如何将自定义数据集分割为训练集和测试集?
如何将自定义数据集分割为训练集和测试集?
import pandas as pd
import numpy as np
import cv2
from torch.utils.data.dataset import Dataset
class CustomDatasetFromCSV(Dataset):
def __init__(self, csv_path, transform=None):
self.data = pd.read_csv(csv_path)
self.labels = pd.get_dummies(self.data['emotion']).values
self.height = 48
self.width = 48
self.transform = transform
def __getitem__(self, index):
pixels = self.data['pixels'].tolist()
faces = []
for pixel_sequence in pixels:
face = [int(pixel) for pixel in pixel_sequence.split(' ')]
face = np.asarray(face).reshape(self.width, self.height)
face = cv2.resize(face.astype('uint8'), (self.width, self.height))
faces.append(face.astype('float32'))
faces = np.asarray(faces)
faces = np.expand_dims(faces, -1)
return faces, self.labels[index]
def __len__(self):
return len(self.data)
def split_train_test(self, train_ratio):
train_size = int(train_ratio * len(self.data))
test_size = len(self.data) - train_size
train_data = self.data[:train_size]
test_data = self.data[train_size:]
train_labels = self.labels[:train_size]
test_labels = self.labels[train_size:]
train_dataset = CustomDatasetFromCSV(train_data, transform=self.transform)
test_dataset = CustomDatasetFromCSV(test_data, transform=self.transform)
return train_dataset, test_dataset
dataset = CustomDatasetFromCSV(csv_path, transform=transforms)
train_dataset, test_dataset = dataset.split_train_test(train_ratio)