如何将自定义数据集分割为训练集和测试集?

5 浏览
0 Comments

如何将自定义数据集分割为训练集和测试集?

import pandas as pd

import numpy as np

import cv2

from torch.utils.data.dataset import Dataset

class CustomDatasetFromCSV(Dataset):

def __init__(self, csv_path, transform=None):

self.data = pd.read_csv(csv_path)

self.labels = pd.get_dummies(self.data['emotion']).values

self.height = 48

self.width = 48

self.transform = transform

def __getitem__(self, index):

pixels = self.data['pixels'].tolist()

faces = []

for pixel_sequence in pixels:

face = [int(pixel) for pixel in pixel_sequence.split(' ')]

face = np.asarray(face).reshape(self.width, self.height)

face = cv2.resize(face.astype('uint8'), (self.width, self.height))

faces.append(face.astype('float32'))

faces = np.asarray(faces)

faces = np.expand_dims(faces, -1)

return faces, self.labels[index]

def __len__(self):

return len(self.data)

def split_train_test(self, train_ratio):

train_size = int(train_ratio * len(self.data))

test_size = len(self.data) - train_size

train_data = self.data[:train_size]

test_data = self.data[train_size:]

train_labels = self.labels[:train_size]

test_labels = self.labels[train_size:]

train_dataset = CustomDatasetFromCSV(train_data, transform=self.transform)

test_dataset = CustomDatasetFromCSV(test_data, transform=self.transform)

return train_dataset, test_dataset

dataset = CustomDatasetFromCSV(csv_path, transform=transforms)

train_dataset, test_dataset = dataset.split_train_test(train_ratio)

0