-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathdata.py
91 lines (74 loc) · 3.12 KB
/
data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
from torch.utils.data import Dataset
import numpy as np
import pandas as pd
import scanpy as sc
import os
from PIL import Image
from os.path import join as pjoin
from utils import load_idx
import torch
def load_aml():
DATA_DIR = "./data/bmmc/"
pretransplant1 = pd.read_csv(
pjoin(DATA_DIR, "clean", "pretransplant1.csv"), index_col=0
)
pretransplant2 = pd.read_csv(
pjoin(DATA_DIR, "clean", "pretransplant2.csv"), index_col=0
)
posttransplant1 = pd.read_csv(
pjoin(DATA_DIR, "clean", "posttransplant1.csv"), index_col=0
)
posttransplant2 = pd.read_csv(
pjoin(DATA_DIR, "clean", "posttransplant2.csv"), index_col=0
)
healthy1 = pd.read_csv(pjoin(DATA_DIR, "clean", "healthy1.csv"), index_col=0)
healthy2 = pd.read_csv(pjoin(DATA_DIR, "clean", "healthy2.csv"), index_col=0)
data1 = np.concatenate([healthy1.values, healthy2.values]).astype('float32')
data2 = np.concatenate([pretransplant1.values, pretransplant2.values]).astype('float32')
data3 = np.concatenate([posttransplant1.values, posttransplant2.values]).astype('float32')
return (data1, np.zeros(data1.shape[0])), (data2, np.ones(data2.shape[0])), (data3, np.ones(data3.shape[0]) * 2)
def load_epithel():
data1 = pd.read_csv("./data/epithel/data/Control.csv", index_col=0).T.values
labels1 = np.zeros(data1.shape[0])
data2 = pd.read_csv("./data/epithel/data/Salmonella.csv", index_col=0).T.values
labels2 = np.ones(data2.shape[0])
data3 = pd.read_csv("./data/epithel/data/Hpoly.Day10.csv", index_col=0).T.values
labels3 = np.ones(data3.shape[0]) * 2 # Array of 2's
return (data1.astype('float32'), labels1), (data2.astype('float32'), labels2), (data3.astype('float32'), labels3)
class SimpleDataset(Dataset):
def __init__(self, X: np.ndarray, y: np.ndarray):
self.X = X.copy()
self.y = y.copy()
def __len__(self):
return self.X.shape[0]
def __getitem__(self, index):
return self.X[index], self.y[index]
class CelebADataset(Dataset):
def __init__(self, image_files, labels=None, transform=None):
"""
Args:
root_dir (string): Directory with all the images
transform (callable, optional): transform to be applied to each image sample
"""
# Read names of images in the root directory
self.transform = transform
self.image_names = image_files
self.labels = labels
def __len__(self):
return len(self.image_names)
def __getitem__(self, idx):
# Get the path to the image
img_path = os.path.join(self.image_names[idx])
# Load image and convert it to RGB
image = Image.open(img_path).convert('RGB')
face_width = face_height = 108
j = (image.size[0] - face_width) // 2
i = (image.size[1] - face_height) // 2
image = image.crop([j, i, j + face_width, i + face_height])
image = image.resize([64, 64], Image.BILINEAR)
# Apply transformations to the image
image = np.array(image.convert('RGB')) / 255
if self.labels is not None:
return image.reshape(3, 64, 64).astype('float32'), self.labels[idx]
else:
return image.reshape(3, 64, 64).astype('float32')