-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpreprocess_f_mnist.py
67 lines (50 loc) · 2.21 KB
/
preprocess_f_mnist.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
#Imports
from keras.datasets import fashion_mnist
import numpy as np
from sklearn.model_selection import train_test_split
def flatten_images(dataset):
"""
Converts each 28x28 image into a 784 dimensional input vector.
Shape returned = (no_of_images, 784)
"""
return np.array([dataset[i].flatten() for i in range(len(dataset))])
def one_hot_encoder(labels, no_classes):
"""
Returns one hot representation of the label for each image.
Shape returned = (no_of_images, no_classes)
"""
temp = np.zeros((labels.shape[0], no_classes))
for i in range(0,labels.shape[0]):
temp[i][labels[i]] = 1
return temp
#generate val set from train set and return updated train set and val set.
def generate_val_set(train_images, train_labels, val_size):
"""
Returns train_images, train_labels, val_images, val_labels.
"""
train_images,val_images, train_labels, val_labels=train_test_split(train_images, train_labels,test_size=0.1,random_state=1)
return train_images, train_labels, val_images, val_labels
def preprocess(train_images, train_labels, test_images, test_labels, val_size):
"""
Preprocesses the data.
1. Normalization.
2. Flattens the images.
3. One hot representation for the labels.
"""
#Getting the number of classes.
num_classes = np.unique(train_labels).shape[0]
#Normalizing the data.
#These are grayscale images so pixel values in range(0,255).
#So we normalize them by dividing by 255 to get values in range(0,1).
train_images=train_images / 255.0
test_images = test_images / 255.0
#Flatten out the images
train_images = flatten_images(train_images)
test_images = flatten_images(test_images)
#Generate validation set here
train_images, train_labels, val_images, val_labels = generate_val_set(train_images, train_labels, val_size)
train_labels = one_hot_encoder(train_labels, num_classes)
val_labels = one_hot_encoder(val_labels, num_classes)
test_labels = one_hot_encoder(test_labels, num_classes)
#return train_images, train_labels, val_images, val_labels, test_images, test_labels
return train_images, train_labels, val_images, val_labels, test_images, test_labels