-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpreprocess.py
executable file
·128 lines (107 loc) · 4.6 KB
/
preprocess.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
import matplotlib.pyplot as plt
import numpy as np
import glob, os
from six.moves import cPickle as pickle
from PIL import Image
image_size = 64 # Pixel width and height.
pixel_depth = 255.0 # Number of levels per pixel.
pTestData = 0.2 # percent of data used for test
picturePath = "../Pictures/"
classFolders = os.listdir(picturePath)
def load_folder(folder, min_num_images):
"""Load the data for one folder."""
image_files = glob.glob(folder + "*.thumbnail")
dataset = np.ndarray(shape=(len(image_files), image_size, image_size),
dtype=np.float32)
print(folder)
num_images = 0
for image in image_files:
try:
img = Image.open(image).convert("L")
image_data = (np.asarray(img).astype(float) -
pixel_depth / 2) / pixel_depth
if image_data.shape != (image_size, image_size):
raise Exception('Unexpected image shape: %s' % str(image_data.shape))
dataset[num_images, :, :] = image_data
num_images = num_images + 1
except IOError as e:
print('Could not read:', image_file, ':', e, '- it\'s ok, skipping.')
dataset = dataset[0:num_images, :, :]
if num_images < min_num_images:
raise Exception('Many fewer images than expected: %d < %d' %
(num_images, min_num_images))
print('Full dataset tensor:', dataset.shape)
print('Mean:', np.mean(dataset))
print('Standard deviation:', np.std(dataset))
return dataset
def class_data(class_folder, min_num_images_per_class, force=False):
set_filename = class_folder + '.pickle'
data_folders = os.listdir(class_folder)
if os.path.exists(set_filename) and not force:
# You may override by setting force=True.
print('%s already present - Skipping pickling.' % set_filename)
else:
print('Pickling %s.' % set_filename)
dataset = []
for folder in data_folders:
dataset.append(load_folder(class_folder + "/" + folder + "/", min_num_images_per_class))
try:
with open(set_filename, 'wb') as f:
pickle.dump(dataset, f, pickle.HIGHEST_PROTOCOL)
except Exception as e:
print('Unable to save data to', set_filename, ':', e)
return set_filename
class1 = class_data(picturePath + classFolders[0], 10) # 45000
class2 = class_data(picturePath + classFolders[1], 10) # 45000
def randomize(dataset, labels):
permutation = np.random.permutation(labels.shape[0])
shuffled_dataset = dataset[permutation,:,:]
shuffled_labels = labels[permutation]
return shuffled_dataset, shuffled_labels
def combine_data(class_datafiles,pTest=0.2):
train_dataset = np.empty([0, image_size, image_size], dtype=np.float32)
train_labels = np.empty(shape=0, dtype=np.int32)
test_dataset = np.empty([0, image_size, image_size], dtype=np.float32)
test_labels = np.empty(shape=0, dtype=np.int32)
l = 0
for c in class_datafiles:
dataset = np.empty([0, image_size, image_size], dtype=np.float32)
labels = np.empty(shape=0, dtype=np.int32)
with open(c, 'rb') as f:
data = pickle.load(f)
for d in data:
dataset = np.vstack((dataset, d))
labels = np.hstack((labels, np.tile(l, len(d))))
l += 1
dataset, labels = randomize(dataset,labels)
splitindex = int(pTest*dataset.shape[0])
train_dataset = np.vstack((train_dataset,dataset[splitindex:,:,:]))
train_labels = np.hstack((train_labels, labels[splitindex:]))
test_dataset = np.vstack((test_dataset, dataset[0:splitindex, :, :]))
test_labels = np.hstack((test_labels, labels[0:splitindex]))
return train_dataset, train_labels, test_dataset, test_labels # , np.ndarray(labels,dtype=np.int32)
# dataset, labels =
train_dataset, train_labels, test_dataset, test_labels = combine_data([class1, class2],pTestData)
train_dataset, train_labels = randomize(train_dataset,train_labels)
test_dataset, test_labels = randomize(test_dataset,test_labels)
pickle_file = 'avDetect.pickle'
try:
f = open(pickle_file, 'wb')
save = {
'train_dataset': train_dataset,
'train_labels': train_labels,
'test_dataset': test_dataset,
'test_labels': test_labels,
}
pickle.dump(save, f, pickle.HIGHEST_PROTOCOL)
f.close()
except Exception as e:
print('Unable to save data to', pickle_file, ':', e)
raise
#print(train_dataset.shape[0])
#print(test_dataset.shape[0])
#plt.figure()
#plt.imshow(train_dataset[2,:,:],cmap="jet")
#plt.show()
#print(train_dataset[2,:,:].min())
#print(train_dataset[2,:,:].max())