-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathneural_network.py
454 lines (351 loc) · 19.1 KB
/
neural_network.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
# ======================================================================================================================
# | A General Neural Network class |
# ======================================================================================================================
# Justification for a class approach:
# ======================================================================================================================
# Every neural network has a set of defined methods/ functions on a given input e.g a minimization function,a sigmoid
# : -function, weights * forward & backward propagation, ...
# These qualify as attributes of the Neural Network Class.
# The No of nodes in the input and output layers may vary for different tasks eg alphanumeric recognition (8 nodes),
# speech recognition = theoretically 8 bits
# The polymorphic property of classes allow for a generalization yet uniqueness of each Neural Network.
# ======================================================================================================================
# standard imports
from __future__ import division
from scipy.optimize import minimize
from math import *
# artificial imports
from weights import *
class NeuralNetwork(object):
activations_dict = {}
weights_list = []
def __init__(self, architecture, training_set, training_level, training_type, training_parameters):
""" :param architecture: A dictionary containing the architecture of the neural network i.e
{'features_layer':400,'hidden_layers':[1,1,2,3],'output_layer':8}
:param training_set: The training set is a dictionary containing a matrix of training data
{'X': features_data, 'y': labels},
shape of features_data = (no_of_features x no_of_examples)
:param training_level: a string "initial" or "continue" to specify whether this is the first training
or otherwise.
:param training_parameters: A dictionary containing the regularization lambda_value and bits_number
eg. {"lambda_value" : 1, "bits_no" : 8}
"""
self.architecture = architecture
self.training_set = training_set
self.training_level = training_level
self.training_type = training_type
self.m = training_set["X"].shape[1]
self.lambda_value = training_parameters["lambda_value"]
self.bits_no = training_parameters["bits_no"]
self.backup_y = training_set["y"]
self.training_set["y"] = self.recode_labels("encode2", self.bits_no, self.backup_y)
@staticmethod
def recode_labels(recode_type, bits_no, labels):
"""
:param recode_type: binary : {encode1, decode1}, ascii then binary : {encode2, decode2}
:param bits_no: number of bits in the binary representation of entry in label
:param labels: the input matrix to be recoded
:return: returns an recode of labels to either binary(encode 1) or ascii then binary (encode 2)
"""
if recode_type == "encode1":
labels = np.matrix(np.array(labels).flatten())
recode_vector = np.matrix(np.arange(1, bits_no+1)).T
if labels.shape[0] != 1:
labels = labels.T
encoded_labels = (np.matrix(recode_vector) == labels).astype(int)
encoded_labels = np.matrix(encoded_labels)
return encoded_labels
elif recode_type == "decode1":
decoded_labels = np.argmax(labels, axis=0)+1
decoded_labels = np.matrix(decoded_labels)
return decoded_labels
if recode_type == "encode2":
if labels.shape[1] != 1:
labels = labels.T
labels = np.array(labels)
labels = labels.flatten()
encoded_labels = [[bin(ord(str(entry)))[2:].zfill(bits_no)] for entry in labels.T]
encoded_labels = np.array(encoded_labels).flatten()
encoded_labels = np.matrix([[int(m) for m in M] for M in encoded_labels]).T
return encoded_labels
elif recode_type == "decode2":
if labels.shape[0] == bits_no:
pass
else:
labels = labels.T
assert labels.shape[0] == bits_no, "error in shape of labels : "+str(labels.shape)
decoded_labels = np.array(labels.T)
decoded_labels = (decoded_labels > 0.5).astype(int)
decoded_list = []
for M in decoded_labels:
binary_str = ''
for m in M:
binary_str += str(m)
ascii_ = int(binary_str, 2)
decoded_list.append([int(chr(ascii_))])
decoded_labels = np.matrix(decoded_list)
return decoded_labels
else:
return "invalid recode_type input, choose decode to covert back to decimal(original encoding) " \
"or encode to convert to ascii then binary subsequently."
def assign_weights(self, training_level):
"""
:param self:
:param training_level: "initial" if first time of training neural network or "continue" if otherwise. continue
loads a weight matrix stored in the weights file.
:return: a list containing a sequence of weight matrices for the neural network
"""
architecture = self.architecture
training_type = self.training_type
if training_level == 'initial':
hidden_layers_units = architecture['hidden_layers']
features_layer_units = architecture['features_layer']
output_layer_units = architecture['output_layer']
# model_seq is a list containing all the units in each layer of the Neural Network
model_seq = [features_layer_units]+hidden_layers_units+[output_layer_units]
weights_row_vector = np.matrix([])
for layer in range(1, len(model_seq)):
epsilon = sqrt(6 / (model_seq[layer-1] + model_seq[layer]))
weight = 2 * epsilon * np.random.rand(model_seq[layer], 1 + model_seq[layer-1]) - epsilon
weights_row_vector = np.concatenate((weights_row_vector, np.reshape(weight, (1, -1))), axis=1)
else:
weights_row_vector = load_weights(training_type)
weights = np.array(weights_row_vector).flatten()
NeuralNetwork.weights = weights
return weights
def to_weights_list(self, weights):
"""
:param self:
:param weights: An array containing the weights for the neural network as a row vector.
:return: a list containing the weights for the mapping between any two layers of the Neural Network.
"""
try:
dummy_variable = weights.shape[1]
dummy_variable *= 0 # destroy
weights_list = [weights] # weights is a matrix so we encapsulate it in a list.
except IndexError:
architecture = self.architecture
features_layer_size = architecture["features_layer"]
output_layer_size = architecture["output_layer"]
hidden_layers = architecture["hidden_layers"]
layer = 0
weights_copy = weights[:]
weights_list = []
weights_copy = weights_copy[hidden_layers[0] * (features_layer_size + 1):]
weights_rolled = weights[0:(hidden_layers[0] * (features_layer_size + 1))]
weights_list.append(np.matrix(np.reshape(weights_rolled,
(hidden_layers[0], features_layer_size + 1), order='F')))
for layer in range(1, len(hidden_layers)):
current_layer = hidden_layers[layer]
prev_layer = hidden_layers[layer - 1]
weights_rolled = weights_copy[0:(current_layer * (prev_layer + 1))]
weights_list.append(
np.reshape(weights_rolled, (current_layer, prev_layer + 1), order='F'))
weights_copy = weights_copy[current_layer * (prev_layer + 1):]
prev_layer = hidden_layers[layer]
weights_rolled = weights_copy[0:output_layer_size * (prev_layer + 1)]
weights_list.append(
np.reshape(weights_rolled, (output_layer_size, prev_layer + 1), order='F'))
weights_copy = weights_copy[output_layer_size * (prev_layer + 1):]
assert np.shape(weights_copy) == (0,), 'I detect an error in rebuilding weights, rebuild = ' + str(
np.shape(weights_copy))
except AttributeError:
weights_list = weights
return weights_list
@staticmethod
def sigmoid_function(input_examples):
"""
:param input_examples:matrix of shape m * n where m = number of training examples, n = number of features for
each training example.
:return: matrix of sigmoid of each input_example in input_examples
"""
sigmoid = 1.0 / (1.0 + np.exp(-input_examples))
return sigmoid
def sigmoid_gradient(self, input_examples):
"""
:param self: utilizes the sigmoid_function
:param input_examples: matrix of shape m * n where m = number of training examples, n = number of features for
each training example.
:return: a matrix containing sigmoid_grad of each input_example in input_examples
"""
activated_layer = self.sigmoid_function(input_examples)
sigmoid_grad = np.multiply(activated_layer, (1 - activated_layer))
return sigmoid_grad
def hypothesis(self, features, weights_list):
"""
:param self:
:param features: training features
:param weights_list: a list containing the weights for the mapping between any two layers of the Neural Network.
:return: An hypothesis matrix of size = (No of output layer nodes x No of examples)
"""
activation_unit = features
activations_dict = {'features_layer': activation_unit}
bias_unit = np.matrix(np.ones((1, activation_unit.shape[1])))
activated_unit = activation_unit
layer = 1
for weight in weights_list:
activation_unit = np.concatenate((bias_unit, activation_unit), axis=0)
activation = weight * activation_unit
if layer != len(weights_list):
activations_dict['activation_hidden_layer'+str(layer)] = activation
activated_unit = self.sigmoid_function(activation)
activation_unit = activated_unit
layer += 1
hypothesis = activated_unit
activations_dict['hypothesis'] = hypothesis
# NeuralNetwork.activations_dict = activations_dict
h_list = [hypothesis, activations_dict]
return h_list
def regularization(self, weights_list, lambda_value):
"""
:param self:
:param weights_list: list containing weights ie [w1, w2, ... wL] , w1 is the weights for layer 1 and so on.
:param lambda_value: the lambda value for regularization
:return: a regularization_value , an float.
"""
m = self.m
# The first column of the weight is un_regularized this is why we truncate it.
squared_weights = [np.sum(np.square(weight[:, 1:])) for weight in weights_list]
regularization_value = (lambda_value / (2 * m)) * sum(squared_weights)
return regularization_value
def nn_cost_function(self, regularization, hypothesis):
"""The parameters is a dictionary containing the necessary parameters needed to compute the training cost .
for every iteration of training a Neural Network.
"""
assert np.shape(self.training_set["y"]) == np.shape(hypothesis),\
"The shape of y : "+str(np.shape(self.training_set["y"])) + \
" does not correspond with the shape of the neural network hypothesis : "\
+ str(np.shape(hypothesis))+"," " a possible error in building hypothesis"
label = np.matrix.ravel(self.training_set["y"])
hypothesis_flat = (np.matrix.ravel(hypothesis)).T
m = self.m
cost = (-1 / m) * (label * np.log(hypothesis_flat) + (1 - label) * np.log(1 - hypothesis_flat))[0, 0]
regularized_cost = cost + regularization
return regularized_cost
def feed_forward_propagation(self, weights):
"""
:param self:
:param weights: An array containing the weights for the neural network as a row vector.
:return: The cost {a real number} for training the Neural network model with the given weights.
"""
training_features = self.training_set['X']
weights_list = self.to_weights_list(weights)
lambda_value = self.lambda_value
hypothesis = self.hypothesis(training_features, weights_list)[0]
regularization = self.regularization(weights_list, lambda_value)
cost = self.nn_cost_function(regularization, hypothesis)
return cost
def back_propagation(self, weights):
"""
:param self:
:param weights:
:return: The weight_gradient for the neural network model as a rolled out matrix
"""
m = self.m
no_hidden_layers = len(self.architecture['hidden_layers'])
y = self.training_set['y']
lambda_value = self.lambda_value
features = self.training_set["X"]
weights_list = self.to_weights_list(weights)
hypothesis_list = self.hypothesis(features, weights_list)
activations_dict = hypothesis_list[1]
# activations_dict = NeuralNetwork.activations_dict
# hypothesis = activations_dict['hypothesis']
hypothesis = hypothesis_list[0]
layer_error = hypothesis - y
weight_gradient = np.matrix(np.empty((0, 1)))
for layer in range(no_hidden_layers, -1, -1):
if layer == 0:
activation = activations_dict['features_layer']
activation_with_bias = np.concatenate((np.ones((1, m)), activation), axis=0)
else:
activation = activations_dict['activation_hidden_layer'+str(layer)]
activation_with_bias = np.concatenate((np.ones((1, m)), self.sigmoid_function(activation)), axis=0)
activation_gradient = self.sigmoid_gradient(activation)
layer_weight_unbiased = np.matrix(weights_list[layer])[:, 1:]
no_rows_weight = layer_weight_unbiased.shape[0]
normalization = np.zeros((no_rows_weight, 1))
normalized_layer_weights = np.concatenate((normalization, layer_weight_unbiased), axis=1)
regularization = lambda_value * normalized_layer_weights
theta_gradient = (1 / m) * (np.matrix((layer_error * activation_with_bias.T)) + regularization)
theta_gradient = theta_gradient.reshape((1, -1), order='F').T
weight_gradient = np.concatenate((theta_gradient, weight_gradient))
layer_error = np.multiply((layer_weight_unbiased.T * layer_error), activation_gradient)
weight_gradient = np.ndarray.flatten(np.array(weight_gradient))
return weight_gradient
def compute_numerical_gradient(self, weights):
"""
:return: a one-dimensional array of weights
"""
weight_shape = weights.shape
numerical_grad = np.zeros(weight_shape)
perturb = np.zeros(weight_shape)
epsilon = 1e-4
weights = np.array(weights).flatten()
perturb = np.array(perturb).flatten()
for i in range(0, np.size(weights)):
perturb[i] = epsilon
loss1 = self.feed_forward_propagation(weights - perturb)
loss2 = self.feed_forward_propagation(weights + perturb)
numerical_grad[i] = (loss2 - loss1) / (2 * epsilon)
perturb[i] = 0
return numerical_grad
def gradient_checking(self, weights):
"""
:param weights:
:return:
"""
numerical_gradient = self.compute_numerical_gradient(weights)
backprop_gradient = self.back_propagation(weights)
assert numerical_gradient.shape == backprop_gradient.shape,\
"I observed that the shapes of the numerical weight_gradient = " + str(numerical_gradient.shape) + \
" and back prop weight_gradient = " + str(backprop_gradient.shape) + " differs"
loss = np.linalg.norm((backprop_gradient - numerical_gradient), 2)
gain = np.linalg.norm((backprop_gradient + numerical_gradient), 2)
difference = loss / gain
print("\n===================== compare ===============================")
print(np.concatenate((np.matrix(numerical_gradient).T, np.matrix(backprop_gradient).T), axis=1))
print("=============================================================\n")
print("==============================================================")
print('the weight_gradient difference = '+str(difference))
print("===============================================================")
print("test completed...")
return "=============================================================\n"
def train(self, weights_initial):
"""
:param weights_initial: This is the initial weights, the starting point of the optimization process.
:param self: The train method trains the artificial neural network using a set of input examples and labels
:return: The optimal weights for prediction as a flattened array.
"""
forward_propagation = self.feed_forward_propagation
back_propagation = self.back_propagation
result = minimize(forward_propagation, weights_initial, method='CG', jac=back_propagation,
options={'disp': True, 'maxiter': 30000})
optimal_weights = result.x
return optimal_weights
def prediction(self, features, weights):
"""
:param self: The prediction method inherits the model weights and state of training (initial or continued)
:param weights:
:param features: input features
:return: returns an m x 1 column vector of the predicted labels for the inputs.
"""
weights_list = self.to_weights_list(weights)
prediction = self.hypothesis(features, weights_list)[0]
prediction = self.recode_labels("decode2", self.bits_no, prediction)
return prediction
def optimal_weights(self, max_iter):
features = self.training_set["X"]
labels = self.training_set["y"]
optimum_weights = np.zeros(())
maximum_accuracy = 0
for i in range(max_iter):
initial_weights = self.assign_weights(self.training_level)
weights = self.train(initial_weights)
prediction = self.prediction(features, weights)
accuracy = mean((prediction == labels) * 100)
print(accuracy)
if accuracy >= maximum_accuracy:
optimum_weights = weights
maximum_accuracy = accuracy
return optimum_weights