-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmatdeeprep.m
371 lines (274 loc) · 17.4 KB
/
matdeeprep.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
%% MatDeepRep – Deep representation learning tool for Image Classification using Trasnfer Learning, the BVLC Caffe Matlab interface (matcaffe) & various pretrained .caffemodel binaries
%
% ◾ ◾ ◾ ◾ ◾ ◾ ◾ ◾ ◾ ◾ ◾ ◾ ◾ ◾ ◾ ◾ ◾ ◾ ◾ ◾ ◾ ◾ ◾ ◾ ◾ ◾ ◾ ◾ ◾ ◾ ◾ ◾ ◾ ◾ ◾ ◾ ◾ ◾
% ◾ ◾
% ◾ MatDeepRep is a Matlab tool, built on top of Caffe framework, ◾
% ◾ capable of learning general deep feature representations ◾
% ◾ for image classification using pre-trained Deep ConvNet Models ◾
% ◾ ◾
% ◾ Version 1.0 (8 September 2016 ) Initial release ◾
% ◾ ◾
% ◾ Author: ©Grigorios Kalliatakis ([email protected]) ◾
% ◾ Homepage: gkalliatakis.com ◾
% ◾ Embedded and Intelligent Systems Laboratory (EIS), ◾
% ◾ University of Essex,UK ◾
% ◾ ◾
% ◾ ◾ ◾ ◾ ◾ ◾ ◾ ◾ ◾ ◾ ◾ ◾ ◾ ◾ ◾ ◾ ◾ ◾ ◾ ◾ ◾ ◾ ◾ ◾ ◾ ◾ ◾ ◾ ◾ ◾ ◾ ◾ ◾ ◾ ◾ ◾ ◾ ◾
function [code,code_v] = matdeeprep(model,dataset,category)
% ――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――
% Usage Example: [code,code_v] = matdeeprep('ResNet50','FMD','fabric');
% ――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――
% Possible Settings for Inputs:
% [1] model =
% 'bvlc_alexnet' (AlexNet trained on ILSVRC 2012, almost exactly as described in "ImageNet classification with deep convolutional neural networks" dim: 227x227)
% 'bvlc_googlenet' (GoogLeNet trained on ILSVRC 2012, almost exactly as described in "Going Deeper with Convolutions" dim: 224x224)
% 'bvlc_reference_caffenet' (AlexNet trained on ILSVRC 2012, with a minor variation from the version as described in ImageNet classification with deep convolutional neural networks dim: 227x227)
% 'bvlc_reference_rcnn_ilsvrc13' (pure Caffe implementation of Region-based Convolutional Neural Networks-R-CNN dim: 227x227)
% 'placesCNN' (AlexNet CNN trained on 205 scene categories of Places Database with 2.5 million images as described in "Learning Deep Features for Scene Recognition using Places Database" dim: 227x227)
% 'ResNet50' (50-layer ResNet as described in "Deep Residual Learning for Image Recognition" dim: 224x224)
% 'ResNet101' (101-layer ResNet as described in "Deep Residual Learning for Image Recognition" dim: 224x224)
% 'ResNet152' (152-layer ResNet as described in "Deep Residual Learning for Image Recognition" dim: 224x224)
% 'VGG_CNN_F' (CNN_F trained on the ILSVRC-2012, as described in "Return of the Devil in the Details: Delving Deep into Convolutional Nets" dim: 224x224)
% 'VGG_CNN_M' (CNN_M trained on the ILSVRC-2012, as described in "Return of the Devil in the Details: Delving Deep into Convolutional Nets" dim: 224x224)
% 'VGG_CNN_S' (CNN_S trained on the ILSVRC-2012, as described in "Return of the Devil in the Details: Delving Deep into Convolutional Nets" dim: 224x224)
% 'VGGNet16' (16-layer VGG-Net as described in "Very deep convolutional networks for large-scale image recognition" dim: 224x224)
% 'VGGNet19' (19-layer VGG-Net as described in "Very deep convolutional networks for large-scale image recognition" dim: 224x224)
% Outputs:
%(1) code = the extracted features for the training images
%(2) code_v = the extracted features for the test images
%% (1) Network initialisation
% Set caffe mode - At the moment only CPU is available
caffe.set_mode_cpu();
% Initialize the network using BVLC CaffeNet for image classification
model_dir = sprintf('../../models/%s/', model);
prototxt_file = dir( fullfile(model_dir,'*deploy.prototxt') ); %# list all *.prototxt files
caffemodel_file = dir( fullfile(model_dir,'*.caffemodel') ); %# list all *.caffemodel files
net_model = [model_dir prototxt_file.name];
net_weights = [model_dir caffemodel_file.name];
phase = 'test'; % run with phase test (so that dropout isn't applied)
if ~exist(net_weights, 'file')
error('Please download CaffeNet from Model Zoo before you run this demo');
end
% Initialise a network
fprintf('\n');
cprintf('*text', 'Initialising caffe network...\n');
net = caffe.Net(net_model, net_weights, phase);
% Some models require the dimensions of the images to be 224x224 while others require 227x227
if isequal(model,'bvlc_alexnet')==1 || isequal(model,'bvlc_reference_caffenet')==1 ||isequal(model,'bvlc_reference_rcnn_ilsvrc13')==1 ||isequal(model,'placesCNN')==1
net.blobs('data').reshape([227 227 3 1]);
else
net.blobs('data').reshape([224 224 3 1]);
end
%% (2) Set paths & calculate number of images inside folders
pos_train_dir= sprintf('./datasets/%s/POS_TRAIN/%s/', dataset, category);
pos_num_train_images=length(dir(pos_train_dir))-4; % Unless you want to consider '.' and '..' as directories, you're probably going to want to subtract 2 from pos_num_train_images.
neg_train_dir= sprintf('./datasets/%s/NEG_TRAIN/%s/', dataset, category);
neg_num_train_images=length(dir(neg_train_dir))-4; % Unless you want to consider '.' and '..' as directories, you're probably going to want to subtract 2 from neg_num_train_images.
pos_test_dir= sprintf('./datasets/%s/POS_TEST/%s/', dataset,category);
pos_num_test_images=length(dir(pos_test_dir))-4; % Unless you want to consider '.' and '..' as directories, you're probably going to want to subtract 2 from pos_num_test_images.
neg_test_dir='./datasets/animals_val/';
neg_num_test_images=length(dir(neg_test_dir))-3; % Unless you want to consider '.' and '..' as directories, you're probably going to want to subtract 2 from neg_num_test_images.
tStart = tic;
%% (3) Training
% Positive Training
h = waitbar(0,'Positive Training...','Name', 'TRAINING IN PROGRES');
for i = 1:pos_num_train_images
sample_image_path =strcat(pos_train_dir,sprintf('%05d.jpg',i)); % Concatenate strings horizontally
im = imread(sample_image_path);
im = standardizeImage(im); % ensure of type single, w. three channels
if isequal(model,'placesCNN')==1
mean_flag=1;
else
mean_flag=0;
end
% Prepare the image for the models requiring 224x224
if isequal(model,'bvlc_googlenet')==1 || isequal(model,'ResNet50')==1 ||isequal(model,'ResNet101')==1 ||isequal(model,'ResNet152')==1 ||isequal(model,'VGG_CNN_M')==1 ||isequal(model,'VGG_CNN_S')==1||isequal(model,'VGG_CNN_F')==1||isequal(model,'VGGNet16')==1 ||isequal(model,'VGGNet19')==1
input_data = {prepare_image_224_224(im,mean_flag)};
temp=net.forward(input_data);
% Check if ResNet is selected so that pool5 layer is called instead of the default fc7 for the other models or inception_5b/output for bvlc_googlenet
if isequal(model,'ResNet50')==1 || isequal(model,'ResNet101') || isequal(model,'ResNet152')
data = net.blobs('res5c_branch2c').get_data;
data = data(2:2:6, 2:2:6, : );
code(:,i) = data( : );
elseif isequal(model,'VGG_CNN_M')==1 || isequal(model,'VGGNet16')==1 || isequal(model,'VGGNet19')==1 || isequal(model,'VGG_CNN_S')==1 || isequal(model,'VGG_CNN_F')==1
code(:,i) = net.blobs('fc7').get_data();
elseif isequal(model,'bvlc_googlenet')==1
data = net.blobs('inception_5b/output').get_data;
data = data(2:2:6, 2:2:6, : );
code(:,i) = data( : );
end
else
input_data = {prepare_image(im,mean_flag)};
temp=net.forward(input_data);
code(:,i) = net.blobs('fc7').get_data();
end
per = i / pos_num_train_images * 100;
waitbar(i / pos_num_train_images,h,sprintf('Positive Training...%.2f%%',per));
end
close(h)
delete(h)
fprintf('\n');
cprintf('*text', ' Positive Training \n');
% Negative Training
h = waitbar(0,'Negative Training...','Name', 'TRAINING IN PROGRES');
for i = 1:neg_num_train_images
j = i + pos_num_train_images;
sample_image_path =strcat(neg_train_dir,sprintf('%05d.jpg',i)); % Concatenate strings horizontally
im = imread(sample_image_path);
im = standardizeImage(im); % ensure of type single, w. three channels
if isequal(model,'placesCNN')==1
mean_flag=1;
else
mean_flag=0;
end
% Prepare the image for the models requiring 224x224
if isequal(model,'bvlc_googlenet')==1 || isequal(model,'ResNet50')==1 ||isequal(model,'ResNet101')==1 ||isequal(model,'ResNet152')==1 ||isequal(model,'VGG_CNN_M')==1 ||isequal(model,'VGG_CNN_S')==1||isequal(model,'VGG_CNN_F')==1||isequal(model,'VGGNet16')==1 ||isequal(model,'VGGNet19')==1
input_data = {prepare_image_224_224(im,mean_flag)};
temp=net.forward(input_data);
% Check if ResNet is selected so that pool5 layer is called instead of the default fc7 for the other models or inception_5b/output for bvlc_googlenet
if isequal(model,'ResNet50')==1 || isequal(model,'ResNet101') || isequal(model,'ResNet152')
data = net.blobs('res5c_branch2c').get_data;
data = data(2:2:6, 2:2:6, : );
code(:,j) = data( : );
elseif isequal(model,'VGG_CNN_M')==1 || isequal(model,'VGGNet16')==1 || isequal(model,'VGGNet19')==1 || isequal(model,'VGG_CNN_S')==1 || isequal(model,'VGG_CNN_F')==1
code(:,j) = net.blobs('fc7').get_data();
elseif isequal(model,'bvlc_googlenet')==1
data = net.blobs('inception_5b/output').get_data;
data = data(2:2:6, 2:2:6, : );
code(:,j) = data( : );
end
else
input_data = {prepare_image(im,mean_flag)};
temp=net.forward(input_data);
code(:,j) = net.blobs('fc7').get_data();
end
per = (i) / neg_num_train_images * 100;
waitbar(i / neg_num_train_images,h,sprintf('Negative Training...%.2f%%',per));
end
close(h)
delete(h)
fprintf('\n');
cprintf('*text', ' Negative Training \n');
%% (4) Testing
% Positive testing
h = waitbar(0,'Positive Testing...','Name', 'TESTING IN PROGRES');
for i = 1:pos_num_test_images
sample_image_path=strcat(pos_test_dir,sprintf('%05d.jpg',i));
im = imread(sample_image_path);
im = standardizeImage(im); % ensure of type single, w. three channels
if isequal(model,'placesCNN')==1
mean_flag=1;
else
mean_flag=0;
end
% Prepare the image for the models requiring 224x224
if isequal(model,'bvlc_googlenet')==1 || isequal(model,'ResNet50')==1 ||isequal(model,'ResNet101')==1 ||isequal(model,'ResNet152')==1 ||isequal(model,'VGG_CNN_M')==1 ||isequal(model,'VGG_CNN_S')==1||isequal(model,'VGG_CNN_F')==1||isequal(model,'VGGNet16')==1 ||isequal(model,'VGGNet19')==1
input_data = {prepare_image_224_224(im,mean_flag)};
temp=net.forward(input_data);
% Check if ResNet is selected so that pool5 layer is called instead of the default fc7 for the other models or inception_5b/output for bvlc_googlenet
if isequal(model,'ResNet50')==1 || isequal(model,'ResNet101') || isequal(model,'ResNet152')
data = net.blobs('res5c_branch2c').get_data;
data = data(2:2:6, 2:2:6, : );
code_v(:,i) = data( : );
elseif isequal(model,'VGG_CNN_M')==1 || isequal(model,'VGGNet16')==1 || isequal(model,'VGGNet19')==1 || isequal(model,'VGG_CNN_S')==1 || isequal(model,'VGG_CNN_F')==1
code_v(:,i) = net.blobs('fc7').get_data();
elseif isequal(model,'bvlc_googlenet')==1
data = net.blobs('inception_5b/output').get_data;
data = data(2:2:6, 2:2:6, : );
code_v(:,i) = data( : );
end
else
input_data = {prepare_image(im,mean_flag)};
temp=net.forward(input_data);
code_v(:,i) = net.blobs('fc7').get_data();
end
per = i / pos_num_test_images * 100;
waitbar(i / pos_num_test_images,h,sprintf('Positive Testing...%.2f%%',per));
end
close(h)
delete(h)
fprintf('\n');
cprintf('*text', ' Positive Testing \n');
% Negative testing (Random images of animals)
h = waitbar(0,'Negative Testing...','Name', 'TESTING IN PROGRES');
for i = 1:neg_num_test_images
j = i + pos_num_test_images;
sample_image_path =strcat(neg_test_dir,sprintf('%05d.jpg',i)); % Concatenate strings horizontally
im = imread(sample_image_path);
im = standardizeImage(im); % ensure of type single, w. three channels
if isequal(model,'placesCNN')==1
mean_flag=1;
else
mean_flag=0;
end
% Prepare the image for the models requiring 224x224
if isequal(model,'bvlc_googlenet')==1 || isequal(model,'ResNet50')==1 ||isequal(model,'ResNet101')==1 ||isequal(model,'ResNet152')==1 ||isequal(model,'VGG_CNN_M')==1 ||isequal(model,'VGG_CNN_S')==1||isequal(model,'VGG_CNN_F')==1||isequal(model,'VGGNet16')==1 ||isequal(model,'VGGNet19')==1
input_data = {prepare_image_224_224(im,mean_flag)};
temp=net.forward(input_data);
% Check if ResNet is selected so that pool5 layer is called instead of the default fc7 for the other models or inception_5b/output for bvlc_googlenet
if isequal(model,'ResNet50')==1 || isequal(model,'ResNet101') || isequal(model,'ResNet152')
data = net.blobs('res5c_branch2c').get_data;
data = data(2:2:6, 2:2:6, : );
code_v(:,i) = data( : );
elseif isequal(model,'VGG_CNN_M')==1 || isequal(model,'VGGNet16')==1 || isequal(model,'VGGNet19')==1 || isequal(model,'VGG_CNN_S')==1 || isequal(model,'VGG_CNN_F')==1
code_v(:,j) = net.blobs('fc7').get_data();
elseif isequal(model,'bvlc_googlenet')==1
data = net.blobs('inception_5b/output').get_data;
data = data(2:2:6, 2:2:6, : );
code_v(:,j) = data( : );
end
else
input_data = {prepare_image(im,mean_flag)};
temp=net.forward(input_data);
code_v(:,j) = net.blobs('fc7').get_data();
end
per = (i) / neg_num_test_images * 100;
waitbar(i / neg_num_test_images,h,sprintf('Negative Testing...%.2f%%',per));
end
close(h)
delete(h)
fprintf('\n');
cprintf('*text', ' Negative Testing \n');
fprintf('\n');
fprintf('\n');
%% Time elapsed for the whole process
fprintf('\n');
tEnd = toc(tStart);
hours = floor(tEnd / 3600);
tEnd = tEnd - hours * 3600;
mins = floor(tEnd / 60);
secs = tEnd - mins * 60;
fprintf('Total Time Elapsed: %.d hours & %.d minutes\n',floor(hours),mins);
%% Functions for preparing images for caffe
function cropped_data = prepare_image(im,mean_flag)
if mean_flag==1
d = load('../../models/placesCNN/places_mean.mat');
mean_data = d.image_mean;
else
d = load('../+caffe/imagenet/ilsvrc_2012_mean.mat');
mean_data = d.mean_data;
end
IMAGE_DIM = 256;
% Convert an image returned by Matlab's imread to im_data in caffe's data format: W x H x C with BGR channels
im_data = im(:, :, [3, 2, 1]); % permute channels from RGB to BGR
im_data = permute(im_data, [2, 1, 3]); % flip width and height
im_data = single(im_data); % convert from uint8 to single
im_data = imresize(im_data, [IMAGE_DIM IMAGE_DIM], 'bilinear'); % resize im_data
im_data = im_data - mean_data; % subtract mean_data (already in W x H x C, BGR)
cropped_data = im_data(15:241, 15:241, :); % take 227 x 227 center crop
function cropped_data = prepare_image_224_224(im,mean_flag)
if mean_flag==1
d = load('../../models/placesCNN/places_mean.mat');
mean_data = d.image_mean;
else
d = load('../+caffe/imagenet/ilsvrc_2012_mean.mat');
mean_data = d.mean_data;
end
IMAGE_DIM = 256;
% Convert an image returned by Matlab's imread to im_data in caffe's data format: W x H x C with BGR channels
im_data = im(:, :, [3, 2, 1]); % permute channels from RGB to BGR
im_data = permute(im_data, [2, 1, 3]); % flip width and height
im_data = single(im_data); % convert from uint8 to single
im_data = imresize(im_data, [IMAGE_DIM IMAGE_DIM], 'bilinear'); % resize im_data
im_data = im_data - mean_data; % subtract mean_data (already in W x H x C, BGR)
cropped_data = im_data(15:238, 15:238, :); % take 224 x 224 center crop