-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmetrics.py
331 lines (245 loc) · 10.2 KB
/
metrics.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
# Project hiatus
# script with metrics
# 26/11/2020
# Cédric BARON
# importing libraries
import numpy as np
from matplotlib import pyplot
from sklearn.metrics import precision_recall_curve
from sklearn import metrics
from sklearn.neighbors import KDTree
from scipy.special import digamma
import scipy.spatial as spatial
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix
import torch
from sklearn.preprocessing import RobustScaler
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import scale
from sklearn import preprocessing
# importing functions from other files
import utils as fun
class ConfusionMatrixBinary:
def __init__(self, n_class, class_names):
self.CM = np.zeros((n_class, n_class))
self.n_class = n_class
self.class_names = class_names
def clear(self):
self.CM = np.zeros((self.n_class, self.n_class))
def add_batch(self, gt, pred):
self.CM += confusion_matrix(gt, pred, labels = list(range(self.n_class)))
def overall_accuracy(self):#percentage of correct classification
return np.trace(self.CM) / np.sum(self.CM)
def class_IoU(self, show = 1):
ious = np.full(self.n_class, 0.)
for i_class in range(self.n_class):
error_matrix = [i for i in range(self.n_class) if i != i_class]
ious[i_class] = self.CM[i_class, i_class] / (np.sum(self.CM[i_class, error_matrix]) + np.sum(self.CM[error_matrix, i_class]) + self.CM[i_class, i_class])
if show:
print(' | '.join('{} : {:3.2f}%'.format(name, 100*iou) for name, iou in zip(self.class_names,ious)))
#do not count classes that are not present in the dataset in the mean IoU
return 100*np.nansum(ious) / (np.logical_not(np.isnan(ious))).sum()
def visualize_roc(y, pred, return_thresh = False):
"""
Function to perform AUC calculation, plots a ROC curve as well
Can output the thresholds used
"""
## making the ROC curve
fpr, tpr, thresholds = metrics.roc_curve(y, pred)
auc = metrics.roc_auc_score(y, pred)
print(auc)
# calculating the optimal threshold
gmeans = tpr - fpr
idx = np.argmax(gmeans)
optimal_threshold = thresholds[idx]
# plot the roc curve for the model
pyplot.plot(fpr, tpr, linestyle='--', label='AUC: %1.2f opt_thresh: %2.2f' % ((auc), (optimal_threshold)))
# axis labels
pyplot.xlabel('False Positive Rate')
pyplot.ylabel('True Positive Rate')
# show the legend
pyplot.legend()
# show the plot
pyplot.show()
## returning thresholds
result = None
if return_thresh:
result = optimal_threshold
return result
def confusion_matrix_visualize(pred, y, thresh):
"""
Computes the confusion matrix for binary values
"""
# loading the confusion matrix
m = ConfusionMatrixBinary(2, ["no change", "change"])
# putting into the confusion matrix
m.add_batch(y, pred)
# printing the result for one given threshold
print("Threshold is "+str(thresh))
print(m.CM)
print('IoU : {:3.2f}%'.format(m.class_IoU()))
print('Overall accuracy : {:3.2f}%'.format(m.overall_accuracy()*100))
print('\n')
return None
def class_precision(binary_vec, y, classes):
"""
Function to evaluate the precision per class for change/no change
args are the bianary vect (predictions), the binary ground truth and the classes
"""
# getting boolean vectors
false_values = binary_vec != y
true_values = binary_vec == y
# converting to numpy
classes = np.array(classes)
## getting the number of true values for every class
true1 = classes[true_values]
true1 = np.count_nonzero(true1 == 1)
true2 = classes[true_values]
true2 = np.count_nonzero(true2 == 2)
true3 = classes[true_values]
true3 = np.count_nonzero(true3 == 3)
## getting the number of false values for every class
false1 = classes[false_values]
false1 = np.count_nonzero(false1 == 1)
false2 = classes[false_values]
false2 = np.count_nonzero(false2 == 2)
false3 = classes[false_values]
false3 = np.count_nonzero(false3 == 3)
# getting the percentage of correctly predicted values
precision1 = true1 / (true1 + false1)
precision2 = true2 / (true2 + false2)
precision3 = true3 / (true3 + false3)
# printing the result
print("Precision for class one is {:3.2f} ".format(precision1))
print("Precision for class two is {:3.2f} ".format(precision2))
print("Precision for class three is {:3.2f} ".format(precision3))
print("\n")
return None
def NMI_continuous_discrete(labels_discrete, data_continuous, nb_classes, labels, classes_idx):
"""
Function to compute the normalised mutual information
"""
# number of samples
N = len(labels_discrete)
# loading the kd tree
tree = spatial.cKDTree(data_continuous)
# variable to store the score
MI = 0
# number of neighbours (actually k-1)
k = 4
for i in range(len(labels)):
# loading the number of pixels from this class
Nxi = nb_classes[i]
# loading the class
label_class = labels[i]
# index to get class continuous data
idx_class = 0
# loading the index matrix for the class
class_idx = classes_idx[i]
# loading the values corresponding to the class
data_class = data_continuous[class_idx]
# loading the tree for this class
tree_class = KDTree(data_class)
# looping through our data
for i, label_disc in enumerate(labels_discrete):
# checking if the sample has the correct class
if label_disc == label_class:
# getting the distance for the nearest neighbours
dist, ind = tree_class.query(data_class[idx_class][None,:], k=k)
# getting max distance
dist_max = np.max(dist)
# getting the number of samples within the distance
ind = tree.query_ball_point(data_continuous[i], dist_max)
Mi = len(ind)
# updating index for data_class
idx_class += 1
# calculating the MI
MI += digamma(N) - digamma(Nxi) + digamma(k-1) - digamma(Mi)
# averaging to get the NMI
NMI_avg = MI / N
return NMI_avg
def svm_accuracy_estimation(data, labels, cv=False):
"""
performance on a svm
This function builds the datasets up
"""
## linear svm with the mns
# loading the data
dataset = fun.train_val_dataset(data, labels)
tensor_train = torch.tensor(dataset['train'])
tensor_val = torch.tensor(dataset['val'])
tensor_gt_val = torch.tensor(dataset['gt_val'])
if len(list(tensor_train.shape)) == 1:
tensor_train = torch.tensor(dataset['train'])[:,None]
tensor_val = torch.tensor(dataset['val'])[:,None]
tensor_gt_val = torch.tensor(dataset['gt_val'])[:,None]
scaler = preprocessing.StandardScaler()
tensor_train = scaler.fit_transform(tensor_train)
tensor_val = scaler.fit_transform(tensor_val)
# loading the model, ovo is one against all, C is the soft margin
svclassifier = SVC(kernel='linear', decision_function_shape='ovr', C=0.01,
class_weight="balanced")
# training the model
svclassifier.fit(tensor_train, dataset['gt_train'])
# predicting the labels
pred_label = svclassifier.predict(tensor_val)
# printing results
conf_mat = confusion_matrix(tensor_gt_val, pred_label)
class_report = classification_report(tensor_gt_val, pred_label)
# performing a cross validation (optional)
if cv:
# prepare the cross-validation procedure
cv = KFold(n_splits=10, random_state=1, shuffle=True)
# performing a k fold validation
scores_cv = cross_val_score(svclassifier, tensor_val, tensor_gt_val,
cv=cv, scoring='f1_macro')
else:
scores_cv=None
return conf_mat, class_report, scores_cv
def svm_accuracy_estimation_2(data_train, data_test, labels_train, labels_test, cv=False):
"""
performance on a SVM
this function work with already made datasets
"""
## linear svm with the mns
# loading the data
tensor_train = torch.tensor(data_train)
tensor_val = torch.tensor(data_test)
tensor_gt_val = torch.tensor(labels_test)
if len(list(tensor_train.shape)) == 1:
tensor_train = torch.tensor(data_train)[:,None]
tensor_val = torch.tensor(data_test)[:,None]
tensor_gt_val = torch.tensor(labels_test)[:,None]
scaler = preprocessing.StandardScaler()
tensor_train = scaler.fit_transform(tensor_train)
tensor_val = scaler.fit_transform(tensor_val)
# loading the model, ovo is one against all, C is the soft margin
svclassifier = SVC(kernel='linear', decision_function_shape='ovr', C=0.01,
class_weight="balanced")
# training the model
svclassifier.fit(tensor_train, labels_train)
# predicting the labels
pred_label = svclassifier.predict(tensor_val)
# printing results
conf_mat = confusion_matrix(tensor_gt_val, pred_label)
class_report = classification_report(tensor_gt_val, pred_label)
# performing a cross validation (optional)
if cv:
# prepare the cross-validation procedure
cv = KFold(n_splits=10, random_state=1, shuffle=True)
# performing a k fold validation
scores_cv = cross_val_score(svclassifier, tensor_val, tensor_gt_val,
cv=cv, scoring='f1_macro')
else:
scores_cv=None
return conf_mat, class_report, scores_cv
def iou_accuracy(pred, threshold, y, classes):
"""
Function to compute the
"""
# converting to binary
binary_vec = fun.convert_binary(pred, threshold)
# visualizing the confusion matrix
confusion_matrix_visualize(binary_vec, y, threshold)
return None