-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtrain.py
86 lines (64 loc) · 3.17 KB
/
train.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
from collections import defaultdict
import numpy as np
import pandas as pd
from sklearn.pipeline import Pipeline
from sklearn.model_selection import KFold
from sklearn.ensemble import RandomForestRegressor, ExtraTreesRegressor
from scipy.stats import pearsonr, spearmanr, kendalltau
from data import names
def cross_validate(dataset, model, names):
data = pd.read_csv(dataset)
videos = data.video.unique()
stats = defaultdict(lambda: defaultdict(list))
for fold, (train_idxs, test_idxs) in enumerate(KFold(n_splits=5).split(videos)):
data_train = data[data.video.isin(videos[train_idxs])]
model.fit(data_train.loc[:, names], data_train.value)
for idx in test_idxs:
data_test = data[data.video == videos[idx]]
y_pred = model.predict(data_test.loc[:, names])
stats["PLCC"][fold].append(pearsonr(y_pred, data_test.value).statistic)
stats["SRCC"][fold].append(spearmanr(y_pred, data_test.value).statistic)
stats["KRCC"][fold].append(kendalltau(y_pred, data_test.value).statistic)
stats["RMSE"][fold].append(np.sqrt(np.mean(np.square(y_pred - data_test.value))))
return {
name: np.mean([np.mean(fold) for fold in stat.values()])
for name, stat in stats.items()
}
def cross_dataset(train_dataset, test_dataset, model, names):
train_data = pd.read_csv(train_dataset)
test_data = pd.read_csv(test_dataset)
stats = defaultdict(list)
model.fit(train_data.loc[:, names], train_data.value)
for video in test_data.video.unique():
data_test = test_data[test_data.video == video]
y_pred = model.predict(data_test.loc[:, names])
stats["PLCC"].append(pearsonr(y_pred, data_test.value).statistic)
stats["SRCC"].append(spearmanr(y_pred, data_test.value).statistic)
stats["KRCC"].append(kendalltau(y_pred, data_test.value).statistic)
stats["RMSE"].append(np.sqrt(np.mean(np.square(y_pred - data_test.value))))
return {
name: np.mean(stat)
for name, stat in stats.items()
}
if __name__ == "__main__":
model = Pipeline([
("regressor", RandomForestRegressor(n_estimators=400, max_features="sqrt", random_state=8))
])
print("Dataset: dataset_based.csv")
print("Method\t\t | PLCC | SRCC | KRCC | RMSE")
names = list(pd.read_csv("dataset_based.csv").columns)
names.remove("video")
names.remove("value")
names.remove("method")
for method, stats in [
("based[cv]", cross_validate("dataset_based.csv", model, names)),
("based[cd]", cross_dataset("dataset_rsblur.csv", "dataset_based.csv", model, names)),
]:
print(f"{method}\t | {stats['PLCC']:.4f} | {stats['SRCC']:.4f} | {stats['KRCC']:.4f} | {stats['RMSE']:15.4f}")
print("Dataset: dataset_rsblur.csv")
print("Method\t\t | PLCC | SRCC | KRCC | RMSE")
for method, stats in [
("based[cv]", cross_validate("dataset_rsblur.csv", model, names)),
("based[cd]", cross_dataset("dataset_based.csv", "dataset_rsblur.csv", model, names)),
]:
print(f"{method}\t | {stats['PLCC']:.4f} | {stats['SRCC']:.4f} | {stats['KRCC']:.4f} | {stats['RMSE']:15.4f}")