-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtrain.py
187 lines (149 loc) · 7.01 KB
/
train.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
from __future__ import print_function
import os
import pickle
import gzip
import matplotlib.pyplot as plt
import random as rnd
from agent.bc_agent import BCAgent
from utils import rgb2gray
from datetime import datetime
import numpy as np
from tensorboard_evaluation import Evaluation
import torch
import glob
from config import Config
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
def read_data(datasets_dir="./data", frac=0.1):
print("... read data")
file_names = glob.glob(os.path.join(datasets_dir, "*.gzip"))
print(file_names)
X = []
y = []
X_tr = []
y_tr = []
n_samples = 0
for data_file in file_names:
f = gzip.open(data_file, 'rb')
data = pickle.load(f)
n_samples += len(data["state"])
X.extend(data["state_img"]) # Hint: to access images use state_img here!
y.extend(data["action"])
X = np.array(X).astype('float32')
y = np.array(y).astype('float32')
# split data into training and validation set
X_train, y_train = X[:int((1 - frac) * n_samples)], y[:int((1 - frac) * n_samples)]
X_valid, y_valid = X[int((1 - frac) * n_samples):], y[int((1 - frac) * n_samples):]
return X_train, y_train, X_valid, y_valid
def onehot(y_train):
y_train_temp = np.zeros((y_train.shape[0], 4))
for idx in range(len(y_train)):
y_train_temp[idx] = np.eye(4)[int(y_train[idx][0])]
return y_train_temp
def preprocessing(X_train, y_train, X_valid, y_valid, conf):
# TODO: preprocess your data here. For the images:
# 1. convert the images in X_train/X_valid to gray scale. If you use rgb2gray() from utils.py, the output shape (100, 150, 1)
X_train = np.array([rgb2gray(img).reshape(1, 100, 150) for img in X_train])
X_valid = np.array([rgb2gray(img).reshape(1, 100, 150) for img in X_valid])
# History:
# At first you should only use the current image as input to your network to learn the next action. Then the input states
# have shape (100, 150, 1). Later, add a history of the last N images to your state so that a state has shape (100, 150, N).
skip_frames, history_length = conf.skip_frames, conf.history_length
X_tr, y_tr = skip_history(X_train, y_train, history_length, skip_frames)
X_val, y_val = skip_history(X_valid, y_valid, history_length, skip_frames)
# Hint: you can also implement frame skipping
return X_tr, y_tr, X_val, y_val
def train_model(X_train, y_train, X_valid, y_valid, config, model_dir="./models", tensorboard_dir="./tensorboard"):
n_minibatches, batch_size, lr, hidden_units, history_length, agent_type = config.n_minibatches, config.batch_size, config.lr, config.hidden_units, config.history_length, config.agent_type
# create result and model folders
if not os.path.exists(model_dir):
os.mkdir(model_dir)
print("... train model")
# TODO: specify your agent with the neural network in agents/bc_agent.py
agent = BCAgent(agent_type, lr, hidden_units, history_length)
# states=['training_accuracy']
states = ['training_accuracy', 'validation_accuracy', 'loss']
tensorboard_eval = Evaluation(tensorboard_dir,"Learning_curves",states)
# TODO: implement the training
# 1. write a method sample_minibatch and perform an update step
def sample_minibatch(X, y):
length = X.shape[0]
X_batch = []
y_batch = []
sampled_idx = rnd.sample(range(1, length), batch_size)
for idx in sampled_idx:
X_batch.append(X[idx])
y_batch.append(y[idx])
return X_batch, y_batch
# 2. compute training/ validation accuracy and loss for the batch and visualize them with tensorboard. You can watch the progress of
# your training *during* the training in your web browser
#
# training loop
for i in range(n_minibatches):
X_batch, y_batch = sample_minibatch(X_train, y_train)
loss = agent.update(X_batch, y_batch)
if i % 10 == 0:
training_correct = 0
training_total = 0
validation_correct = 0
validation_total = 0
training_accuracy = 0
validation_accuracy = 0
with torch.no_grad():
output_training = agent.predict(torch.tensor(X_batch).to(device))
torch.cuda.empty_cache()
output_validation = agent.predict(torch.tensor(X_valid).to(device))
for idx, label in enumerate(output_training):
if torch.argmax(label) == torch.tensor(y_batch[idx], dtype=torch.long, device=device):
training_correct += 1
training_total += 1
for idx, label in enumerate(output_validation):
if torch.argmax(label) == torch.tensor(y_valid[idx], dtype=torch.long, device=device):
validation_correct += 1
validation_total += 1
training_accuracy = training_correct / training_total
validation_accuracy = validation_correct / validation_total
print("Episode %d of %d" % (i, n_minibatches))
print("Training accuracy: %f" % training_accuracy)
print("Validation accuracy: %f" % validation_accuracy)
# compute training/ validation accuracy and write it to tensorboard
eval_dic = {'training_accuracy': training_accuracy, 'validation_accuracy': validation_accuracy,
'loss': loss.item()}
# eval_dic={'training_accuracy':training_accuracy}
tensorboard_eval.write_episode_data(i,eval_dic)
# save your agent
save_file = datetime.now().strftime("%Y%m%d-%H%M%S") + "_bc_agent.pt"
model_dir = agent.save(os.path.join(model_dir, save_file))
print("Model saved in file: %s" % model_dir)
def skip_history(X, y, hist_len, n_skip):
length = len(X)
idx = 0
X_seq = []
y_seq = []
while idx + n_skip + 1 < length:
X_tmp = []
s = (1, 100, 150)
y_tmp = np.zeros(s)
while len(X_tmp) < hist_len:
X_tmp.append(X[idx][0])
y_tmp = y[idx]
if idx + n_skip + 1 < length:
idx = idx + n_skip + 1
else:
X_tmp = []
break
if len(X_tmp) != 0:
X_tmp = np.array(X_tmp).astype('float32').reshape(hist_len, 100, 150)
X_seq.append(X_tmp)
y_seq.append(y_tmp)
X_seq = np.array(X_seq).astype('float32')
y_seq = np.array(y_seq).astype('float32')
return X_seq, y_seq
if __name__ == "__main__":
# read data
X_train, y_train, X_valid, y_valid = read_data("./data")
conf = Config()
# preprocess data
if conf.agent_type == "ResNet":
X_train, y_train, X_valid, y_valid = preprocessing(X_train, y_train, X_valid, y_valid, conf)
# train model (you can change the parameters!)
train_model(X_train, y_train, X_valid, y_valid, conf)