-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathsettings.py
105 lines (75 loc) · 2.34 KB
/
settings.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
'''All parameters are set here'''
class AgentSetting():
''''commented values are the used ones in the nature paper, use them if you got that much of computing power'''
minibatch = 32
replay_strt_size = 15000#50000 # at start use randm policy for this time to fill memory b4 learning
replay_memory = 50000#1000000
t_net_update_freq = 10000 #10000
discount_factor = 0.99
update_freq = 1 #4 #update net every 4 actions
#nature paper:50 million frames to be trained on
#double DQN paper: 200 million frames,ie, 50M steps
training_steps = 1000000
#RMSProp
learning_rate = 0.00025 #not dueling
duel_learining_rate = 0.0000625 # dueling lr (6.25*10^-5)
momentum = 0.95
#deprecated
#grad_momentum = 0.95
#sq_grad_momentum = 0.95
#min_sq_grad = 0.01
#epsilon
e_greedy_init = 1.0
e_greedy_final = 0.1
e_final_at = 100000#1000000 #training_steps // 50 #1000000 #steps over which epsilon is annealed to its final value
#TODO apply evalution every 1M steps!
no_op_max = 30 #max of no-op action
eval_every = 1000000 #steps
epsilon_eval = 0.05
class ArchitectureSetting():
#layer 1
in_shape = [84,84,4]
f1_no = 32
f1_size = [8,8]
stride1 = 4
#layer2
f2_no = 64
f2_size = [4,4]
stride2 = 2
#layer3
f3_no = 64
f3_size = [3,3]
stride3 = 1
#layer4 fc
nodes = 512 #if dueling 512 for value and same for adv
class StateProcessorSetting():
history_length = 4
observation_dims = [84, 84]
class EnvSetting():
recEvery = 100 #rec every ? episode
'''gym:Each action is repeatedly performed for a duration of k frames, where k is uniformly sampled from {2, 3, 4}.'''
action_repeat = 1 #4
#4 reward clipping
max_reward = 1.0
min_reward = -1.0
#display
frame_dim = [210,160,3]
render = True
class UtilSettings():
'''useful paths '''
trainDir = 'graphVars/train' #dir for checkpoints during training
playDir = 'graphVars/play' #dir for model weights for playing/eval
monitorDir = 'gymRecordings'
#experienceDir = 'expMemory' #TODO-kill
trainSummaryDir = 'summaries/train'
playSummaryDir = 'summaries/play'
class PerSettings():
# doubleQ (tuned) , per-> proportional method
#priority paras
epsilon = 1.0#0.1 #0.01
alpha = 0.6
#importance sampling paras
beta_init = 0.4
beta_final = 1.0 #linear annealing till end of training
beta_finalAt = AgentSetting.training_steps
step_size = AgentSetting.learning_rate / 4.0