-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhydra_config.yaml
56 lines (44 loc) · 1.46 KB
/
hydra_config.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
defaults:
- override hydra/launcher: submitit_slurm
hydra:
launcher:
timeout_min: 900
mem_gb: 6
cpus_per_task: 4
sweep:
dir: /scratch/wf541/multirun/${now:%Y-%m-%d}/${now:%H:%M:%S}
subdir: ${hydra.job.override_dirname}
model: DDPG
# Parameters relating to stock and option prices and quantities
init_stock_holdings: 0 # let's not hint the algorithm anything and see if it can find the hedging
init_option_holdings: 100
init_wealth: 0 # no initial wealth, we borrow money to buy stocks
gbm_mu: 0.02 # we assume 5 rebalances per day, that amounts to 0.1, or 10% annual return
gbm_sigma: 0.09 # we assume 5 rebalances, that amounts to roughly 20% annual volatility
gbm_r: 0.0 # no interest rate in very short term
episode_length: 50
num_out_of_sample_path: 10000
# Parameters relating to reward function and action space
reward_kappa: 0.01
reward_clip_min: -200 # calculating reward - if stock prices goes up or down to 2 or 3 sigmas, what the reward would be *
reward_clip_max: 200
action_min: -100
action_max: 100
# Parameters relating to training
data_reuse_num_episodes: 10000
total_training_timesteps: 12500000
learning_rate: 0.001
# PPO model hyperparameters -- these should be iterated over (on the command-line) using multirun
batch_size: 32
gamma: 0.99
tau: 0.005
gradient_steps: -1
max_grad_norm: 0.5
net_arch_length: 5
net_arch_size: 32
train_freq: 1
learning_starts: 500
net_arch: [32, 32, 32, 32, 32]
OUstd: 50
OUtheta: 0.00
OUdt: 0.3