hydra_config.yaml

defaults:
  - override hydra/launcher: submitit_slurm

hydra:
  launcher:
    timeout_min: 900
    mem_gb: 6
    cpus_per_task: 4
    
  sweep:
    dir: /scratch/wf541/multirun/${now:%Y-%m-%d}/${now:%H:%M:%S}
    subdir: ${hydra.job.override_dirname}

model: DDPG

# Parameters relating to stock and option prices and quantities
init_stock_holdings: 0  # let's not hint the algorithm anything and see if it can find the hedging
init_option_holdings: 100
init_wealth: 0  # no initial wealth, we borrow money to buy stocks
gbm_mu: 0.02  # we assume 5 rebalances per day, that amounts to 0.1, or 10% annual return
gbm_sigma: 0.09  # we assume 5 rebalances, that amounts to roughly 20% annual volatility
gbm_r: 0.0  # no interest rate in very short term

episode_length: 50
num_out_of_sample_path: 10000

# Parameters relating to reward function and action space
reward_kappa: 0.01
reward_clip_min: -200  # calculating reward - if stock prices goes up or down to 2 or 3 sigmas, what the reward would be *
reward_clip_max: 200
action_min: -100
action_max: 100

# Parameters relating to training
data_reuse_num_episodes: 10000
total_training_timesteps: 12500000
learning_rate: 0.001

# PPO model hyperparameters -- these should be iterated over (on the command-line) using multirun
batch_size: 32
gamma: 0.99
tau: 0.005
gradient_steps: -1
max_grad_norm: 0.5
net_arch_length: 5
net_arch_size: 32

train_freq: 1
learning_starts: 500
net_arch: [32, 32, 32, 32, 32]

OUstd: 50
OUtheta: 0.00
OUdt: 0.3