Skip to content

Commit ccfd4a6

Browse files
committed
Add more hyperparameters
1 parent b219482 commit ccfd4a6

File tree

2 files changed

+67
-1
lines changed

2 files changed

+67
-1
lines changed

hyperparams/crossq.yml

+66
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,38 @@
1+
MountainCarContinuous-v0:
2+
n_timesteps: !!float 50000
3+
policy: 'MlpPolicy'
4+
learning_rate: !!float 7e-4
5+
buffer_size: 50000
6+
train_freq: 32
7+
gradient_steps: 32
8+
gamma: 0.9999
9+
learning_starts: 100
10+
use_sde: True
11+
policy_delay: 2
12+
policy_kwargs: "dict(use_expln=True, log_std_init=-1, net_arch=[64, 64])"
13+
14+
Pendulum-v1:
15+
n_timesteps: 20000
16+
policy: 'MlpPolicy'
17+
policy_delay: 2
18+
policy_kwargs: "dict(net_arch=[256, 256])"
19+
20+
21+
LunarLanderContinuous-v2:
22+
n_timesteps: !!float 2e5
23+
policy: 'MlpPolicy'
24+
buffer_size: 1000000
25+
learning_starts: 10000
26+
27+
28+
BipedalWalker-v3:
29+
n_timesteps: !!float 2e5
30+
policy: 'MlpPolicy'
31+
buffer_size: 300000
32+
gamma: 0.98
33+
learning_starts: 10000
34+
policy_kwargs: "dict(net_arch=dict(pi=[256, 256], qf=[1024, 1024]))"
35+
136
# === Mujoco Envs ===
237

338
HalfCheetah-v4: &mujoco-defaults
@@ -23,3 +58,34 @@ Humanoid-v4:
2358

2459
HumanoidStandup-v4:
2560
<<: *mujoco-defaults
61+
62+
Swimmer-v4:
63+
<<: *mujoco-defaults
64+
gamma: 0.999
65+
66+
# Tuned for SAC, need to check with CrossQ
67+
HalfCheetahBulletEnv-v0: &pybullet-defaults
68+
n_timesteps: !!float 1e6
69+
policy: 'MlpPolicy'
70+
learning_rate: !!float 7.3e-4
71+
buffer_size: 300000
72+
batch_size: 256
73+
ent_coef: 'auto'
74+
gamma: 0.98
75+
train_freq: 8
76+
gradient_steps: 8
77+
learning_starts: 10000
78+
use_sde: True
79+
policy_kwargs: "dict(use_expln=True, log_std_init=-3)"
80+
81+
# Tuned
82+
AntBulletEnv-v0:
83+
<<: *pybullet-defaults
84+
85+
HopperBulletEnv-v0:
86+
<<: *pybullet-defaults
87+
learning_rate: lin_7.3e-4
88+
89+
Walker2DBulletEnv-v0:
90+
<<: *pybullet-defaults
91+
learning_rate: lin_7.3e-4

0 commit comments

Comments
 (0)