@@ -18,11 +18,11 @@ def sample_ppo_params(trial: optuna.Trial) -> Dict[str, Any]:
18
18
batch_size = trial .suggest_categorical ("batch_size" , [8 , 16 , 32 , 64 , 128 , 256 , 512 ])
19
19
n_steps = trial .suggest_categorical ("n_steps" , [8 , 16 , 32 , 64 , 128 , 256 , 512 , 1024 , 2048 ])
20
20
gamma = trial .suggest_categorical ("gamma" , [0.9 , 0.95 , 0.98 , 0.99 , 0.995 , 0.999 , 0.9999 ])
21
- learning_rate = trial .suggest_loguniform ("learning_rate" , 1e-5 , 1 )
21
+ learning_rate = trial .suggest_float ("learning_rate" , 1e-5 , 1 , log = True )
22
22
lr_schedule = "constant"
23
23
# Uncomment to enable learning rate schedule
24
24
# lr_schedule = trial.suggest_categorical('lr_schedule', ['linear', 'constant'])
25
- ent_coef = trial .suggest_loguniform ("ent_coef" , 0.00000001 , 0.1 )
25
+ ent_coef = trial .suggest_float ("ent_coef" , 0.00000001 , 0.1 , log = True )
26
26
clip_range = trial .suggest_categorical ("clip_range" , [0.1 , 0.2 , 0.3 , 0.4 ])
27
27
n_epochs = trial .suggest_categorical ("n_epochs" , [1 , 5 , 10 , 20 ])
28
28
gae_lambda = trial .suggest_categorical ("gae_lambda" , [0.8 , 0.9 , 0.92 , 0.95 , 0.98 , 0.99 , 1.0 ])
@@ -86,7 +86,7 @@ def sample_trpo_params(trial: optuna.Trial) -> Dict[str, Any]:
86
86
batch_size = trial .suggest_categorical ("batch_size" , [8 , 16 , 32 , 64 , 128 , 256 , 512 ])
87
87
n_steps = trial .suggest_categorical ("n_steps" , [8 , 16 , 32 , 64 , 128 , 256 , 512 , 1024 , 2048 ])
88
88
gamma = trial .suggest_categorical ("gamma" , [0.9 , 0.95 , 0.98 , 0.99 , 0.995 , 0.999 , 0.9999 ])
89
- learning_rate = trial .suggest_loguniform ("learning_rate" , 1e-5 , 1 )
89
+ learning_rate = trial .suggest_float ("learning_rate" , 1e-5 , 1 , log = True )
90
90
lr_schedule = "constant"
91
91
# Uncomment to enable learning rate schedule
92
92
# lr_schedule = trial.suggest_categorical('lr_schedule', ['linear', 'constant'])
@@ -159,8 +159,8 @@ def sample_a2c_params(trial: optuna.Trial) -> Dict[str, Any]:
159
159
gae_lambda = trial .suggest_categorical ("gae_lambda" , [0.8 , 0.9 , 0.92 , 0.95 , 0.98 , 0.99 , 1.0 ])
160
160
n_steps = trial .suggest_categorical ("n_steps" , [8 , 16 , 32 , 64 , 128 , 256 , 512 , 1024 , 2048 ])
161
161
lr_schedule = trial .suggest_categorical ("lr_schedule" , ["linear" , "constant" ])
162
- learning_rate = trial .suggest_loguniform ("learning_rate" , 1e-5 , 1 )
163
- ent_coef = trial .suggest_loguniform ("ent_coef" , 0.00000001 , 0.1 )
162
+ learning_rate = trial .suggest_float ("learning_rate" , 1e-5 , 1 , log = True )
163
+ ent_coef = trial .suggest_float ("ent_coef" , 0.00000001 , 0.1 , log = True )
164
164
vf_coef = trial .suggest_uniform ("vf_coef" , 0 , 1 )
165
165
# Uncomment for gSDE (continuous actions)
166
166
# log_std_init = trial.suggest_uniform("log_std_init", -4, 1)
@@ -216,7 +216,7 @@ def sample_sac_params(trial: optuna.Trial) -> Dict[str, Any]:
216
216
:return:
217
217
"""
218
218
gamma = trial .suggest_categorical ("gamma" , [0.9 , 0.95 , 0.98 , 0.99 , 0.995 , 0.999 , 0.9999 ])
219
- learning_rate = trial .suggest_loguniform ("learning_rate" , 1e-5 , 1 )
219
+ learning_rate = trial .suggest_float ("learning_rate" , 1e-5 , 1 , log = True )
220
220
batch_size = trial .suggest_categorical ("batch_size" , [16 , 32 , 64 , 128 , 256 , 512 , 1024 , 2048 ])
221
221
buffer_size = trial .suggest_categorical ("buffer_size" , [int (1e4 ), int (1e5 ), int (1e6 )])
222
222
learning_starts = trial .suggest_categorical ("learning_starts" , [0 , 1000 , 10000 , 20000 ])
@@ -277,7 +277,7 @@ def sample_td3_params(trial: optuna.Trial) -> Dict[str, Any]:
277
277
:return:
278
278
"""
279
279
gamma = trial .suggest_categorical ("gamma" , [0.9 , 0.95 , 0.98 , 0.99 , 0.995 , 0.999 , 0.9999 ])
280
- learning_rate = trial .suggest_loguniform ("learning_rate" , 1e-5 , 1 )
280
+ learning_rate = trial .suggest_float ("learning_rate" , 1e-5 , 1 , log = True )
281
281
batch_size = trial .suggest_categorical ("batch_size" , [16 , 32 , 64 , 100 , 128 , 256 , 512 , 1024 , 2048 ])
282
282
buffer_size = trial .suggest_categorical ("buffer_size" , [int (1e4 ), int (1e5 ), int (1e6 )])
283
283
# Polyak coeff
@@ -335,7 +335,7 @@ def sample_ddpg_params(trial: optuna.Trial) -> Dict[str, Any]:
335
335
:return:
336
336
"""
337
337
gamma = trial .suggest_categorical ("gamma" , [0.9 , 0.95 , 0.98 , 0.99 , 0.995 , 0.999 , 0.9999 ])
338
- learning_rate = trial .suggest_loguniform ("learning_rate" , 1e-5 , 1 )
338
+ learning_rate = trial .suggest_float ("learning_rate" , 1e-5 , 1 , log = True )
339
339
batch_size = trial .suggest_categorical ("batch_size" , [16 , 32 , 64 , 100 , 128 , 256 , 512 , 1024 , 2048 ])
340
340
buffer_size = trial .suggest_categorical ("buffer_size" , [int (1e4 ), int (1e5 ), int (1e6 )])
341
341
# Polyak coeff
@@ -391,7 +391,7 @@ def sample_dqn_params(trial: optuna.Trial) -> Dict[str, Any]:
391
391
:return:
392
392
"""
393
393
gamma = trial .suggest_categorical ("gamma" , [0.9 , 0.95 , 0.98 , 0.99 , 0.995 , 0.999 , 0.9999 ])
394
- learning_rate = trial .suggest_loguniform ("learning_rate" , 1e-5 , 1 )
394
+ learning_rate = trial .suggest_float ("learning_rate" , 1e-5 , 1 , log = True )
395
395
batch_size = trial .suggest_categorical ("batch_size" , [16 , 32 , 64 , 100 , 128 , 256 , 512 ])
396
396
buffer_size = trial .suggest_categorical ("buffer_size" , [int (1e4 ), int (5e4 ), int (1e5 ), int (1e6 )])
397
397
exploration_final_eps = trial .suggest_uniform ("exploration_final_eps" , 0 , 0.2 )
@@ -489,7 +489,7 @@ def sample_ars_params(trial: optuna.Trial) -> Dict[str, Any]:
489
489
# n_eval_episodes = trial.suggest_categorical("n_eval_episodes", [1, 2])
490
490
n_delta = trial .suggest_categorical ("n_delta" , [4 , 8 , 6 , 32 , 64 ])
491
491
# learning_rate = trial.suggest_categorical("learning_rate", [0.01, 0.02, 0.025, 0.03])
492
- learning_rate = trial .suggest_loguniform ("learning_rate" , 1e-5 , 1 )
492
+ learning_rate = trial .suggest_float ("learning_rate" , 1e-5 , 1 , log = True )
493
493
delta_std = trial .suggest_categorical ("delta_std" , [0.01 , 0.02 , 0.025 , 0.03 , 0.05 , 0.1 , 0.2 , 0.3 ])
494
494
top_frac_size = trial .suggest_categorical ("top_frac_size" , [0.1 , 0.2 , 0.3 , 0.5 , 0.8 , 0.9 , 1.0 ])
495
495
zero_policy = trial .suggest_categorical ("zero_policy" , [True , False ])
0 commit comments