From 325bd19365f5b2e1e15a7405f3e153da741ba06c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quentin=20Gallou=C3=A9dec?= <45557362+qgallouedec@users.noreply.github.com> Date: Wed, 30 Mar 2022 15:39:41 +0200 Subject: [PATCH 1/3] Add parser argument --- train.py | 1 + 1 file changed, 1 insertion(+) diff --git a/train.py b/train.py index f38eba33a..8d36c65de 100644 --- a/train.py +++ b/train.py @@ -64,6 +64,7 @@ type=int, default=10, ) + parser.add_argument("--n-models", help="Number of models for optimizing hyperparameters.", type=int, default=1) parser.add_argument( "-optimize", "--optimize-hyperparameters", action="store_true", default=False, help="Run hyperparameters search" ) From cfd58579eae2a58044f328a95bd2968b6d68c1a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quentin=20Gallou=C3=A9dec?= <45557362+qgallouedec@users.noreply.github.com> Date: Sun, 17 Apr 2022 19:43:59 +0200 Subject: [PATCH 2/3] n_models in args and ExpManager attribut --- train.py | 1 + utils/exp_manager.py | 3 +++ 2 files changed, 4 insertions(+) diff --git a/train.py b/train.py index beddd60b4..93e6bd49c 100644 --- a/train.py +++ b/train.py @@ -202,6 +202,7 @@ args.storage, args.study_name, args.n_trials, + args.n_models, args.n_jobs, args.sampler, args.pruner, diff --git a/utils/exp_manager.py b/utils/exp_manager.py index 1b6725831..1f0c9716f 100644 --- a/utils/exp_manager.py +++ b/utils/exp_manager.py @@ -73,6 +73,7 @@ def __init__( storage: Optional[str] = None, study_name: Optional[str] = None, n_trials: int = 1, + n_models: int = 1, n_jobs: int = 1, sampler: str = "tpe", pruner: str = "median", @@ -133,6 +134,8 @@ def __init__( self.no_optim_plots = no_optim_plots # maximum number of trials for finding the best hyperparams self.n_trials = n_trials + # number of parallel trained models, result is the median score + self.n_models = n_models # number of parallel jobs when doing hyperparameter search self.n_jobs = n_jobs self.sampler = sampler From 521124e34dde9de3618fa721cfc550fa66d73329 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quentin=20Gallou=C3=A9dec?= <45557362+qgallouedec@users.noreply.github.com> Date: Sun, 1 May 2022 12:10:25 +0200 Subject: [PATCH 3/3] Multiple models without pruner --- utils/exp_manager.py | 113 ++++++++++++++++++++++++------------------- 1 file changed, 62 insertions(+), 51 deletions(-) diff --git a/utils/exp_manager.py b/utils/exp_manager.py index 5f25d830f..55f02433a 100644 --- a/utils/exp_manager.py +++ b/utils/exp_manager.py @@ -140,6 +140,7 @@ def __init__( self.n_jobs = n_jobs self.sampler = sampler self.pruner = pruner + assert not (self.n_models > 1 and self.pruner != "none"), "Pruner is not currently supported for multiple models" self.n_startup_trials = n_startup_trials self.n_evaluations = n_evaluations self.deterministic_eval = not self.is_atari(self.env_id) @@ -652,15 +653,18 @@ def objective(self, trial: optuna.Trial) -> float: if self.verbose >= 2: trial_verbosity = self.verbose - model = ALGOS[self.algo]( - env=env, - tensorboard_log=None, - # We do not seed the trial - seed=None, - verbose=trial_verbosity, - device=self.device, - **kwargs, - ) + models = [ + ALGOS[self.algo]( + env=env, + tensorboard_log=None, + # We do not seed the trial + seed=None, + verbose=trial_verbosity if model_idx == 0 else 0, + device=self.device, + **kwargs, + ) + for model_idx in range(self.n_models) + ] eval_env = self.create_envs(n_envs=self.n_eval_envs, eval_env=True) @@ -671,51 +675,58 @@ def objective(self, trial: optuna.Trial) -> float: path = None if self.optimization_log_path is not None: path = os.path.join(self.optimization_log_path, f"trial_{str(trial.number)}") - callbacks = get_callback_list({"callback": self.specified_callbacks}) - eval_callback = TrialEvalCallback( - eval_env, - trial, - best_model_save_path=path, - log_path=path, - n_eval_episodes=self.n_eval_episodes, - eval_freq=optuna_eval_freq, - deterministic=self.deterministic_eval, - ) - callbacks.append(eval_callback) - learn_kwargs = {} - # Special case for ARS - if self.algo == "ars" and self.n_envs > 1: - learn_kwargs["async_eval"] = AsyncEval( - [lambda: self.create_envs(n_envs=1, no_log=True) for _ in range(self.n_envs)], model.policy + rewards = np.zeros(self.n_models) + for model_idx, model in enumerate(models): + callbacks = get_callback_list({"callback": self.specified_callbacks}) + eval_callback = TrialEvalCallback( + eval_env, + trial, + best_model_save_path=path, + log_path=path, + n_eval_episodes=self.n_eval_episodes, + eval_freq=optuna_eval_freq, + deterministic=self.deterministic_eval, ) + callbacks.append(eval_callback) - try: - model.learn(self.n_timesteps, callback=callbacks, **learn_kwargs) - # Free memory - model.env.close() - eval_env.close() - except (AssertionError, ValueError) as e: - # Sometimes, random hyperparams can generate NaN - # Free memory - model.env.close() - eval_env.close() - # Prune hyperparams that generate NaNs - print(e) - print("============") - print("Sampled hyperparams:") - pprint(sampled_hyperparams) - raise optuna.exceptions.TrialPruned() - is_pruned = eval_callback.is_pruned - reward = eval_callback.last_mean_reward - - del model.env, eval_env - del model - - if is_pruned: - raise optuna.exceptions.TrialPruned() - - return reward + learn_kwargs = {} + # Special case for ARS + if self.algo == "ars" and self.n_envs > 1: + learn_kwargs["async_eval"] = AsyncEval( + [lambda: self.create_envs(n_envs=1, no_log=True) for _ in range(self.n_envs)], model.policy + ) + + try: + model.learn(self.n_timesteps, callback=callbacks, **learn_kwargs) + # Free memory + model.env.close() + except (AssertionError, ValueError) as e: + # Sometimes, random hyperparams can generate NaN + # Free memory + model.env.close() + eval_env.close() + # Prune hyperparams that generate NaNs + print(e) + print("============") + print("Sampled hyperparams:") + pprint(sampled_hyperparams) + raise optuna.exceptions.TrialPruned() + is_pruned = eval_callback.is_pruned + rewards[model_idx] = eval_callback.last_mean_reward + + del model.env + del model + + if is_pruned: + eval_env.close() + del eval_env + raise optuna.exceptions.TrialPruned() + + eval_env.close() + del eval_env + + return np.median(rewards) def hyperparameters_optimization(self) -> None: