ppo_default_config_597d2de3.txt

"observation_space": null,
      "action_space": null,
      "env_task_fn": null,
      "render_env": false,
      "clip_rewards": null,
      "normalize_actions": true,
      "clip_actions": false,
      "disable_env_checking": false,
      "_is_atari": null,
      "auto_wrap_old_gym_envs": true,
      "action_mask_key": "action_mask",
      "env_runner_cls": null,
      "num_envs_per_worker": 1,
      "sample_collector": {
        "_type": "CLOUDPICKLE_FALLBACK",
        "value": "80059551000000000000008c357261792e726c6c69622e6576616c756174696f6e2e636f6c6c6563746f72732e73696d706c655f6c6973745f636f6c6c6563746f72948c1353696d706c654c697374436f6c6c6563746f729493942e"
      },
      "sample_async": false,
      "enable_connectors": true,
      "update_worker_filter_stats": true,
      "use_worker_filter_stats": true,
      "rollout_fragment_length": "auto",
      "batch_mode": "truncate_episodes",
      "remote_worker_envs": false,
      "remote_env_batch_wait_ms": 0,
      "validate_workers_after_construction": true,
      "preprocessor_pref": "deepmind",
      "observation_filter": "NoFilter",
      "compress_observations": false,
      "enable_tf1_exec_eagerly": false,
      "sampler_perf_stats_ema_coef": null,
      "gamma": 1,
      "lr": 5e-06,
      "grad_clip": null,
      "grad_clip_by": "global_norm",
      "train_batch_size": 2048,
      "model": {
        "_disable_preprocessor_api": false,
        "_disable_action_flattening": false,
        "fcnet_hiddens": [
          256,
          256
        ],
        "fcnet_activation": "tanh",
        "conv_filters": null,
        "conv_activation": "relu",
        "post_fcnet_hiddens": [],
        "post_fcnet_activation": "relu",
        "free_log_std": false,
        "no_final_linear": false,
        "vf_share_layers": false,
        "use_lstm": false,
        "max_seq_len": 20,
        "lstm_cell_size": 256,
        "lstm_use_prev_action": false,
        "lstm_use_prev_reward": false,
        "_time_major": false,
        "use_attention": false,
        "attention_num_transformer_units": 1,
        "attention_dim": 64,
        "attention_num_heads": 1,
        "attention_head_dim": 32,
        "attention_memory_inference": 50,
        "attention_memory_training": 50,
        "attention_position_wise_mlp_dim": 32,
        "attention_init_gru_gate_bias": 2.0,
        "attention_use_n_prev_actions": 0,
        "attention_use_n_prev_rewards": 0,
        "framestack": true,
        "dim": 84,
        "grayscale": false,
        "zero_mean": true,
        "custom_model": null,
        "custom_model_config": {},
        "custom_action_dist": null,
        "custom_preprocessor": null,
        "encoder_latent_dim": null,
        "always_check_shapes": false,
        "lstm_use_prev_action_reward": -1,
        "_use_default_native_models": -1
      },
      "optimizer": {},
      "max_requests_in_flight_per_sampler_worker": 2,
      "_learner_class": null,
      "_enable_learner_api": true,
      "explore": true,
      "exploration_config": {},
      "algorithm_config_overrides_per_module": {},
      "policy_map_capacity": 100,
      "policy_mapping_fn": {
        "_type": "CLOUDPICKLE_FALLBACK",
        "value": "80059557000000000000008c257261792e726c6c69622e616c676f726974686d732e616c676f726974686d5f636f6e666967948c29416c676f726974686d436f6e6669672e44454641554c545f504f4c4943595f4d415050494e475f464e9493942e"
      },
      "policies_to_train": null,
      "policy_states_are_swappable": false,
      "observation_fn": null,
      "count_steps_by": "env_steps",
      "input_config": {},
      "actions_in_input_normalized": false,
      "postprocess_inputs": false,
      "shuffle_buffer_size": 0,
      "output": null,
      "output_config": {},
      "output_compress_columns": [
        "obs",
        "new_obs"
      ],
      "output_max_file_size": 67108864,
      "offline_sampling": false,
      "evaluation_interval": null,
      "evaluation_duration": 10,
      "evaluation_duration_unit": "episodes",
      "evaluation_sample_timeout_s": 180.0,
      "evaluation_parallel_to_training": false,
      "evaluation_config": null,
      "off_policy_estimation_methods": {},
      "ope_split_batch_by_episode": true,
      "evaluation_num_workers": 0,
      "always_attach_evaluation_results": false,
      "enable_async_evaluation": false,
      "in_evaluation": false,
      "sync_filters_on_rollout_workers_timeout_s": 60.0,
      "keep_per_episode_custom_metrics": false,
      "metrics_episode_collection_timeout_s": 60.0,
      "metrics_num_episodes_for_smoothing": 100,
      "min_time_s_per_iteration": null,
      "min_train_timesteps_per_iteration": 0,
      "min_sample_timesteps_per_iteration": 0,
      "export_native_model_files": false,
      "checkpoint_trainable_policies_only": false,
      "logger_creator": null,
      "logger_config": null,
      "log_level": "WARN",
      "log_sys_usage": true,
      "fake_sampler": false,
      "seed": null,
      "ignore_worker_failures": false,
      "recreate_failed_workers": false,
      "max_num_worker_restarts": 1000,
      "delay_between_worker_restarts_s": 60.0,
      "restart_failed_sub_environments": false,
      "num_consecutive_worker_failures_tolerance": 100,
      "worker_health_probe_timeout_s": 60,
      "worker_restore_timeout_s": 1800,
      "rl_module_spec": {
        "_type": "CLOUDPICKLE_FALLBACK",
        "value": "80059536010000000000008c227261792e726c6c69622e636f72652e726c5f6d6f64756c652e726c5f6d6f64756c65948c1753696e676c654167656e74524c4d6f64756c65537065639493942981947d94288c0c6d6f64756c655f636c617373948c327261792e726c6c69622e616c676f726974686d732e70706f2e746f7263682e70706f5f746f7263685f726c5f6d6f64756c65948c1050504f546f726368524c4d6f64756c659493948c116f62736572766174696f6e5f7370616365944e8c0c616374696f6e5f7370616365944e8c116d6f64656c5f636f6e6669675f64696374944e8c0d636174616c6f675f636c617373948c247261792e726c6c69622e616c676f726974686d732e70706f2e70706f5f636174616c6f67948c0a50504f436174616c6f679493948c0f6c6f61645f73746174655f70617468944e75622e"
      },
      "_enable_rl_module_api": true,
      "_AlgorithmConfig__prior_exploration_config": {
        "type": "StochasticSampling"
      },
      "_tf_policy_handles_more_than_one_loss": false,
      "_disable_preprocessor_api": false,
      "_disable_action_flattening": false,
      "_disable_execution_plan_api": true,
      "_disable_initialize_loss_from_dummy_batch": false,
      "simple_optimizer": false,
      "policy_map_cache": -1,
      "worker_cls": -1,
      "synchronize_filters": -1,
      "replay_sequence_length": null,
      "lr_schedule": null,
      "use_critic": true,
      "use_gae": true,
      "use_kl_loss": true,
      "kl_coeff": 0.2,
      "kl_target": 0.01,
      "sgd_minibatch_size": 128,
      "num_sgd_iter": 30,
      "shuffle_sequences": true,
      "vf_loss_coeff": 1.0,
      "entropy_coeff": 0.0,
      "entropy_coeff_schedule": null,
      "clip_param": 0.3,
      "vf_clip_param": 10.0,
      "vf_share_layers": -1,
      "__stdout_file__": null,
      "__stderr_file__": null,
      "lambda": 1.0,
      "input": "sampler",
      "policies": {
        "default_policy": [
          null,
          null,
          null,
          null
        ]
      },