-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy patha2c.py
50 lines (44 loc) · 1.52 KB
/
a2c.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
import ray
from ray import tune
from experiments import EXPERIMENT_DIR
from experiments.trainable import Loop
from pandemonium.implementations.a2c import create_horde
EXPERIMENT_NAME = 'AC'
total_steps = int(1e5)
if __name__ == "__main__":
ray.init(local_mode=False)
analysis = tune.run(
Loop,
name=EXPERIMENT_NAME,
local_dir=EXPERIMENT_DIR,
# num_samples=1,
# checkpoint_freq=1000, # in training iterations
verbose=1,
stop={"timesteps_total": total_steps},
config={
"env": "MiniGrid-EmptyEnv6x6-ImgOnly-v0",
'encoder': 'image',
"rollout_fragment_length": 16, # batch size for exp collector
"policy_name": 'VPG',
"policy_cfg": {'entropy_coefficient': tune.grid_search([0.01])},
'gamma': tune.grid_search([0.9]),
'trace_decay': tune.grid_search([0.5]),
"horde_fn": create_horde,
# # --- Evaluation ---
# "evaluation_interval": 100, # per training iteration
# "custom_eval_function": eval_fn,
# "evaluation_num_episodes": 1,
# "evaluation_config": {
# "env_config": {},
# },
# # HACK to get the evaluation through
# "model": {
# 'conv_filters': [
# [8, [2, 2], 1],
# [16, [2, 2], 1],
# [32, [2, 2], 1],
# ],
# 'fcnet_hiddens': [256]
# }
}
)