Skip to content

Commit

Permalink
env(zjow): fix evogym replay video problem (#527)
Browse files Browse the repository at this point in the history
* fix video save

* fix video save

* fix gym repo

* Add carrier config

* minor change
  • Loading branch information
zjowowen authored Oct 31, 2022
1 parent f5f219b commit 38241ff
Show file tree
Hide file tree
Showing 8 changed files with 220 additions and 179 deletions.
14 changes: 7 additions & 7 deletions dizoo/evogym/config/bridgewalker_ddpg_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,17 @@
env_id='BridgeWalker-v0',
robot='speed_bot',
robot_dir='../envs',
collector_env_num=1,
evaluator_env_num=1,
n_evaluator_episode=1,
stop_value=1,
manager=dict(shared_memory=False, ),
collector_env_num=8,
evaluator_env_num=8,
n_evaluator_episode=8,
stop_value=10,
manager=dict(shared_memory=True, ),
# The path to save the game replay
replay_path='./evogym_walker_ddpg_seed0/video',
# replay_path='./evogym_walker_ddpg_seed0/video',
),
policy=dict(
cuda=True,
load_path="./evogym_walker_ddpg_seed0/ckpt/ckpt_best.pth.tar",
# load_path="./evogym_walker_ddpg_seed0/ckpt/ckpt_best.pth.tar",
random_collect_size=1000,
model=dict(
obs_shape=59,
Expand Down
65 changes: 65 additions & 0 deletions dizoo/evogym/config/carrier_ppo_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
from easydict import EasyDict

carry_ppo_config = dict(
exp_name='evogym_carrier_ppo_seed1',
env=dict(
env_id='Carrier-v0',
robot='carry_bot',
robot_dir='./dizoo/evogym/envs',
collector_env_num=8,
evaluator_env_num=8,
n_evaluator_episode=8,
stop_value=10,
manager=dict(shared_memory=True, ),
# The path to save the game replay
# replay_path='./evogym_carry_ppo_seed0/video',
),
policy=dict(
cuda=True,
recompute_adv=True,
# load_path="./evogym_carry_ppo_seed0/ckpt/ckpt_best.pth.tar",
model=dict(
obs_shape=70,
action_shape=12,
action_space='continuous',
),
action_space='continuous',
learn=dict(
epoch_per_collect=10,
batch_size=256,
learning_rate=3e-3,
value_weight=0.5,
entropy_weight=0.01,
clip_ratio=0.2,
adv_norm=True,
value_norm=True,
),
collect=dict(
n_sample=2048,
gae_lambda=0.97,
),
eval=dict(evaluator=dict(eval_freq=5000, )),
)
)
carry_ppo_config = EasyDict(carry_ppo_config)
main_config = carry_ppo_config

carry_ppo_create_config = dict(
env=dict(
type='evogym',
import_names=['dizoo.evogym.envs.evogym_env'],
),
env_manager=dict(type='subprocess'),
policy=dict(
type='ppo',
import_names=['ding.policy.ppo'],
),
replay_buffer=dict(type='naive', ),
)
carry_ppo_create_config = EasyDict(carry_ppo_create_config)
create_config = carry_ppo_create_config

if __name__ == "__main__":
# or you can enter `ding -m serial -c evogym_carry_ppo_config.py -s 0 --env-step 1e7`
from ding.entry import serial_pipeline_onpolicy
serial_pipeline_onpolicy((main_config, create_config), seed=0)
16 changes: 8 additions & 8 deletions dizoo/evogym/config/walker_ddpg_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,18 +5,18 @@
env=dict(
env_id='Walker-v0',
robot='speed_bot',
robot_dir='../envs',
collector_env_num=1,
evaluator_env_num=1,
n_evaluator_episode=1,
stop_value=-0.5,
manager=dict(shared_memory=False, ),
robot_dir='./dizoo/evogym/envs',
collector_env_num=8,
evaluator_env_num=8,
n_evaluator_episode=8,
stop_value=10,
manager=dict(shared_memory=True, ),
# The path to save the game replay
replay_path='./evogym_walker_ddpg_seed0/video',
# replay_path='./evogym_walker_ddpg_seed0/video',
),
policy=dict(
cuda=True,
load_path="./evogym_walker_ddpg_seed0/ckpt/ckpt_best.pth.tar",
# load_path="./evogym_walker_ddpg_seed0/ckpt/ckpt_best.pth.tar",
random_collect_size=1000,
model=dict(
obs_shape=58,
Expand Down
70 changes: 0 additions & 70 deletions dizoo/evogym/config/walker_ddpg_eval_config.py

This file was deleted.

65 changes: 65 additions & 0 deletions dizoo/evogym/config/walker_ppo_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
from easydict import EasyDict

walker_ppo_config = dict(
exp_name='evogym_walker_ppo_seed0',
env=dict(
env_id='Walker-v0',
robot='speed_bot',
robot_dir='./dizoo/evogym/envs',
collector_env_num=1,
evaluator_env_num=1,
n_evaluator_episode=1,
stop_value=10,
manager=dict(shared_memory=True, ),
# The path to save the game replay
# replay_path='./evogym_walker_ppo_seed0/video',
),
policy=dict(
cuda=True,
recompute_adv=True,
# load_path="./evogym_walker_ppo_seed0/ckpt/ckpt_best.pth.tar",
model=dict(
obs_shape=58,
action_shape=10,
action_space='continuous',
),
action_space='continuous',
learn=dict(
epoch_per_collect=10,
batch_size=256,
learning_rate=3e-4,
value_weight=0.5,
entropy_weight=0.0,
clip_ratio=0.2,
adv_norm=True,
value_norm=True,
),
collect=dict(
n_sample=2048,
gae_lambda=0.97,
),
eval=dict(evaluator=dict(eval_freq=5000, )),
)
)
walker_ppo_config = EasyDict(walker_ppo_config)
main_config = walker_ppo_config

walker_ppo_create_config = dict(
env=dict(
type='evogym',
import_names=['dizoo.evogym.envs.evogym_env'],
),
env_manager=dict(type='subprocess'),
policy=dict(
type='ppo',
import_names=['ding.policy.ppo'],
),
replay_buffer=dict(type='naive', ),
)
walker_ppo_create_config = EasyDict(walker_ppo_create_config)
create_config = walker_ppo_create_config

if __name__ == "__main__":
# or you can enter `ding -m serial -c evogym_walker_ppo_config.py -s 0 --env-step 1e7`
from ding.entry import serial_pipeline_onpolicy
serial_pipeline_onpolicy((main_config, create_config), seed=0)
57 changes: 57 additions & 0 deletions dizoo/evogym/entry/walker_ppo_eval.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
import os
import gym
import torch
from tensorboardX import SummaryWriter
from easydict import EasyDict
from functools import partial

from ding.config import compile_config
from ding.worker import BaseLearner, SampleSerialCollector, InteractionSerialEvaluator, AdvancedReplayBuffer
from ding.envs import BaseEnvManager
from ding.envs import get_vec_env_setting, create_env_manager
from ding.policy import PPOPolicy
from ding.utils import set_pkg_seed

from dizoo.evogym.config.walker_ppo_config import main_config, create_config


def main(cfg, create_cfg, seed=0):
cfg = compile_config(
cfg,
BaseEnvManager,
PPOPolicy,
BaseLearner,
SampleSerialCollector,
InteractionSerialEvaluator,
AdvancedReplayBuffer,
create_cfg=create_cfg,
save_cfg=True
)

create_cfg.policy.type = create_cfg.policy.type + '_command'
env_fn = None
cfg = compile_config(cfg, seed=seed, env=env_fn, auto=True, create_cfg=create_cfg, save_cfg=True)
# Create main components: env, policy
env_fn, collector_env_cfg, evaluator_env_cfg = get_vec_env_setting(cfg.env)
evaluator_env = create_env_manager(cfg.env.manager, [partial(env_fn, cfg=c) for c in evaluator_env_cfg])

evaluator_env.enable_save_replay(cfg.env.replay_path)

# Set random seed for all package and instance
evaluator_env.seed(seed, dynamic_seed=False)
set_pkg_seed(seed, use_cuda=cfg.policy.cuda)

# Set up RL Policy
policy = PPOPolicy(cfg.policy)
policy.eval_mode.load_state_dict(torch.load(cfg.policy.load_path, map_location='cpu'))

# evaluate
tb_logger = SummaryWriter(os.path.join('./{}/log/'.format(cfg.exp_name), 'serial'))
evaluator = InteractionSerialEvaluator(
cfg.policy.eval.evaluator, evaluator_env, policy.eval_mode, tb_logger, exp_name=cfg.exp_name
)
evaluator.eval()


if __name__ == "__main__":
main(main_config, create_config, seed=0)
29 changes: 18 additions & 11 deletions dizoo/evogym/envs/evogym_env.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,19 @@
from typing import Any, Union, List, Optional
import os
import time
import copy
import numpy as np
from easydict import EasyDict
import gym
import evogym.envs
from evogym import WorldObject, sample_robot
from .viewer import DingEvoViewer
from evogym.sim import EvoSim
import os
from easydict import EasyDict

from ding.envs import BaseEnv, BaseEnvTimestep, FinalEvalRewardEnv
from ding.envs.common.common_function import affine_transform
from ding.torch_utils import to_ndarray, to_list
from ding.utils import ENV_REGISTRY

import evogym.envs
from evogym import WorldObject, sample_robot
from evogym.sim import EvoSim

@ENV_REGISTRY.register('evogym')
class EvoGymEnv(BaseEnv):
Expand Down Expand Up @@ -59,11 +60,17 @@ def reset(self) -> np.ndarray:
self._env.seed(self._seed)
if self._replay_path is not None:
gym.logger.set_level(gym.logger.DEBUG)
# use our own 'viewer' to make 'render' compatible with gym
self._env.default_viewer = DingEvoViewer(EvoSim(self._env.world))
self._env.__class__.render = self._env.default_viewer.render
self._env.metadata['render.modes'] = 'rgb_array' # make render mode compatible with gym
self._env = gym.wrappers.RecordVideo(self._env, './videos/' + str('time()') + '/') # time()
# make render mode compatible with gym
if gym.version.VERSION > '0.22.0':
self._env.metadata.update({'render_modes': ["rgb_array"]})
else:
self._env.metadata.update({'render.modes': ["rgb_array"]})
self._env = gym.wrappers.RecordVideo(
self._env,
video_folder=self._replay_path,
episode_trigger=lambda episode_id: True,
name_prefix='rl-video-{}-{}'.format(id(self),time.time())
)
obs = self._env.reset()
obs = to_ndarray(obs).astype('float32')
return obs
Expand Down
Loading

0 comments on commit 38241ff

Please sign in to comment.