Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Requesting Guidance on training and testing in a tetris environment. #265 #267

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 58 additions & 0 deletions train_tetris.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"try:\n",
" import botris\n",
"except:\n",
" # botris-interface, the environment for tetris was not installed\n",
" print(\"botris-interface was not found, attemping install...\")\n",
" !pip install botris-interface==0.1.21\n",
"\n",
"try:\n",
" import lzero, ding\n",
"except:\n",
" # LightZero, the repository for training was not installed\n",
" print(\"LightZero was not found, attemping install from relative directory...\")\n",
" !pip install -e ."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from zoo.botris.config.botris_efficientzero_config import main_config, create_config, max_env_step\n",
"from lzero.entry import train_muzero\n",
"\n",
"train_muzero([main_config, create_config], seed=0, model_path=main_config.policy.model_path, max_env_step=max_env_step)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.14"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Empty file added zoo/botris/__init__.py
Empty file.
Empty file added zoo/botris/config/__init__.py
Empty file.
90 changes: 90 additions & 0 deletions zoo/botris/config/botris_5move_efficientzero_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
from easydict import EasyDict
from zoo.botris.envs.modals import ENCODED_INPUT_SHAPE, OBSERVATION_SPACE_SIZE
from zoo.botris.envs.botris_5move_env import ACTION_SPACE_SIZE

# ==============================================================
# begin of the most frequently changed config specified by the user
# ==============================================================
env_id = 'botris-5move'
collector_env_num = 8
n_episode = 8
evaluator_env_num = 4
num_simulations = 50
update_per_collect = None
batch_size = 64
max_env_step = int(5e6)
reanalyze_ratio = 0.
replay_ratio = 0.25
max_episode_len=500
# ==============================================================
# end of the most frequently changed config specified by the user
# ==============================================================
botris_efficientzero_config = dict(
exp_name=f'data_ez/botris_5move_efficientzero_ns{num_simulations}_upc{update_per_collect}_rer{reanalyze_ratio}_seed0',
env=dict(
max_episode_steps=max_episode_len,
env_id=env_id,
obs_type='dict_encoded_board',
collector_env_num=collector_env_num,
evaluator_env_num=evaluator_env_num,
n_evaluator_episode=evaluator_env_num,
manager=dict(shared_memory=False, ),
max_score=None
),
policy=dict(
model=dict(
observation_shape=OBSERVATION_SPACE_SIZE,
action_space_size=ACTION_SPACE_SIZE,
model_type='mlp',
lstm_hidden_size=256,
latent_state_dim=256,
discrete_action_encoding_type='one_hot',
norm_type='BN',
self_supervised_learning_loss=True,
),
# (str) The path of the pretrained model. If None, the model will be initialized by the default model.
model_path=None,
cuda=True,
device='cuda',
env_type='not_board_games',
action_type='fixed_action_space',
game_segment_length=50,
update_per_collect=update_per_collect,
batch_size=batch_size,
optim_type='Adam',
lr_piecewise_constant_decay=True,
learning_rate=0.003,
num_simulations=num_simulations,
reanalyze_ratio=reanalyze_ratio,
n_episode=n_episode,
eval_freq=int(2e2),
replay_buffer_size=int(1e6), # the size/capacity of replay_buffer, in the terms of transitions.
collector_env_num=collector_env_num,
evaluator_env_num=evaluator_env_num,
target_update_freq=100,
use_priority=False,
ssl_loss_weight=2,
),
)

botris_efficientzero_config = EasyDict(botris_efficientzero_config)
main_config = botris_efficientzero_config

botris_efficientzero_create_config = dict(
env=dict(
type='botris-5move',
import_names=['zoo.botris.envs.botris_5move_env'],
),
env_manager=dict(type='subprocess'),
policy=dict(
type='efficientzero',
import_names=['lzero.policy.efficientzero'],
),
)
botris_efficientzero_create_config = EasyDict(botris_efficientzero_create_config)
create_config = botris_efficientzero_create_config

if __name__ == "__main__":
from lzero.entry import train_muzero

train_muzero([main_config, create_config], seed=0, model_path=main_config.policy.model_path, max_env_step=max_env_step)
100 changes: 100 additions & 0 deletions zoo/botris/config/botris_alphazero_sp_mode_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
from easydict import EasyDict
from zoo.botris.envs.modals import ACTION_SPACE_SIZE, ENCODED_INPUT_SHAPE, OBSERVATION_SPACE_SIZE

# ==============================================================
# begin of the most frequently changed config specified by the user
# ==============================================================
collector_env_num = 32
n_episode = 32
evaluator_env_num = 5
num_simulations = 50
update_per_collect = 50
batch_size = 256
max_env_step = int(5e5)
mcts_ctree = True
# ==============================================================
# end of the most frequently changed config specified by the user
# ==============================================================
gomoku_alphazero_config = dict(
exp_name=
f'data_az_ctree/gomoku_alphazero_sp-mode_ns{num_simulations}_upc{update_per_collect}_seed0',
env=dict(
collector_env_num=collector_env_num,
evaluator_env_num=evaluator_env_num,
n_evaluator_episode=evaluator_env_num,
manager=dict(shared_memory=False, ),
# ==============================================================
# for the creation of simulation env
render_mode=None,
replay_path=None,
alphazero_mcts_ctree=mcts_ctree,
# ==============================================================
),
policy=dict(
mcts_ctree=mcts_ctree,
# ==============================================================
# for the creation of simulation env
simulation_env_id='botris-versus',
simulation_env_config_type='self_play',
# ==============================================================
torch_compile=False,
tensor_float_32=False,
model=dict(
observation_shape=ENCODED_INPUT_SHAPE,
action_space_size=ACTION_SPACE_SIZE,
),
cuda=True,
update_per_collect=update_per_collect,
batch_size=batch_size,
optim_type='Adam',
lr_piecewise_constant_decay=False,
learning_rate=0.003,
manual_temperature_decay=True,
grad_clip_value=0.5,
value_weight=1.0,
entropy_weight=0.0,
n_episode=n_episode,
eval_freq=int(2e3),
mcts=dict(num_simulations=num_simulations),
collector_env_num=collector_env_num,
evaluator_env_num=evaluator_env_num,
),
)

gomoku_alphazero_config = EasyDict(gomoku_alphazero_config)
main_config = gomoku_alphazero_config

gomoku_alphazero_create_config = dict(
env=dict(
type='botris-versus',
import_names=['zoo.botris.envs.botris_versus_lightzero_env'],
),
env_manager=dict(type='subprocess'),
policy=dict(
type='alphazero',
import_names=['lzero.policy.alphazero'],
),
collector=dict(
type='episode_alphazero',
import_names=['lzero.worker.alphazero_collector'],
),
evaluator=dict(
type='alphazero',
import_names=['lzero.worker.alphazero_evaluator'],
)
)
gomoku_alphazero_create_config = EasyDict(gomoku_alphazero_create_config)
create_config = gomoku_alphazero_create_config

if __name__ == '__main__':
if main_config.policy.tensor_float_32:
import torch

# The flag below controls whether to allow TF32 on matmul. This flag defaults to False
# in PyTorch 1.12 and later.
torch.backends.cuda.matmul.allow_tf32 = True
# The flag below controls whether to allow TF32 on cuDNN. This flag defaults to True.
torch.backends.cudnn.allow_tf32 = True

from lzero.entry import train_alphazero
train_alphazero([main_config, create_config], seed=0, max_env_step=max_env_step)
89 changes: 89 additions & 0 deletions zoo/botris/config/botris_efficientzero_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
from easydict import EasyDict
from zoo.botris.envs.modals import ACTION_SPACE_SIZE, ENCODED_INPUT_SHAPE, OBSERVATION_SPACE_SIZE

# ==============================================================
# begin of the most frequently changed config specified by the user
# ==============================================================
env_id = 'botris'
collector_env_num = 8
n_episode = 8
evaluator_env_num = 4
num_simulations = 50
update_per_collect = None
batch_size = 256
max_env_step = int(5e7)
reanalyze_ratio = 0.
replay_ratio = 0.25
# ==============================================================
# end of the most frequently changed config specified by the user
# ==============================================================

botris_efficientzero_config = dict(
exp_name=f'data_ez/botris_efficientzero_ns{num_simulations}_upc{update_per_collect}_rer{reanalyze_ratio}_seed0',
env=dict(
max_episode_steps=max_env_step,
env_id=env_id,
obs_type='dict_encoded_board',
collector_env_num=collector_env_num,
evaluator_env_num=evaluator_env_num,
n_evaluator_episode=evaluator_env_num,
manager=dict(shared_memory=False, ),
max_score=None
),
policy=dict(
model=dict(
observation_shape=OBSERVATION_SPACE_SIZE,
action_space_size=ACTION_SPACE_SIZE,
model_type='mlp',
lstm_hidden_size=256,
latent_state_dim=256,
discrete_action_encoding_type='one_hot',
norm_type='BN',
self_supervised_learning_loss=True,
),
# (str) The path of the pretrained model. If None, the model will be initialized by the default model.
model_path=None,
cuda=True,
device='cuda',
env_type='not_board_games',
action_type='varied_action_space',
game_segment_length=50,
update_per_collect=update_per_collect,
batch_size=batch_size,
optim_type='Adam',
lr_piecewise_constant_decay=True,
learning_rate=0.003,
num_simulations=num_simulations,
reanalyze_ratio=reanalyze_ratio,
n_episode=n_episode,
eval_freq=int(2e2),
replay_buffer_size=int(1e6), # the size/capacity of replay_buffer, in the terms of transitions.
collector_env_num=collector_env_num,
evaluator_env_num=evaluator_env_num,
target_update_freq=100,
use_priority=False,
ssl_loss_weight=2,
),
)

botris_efficientzero_config = EasyDict(botris_efficientzero_config)
main_config = botris_efficientzero_config

botris_efficientzero_create_config = dict(
env=dict(
type='botris',
import_names=['zoo.botris.envs.botris_lightzero_env'],
),
env_manager=dict(type='subprocess'),
policy=dict(
type='efficientzero',
import_names=['lzero.policy.efficientzero'],
),
)
botris_efficientzero_create_config = EasyDict(botris_efficientzero_create_config)
create_config = botris_efficientzero_create_config

if __name__ == "__main__":
from lzero.entry import train_muzero

train_muzero([main_config, create_config], seed=0, model_path=main_config.policy.model_path, max_env_step=max_env_step)
Loading