Open
Description
Bug Description
When training Unitree Go2 on terrain, the value of base_lin_vel sometimes becomes abnormally large.
This tends to occur especially when the robot falls over or tilts, and since it didn’t happen in previous versions, I suspect that recent changes related to terrain are causing this issue.
Steps to Reproduce
ussing the below script (go2_env.py) can reporduce the error.
I simply
- changed the termination degree from 10 to 150
- replaced plane with terrain
- increse the number of visualized robots
- added arrows to visualize dof_vel of each robot and given commands
It could happen regardless of the number of visualized robots or wih/without arrows.
----------------------------- #go2_train.py ----------------------------------
import argparse
import os
import pickle
import shutil
from importlib import metadata
try:
try:
if metadata.version("rsl-rl"):
raise ImportError
except metadata.PackageNotFoundError:
if metadata.version("rsl-rl-lib") != "2.2.4":
raise ImportError
except (metadata.PackageNotFoundError, ImportError) as e:
raise ImportError("Please uninstall 'rsl_rl' and install 'rsl-rl-lib==2.2.4'.") from e
from rsl_rl.runners import OnPolicyRunner
import genesis as gs
from go2_env import Go2Env
def get_train_cfg(exp_name, max_iterations):
train_cfg_dict = {
"algorithm": {
"class_name": "PPO",
"clip_param": 0.2,
"desired_kl": 0.01,
"entropy_coef": 0.01,
"gamma": 0.99,
"lam": 0.95,
"learning_rate": 0.001,
"max_grad_norm": 1.0,
"num_learning_epochs": 5,
"num_mini_batches": 4,
"schedule": "adaptive",
"use_clipped_value_loss": True,
"value_loss_coef": 1.0,
},
"init_member_classes": {},
"policy": {
"activation": "elu",
"actor_hidden_dims": [512, 256, 128],
"critic_hidden_dims": [512, 256, 128],
"init_noise_std": 1.0,
"class_name": "ActorCritic",
},
"runner": {
"checkpoint": -1,
"experiment_name": exp_name,
"load_run": -1,
"log_interval": 1,
"max_iterations": max_iterations,
"record_interval": -1,
"resume": False,
"resume_path": None,
"run_name": "",
},
"runner_class_name": "OnPolicyRunner",
"num_steps_per_env": 24,
"save_interval": 100,
"empirical_normalization": None,
"seed": 1,
}
return train_cfg_dict
def get_cfgs():
env_cfg = {
"num_actions": 12,
# joint/link names
"default_joint_angles": { # [rad]
"FL_hip_joint": 0.0,
"FR_hip_joint": 0.0,
"RL_hip_joint": 0.0,
"RR_hip_joint": 0.0,
"FL_thigh_joint": 0.8,
"FR_thigh_joint": 0.8,
"RL_thigh_joint": 1.0,
"RR_thigh_joint": 1.0,
"FL_calf_joint": -1.5,
"FR_calf_joint": -1.5,
"RL_calf_joint": -1.5,
"RR_calf_joint": -1.5,
},
"joint_names": [
"FR_hip_joint",
"FR_thigh_joint",
"FR_calf_joint",
"FL_hip_joint",
"FL_thigh_joint",
"FL_calf_joint",
"RR_hip_joint",
"RR_thigh_joint",
"RR_calf_joint",
"RL_hip_joint",
"RL_thigh_joint",
"RL_calf_joint",
],
# PD
"kp": 20.0,
"kd": 0.5,
# termination
"termination_if_roll_greater_than": 150, # degree
"termination_if_pitch_greater_than": 150,
# base pose
"base_init_pos": [0.0, 0.0, 0.42],
"base_init_quat": [1.0, 0.0, 0.0, 0.0],
"episode_length_s": 20.0,
"resampling_time_s": 4.0,
"action_scale": 0.25,
"simulate_action_latency": True,
"clip_actions": 100.0,
}
obs_cfg = {
"num_obs": 45,
"obs_scales": {
"lin_vel": 2.0,
"ang_vel": 0.25,
"dof_pos": 1.0,
"dof_vel": 0.05,
},
}
reward_cfg = {
"tracking_sigma": 0.25,
"base_height_target": 0.3,
"feet_height_target": 0.075,
"reward_scales": {
"tracking_lin_vel": 1.0,
"tracking_ang_vel": 0.2,
"lin_vel_z": -1.0,
"base_height": -50.0,
"action_rate": -0.005,
"similar_to_default": -0.1,
},
}
command_cfg = {
"num_commands": 3,
"lin_vel_x_range": [0.5, 0.5],
"lin_vel_y_range": [0, 0],
"ang_vel_range": [0, 0],
}
return env_cfg, obs_cfg, reward_cfg, command_cfg
def main():
parser = argparse.ArgumentParser()
parser.add_argument("-e", "--exp_name", type=str, default="go2-walking")
parser.add_argument("-B", "--num_envs", type=int, default=200)
parser.add_argument("--max_iterations", type=int, default=1000)
args = parser.parse_args()
gs.init(logging_level="warning")
log_dir = f"logs/{args.exp_name}"
env_cfg, obs_cfg, reward_cfg, command_cfg = get_cfgs()
train_cfg = get_train_cfg(args.exp_name, args.max_iterations)
if os.path.exists(log_dir):
shutil.rmtree(log_dir)
os.makedirs(log_dir, exist_ok=True)
pickle.dump(
[env_cfg, obs_cfg, reward_cfg, command_cfg, train_cfg],
open(f"{log_dir}/cfgs.pkl", "wb"),
)
env = Go2Env(
num_envs=args.num_envs, env_cfg=env_cfg, obs_cfg=obs_cfg, reward_cfg=reward_cfg, command_cfg=command_cfg, show_viewer=True
)
runner = OnPolicyRunner(env, train_cfg, log_dir, device=gs.device)
runner.learn(num_learning_iterations=args.max_iterations, init_at_random_ep_len=True)
if __name__ == "__main__":
main()
"""
# training
python examples/locomotion/go2_train.py
"""
----------------------- #go2_env.py -----------------------------------------------
import torch
import math
import genesis as gs
from genesis.utils.geom import quat_to_xyz, transform_by_quat, inv_quat, transform_quat_by_quat
import numpy as np
def gs_rand_float(lower, upper, shape, device):
return (upper - lower) * torch.rand(size=shape, device=device) + lower
class Go2Env:
def __init__(self, num_envs, env_cfg, obs_cfg, reward_cfg, command_cfg, show_viewer=False):
self.num_envs = num_envs
self.num_obs = obs_cfg["num_obs"]
self.num_privileged_obs = None
self.num_actions = env_cfg["num_actions"]
self.num_commands = command_cfg["num_commands"]
self.device = gs.device
self.simulate_action_latency = True # there is a 1 step latency on real robot
self.dt = 0.02 # control frequency on real robot is 50hz
self.max_episode_length = math.ceil(env_cfg["episode_length_s"] / self.dt)
self.env_cfg = env_cfg
self.obs_cfg = obs_cfg
self.reward_cfg = reward_cfg
self.command_cfg = command_cfg
self.obs_scales = obs_cfg["obs_scales"]
self.reward_scales = reward_cfg["reward_scales"]
visualized_number = 3
self.rendered_envs_idx = list(range(visualized_number))
# create scene
self.scene = gs.Scene(
sim_options=gs.options.SimOptions(dt=self.dt, substeps=2),
viewer_options=gs.options.ViewerOptions(
max_FPS=int(0.5 / self.dt),
camera_pos=(2.0, 0.0, 2.5),
camera_lookat=(0.0, 0.0, 0.5),
camera_fov=40,
),
vis_options=gs.options.VisOptions(rendered_envs_idx=self.rendered_envs_idx),
rigid_options=gs.options.RigidOptions(
dt=self.dt,
constraint_solver=gs.constraint_solver.Newton,
enable_collision=True,
enable_joint_limit=True,
),
show_viewer=show_viewer,
)
# add plain
# self.scene.add_entity(gs.morphs.URDF(file="urdf/plane/plane.urdf", fixed=True))
horizontal_scale = 0.25
vertical_scale = 0.005
height_field = np.zeros([40, 40])
heights_range = np.arange(-10, 20, 10)
# height_field[5:35, 5:35] = np.random.choice(heights_range, (30, 30))
self.scene.add_entity( morph=gs.morphs.Terrain(
pos=(-5.0, -5.0, 0.0),
horizontal_scale=horizontal_scale,
vertical_scale=vertical_scale,
height_field=height_field,
name="example",
# from_stored=True,
),
)
# add robot
self.base_init_pos = torch.tensor(self.env_cfg["base_init_pos"], device=gs.device)
self.base_init_quat = torch.tensor(self.env_cfg["base_init_quat"], device=gs.device)
self.inv_base_init_quat = inv_quat(self.base_init_quat)
self.robot = self.scene.add_entity(
gs.morphs.URDF(
file="urdf/go2/urdf/go2.urdf",
pos=self.base_init_pos.cpu().numpy(),
quat=self.base_init_quat.cpu().numpy(),
),
)
# build
self.scene.build(n_envs=num_envs)
# names to indices
self.motors_dof_idx = [self.robot.get_joint(name).dof_start for name in self.env_cfg["joint_names"]]
# PD control parameters
self.robot.set_dofs_kp([self.env_cfg["kp"]] * self.num_actions, self.motors_dof_idx)
self.robot.set_dofs_kv([self.env_cfg["kd"]] * self.num_actions, self.motors_dof_idx)
# prepare reward functions and multiply reward scales by dt
self.reward_functions, self.episode_sums = dict(), dict()
for name in self.reward_scales.keys():
self.reward_scales[name] *= self.dt
self.reward_functions[name] = getattr(self, "_reward_" + name)
self.episode_sums[name] = torch.zeros((self.num_envs,), device=gs.device, dtype=gs.tc_float)
# initialize buffers
self.base_lin_vel = torch.zeros((self.num_envs, 3), device=gs.device, dtype=gs.tc_float)
self.base_ang_vel = torch.zeros((self.num_envs, 3), device=gs.device, dtype=gs.tc_float)
self.projected_gravity = torch.zeros((self.num_envs, 3), device=gs.device, dtype=gs.tc_float)
self.global_gravity = torch.tensor([0.0, 0.0, -1.0], device=gs.device, dtype=gs.tc_float).repeat(
self.num_envs, 1
)
self.obs_buf = torch.zeros((self.num_envs, self.num_obs), device=gs.device, dtype=gs.tc_float)
self.rew_buf = torch.zeros((self.num_envs,), device=gs.device, dtype=gs.tc_float)
self.reset_buf = torch.ones((self.num_envs,), device=gs.device, dtype=gs.tc_int)
self.episode_length_buf = torch.zeros((self.num_envs,), device=gs.device, dtype=gs.tc_int)
self.commands = torch.zeros((self.num_envs, self.num_commands), device=gs.device, dtype=gs.tc_float)
self.commands_scale = torch.tensor(
[self.obs_scales["lin_vel"], self.obs_scales["lin_vel"], self.obs_scales["ang_vel"]],
device=gs.device,
dtype=gs.tc_float,
)
self.actions = torch.zeros((self.num_envs, self.num_actions), device=gs.device, dtype=gs.tc_float)
self.last_actions = torch.zeros_like(self.actions)
self.dof_pos = torch.zeros_like(self.actions)
self.dof_vel = torch.zeros_like(self.actions)
self.last_dof_vel = torch.zeros_like(self.actions)
self.base_pos = torch.zeros((self.num_envs, 3), device=gs.device, dtype=gs.tc_float)
self.base_quat = torch.zeros((self.num_envs, 4), device=gs.device, dtype=gs.tc_float)
self.default_dof_pos = torch.tensor(
[self.env_cfg["default_joint_angles"][name] for name in self.env_cfg["joint_names"]],
device=gs.device,
dtype=gs.tc_float,
)
self.extras = dict() # extra information for logging
self.extras["observations"] = dict()
def _resample_commands(self, envs_idx):
self.commands[envs_idx, 0] = gs_rand_float(*self.command_cfg["lin_vel_x_range"], (len(envs_idx),), gs.device)
self.commands[envs_idx, 1] = gs_rand_float(*self.command_cfg["lin_vel_y_range"], (len(envs_idx),), gs.device)
self.commands[envs_idx, 2] = gs_rand_float(*self.command_cfg["ang_vel_range"], (len(envs_idx),), gs.device)
def step(self, actions):
self.actions = torch.clip(actions, -self.env_cfg["clip_actions"], self.env_cfg["clip_actions"])
exec_actions = self.last_actions if self.simulate_action_latency else self.actions
target_dof_pos = exec_actions * self.env_cfg["action_scale"] + self.default_dof_pos
self.robot.control_dofs_position(target_dof_pos, self.motors_dof_idx)
self.scene.step()
self._draw_debug_vis()
# update buffers
self.episode_length_buf += 1
self.base_pos[:] = self.robot.get_pos()
self.base_quat[:] = self.robot.get_quat()
self.base_euler = quat_to_xyz(
transform_quat_by_quat(torch.ones_like(self.base_quat) * self.inv_base_init_quat, self.base_quat),
rpy=True,
degrees=True,
)
inv_base_quat = inv_quat(self.base_quat)
self.base_lin_vel[:] = transform_by_quat(self.robot.get_vel(), inv_base_quat)
self.base_ang_vel[:] = transform_by_quat(self.robot.get_ang(), inv_base_quat)
self.projected_gravity = transform_by_quat(self.global_gravity, inv_base_quat)
self.dof_pos[:] = self.robot.get_dofs_position(self.motors_dof_idx)
self.dof_vel[:] = self.robot.get_dofs_velocity(self.motors_dof_idx)
# resample commands
envs_idx = (
(self.episode_length_buf % int(self.env_cfg["resampling_time_s"] / self.dt) == 0)
.nonzero(as_tuple=False)
.reshape((-1,))
)
self._resample_commands(envs_idx)
# check termination and reset
self.reset_buf = self.episode_length_buf > self.max_episode_length
self.reset_buf |= torch.abs(self.base_euler[:, 1]) > self.env_cfg["termination_if_pitch_greater_than"]
self.reset_buf |= torch.abs(self.base_euler[:, 0]) > self.env_cfg["termination_if_roll_greater_than"]
time_out_idx = (self.episode_length_buf > self.max_episode_length).nonzero(as_tuple=False).reshape((-1,))
self.extras["time_outs"] = torch.zeros_like(self.reset_buf, device=gs.device, dtype=gs.tc_float)
self.extras["time_outs"][time_out_idx] = 1.0
self.reset_idx(self.reset_buf.nonzero(as_tuple=False).reshape((-1,)))
# compute reward
self.rew_buf[:] = 0.0
for name, reward_func in self.reward_functions.items():
rew = reward_func() * self.reward_scales[name]
self.rew_buf += rew
self.episode_sums[name] += rew
# compute observations
self.obs_buf = torch.cat(
[
self.base_ang_vel * self.obs_scales["ang_vel"], # 3
self.projected_gravity, # 3
self.commands * self.commands_scale, # 3
(self.dof_pos - self.default_dof_pos) * self.obs_scales["dof_pos"], # 12
self.dof_vel * self.obs_scales["dof_vel"], # 12
self.actions, # 12
],
axis=-1,
)
self.last_actions[:] = self.actions[:]
self.last_dof_vel[:] = self.dof_vel[:]
self.extras["observations"]["critic"] = self.obs_buf
return self.obs_buf, self.rew_buf, self.reset_buf, self.extras
def _draw_debug_vis(self):
self.scene.clear_debug_objects()
VEL_LENGTH_SCALE = 0.3
VEL_RADIUS = 0.05
# Draw for every environment that is being shown in the viewer
for env_idx in self.rendered_envs_idx:
# ─── origin slightly above the robot base ─────────────────────
origin = self.base_pos[env_idx].clone().cpu()
origin[2] += 0.2
# ─── BLUE arrow: current base-frame linear velocity ───────────
vel_body = self.base_lin_vel[env_idx].unsqueeze(0) # (1,3)
vel_world = transform_by_quat(
vel_body,
self.base_quat[env_idx].unsqueeze(0)
)[0].cpu()
self.scene.draw_debug_arrow(
pos = origin,
vec = vel_world * VEL_LENGTH_SCALE,
radius= VEL_RADIUS,
color = (0.0, 0.0, 1.0, 0.8)
)
# ─── GREEN arrow: commanded velocity (rotated to world frame) ─
cmd_body = torch.tensor(
[*self.commands[env_idx, :2], 0.0],
device=self.device, dtype=gs.tc_float
).unsqueeze(0)
cmd_world = transform_by_quat(
cmd_body,
self.base_quat[env_idx].unsqueeze(0)
)[0].cpu()
self.scene.draw_debug_arrow(
pos = origin,
vec = cmd_world * VEL_LENGTH_SCALE,
radius= VEL_RADIUS,
color = (0.0, 1.0, 0.0, 0.8)
)
def get_observations(self):
self.extras["observations"]["critic"] = self.obs_buf
return self.obs_buf, self.extras
def get_privileged_observations(self):
return None
def reset_idx(self, envs_idx):
if len(envs_idx) == 0:
return
# reset dofs
self.dof_pos[envs_idx] = self.default_dof_pos
self.dof_vel[envs_idx] = 0.0
self.robot.set_dofs_position(
position=self.dof_pos[envs_idx],
dofs_idx_local=self.motors_dof_idx,
zero_velocity=True,
envs_idx=envs_idx,
)
# reset base
self.base_pos[envs_idx] = self.base_init_pos
self.base_quat[envs_idx] = self.base_init_quat.reshape(1, -1)
self.robot.set_pos(self.base_pos[envs_idx], zero_velocity=False, envs_idx=envs_idx)
self.robot.set_quat(self.base_quat[envs_idx], zero_velocity=False, envs_idx=envs_idx)
self.base_lin_vel[envs_idx] = 0
self.base_ang_vel[envs_idx] = 0
self.robot.zero_all_dofs_velocity(envs_idx)
# reset buffers
self.last_actions[envs_idx] = 0.0
self.last_dof_vel[envs_idx] = 0.0
self.episode_length_buf[envs_idx] = 0
self.reset_buf[envs_idx] = True
# fill extras
self.extras["episode"] = {}
for key in self.episode_sums.keys():
self.extras["episode"]["rew_" + key] = (
torch.mean(self.episode_sums[key][envs_idx]).item() / self.env_cfg["episode_length_s"]
)
self.episode_sums[key][envs_idx] = 0.0
self._resample_commands(envs_idx)
def reset(self):
self.reset_buf[:] = True
self.reset_idx(torch.arange(self.num_envs, device=gs.device))
return self.obs_buf, None
# ------------ reward functions----------------
def _reward_tracking_lin_vel(self):
# Tracking of linear velocity commands (xy axes)
lin_vel_error = torch.sum(torch.square(self.commands[:, :2] - self.base_lin_vel[:, :2]), dim=1)
return torch.exp(-lin_vel_error / self.reward_cfg["tracking_sigma"])
def _reward_tracking_ang_vel(self):
# Tracking of angular velocity commands (yaw)
ang_vel_error = torch.square(self.commands[:, 2] - self.base_ang_vel[:, 2])
return torch.exp(-ang_vel_error / self.reward_cfg["tracking_sigma"])
def _reward_lin_vel_z(self):
# Penalize z axis base linear velocity
return torch.square(self.base_lin_vel[:, 2])
def _reward_action_rate(self):
# Penalize changes in actions
return torch.sum(torch.square(self.last_actions - self.actions), dim=1)
def _reward_similar_to_default(self):
# Penalize joint poses far away from default pose
return torch.sum(torch.abs(self.dof_pos - self.default_dof_pos), dim=1)
def _reward_base_height(self):
# Penalize base height away from target
return torch.square(self.base_pos[:, 2] - self.reward_cfg["base_height_target"])
Expected Behavior
dof_vel should be stable
Screenshots/Videos
genesis_issue.mp4
Relevant log output
Environment
- OS: Ubuntu 22.04,
- GPU/CPU: RTX 4090
- GPU-driver version: 550.120
- CUDA / CUDA-toolkit version:12.4
Release version or Commit ID
The bug confrimed: the latest version
The bug not confirmed: 382cf4c
Additional Context
No response