Skip to content

Commit 51bd245

Browse files
authored
Merge pull request #671 from maiyetum95/master
add test_2d_owsc_python case for deep reinforcement learning training
2 parents 2e62fa1 + 46c8cc5 commit 51bd245

20 files changed

+1444
-0
lines changed
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
### Python, for ${Python_EXECUTABLE}
2+
find_package(Python3 COMPONENTS Interpreter Development REQUIRED)
3+
### Pybind11
4+
find_package(pybind11 CONFIG REQUIRED)
5+
6+
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_SOURCE_DIR}/cmake) # main (top) cmake dir
7+
8+
set(CMAKE_VERBOSE_MAKEFILE on)
9+
10+
STRING(REGEX REPLACE ".*/(.*)" "\\1" CURRENT_FOLDER ${CMAKE_CURRENT_SOURCE_DIR})
11+
PROJECT("${CURRENT_FOLDER}")
12+
13+
SET(LIBRARY_OUTPUT_PATH ${PROJECT_BINARY_DIR}/lib)
14+
SET(EXECUTABLE_OUTPUT_PATH "${PROJECT_BINARY_DIR}/bin/")
15+
SET(BUILD_INPUT_PATH "${EXECUTABLE_OUTPUT_PATH}/input")
16+
SET(BUILD_RELOAD_PATH "${EXECUTABLE_OUTPUT_PATH}/reload")
17+
SET(BUILD_BIND_PATH "${EXECUTABLE_OUTPUT_PATH}/bind")
18+
SET(BUILD_DRL_PATH "${EXECUTABLE_OUTPUT_PATH}/drl")
19+
20+
file(MAKE_DIRECTORY ${BUILD_INPUT_PATH})
21+
execute_process(COMMAND ${CMAKE_COMMAND} -E make_directory ${BUILD_INPUT_PATH})
22+
file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/regression_test_tool/ DESTINATION ${BUILD_INPUT_PATH})
23+
24+
25+
file(MAKE_DIRECTORY ${BUILD_BIND_PATH})
26+
file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/pybind_tool/
27+
DESTINATION ${BUILD_BIND_PATH})
28+
29+
file(MAKE_DIRECTORY ${BUILD_DRL_PATH})
30+
file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/deep_reinforcement_learning_tool/
31+
DESTINATION ${BUILD_DRL_PATH})
32+
33+
aux_source_directory(. DIR_SRCS)
34+
pybind11_add_module(${PROJECT_NAME} ${DIR_SRCS})
35+
set_target_properties(${PROJECT_NAME} PROPERTIES VS_DEBUGGER_WORKING_DIRECTORY "${EXECUTABLE_OUTPUT_PATH}")
36+
target_link_libraries(${PROJECT_NAME} PRIVATE sphinxsys_2d)
37+
38+
add_test(NAME ${PROJECT_NAME} COMMAND ${Python3_EXECUTABLE} "${EXECUTABLE_OUTPUT_PATH}/bind/pybind_test.py")
39+
set_tests_properties(${PROJECT_NAME} PROPERTIES WORKING_DIRECTORY "${EXECUTABLE_OUTPUT_PATH}"
40+
PASS_REGULAR_EXPRESSION "The result of TotalViscousForceFromFluid is correct based on the dynamic time warping regression test!")
41+
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
#include "custom_io_environment.h"
2+
#include "sph_system.h"
3+
namespace fs = std::filesystem;
4+
5+
namespace SPH
6+
{
7+
//=================================================================================================//
8+
CustomIOEnvironment::CustomIOEnvironment(SPHSystem &sph_system, bool delete_output, int parallel_env_number, int episode_number)
9+
: IOEnvironment(sph_system, delete_output)
10+
{
11+
// Append environment_number to the output_folder_
12+
output_folder_ += "_env_" + std::to_string(parallel_env_number) + "_episode_" + std::to_string(episode_number);
13+
14+
// Check and create the output folder with the modified path
15+
if (!fs::exists(output_folder_)) {
16+
fs::create_directory(output_folder_);
17+
}
18+
19+
// Handle deletion of contents in the output folder if required
20+
if (delete_output) {
21+
fs::remove_all(output_folder_);
22+
fs::create_directory(output_folder_);
23+
}
24+
}
25+
//=================================================================================================//
26+
} // namespace SPH
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
#ifndef CUSTOM_IO_ENVIRONMENT_H
2+
#define CUSTOM_IO_ENVIRONMENT_H
3+
4+
#include "io_environment.h"
5+
#include "sph_system.h"
6+
7+
namespace SPH
8+
{
9+
class CustomIOEnvironment : public IOEnvironment
10+
{
11+
public:
12+
// Constructor with an additional environment_number parameter
13+
CustomIOEnvironment(SPHSystem &sph_system, bool delete_output, int parallel_env_number, int episode_number);
14+
};
15+
} // namespace SPH
16+
#endif // CUSTOM_IO_ENVIRONMENT_H
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
#ifndef CUSTOM_IO_OBSERVATION_H
2+
#define CUSTOM_IO_OBSERVATION_H
3+
4+
#include "io_observation.h"
5+
6+
namespace SPH
7+
{
8+
template <class LocalReduceMethodType>
9+
class ExtendedReducedQuantityRecording : public ReducedQuantityRecording<LocalReduceMethodType>
10+
{
11+
public:
12+
// Inherit constructors from the base class
13+
using ReducedQuantityRecording<LocalReduceMethodType>::ReducedQuantityRecording;
14+
15+
// Function to directly return the result of reduce_method_.exec()
16+
typename LocalReduceMethodType::ReturnType getReducedQuantity()
17+
{
18+
return this->reduce_method_.exec();
19+
}
20+
};
21+
22+
} // namespace SPH
23+
#endif // CUSTOM_IO_OBSERVATION_H
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
#ifndef CUSTOM_IO_SIMBODY_H
2+
#define CUSTOM_IO_SIMBODY_H
3+
4+
#include "io_simbody.h"
5+
#include <SimTKsimbody.h>
6+
7+
namespace SPH {
8+
/**
9+
* @class WriteSimBodyPinDataExtended
10+
* @brief Extended class to write total force acting on a solid body and get angles to Python.
11+
*/
12+
class WriteSimBodyPinDataExtended : public WriteSimBodyPinData
13+
{
14+
public:
15+
WriteSimBodyPinDataExtended(SPHSystem &sph_system, SimTK::RungeKuttaMersonIntegrator &integ,
16+
SimTK::MobilizedBody::Pin &pinbody)
17+
: WriteSimBodyPinData(sph_system, integ, pinbody){};
18+
19+
// Function to get angle
20+
Real getAngleToPython(size_t iteration_step = 0)
21+
{
22+
const SimTK::State& state = integ_.getState();
23+
Real angle = mobody_.getAngle(state);
24+
return angle;
25+
}
26+
27+
// Function to get angle rate
28+
Real getAngleRateToPython(size_t iteration_step = 0)
29+
{
30+
const SimTK::State& state = integ_.getState();
31+
Real angle_rate = mobody_.getRate(state);
32+
return angle_rate;
33+
}
34+
};
35+
} // namespace SPH
36+
#endif // CUSTOM_IO_SIMBODY_H
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
from gymnasium.envs.registration import register
2+
3+
register(
4+
id="OWSC-v0",
5+
entry_point="gym_env_owsc.envs:OWSCEnv",
6+
kwargs={'parallel_envs': 0},
7+
max_episode_steps=500,
8+
reward_threshold=500.0,
9+
)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
from gym_env_owsc.envs.owsc import OWSCEnv
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,138 @@
1+
import sys
2+
import math
3+
import numpy as np
4+
import gymnasium as gym
5+
from gymnasium import spaces
6+
# add dynamic link library or shared object to python env
7+
sys.path.append('/path/to/SPHinXsys/case/lib/dynamic link library or shared object')
8+
import test_2d_owsc_python as test_2d
9+
10+
11+
class OWSCEnv(gym.Env):
12+
"""Custom Environment without rendering."""
13+
# metadata = {"render_modes": ["human", "rgb_array"], "render_fps": 30}
14+
15+
def __init__(self, render_mode=None, parallel_envs=0):
16+
# Initialize environment parameters
17+
self.parallel_envs = parallel_envs # Identifier for parallel simulation environments
18+
self.episode = 1 # Current episode number
19+
self.time_per_action = 0.1 # Time interval per action step
20+
self.low_action = -1.0 # Minimum action value
21+
self.max_action = 1.0 # Maximum action value
22+
self.update_per_action = 10 # The action's effect is applied in smaller iterations within one action time step
23+
self.low_obs = -10.0 # Minimum observation value
24+
self.high_obs = 10.0 # Maximum observation value
25+
self.obs_numbers = 16 # Number of observation variables
26+
27+
# Define action and observation spaces for Gym
28+
low_action = np.array([self.low_action]).astype(np.float32)
29+
high_action = np.array([self.max_action]).astype(np.float32)
30+
low_obs = np.full(self.obs_numbers, self.low_obs).astype(np.float32)
31+
high_obs = np.full(self.obs_numbers, self.high_obs).astype(np.float32)
32+
33+
self.action_space = spaces.Box(low_action, high_action) # Continuous action space
34+
self.observation_space = spaces.Box(low_obs, high_obs) # Continuous observation space
35+
36+
# Reset the environment at the beginning of each episode
37+
def reset(self, seed=None, options=None):
38+
super().reset(seed=seed)
39+
40+
# Initialize the OWSC simulation with the given episode and environment setup
41+
self.owsc = test_2d.owsc_from_sph_cpp(self.parallel_envs, self.episode)
42+
self.action_time_steps = 0 # Track the number of action steps
43+
self.action_time = 0.5 # Initialize action time
44+
self.damping_coefficient = 50 # Set damping coefficient for the environment
45+
self.total_reward_per_episode = 0.0 # Track total reward in each episode
46+
47+
# Start the simulation with the given action time and damping coefficient
48+
self.owsc.run_case(self.action_time, self.damping_coefficient)
49+
50+
# Initialize observation array with zero values
51+
self.observation = np.zeros(self.obs_numbers)
52+
# Fill the observation array with values from the OWSC simulation
53+
for i in range(0, 2):
54+
self.observation[i] = self.owsc.get_wave_height(i)
55+
self.observation[i + 2] = self.owsc.get_wave_velocity(i, 0)
56+
self.observation[i + 4] = self.owsc.get_wave_velocity(i, 1)
57+
self.observation[i + 6] = self.owsc.get_wave_velocity_on_flap(i, 0)
58+
self.observation[i + 8] = self.owsc.get_wave_velocity_on_flap(i, 1)
59+
self.observation[i + 10] = self.owsc.get_flap_position(i, 0)
60+
self.observation[i + 12] = self.owsc.get_flap_position(i, 1)
61+
self.observation[14] = self.owsc.get_flap_angle()
62+
self.observation[15] = self.owsc.get_flap_angle_rate()
63+
64+
self._get_obs = self.observation.astype(np.float32)
65+
66+
return self._get_obs, {}
67+
68+
def step(self, action):
69+
self.action_time_steps += 1
70+
# Apply the action to change the damping coefficient
71+
self.damping_change = 5.0 * action[0]
72+
# Penalty for invalid actions
73+
penality_0 = 0.0
74+
# Ensure the damping coefficient stays within valid bounds
75+
if self.damping_coefficient + self.damping_change < 0.01:
76+
self.damping_change = 0.01 - self.damping_coefficient
77+
penality_0 = - 1.0
78+
if self.damping_coefficient + self.damping_change > 100:
79+
self.damping_change = 100 - self.damping_coefficient
80+
penality_0 = - 1.0
81+
82+
reward_0 = 0.0
83+
for i in range(self.update_per_action):
84+
self.flap_angle_rate_previous = self.owsc.get_flap_angle_rate()
85+
self.damping_coefficient += self.damping_change / self.update_per_action
86+
self.action_time += self.time_per_action / self.update_per_action
87+
self.owsc.run_case(self.action_time, self.damping_coefficient)
88+
self.flap_angle_rate_now = self.owsc.get_flap_angle_rate()
89+
# Calculate reward based on energy (flap angle rate)
90+
reward_0 += self.damping_coefficient * math.pow(0.5 * (self.flap_angle_rate_now + self.flap_angle_rate_previous), 2) * self.time_per_action / self.update_per_action
91+
# Add any penalties to the reward
92+
reward = reward_0 + penality_0
93+
self.total_reward_per_episode += reward
94+
95+
# Update observations from the OWSC simulation
96+
for i in range(0, 2):
97+
self.observation[i] = self.owsc.get_wave_height(i)
98+
self.observation[i + 2] = self.owsc.get_wave_velocity(i, 0)
99+
self.observation[i + 4] = self.owsc.get_wave_velocity(i, 1)
100+
self.observation[i + 6] = self.owsc.get_wave_velocity_on_flap(i, 0)
101+
self.observation[i + 8] = self.owsc.get_wave_velocity_on_flap(i, 1)
102+
self.observation[i + 10] = self.owsc.get_flap_position(i, 0)
103+
self.observation[i + 12] = self.owsc.get_flap_position(i, 1)
104+
self.observation[14] = self.owsc.get_flap_angle()
105+
self.observation[15] = self.owsc.get_flap_angle_rate()
106+
107+
self._get_obs = self.observation.astype(np.float32)
108+
109+
# Log action and reward information to files
110+
with open(f'action_env{self.parallel_envs}_epi{self.episode}.txt', 'a') as file:
111+
file.write(f'action_time: {self.action_time} action: {self.damping_coefficient}\n')
112+
113+
with open(f'reward_env{self.parallel_envs}_epi{self.episode}.txt', 'a') as file:
114+
file.write(f'action_time: {self.action_time} reward: {reward}\n')
115+
116+
# Check if the episode is done after 200 steps
117+
if self.action_time_steps > 99:
118+
done = True
119+
with open(f'reward_env{self.parallel_envs}.txt', 'a') as file:
120+
file.write(f'episode: {self.episode} total_reward: {self.total_reward_per_episode}\n')
121+
self.episode += 1
122+
else:
123+
done = False
124+
125+
# Return the updated observation, reward, done flag, and additional info
126+
return self._get_obs, reward, done, False, {}
127+
128+
# Render method (optional, no rendering in this case)
129+
def render(self):
130+
return 0
131+
132+
# Additional render frame logic (not implemented)
133+
def _render_frame(self):
134+
return 0
135+
136+
# Close the environment and cleanup (optional)
137+
def close(self):
138+
return 0
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
from setuptools import setup
2+
3+
setup(
4+
name="gym_env_owsc",
5+
version="1.0",
6+
install_requires=["gymnasium>=0.27.1", "pygame>=2.3.0"],
7+
)

0 commit comments

Comments
 (0)