1
+ import sys
2
+ import math
3
+ import numpy as np
4
+ import gymnasium as gym
5
+ from gymnasium import spaces
6
+ # add dynamic link library or shared object to python env
7
+ sys .path .append ('/path/to/SPHinXsys/case/lib/dynamic link library or shared object' )
8
+ import test_2d_owsc_python as test_2d
9
+
10
+
11
+ class OWSCEnv (gym .Env ):
12
+ """Custom Environment without rendering."""
13
+ # metadata = {"render_modes": ["human", "rgb_array"], "render_fps": 30}
14
+
15
+ def __init__ (self , render_mode = None , parallel_envs = 0 ):
16
+ # Initialize environment parameters
17
+ self .parallel_envs = parallel_envs # Identifier for parallel simulation environments
18
+ self .episode = 1 # Current episode number
19
+ self .time_per_action = 0.1 # Time interval per action step
20
+ self .low_action = - 1.0 # Minimum action value
21
+ self .max_action = 1.0 # Maximum action value
22
+ self .update_per_action = 10 # The action's effect is applied in smaller iterations within one action time step
23
+ self .low_obs = - 10.0 # Minimum observation value
24
+ self .high_obs = 10.0 # Maximum observation value
25
+ self .obs_numbers = 16 # Number of observation variables
26
+
27
+ # Define action and observation spaces for Gym
28
+ low_action = np .array ([self .low_action ]).astype (np .float32 )
29
+ high_action = np .array ([self .max_action ]).astype (np .float32 )
30
+ low_obs = np .full (self .obs_numbers , self .low_obs ).astype (np .float32 )
31
+ high_obs = np .full (self .obs_numbers , self .high_obs ).astype (np .float32 )
32
+
33
+ self .action_space = spaces .Box (low_action , high_action ) # Continuous action space
34
+ self .observation_space = spaces .Box (low_obs , high_obs ) # Continuous observation space
35
+
36
+ # Reset the environment at the beginning of each episode
37
+ def reset (self , seed = None , options = None ):
38
+ super ().reset (seed = seed )
39
+
40
+ # Initialize the OWSC simulation with the given episode and environment setup
41
+ self .owsc = test_2d .owsc_from_sph_cpp (self .parallel_envs , self .episode )
42
+ self .action_time_steps = 0 # Track the number of action steps
43
+ self .action_time = 0.5 # Initialize action time
44
+ self .damping_coefficient = 50 # Set damping coefficient for the environment
45
+ self .total_reward_per_episode = 0.0 # Track total reward in each episode
46
+
47
+ # Start the simulation with the given action time and damping coefficient
48
+ self .owsc .run_case (self .action_time , self .damping_coefficient )
49
+
50
+ # Initialize observation array with zero values
51
+ self .observation = np .zeros (self .obs_numbers )
52
+ # Fill the observation array with values from the OWSC simulation
53
+ for i in range (0 , 2 ):
54
+ self .observation [i ] = self .owsc .get_wave_height (i )
55
+ self .observation [i + 2 ] = self .owsc .get_wave_velocity (i , 0 )
56
+ self .observation [i + 4 ] = self .owsc .get_wave_velocity (i , 1 )
57
+ self .observation [i + 6 ] = self .owsc .get_wave_velocity_on_flap (i , 0 )
58
+ self .observation [i + 8 ] = self .owsc .get_wave_velocity_on_flap (i , 1 )
59
+ self .observation [i + 10 ] = self .owsc .get_flap_position (i , 0 )
60
+ self .observation [i + 12 ] = self .owsc .get_flap_position (i , 1 )
61
+ self .observation [14 ] = self .owsc .get_flap_angle ()
62
+ self .observation [15 ] = self .owsc .get_flap_angle_rate ()
63
+
64
+ self ._get_obs = self .observation .astype (np .float32 )
65
+
66
+ return self ._get_obs , {}
67
+
68
+ def step (self , action ):
69
+ self .action_time_steps += 1
70
+ # Apply the action to change the damping coefficient
71
+ self .damping_change = 5.0 * action [0 ]
72
+ # Penalty for invalid actions
73
+ penality_0 = 0.0
74
+ # Ensure the damping coefficient stays within valid bounds
75
+ if self .damping_coefficient + self .damping_change < 0.01 :
76
+ self .damping_change = 0.01 - self .damping_coefficient
77
+ penality_0 = - 1.0
78
+ if self .damping_coefficient + self .damping_change > 100 :
79
+ self .damping_change = 100 - self .damping_coefficient
80
+ penality_0 = - 1.0
81
+
82
+ reward_0 = 0.0
83
+ for i in range (self .update_per_action ):
84
+ self .flap_angle_rate_previous = self .owsc .get_flap_angle_rate ()
85
+ self .damping_coefficient += self .damping_change / self .update_per_action
86
+ self .action_time += self .time_per_action / self .update_per_action
87
+ self .owsc .run_case (self .action_time , self .damping_coefficient )
88
+ self .flap_angle_rate_now = self .owsc .get_flap_angle_rate ()
89
+ # Calculate reward based on energy (flap angle rate)
90
+ reward_0 += self .damping_coefficient * math .pow (0.5 * (self .flap_angle_rate_now + self .flap_angle_rate_previous ), 2 ) * self .time_per_action / self .update_per_action
91
+ # Add any penalties to the reward
92
+ reward = reward_0 + penality_0
93
+ self .total_reward_per_episode += reward
94
+
95
+ # Update observations from the OWSC simulation
96
+ for i in range (0 , 2 ):
97
+ self .observation [i ] = self .owsc .get_wave_height (i )
98
+ self .observation [i + 2 ] = self .owsc .get_wave_velocity (i , 0 )
99
+ self .observation [i + 4 ] = self .owsc .get_wave_velocity (i , 1 )
100
+ self .observation [i + 6 ] = self .owsc .get_wave_velocity_on_flap (i , 0 )
101
+ self .observation [i + 8 ] = self .owsc .get_wave_velocity_on_flap (i , 1 )
102
+ self .observation [i + 10 ] = self .owsc .get_flap_position (i , 0 )
103
+ self .observation [i + 12 ] = self .owsc .get_flap_position (i , 1 )
104
+ self .observation [14 ] = self .owsc .get_flap_angle ()
105
+ self .observation [15 ] = self .owsc .get_flap_angle_rate ()
106
+
107
+ self ._get_obs = self .observation .astype (np .float32 )
108
+
109
+ # Log action and reward information to files
110
+ with open (f'action_env{ self .parallel_envs } _epi{ self .episode } .txt' , 'a' ) as file :
111
+ file .write (f'action_time: { self .action_time } action: { self .damping_coefficient } \n ' )
112
+
113
+ with open (f'reward_env{ self .parallel_envs } _epi{ self .episode } .txt' , 'a' ) as file :
114
+ file .write (f'action_time: { self .action_time } reward: { reward } \n ' )
115
+
116
+ # Check if the episode is done after 200 steps
117
+ if self .action_time_steps > 99 :
118
+ done = True
119
+ with open (f'reward_env{ self .parallel_envs } .txt' , 'a' ) as file :
120
+ file .write (f'episode: { self .episode } total_reward: { self .total_reward_per_episode } \n ' )
121
+ self .episode += 1
122
+ else :
123
+ done = False
124
+
125
+ # Return the updated observation, reward, done flag, and additional info
126
+ return self ._get_obs , reward , done , False , {}
127
+
128
+ # Render method (optional, no rendering in this case)
129
+ def render (self ):
130
+ return 0
131
+
132
+ # Additional render frame logic (not implemented)
133
+ def _render_frame (self ):
134
+ return 0
135
+
136
+ # Close the environment and cleanup (optional)
137
+ def close (self ):
138
+ return 0
0 commit comments