-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmain.py
44 lines (35 loc) · 1.32 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import gym
import time
from linear_RBFs import LinearRBFs
# Set configuration parameters
render = True
save_trained_parameters = True
max_num_of_episodes = 200
max_time_steps_episode = 1000
# Create environment
env = gym.make('Continuous-CartPole-COACH-v1')
# Initialize agent
agent = LinearRBFs(load_trained_parameters=False)
# Initialize obtained reward
reward = 0
# Iterate over the maximum number of episodes
for i_episode in range(max_num_of_episodes):
observation = env.reset() # If the environment is reset, the first observation is given
agent.new_episode() # Reset episode variables
# Iterate over all episodes
print('Starting episode number', i_episode)
for t in range(max_time_steps_episode):
if render:
env.render() # Make the environment visible
action = agent.action(observation)
observation, h, done, info = env.step(action) # Receive an observation and reward after action
reward += h[1] # h[0]: human feedback; h[1]: reward value
if h[0] != 0: # If feedback is given
agent.update(h[0], observation)
if done: # If the episode is finished
if save_trained_parameters:
agent.save_params()
print('episode reward:', reward)
reward = 0
time.sleep(1)
break