You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
{{ message }}
This repository has been archived by the owner on Dec 11, 2022. It is now read-only.
I am trying to make a simple BC experiment using RL-Coach and a custom Gym environment. Based on the idea of this I am trying to predict the target variable based on f_1, f_2 and f_3. The Gym environment I am using is:
import random
import numpy as np
import pandas as pd
import gym
from gym import spaces
from sklearn.preprocessing import MinMaxScaler
class TestEnvOne(gym.Env):
def __init__(self, max_time):
super(TestEnvOne, self).__init__()
self.max_time = max_time
f_1 = np.sin(np.arange(self.max_time))
f_2 = np.cos(np.arange(self.max_time))
f_3 = np.tan(np.arange(self.max_time))
target = np.roll(f_1, 1) + np.roll(f_2, 2) + np.roll(f_3, 3)
df = pd.DataFrame({'target': target, 'f_1': f_1, 'f_2': f_2, 'f_3': f_3})
list_col = ['target', 'f_1', 'f_2', 'f_3']
df[list_col] = MinMaxScaler().fit_transform(df[list_col])
self.df = df
self.start_step = 0
self.current_step = 0
# Actions
self.action_space = spaces.Box(low=np.array([0]), high=np.array([1]), dtype=np.float32)
self.observation_space = gym.spaces.dict.Dict(
{'measurements': spaces.Box(low=0.0, high=1.1, shape=(3,), dtype=np.float32),
'desired_goal': spaces.Box(low=np.array([0]), high=np.array([1]), dtype=np.float32)
})
self.reward_range = (-1, 1)
def _next_observation(self):
measurements = np.array([
self.df.loc[self.current_step, 'f_1'],
self.df.loc[self.current_step, 'f_2'],
self.df.loc[self.current_step, 'f_3']
])
frame = {'desired_goal': self.df.loc[self.current_step, 'target'].reshape(-1, 1),
'measurements': measurements
}
return frame
def step(self, action):
self.current_step += 1
if self.current_step >= len(self.df.loc[:, 'target'].values):
self.current_step = 0
obs = self._next_observation()
reward = (obs['desired_goal'] - action)[0][0]
done = (self.current_step == self.start_step)
returning_value = {'measurements': obs['measurements'], 'desired_goal': obs['desired_goal']}
all = returning_value, reward, done, {}
return all
def reset(self):
# Set the current step to a random point within the data frame
self.start_step = random.randint(0, len(self.df.loc[:, 'target'].values) - 1)
self.current_step = self.start_step
observation = self._next_observation()
return observation
def render(self, mode='human', close=False):
# Render the environment to the screen
print(f'Step: {self.current_step}')
print(f'Target: {self.df.loc[self.current_step, "target"]}')
def seed(self, seed=None):
self.seed_value = seed
return [seed]
The preset I am using is based on Doom Basic BC as the following:
Running using command line: coach -p presets/PruebaPresetBC.py
I am getting a exception in the improve phase:
Traceback (most recent call last):
File "/home/meteo/coach_env/bin/coach", line 8, in <module>
sys.exit(main())
File "/home/meteo/coach_env/lib/python3.6/site-packages/rl_coach/coach.py", line 777, in main
launcher.launch()
File "/home/meteo/coach_env/lib/python3.6/site-packages/rl_coach/coach.py", line 226, in launch
self.run_graph_manager(graph_manager, args)
File "/home/meteo/coach_env/lib/python3.6/site-packages/rl_coach/coach.py", line 612, in run_graph_manager
self.start_single_threaded(task_parameters, graph_manager, args)
File "/home/meteo/coach_env/lib/python3.6/site-packages/rl_coach/coach.py", line 674, in start_single_threaded
start_graph(graph_manager=graph_manager, task_parameters=task_parameters)
File "/home/meteo/coach_env/lib/python3.6/site-packages/rl_coach/coach.py", line 88, in start_graph
graph_manager.improve()
File "/home/meteo/coach_env/lib/python3.6/site-packages/rl_coach/graph_managers/graph_manager.py", line 547, in improve
self.train_and_act(self.steps_between_evaluation_periods)
File "/home/meteo/coach_env/lib/python3.6/site-packages/rl_coach/graph_managers/graph_manager.py", line 481, in train_and_act
self.act(EnvironmentSteps(1))
File "/home/meteo/coach_env/lib/python3.6/site-packages/rl_coach/graph_managers/graph_manager.py", line 447, in act
result = self.top_level_manager.step(None)
File "/home/meteo/coach_env/lib/python3.6/site-packages/rl_coach/level_manager.py", line 245, in step
action_info = acting_agent.act()
File "/home/meteo/coach_env/lib/python3.6/site-packages/rl_coach/agents/agent.py", line 851, in act
action = self.choose_action(curr_state)
File "/home/meteo/coach_env/lib/python3.6/site-packages/rl_coach/agents/imitation_agent.py", line 43, in choose_action
prediction = self.networks['main'].online_network.predict(self.prepare_batch_for_inference(curr_state, 'main'))
File "/home/meteo/coach_env/lib/python3.6/site-packages/rl_coach/architectures/tensorflow_components/architecture.py", line 547, in predict
output = self.sess.run(outputs, feed_dict)
File "/home/meteo/coach_env/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 950, in run
run_metadata_ptr)
File "/home/meteo/coach_env/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1149, in _run
str(subfeed_t.get_shape())))
ValueError: Cannot feed value of shape (1, 3) for Tensor 'main_level/agent/main/online/network_0/measurements/measurements:0', which has shape '(?, 0)'
I think it is trying to use a measurement (which are three values) into a zero sized network. I don't know where this zero shape comes from. Any idea? Thanks
The text was updated successfully, but these errors were encountered:
Sign up for freeto subscribe to this conversation on GitHub.
Already have an account?
Sign in.
I am trying to make a simple BC experiment using RL-Coach and a custom Gym environment. Based on the idea of this I am trying to predict the target variable based on f_1, f_2 and f_3. The Gym environment I am using is:
The preset I am using is based on Doom Basic BC as the following:
Running using command line: coach -p presets/PruebaPresetBC.py
I am getting a exception in the improve phase:
I think it is trying to use a measurement (which are three values) into a zero sized network. I don't know where this zero shape comes from. Any idea? Thanks
The text was updated successfully, but these errors were encountered: