Variable Horizon in seals/CartPole 

```python
from imitation.algorithms.adversarial.airl import AIRL
from imitation.rewards.reward_nets import BasicShapedRewardNet
from imitation.util.networks import RunningNorm
from stable_baselines3 import PPO
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv

import gym
import seals

learners_rewards_after_training = []
learners_rewards_before_training = []
venv = DummyVecEnv([lambda: gym.make("seals/CartPole-v0")] * 8)
learner = PPO(
        env=venv,
        policy=MlpPolicy,
        batch_size=64,
        ent_coef=0.0,
        learning_rate=0.0003,
        n_epochs=10,
    )
reward_net = BasicShapedRewardNet(
        venv.observation_space, venv.action_space, normalize_input_layer=RunningNorm
    )
airl_trainer = AIRL(
        demonstrations=rollouts,
        demo_batch_size=1024,
        gen_replay_buffer_capacity=2048,
        n_disc_updates_per_round=4,
        venv=venv,
        gen_algo=learner,
        reward_net=reward_net
    )

for i in range(10):
     
    learner_rewards_before_training, _ = evaluate_policy(
        learner, venv, 100, return_episode_rewards=True
    )
    learners_rewards_before_training.append(learner_rewards_before_training)


    airl_trainer.train(20000)  # Note: set to 300000 for better results
    learner_rewards_after_training, _ = evaluate_policy(
        learner, venv, 100, return_episode_rewards=True
        ) 
    learners_rewards_after_training.append(learner_rewards_after_training)
```

```
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
~\AppData\Local\Temp\ipykernel_16872\944136942.py in <module>
     41 
     42 
---> 43     airl_trainer.train(20000)  # Note: set to 300000 for better results
     44     learner_rewards_after_training, _ = evaluate_policy(
     45         learner, venv, 100, return_episode_rewards=True

c:\users\stephane\documents\imitation\src\imitation\algorithms\adversarial\common.py in train(self, total_timesteps, callback)
    416         )
    417         for r in tqdm.tqdm(range(0, n_rounds), desc="round"):
--> 418             self.train_gen(self.gen_train_timesteps)
    419             for _ in range(self.n_disc_updates_per_round):
    420                 with networks.training(self.reward_train):

c:\users\stephane\documents\imitation\src\imitation\algorithms\adversarial\common.py in train_gen(self, total_timesteps, learn_kwargs)
    385 
    386         gen_trajs, ep_lens = self.venv_buffering.pop_trajectories()
--> 387         self._check_fixed_horizon(ep_lens)
    388         gen_samples = rollout.flatten_trajectories_with_rew(gen_trajs)
    389         self._gen_replay_buffer.store(gen_samples)

c:\users\stephane\documents\imitation\src\imitation\algorithms\base.py in _check_fixed_horizon(self, horizons)
     89         if len(horizons) > 1:
     90             raise ValueError(
---> 91                 f"Episodes of different length detected: {horizons}. "
     92                 "Variable horizon environments are discouraged -- "
     93                 "termination conditions leak information about reward. See"

ValueError: Episodes of different length detected: {548, 500}. Variable horizon environments are discouraged -- termination conditions leak information about reward. Seehttps://imitation.readthedocs.io/en/latest/guide/variable_horizon.html for more information. If you are SURE you want to run imitation on a variable horizon task, then please pass in the flag: `allow_variable_horizon=True`.
```

When trying to run demo from https://github.com/HumanCompatibleAI/imitation/blob/master/examples/4_train_airl.ipynb
with a for loop for the training steps it creates episodes of different horizons

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Variable Horizon in seals/CartPole #56

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Variable Horizon in seals/CartPole #56

Description

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions