Skip to content

Commit 34a88c9

Browse files
committed
Move tests
1 parent 8256842 commit 34a88c9

File tree

2 files changed

+248
-252
lines changed

2 files changed

+248
-252
lines changed
Lines changed: 248 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,248 @@
1+
"""This script creates a test which fails when
2+
saving/resuming a model is unsuccessful."""
3+
4+
import tempfile
5+
6+
import numpy as np
7+
import pytest
8+
import torch
9+
from torch.nn import functional as F
10+
11+
from garage.envs import GymEnv, normalize
12+
from garage.experiment import deterministic, SnapshotConfig
13+
from garage.replay_buffer import PathBuffer
14+
from garage.sampler import FragmentWorker, LocalSampler
15+
from garage.torch import set_gpu_mode
16+
from garage.torch.algos import SAC
17+
from garage.torch.policies import TanhGaussianMLPPolicy
18+
from garage.torch.q_functions import ContinuousMLPQFunction
19+
from garage.trainer import Trainer
20+
21+
22+
@pytest.mark.mujoco
23+
def test_torch_cpu_resume_cpu():
24+
"""Test saving on CPU and resuming on CPU."""
25+
with tempfile.TemporaryDirectory() as temp_dir:
26+
snapshot_config = SnapshotConfig(snapshot_dir=temp_dir,
27+
snapshot_mode='last',
28+
snapshot_gap=1)
29+
env = normalize(
30+
GymEnv('InvertedDoublePendulum-v2', max_episode_length=100))
31+
deterministic.set_seed(0)
32+
policy = TanhGaussianMLPPolicy(
33+
env_spec=env.spec,
34+
hidden_sizes=[32, 32],
35+
hidden_nonlinearity=torch.nn.ReLU,
36+
output_nonlinearity=None,
37+
min_std=np.exp(-20.),
38+
max_std=np.exp(2.),
39+
)
40+
41+
qf1 = ContinuousMLPQFunction(env_spec=env.spec,
42+
hidden_sizes=[32, 32],
43+
hidden_nonlinearity=F.relu)
44+
45+
qf2 = ContinuousMLPQFunction(env_spec=env.spec,
46+
hidden_sizes=[32, 32],
47+
hidden_nonlinearity=F.relu)
48+
replay_buffer = PathBuffer(capacity_in_transitions=int(1e6), )
49+
trainer = Trainer(snapshot_config=snapshot_config)
50+
sampler = LocalSampler(agents=policy,
51+
envs=env,
52+
max_episode_length=env.spec.max_episode_length,
53+
worker_class=FragmentWorker)
54+
sac = SAC(env_spec=env.spec,
55+
policy=policy,
56+
qf1=qf1,
57+
qf2=qf2,
58+
sampler=sampler,
59+
gradient_steps_per_itr=100,
60+
replay_buffer=replay_buffer,
61+
min_buffer_size=1e3,
62+
target_update_tau=5e-3,
63+
discount=0.99,
64+
buffer_batch_size=64,
65+
reward_scale=1.,
66+
steps_per_epoch=2)
67+
sac.has_lambda = lambda x: x + 1
68+
trainer.setup(sac, env)
69+
set_gpu_mode(False)
70+
sac.to()
71+
trainer.setup(algo=sac, env=env)
72+
trainer.train(n_epochs=10, batch_size=100)
73+
trainer = Trainer(snapshot_config)
74+
trainer.restore(temp_dir)
75+
trainer.resume(n_epochs=20)
76+
77+
78+
@pytest.mark.gpu
79+
@pytest.mark.mujoco
80+
def test_torch_cpu_resume_gpu():
81+
"""Test saving on CPU and resuming on GPU."""
82+
with tempfile.TemporaryDirectory() as temp_dir:
83+
snapshot_config = SnapshotConfig(snapshot_dir=temp_dir,
84+
snapshot_mode='last',
85+
snapshot_gap=1)
86+
env = normalize(
87+
GymEnv('InvertedDoublePendulum-v2', max_episode_length=100))
88+
deterministic.set_seed(0)
89+
policy = TanhGaussianMLPPolicy(
90+
env_spec=env.spec,
91+
hidden_sizes=[32, 32],
92+
hidden_nonlinearity=torch.nn.ReLU,
93+
output_nonlinearity=None,
94+
min_std=np.exp(-20.),
95+
max_std=np.exp(2.),
96+
)
97+
98+
qf1 = ContinuousMLPQFunction(env_spec=env.spec,
99+
hidden_sizes=[32, 32],
100+
hidden_nonlinearity=F.relu)
101+
102+
qf2 = ContinuousMLPQFunction(env_spec=env.spec,
103+
hidden_sizes=[32, 32],
104+
hidden_nonlinearity=F.relu)
105+
replay_buffer = PathBuffer(capacity_in_transitions=int(1e6), )
106+
trainer = Trainer(snapshot_config=snapshot_config)
107+
sampler = LocalSampler(agents=policy,
108+
envs=env,
109+
max_episode_length=env.spec.max_episode_length,
110+
worker_class=FragmentWorker)
111+
sac = SAC(env_spec=env.spec,
112+
policy=policy,
113+
qf1=qf1,
114+
qf2=qf2,
115+
sampler=sampler,
116+
gradient_steps_per_itr=100,
117+
replay_buffer=replay_buffer,
118+
min_buffer_size=1e3,
119+
target_update_tau=5e-3,
120+
discount=0.99,
121+
buffer_batch_size=64,
122+
reward_scale=1.,
123+
steps_per_epoch=2)
124+
sac.has_lambda = lambda x: x + 1
125+
trainer.setup(sac, env)
126+
set_gpu_mode(False)
127+
sac.to()
128+
trainer.setup(algo=sac, env=env)
129+
trainer.train(n_epochs=10, batch_size=100)
130+
trainer = Trainer(snapshot_config)
131+
set_gpu_mode(True)
132+
trainer.restore(temp_dir)
133+
trainer.resume(n_epochs=20)
134+
135+
136+
@pytest.mark.gpu
137+
@pytest.mark.mujoco
138+
def test_torch_gpu_resume_cpu():
139+
"""Test saving on GPU and resuming on CPU."""
140+
with tempfile.TemporaryDirectory() as temp_dir:
141+
snapshot_config = SnapshotConfig(snapshot_dir=temp_dir,
142+
snapshot_mode='last',
143+
snapshot_gap=1)
144+
env = normalize(
145+
GymEnv('InvertedDoublePendulum-v2', max_episode_length=100))
146+
deterministic.set_seed(0)
147+
policy = TanhGaussianMLPPolicy(
148+
env_spec=env.spec,
149+
hidden_sizes=[32, 32],
150+
hidden_nonlinearity=torch.nn.ReLU,
151+
output_nonlinearity=None,
152+
min_std=np.exp(-20.),
153+
max_std=np.exp(2.),
154+
)
155+
156+
qf1 = ContinuousMLPQFunction(env_spec=env.spec,
157+
hidden_sizes=[32, 32],
158+
hidden_nonlinearity=F.relu)
159+
160+
qf2 = ContinuousMLPQFunction(env_spec=env.spec,
161+
hidden_sizes=[32, 32],
162+
hidden_nonlinearity=F.relu)
163+
replay_buffer = PathBuffer(capacity_in_transitions=int(1e6), )
164+
trainer = Trainer(snapshot_config=snapshot_config)
165+
sampler = LocalSampler(agents=policy,
166+
envs=env,
167+
max_episode_length=env.spec.max_episode_length,
168+
worker_class=FragmentWorker)
169+
sac = SAC(env_spec=env.spec,
170+
policy=policy,
171+
qf1=qf1,
172+
qf2=qf2,
173+
sampler=sampler,
174+
gradient_steps_per_itr=100,
175+
replay_buffer=replay_buffer,
176+
min_buffer_size=1e3,
177+
target_update_tau=5e-3,
178+
discount=0.99,
179+
buffer_batch_size=64,
180+
reward_scale=1.,
181+
steps_per_epoch=2)
182+
sac.has_lambda = lambda x: x + 1
183+
trainer.setup(sac, env)
184+
set_gpu_mode(True)
185+
sac.to()
186+
trainer.setup(algo=sac, env=env)
187+
trainer.train(n_epochs=10, batch_size=100)
188+
set_gpu_mode(False)
189+
trainer = Trainer(snapshot_config)
190+
trainer.restore(temp_dir)
191+
trainer.resume(n_epochs=20)
192+
193+
194+
@pytest.mark.gpu
195+
@pytest.mark.mujoco
196+
def test_torch_gpu_resume_gpu():
197+
"""Test saving on GPU and resuming on GPU."""
198+
with tempfile.TemporaryDirectory() as temp_dir:
199+
snapshot_config = SnapshotConfig(snapshot_dir=temp_dir,
200+
snapshot_mode='last',
201+
snapshot_gap=1)
202+
env = normalize(
203+
GymEnv('InvertedDoublePendulum-v2', max_episode_length=100))
204+
deterministic.set_seed(0)
205+
policy = TanhGaussianMLPPolicy(
206+
env_spec=env.spec,
207+
hidden_sizes=[32, 32],
208+
hidden_nonlinearity=torch.nn.ReLU,
209+
output_nonlinearity=None,
210+
min_std=np.exp(-20.),
211+
max_std=np.exp(2.),
212+
)
213+
214+
qf1 = ContinuousMLPQFunction(env_spec=env.spec,
215+
hidden_sizes=[32, 32],
216+
hidden_nonlinearity=F.relu)
217+
218+
qf2 = ContinuousMLPQFunction(env_spec=env.spec,
219+
hidden_sizes=[32, 32],
220+
hidden_nonlinearity=F.relu)
221+
replay_buffer = PathBuffer(capacity_in_transitions=int(1e6), )
222+
trainer = Trainer(snapshot_config=snapshot_config)
223+
sampler = LocalSampler(agents=policy,
224+
envs=env,
225+
max_episode_length=env.spec.max_episode_length,
226+
worker_class=FragmentWorker)
227+
sac = SAC(env_spec=env.spec,
228+
policy=policy,
229+
qf1=qf1,
230+
qf2=qf2,
231+
sampler=sampler,
232+
gradient_steps_per_itr=100,
233+
replay_buffer=replay_buffer,
234+
min_buffer_size=1e3,
235+
target_update_tau=5e-3,
236+
discount=0.99,
237+
buffer_batch_size=64,
238+
reward_scale=1.,
239+
steps_per_epoch=2)
240+
sac.has_lambda = lambda x: x + 1
241+
trainer.setup(sac, env)
242+
set_gpu_mode(True)
243+
sac.to()
244+
trainer.setup(algo=sac, env=env)
245+
trainer.train(n_epochs=10, batch_size=100)
246+
trainer = Trainer(snapshot_config)
247+
trainer.restore(temp_dir)
248+
trainer.resume(n_epochs=20)

0 commit comments

Comments
 (0)