|
35 | 35 | # - how to build an environment in TorchRL, including transforms (e.g. data
|
36 | 36 | # normalization, frame concatenation, resizing and turning to grayscale)
|
37 | 37 | # and parallel execution. Unlike what we did in the
|
38 |
| -# `DDPG tutorial <https://pytorch.org/rl/tutorials/coding_ddpg.html>`_, we |
| 38 | +# :ref:`DDPG tutorial <coding_ddpg>`, we |
39 | 39 | # will normalize the pixels and not the state vector.
|
40 | 40 | # - how to design a :class:`~torchrl.modules.QValueActor` object, i.e. an actor
|
41 | 41 | # that estimates the action values and picks up the action with the highest
|
|
46 | 46 | # - and finally how to evaluate your model.
|
47 | 47 | #
|
48 | 48 | # **Prerequisites**: We encourage you to get familiar with torchrl through the
|
49 |
| -# `PPO tutorial <https://pytorch.org/rl/tutorials/coding_ppo.html>`_ first. |
| 49 | +# :ref:`PPO tutorial <coding_ppo>` first. |
50 | 50 | #
|
51 | 51 | # DQN
|
52 | 52 | # ---
|
@@ -393,8 +393,8 @@ def get_replay_buffer(buffer_size, n_optim, batch_size):
|
393 | 393 | # Data collector
|
394 | 394 | # ~~~~~~~~~~~~~~
|
395 | 395 | #
|
396 |
| -# As in `PPO <https://pytorch.org/rl/tutorials/coding_ppo.html>`_ and |
397 |
| -# `DDPG <https://pytorch.org/rl/tutorials/coding_ddpg.html>`_, we will be using |
| 396 | +# As in :ref:`PPO <coding_ppo>` and |
| 397 | +# :ref:`DDPG <coding_ddpg>`, we will be using |
398 | 398 | # a data collector as a dataloader in the outer loop.
|
399 | 399 | #
|
400 | 400 | # We choose the following configuration: we will be running a series of
|
@@ -691,7 +691,7 @@ def get_loss_module(actor, gamma):
|
691 | 691 | # In this case, a location must be explicitly passed (). This method gives
|
692 | 692 | # more control over the location of the hook but it also requires more
|
693 | 693 | # understanding of the Trainer mechanism.
|
694 |
| -# Check the `trainer documentation <https://pytorch.org/rl/reference/trainers.html>`_ |
| 694 | +# Check the :ref:`trainer documentation <ref_trainers>` |
695 | 695 | # for a detailed description of the trainer hooks.
|
696 | 696 | #
|
697 | 697 | trainer.register_op("post_optim", target_net_updater.step)
|
@@ -768,7 +768,7 @@ def print_csv_files_in_folder(folder_path):
|
768 | 768 | # - A prioritized replay buffer could also be used. This will give a
|
769 | 769 | # higher priority to samples that have the worst value accuracy.
|
770 | 770 | # Learn more on the
|
771 |
| -# `replay buffer section <https://pytorch.org/rl/reference/data.html#composable-replay-buffers>`_ |
| 771 | +# :ref:`replay buffer section <ref_buffers>` |
772 | 772 | # of the documentation.
|
773 | 773 | # - A distributional loss (see :class:`~torchrl.objectives.DistributionalDQNLoss`
|
774 | 774 | # for more information).
|
|
0 commit comments