[Doc] Fix links in doc (#2151)

Vincent Moens · Vincent Moens · commit b4f4daf7ffe5 · 2024-05-02T14:17:39.000+01:00
(cherry picked from commit 5b9cb44)
diff --git a/docs/source/reference/data.rst b/docs/source/reference/data.rst
@@ -24,6 +24,8 @@ widely used replay buffers:
 Composable Replay Buffers
 -------------------------
 
+.. _ref_buffers:
+
 We also give users the ability to compose a replay buffer.
 We provide a wide panel of solutions for replay buffer usage, including support for
 almost any data type; storage in memory, on device or on physical memory;
@@ -796,6 +798,8 @@ such that they can be stacked together during sampling.
 TensorSpec
 ----------
 
+.. _ref_specs:
+
 The `TensorSpec` parent class and subclasses define the basic properties of observations and actions in TorchRL, such
 as shape, device, dtype and domain.
 It is important that your environment specs match the input and output that it sends and receives, as
diff --git a/docs/source/reference/envs.rst b/docs/source/reference/envs.rst
@@ -133,7 +133,7 @@ function.
   transform.
 
 
-Our environment `tutorial <https://pytorch.org/rl/tutorials/pendulum.html>`_
+Our environment :ref:`tutorial <pendulum_tuto>`
 provides more information on how to design a custom environment from scratch.
 
 .. autosummary::
@@ -559,7 +559,7 @@ Transforms
 In most cases, the raw output of an environment must be treated before being passed to another object (such as a
 policy or a value operator). To do this, TorchRL provides a set of transforms that aim at reproducing the transform
 logic of `torch.distributions.Transform` and `torchvision.transforms`.
-Our environment `tutorial <https://pytorch.org/rl/tutorials/pendulum.html>`_
+Our environment :ref:`tutorial <pendulum_tuto>`
 provides more information on how to design a custom transform.
 
 Transformed environments are build using the :class:`TransformedEnv` primitive.
diff --git a/docs/source/reference/trainers.rst b/docs/source/reference/trainers.rst
@@ -3,6 +3,8 @@
 torchrl.trainers package
 ========================
 
+.. _ref_trainers:
+
 The trainer package provides utilities to write re-usable training scripts. The core idea is to use a
 trainer that implements a nested loop, where the outer loop runs the data collection steps and the inner
 loop the optimization steps. We believe this fits multiple RL training schemes, such as
diff --git a/tutorials/sphinx-tutorials/coding_dqn.py b/tutorials/sphinx-tutorials/coding_dqn.py
@@ -35,7 +35,7 @@
 # - how to build an environment in TorchRL, including transforms (e.g. data
 #   normalization, frame concatenation, resizing and turning to grayscale)
 #   and parallel execution. Unlike what we did in the
-#   `DDPG tutorial <https://pytorch.org/rl/tutorials/coding_ddpg.html>`_, we
+#   :ref:`DDPG tutorial <coding_ddpg>`, we
 #   will normalize the pixels and not the state vector.
 # - how to design a :class:`~torchrl.modules.QValueActor` object, i.e. an actor
 #   that estimates the action values and picks up the action with the highest
@@ -46,7 +46,7 @@
 # - and finally how to evaluate your model.
 #
 # **Prerequisites**: We encourage you to get familiar with torchrl through the
-# `PPO tutorial <https://pytorch.org/rl/tutorials/coding_ppo.html>`_ first.
+# :ref:`PPO tutorial <coding_ppo>` first.
 #
 # DQN
 # ---
@@ -393,8 +393,8 @@ def get_replay_buffer(buffer_size, n_optim, batch_size):
 # Data collector
 # ~~~~~~~~~~~~~~
 #
-# As in `PPO <https://pytorch.org/rl/tutorials/coding_ppo.html>`_ and
-# `DDPG <https://pytorch.org/rl/tutorials/coding_ddpg.html>`_, we will be using
+# As in :ref:`PPO <coding_ppo>` and
+# :ref:`DDPG <coding_ddpg>`, we will be using
 # a data collector as a dataloader in the outer loop.
 #
 # We choose the following configuration: we will be running a series of
@@ -691,7 +691,7 @@ def get_loss_module(actor, gamma):
 #   In this case, a location must be explicitly passed (). This method gives
 #   more control over the location of the hook but it also requires more
 #   understanding of the Trainer mechanism.
-#   Check the `trainer documentation <https://pytorch.org/rl/reference/trainers.html>`_
+#   Check the :ref:`trainer documentation <ref_trainers>`
 #   for a detailed description of the trainer hooks.
 #
 trainer.register_op("post_optim", target_net_updater.step)
@@ -768,7 +768,7 @@ def print_csv_files_in_folder(folder_path):
 # - A prioritized replay buffer could also be used. This will give a
 #   higher priority to samples that have the worst value accuracy.
 #   Learn more on the
-#   `replay buffer section <https://pytorch.org/rl/reference/data.html#composable-replay-buffers>`_
+#   :ref:`replay buffer section <ref_buffers>`
 #   of the documentation.
 # - A distributional loss (see :class:`~torchrl.objectives.DistributionalDQNLoss`
 #   for more information).
diff --git a/tutorials/sphinx-tutorials/coding_ppo.py b/tutorials/sphinx-tutorials/coding_ppo.py
@@ -26,12 +26,12 @@
 
 We will cover six crucial components of TorchRL:
 
-* `environments <https://pytorch.org/rl/reference/envs.html>`__
-* `transforms <https://pytorch.org/rl/reference/envs.html#transforms>`__
-* `models (policy and value function) <https://pytorch.org/rl/reference/modules.html>`__
-* `loss modules <https://pytorch.org/rl/reference/objectives.html>`__
-* `data collectors <https://pytorch.org/rl/reference/collectors.html>`__
-* `replay buffers <https://pytorch.org/rl/reference/data.html#replay-buffers>`__
+* :ref:`environments <Environment-API>`
+* :ref:`transforms <transforms>`
+* :ref:`models <ref_modules>`
+* :ref:`loss modules <ref_objectives>`
+* :ref:`data collectors <ref_collectors>`
+* :ref:`replay buffers <ref_buffers>`
 
 """
 
@@ -478,7 +478,7 @@
 # Data collector
 # --------------
 #
-# TorchRL provides a set of `DataCollector classes <https://pytorch.org/rl/reference/collectors.html>`__.
+# TorchRL provides a set of :ref:`DataCollector classes <ref_collectors>`.
 # Briefly, these classes execute three operations: reset an environment,
 # compute an action given the latest observation, execute a step in the environment,
 # and repeat the last two steps until the environment signals a stop (or reaches
diff --git a/tutorials/sphinx-tutorials/multiagent_ppo.py b/tutorials/sphinx-tutorials/multiagent_ppo.py
@@ -183,7 +183,7 @@
 # TorchRL API allows integrating various types of multi-agent environment flavours.
 # Some examples include environments with shared or individual agent rewards, done flags, and observations.
 # For more information on how the multi-agent environments API works in TorchRL, you can check out the dedicated
-# `doc section <https://pytorch.org/rl/reference/envs.html#multi-agent-environments>`_.
+# :ref:`doc section <MARL-environment-API>`.
 #
 # The VMAS simulator, in particular, models agents with individual rewards, info, observations, and actions, but
 # with a collective done flag.
@@ -784,7 +784,7 @@
 #
 # If you are interested in creating or wrapping your own multi-agent environments in TorchRL,
 # you can check out the dedicated
-# `doc section <https://pytorch.org/rl/reference/envs.html#multi-agent-environments>`_.
+# :ref:`doc section <MARL-environment-API>`.
 #
 # Finally, you can modify the parameters of this tutorial to try many other configurations and scenarios
 # to become a MARL master.
diff --git a/tutorials/sphinx-tutorials/pendulum.py b/tutorials/sphinx-tutorials/pendulum.py
@@ -33,9 +33,9 @@
 
   In the process, we will touch three crucial components of TorchRL:
 
-* `environments <https://pytorch.org/rl/reference/envs.html>`__
-* `transforms <https://pytorch.org/rl/reference/envs.html#transforms>`__
-* `models (policy and value function) <https://pytorch.org/rl/reference/modules.html>`__
+* :ref:`environments <Environment-API>`
+* :ref:`transforms <transforms>`
+* :ref:`models <ref_modules>`
 
 """
 
@@ -389,7 +389,7 @@ def _reset(self, tensordict):
 # convenient shortcuts to the content of the output and input spec containers.
 #
 # TorchRL offers multiple :class:`~torchrl.data.TensorSpec`
-# `subclasses <https://pytorch.org/rl/reference/data.html#tensorspec>`_ to
+# :ref:`subclasses <ref_specs>` to
 # encode the environment's input and output characteristics.
 #
 # Specs shape

Original file line number	Diff line number	Diff line change
`@@ -183,7 +183,7 @@`
`183`	`183`	`# TorchRL API allows integrating various types of multi-agent environment flavours.`
`184`	`184`	`# Some examples include environments with shared or individual agent rewards, done flags, and observations.`
`185`	`185`	`# For more information on how the multi-agent environments API works in TorchRL, you can check out the dedicated`
`186`		-# `doc section <https://pytorch.org/rl/reference/envs.html#multi-agent-environments>`_.
	`186`	+# :ref:`doc section <MARL-environment-API>`.
`187`	`187`	`#`
`188`	`188`	`# The VMAS simulator, in particular, models agents with individual rewards, info, observations, and actions, but`
`189`	`189`	`# with a collective done flag.`
`@@ -784,7 +784,7 @@`
`784`	`784`	`#`
`785`	`785`	`# If you are interested in creating or wrapping your own multi-agent environments in TorchRL,`
`786`	`786`	`# you can check out the dedicated`
`787`		-# `doc section <https://pytorch.org/rl/reference/envs.html#multi-agent-environments>`_.
	`787`	+# :ref:`doc section <MARL-environment-API>`.
`788`	`788`	`#`
`789`	`789`	`# Finally, you can modify the parameters of this tutorial to try many other configurations and scenarios`
`790`	`790`	`# to become a MARL master.`