From 950daac2030cc8017c882dfb57b3b0a0895617c3 Mon Sep 17 00:00:00 2001
From: qbarthelemy <q.barthelemy@gmail.com>
Date: Fri, 3 Jan 2025 13:57:10 +0100
Subject: [PATCH] improve documentation rendering

---
 ot/gromov/_lowrank.py      |  14 ++--
 ot/gromov/_partial.py      |  14 ++--
 ot/lp/solver_1d.py         |  32 ++++-----
 ot/plot.py                 |  12 ++--
 ot/solvers.py              |  30 ++++----
 ot/unbalanced/_sinkhorn.py | 136 +++++++++++++++++++------------------
 ot/utils.py                |   4 +-
 7 files changed, 124 insertions(+), 118 deletions(-)

diff --git a/ot/gromov/_lowrank.py b/ot/gromov/_lowrank.py
index 82ff98da4..b7f5c49ed 100644
--- a/ot/gromov/_lowrank.py
+++ b/ot/gromov/_lowrank.py
@@ -92,14 +92,14 @@ def lowrank_gromov_wasserstein_samples(
 
     where :
 
-    - :math: `A` is the (`dim_a`, `dim_a`) square pairwise cost matrix of the source domain.
-    - :math: `B` is the (`dim_a`, `dim_a`) square pairwise cost matrix of the target domain.
-    - :math: `\mathcal{Q}_{A,B}` is quadratic objective function of the Gromov Wasserstein plan.
-    - :math: `Q` and `R` are the low-rank matrix decomposition of the Gromov-Wasserstein plan.
-    - :math: `g` is the weight vector for the low-rank decomposition of the Gromov-Wasserstein plan.
+    - :math:`A` is the (`dim_a`, `dim_a`) square pairwise cost matrix of the source domain.
+    - :math:`B` is the (`dim_a`, `dim_a`) square pairwise cost matrix of the target domain.
+    - :math:`\mathcal{Q}_{A,B}` is quadratic objective function of the Gromov Wasserstein plan.
+    - :math:`Q` and `R` are the low-rank matrix decomposition of the Gromov-Wasserstein plan.
+    - :math:`g` is the weight vector for the low-rank decomposition of the Gromov-Wasserstein plan.
     - :math:`\mathbf{a}` and :math:`\mathbf{b}` are source and target weights (histograms, both sum to 1).
-    - :math: `r` is the rank of the Gromov-Wasserstein plan.
-    - :math: `\mathcal{C(a,b,r)}` are the low-rank couplings of the OT problem.
+    - :math:`r` is the rank of the Gromov-Wasserstein plan.
+    - :math:`\mathcal{C(a,b,r)}` are the low-rank couplings of the OT problem.
     - :math:`H((Q,R,g))` is the values of the three respective entropies evaluated for each term.
 
 
diff --git a/ot/gromov/_partial.py b/ot/gromov/_partial.py
index c6837f1d3..dd2a86aa0 100644
--- a/ot/gromov/_partial.py
+++ b/ot/gromov/_partial.py
@@ -1002,18 +1002,18 @@ def solve_partial_gromov_linesearch(
     Parameters
     ----------
 
-    G : array-like, shape(ns,nt)
+    G : array-like, shape(ns, nt)
         The transport map at a given iteration of the FW
-    deltaG : array-like (ns,nt)
+    deltaG : array-like, shape (ns, nt)
         Difference between the optimal map `Gc` found by linearization in the
         FW algorithm and the value at a given iteration
     cost_G : float
         Value of the cost at `G`
-    df_G : array-like (ns,nt)
+    df_G : array-like, shape (ns, nt)
         Gradient of the GW cost at `G`
-    df_Gc : array-like (ns,nt)
+    df_Gc : array-like, shape (ns, nt)
         Gradient of the GW cost at `Gc`
-    M : array-like (ns,nt)
+    M : array-like, shape (ns, nt)
         Cost matrix between the features.
     reg : float
         Regularization parameter.
@@ -1032,7 +1032,7 @@ def solve_partial_gromov_linesearch(
         nb of function call. Useless here
     cost_G : float
         The value of the cost for the next iteration
-    df_G : array-like (ns,nt)
+    df_G : array-like, shape (ns, nt)
         Updated gradient of the GW cost
 
     References
@@ -1173,7 +1173,7 @@ def entropic_partial_gromov_wasserstein(
 
     Returns
     -------
-    :math: `gamma` : (dim_a, dim_b) ndarray
+    :math:`gamma` : ndarray, shape (dim_a, dim_b)
         Optimal transportation matrix for the given parameters
     log : dict
         log dictionary returned only if `log` is `True`
diff --git a/ot/lp/solver_1d.py b/ot/lp/solver_1d.py
index e8af20c3c..c308549f8 100644
--- a/ot/lp/solver_1d.py
+++ b/ot/lp/solver_1d.py
@@ -160,7 +160,7 @@ def emd_1d(
     where :
 
     - d is the metric
-    - x_a and x_b are the samples
+    - :math:`x_a` and :math:`x_b` are the samples
     - a and b are the sample weights
 
     This implementation only supports metrics
@@ -170,13 +170,13 @@ def emd_1d(
 
     Parameters
     ----------
-    x_a : (ns,) or (ns, 1) ndarray, float64
+    x_a : ndarray of float64, shape (ns,) or (ns, 1)
         Source dirac locations (on the real line)
-    x_b : (nt,) or (ns, 1) ndarray, float64
+    x_b : ndarray of float64, shape (nt,) or (ns, 1)
         Target dirac locations (on the real line)
-    a : (ns,) ndarray, float64, optional
+    a : ndarray of float64, shape (ns,), optional
         Source histogram (default is uniform weight)
-    b : (nt,) ndarray, float64, optional
+    b : ndarray of float64, shape (nt,), optional
         Target histogram (default is uniform weight)
     metric: str, optional (default='sqeuclidean')
         Metric to be used. Only works with either of the strings
@@ -184,7 +184,7 @@ def emd_1d(
     p: float, optional (default=1.0)
          The p-norm to apply for if metric='minkowski'
     dense: boolean, optional (default=True)
-        If True, returns math:`\gamma` as a dense ndarray of shape (ns, nt).
+        If True, returns :math:`\gamma` as a dense ndarray of shape (ns, nt).
         Otherwise returns a sparse representation using scipy's `coo_matrix`
         format. Due to implementation details, this function runs faster when
         `'sqeuclidean'`, `'minkowski'`, `'cityblock'`,  or `'euclidean'` metrics
@@ -198,7 +198,7 @@ def emd_1d(
 
     Returns
     -------
-    gamma: (ns, nt) ndarray
+    gamma: ndarray, shape (ns, nt)
         Optimal transportation matrix for the given parameters
     log: dict
         If input log is True, a dictionary containing the cost
@@ -318,7 +318,7 @@ def emd2_1d(
     where :
 
     - d is the metric
-    - x_a and x_b are the samples
+    - :math:`x_a` and :math:`x_b` are the samples
     - a and b are the sample weights
 
     This implementation only supports metrics
@@ -328,13 +328,13 @@ def emd2_1d(
 
     Parameters
     ----------
-    x_a : (ns,) or (ns, 1) ndarray, float64
+    x_a : ndarray of float64, shape (ns,) or (ns, 1)
         Source dirac locations (on the real line)
-    x_b : (nt,) or (ns, 1) ndarray, float64
+    x_b : ndarray of float64, shape (nt,) or (ns, 1)
         Target dirac locations (on the real line)
-    a : (ns,) ndarray, float64, optional
+    a : ndarray of float64, shape (ns,), optional
         Source histogram (default is uniform weight)
-    b : (nt,) ndarray, float64, optional
+    b : ndarray of float64, shape (nt,), optional
         Target histogram (default is uniform weight)
     metric: str, optional (default='sqeuclidean')
         Metric to be used. Only works with either of the strings
@@ -342,7 +342,7 @@ def emd2_1d(
     p: float, optional (default=1.0)
          The p-norm to apply for if metric='minkowski'
     dense: boolean, optional (default=True)
-        If True, returns math:`\gamma` as a dense ndarray of shape (ns, nt).
+        If True, returns :math:`\gamma` as a dense ndarray of shape (ns, nt).
         Otherwise returns a sparse representation using scipy's `coo_matrix`
         format. Only used if log is set to True. Due to implementation details,
         this function runs faster when dense is set to False.
@@ -405,9 +405,9 @@ def roll_cols(M, shifts):
 
     Parameters
     ----------
-    M : (nr, nc) ndarray
+    M : ndarray, shape (nr, nc)
         Matrix to shift
-    shifts: int or (nr,) ndarray
+    shifts: int or ndarray, shape (nr,)
 
     Returns
     -------
@@ -1046,7 +1046,7 @@ def semidiscrete_wasserstein2_unif_circle(u_values, u_weights=None):
 
     Parameters
     ----------
-    u_values: ndarray, shape (n, ...)
+    u_values : ndarray, shape (n, ...)
         Samples
     u_weights : ndarray, shape (n, ...), optional
         samples weights in the source domain
diff --git a/ot/plot.py b/ot/plot.py
index 88fbc0856..1505235c8 100644
--- a/ot/plot.py
+++ b/ot/plot.py
@@ -32,7 +32,7 @@ def plot1D_mat(
     r"""Plot matrix :math:`\mathbf{M}` with the source and target 1D distributions.
 
     Creates a subplot with the source distribution :math:`\mathbf{a}` and target
-    distribution :math:`\mathbf{b}`t.
+    distribution :math:`\mathbf{b}`.
     In 'yx' mode (default), the source is on the left and
     the target on the top, and in 'xy' mode, source on the bottom (upside
     down) and the target on the left.
@@ -69,8 +69,9 @@ def plot1D_mat(
     ax2 : target plot ax
     ax3 : coupling plot ax
 
-    .. seealso::
-        :func:`rescale_for_imshow_plot`
+    See Also
+    --------
+    :func:`rescale_for_imshow_plot`
     """
     assert plot_style in ["yx", "xy"], "plot_style should be 'yx' or 'xy'"
     na, nb = M.shape
@@ -188,8 +189,9 @@ def rescale_for_imshow_plot(x, y, n, m=None, a_y=None, b_y=None):
     yr : ndarray, shape (nx,)
         Rescaled y values (due to slicing, may have less elements than y)
 
-    .. seealso::
-        :func:`plot1D_mat`
+    See Also
+    --------
+    :func:`plot1D_mat`
 
     """
     # slice over the y values that are in the y range
diff --git a/ot/solvers.py b/ot/solvers.py
index 80f366354..ff513ed0d 100644
--- a/ot/solvers.py
+++ b/ot/solvers.py
@@ -79,7 +79,7 @@ def solve(
 
     Parameters
     ----------
-    M : array_like, shape (dim_a, dim_b)
+    M : array-like, shape (dim_a, dim_b)
         Loss matrix
     a : array-like, shape (dim_a,), optional
         Samples weights in the source domain (default is uniform)
@@ -88,10 +88,10 @@ def solve(
     reg : float, optional
         Regularization weight :math:`\lambda_r`, by default None (no reg., exact
         OT)
-    c : array-like (dim_a, dim_b), optional (default=None)
+    c : array-like, shape (dim_a, dim_b), optional (default=None)
         Reference measure for the regularization.
         If None, then use :math:`\mathbf{c} = \mathbf{a} \mathbf{b}^T`.
-        If :math:`\texttt{reg_type}='entropy'`, then :math:`\mathbf{c} = 1_{dim_a} 1_{dim_b}^T`.
+        If :math:`\texttt{reg_type}=`'entropy', then :math:`\mathbf{c} = 1_{dim_a} 1_{dim_b}^T`.
     reg_type : str, optional
         Type of regularization :math:`R`  either "KL", "L2", "entropy",
         by default "KL". a tuple of functions can be provided for general
@@ -116,9 +116,9 @@ def solve(
         Number of OMP threads for exact OT solver, by default 1
     max_iter : int, optional
         Maximum number of iterations, by default None (default values in each solvers)
-    plan_init : array_like, shape (dim_a, dim_b), optional
+    plan_init : array-like, shape (dim_a, dim_b), optional
         Initialization of the OT plan for iterative methods, by default None
-    potentials_init : (array_like(dim_a,),array_like(dim_b,)), optional
+    potentials_init : (array-like(dim_a,),array-like(dim_b,)), optional
         Initialization of the OT dual potentials for iterative methods, by default None
     tol : _type_, optional
         Tolerance for solution precision, by default None (default values in each solvers)
@@ -628,11 +628,11 @@ def solve_gromov(
 
     Parameters
     ----------
-    Ca : array_like, shape (dim_a, dim_a)
+    Ca : array-like, shape (dim_a, dim_a)
         Cost matrix in the source domain
-    Cb : array_like, shape (dim_b, dim_b)
+    Cb : array-like, shape (dim_b, dim_b)
         Cost matrix in the target domain
-    M : array_like, shape (dim_a, dim_b), optional
+    M : array-like, shape (dim_a, dim_b), optional
         Linear cost matrix for Fused Gromov-Wasserstein (default is None).
     a : array-like, shape (dim_a,), optional
         Samples weights in the source domain (default is uniform)
@@ -669,7 +669,7 @@ def solve_gromov(
     max_iter : int, optional
         Maximum number of iterations, by default None (default values in each
         solvers)
-    plan_init : array_like, shape (dim_a, dim_b), optional
+    plan_init : array-like, shape (dim_a, dim_b), optional
         Initialization of the OT plan for iterative methods, by default None
     tol : float, optional
         Tolerance for solution precision, by default None (default values in
@@ -1342,10 +1342,10 @@ def solve_sample(
     reg : float, optional
         Regularization weight :math:`\lambda_r`, by default None (no reg., exact
         OT)
-    c : array-like (dim_a, dim_b), optional (default=None)
+    c : array-like, shape (dim_a, dim_b), optional (default=None)
         Reference measure for the regularization.
         If None, then use :math:`\mathbf{c} = \mathbf{a} \mathbf{b}^T`.
-        If :math:`\texttt{reg_type}='entropy'`, then :math:`\mathbf{c} = 1_{dim_a} 1_{dim_b}^T`.
+        If :math:`\texttt{reg_type}=`'entropy', then :math:`\mathbf{c} = 1_{dim_a} 1_{dim_b}^T`.
     reg_type : str, optional
         Type of regularization :math:`R`  either "KL", "L2", "entropy", by default "KL"
     unbalanced : float or indexable object of length 1 or 2
@@ -1374,13 +1374,13 @@ def solve_sample(
         Number of OMP threads for exact OT solver, by default 1
     max_iter : int, optional
         Maximum number of iteration, by default None (default values in each solvers)
-    plan_init : array_like, shape (dim_a, dim_b), optional
+    plan_init : array-like, shape (dim_a, dim_b), optional
         Initialization of the OT plan for iterative methods, by default None
     rank : int, optional
         Rank of the OT matrix for lazy solers (method='factored'), by default 100
     scaling : float, optional
         Scaling factor for the epsilon scaling lazy solvers (method='geomloss'), by default 0.95
-    potentials_init : (array_like(dim_a,),array_like(dim_b,)), optional
+    potentials_init : (array-like(dim_a,),array-like(dim_b,)), optional
         Initialization of the OT dual potentials for iterative methods, by default None
     tol : _type_, optional
         Tolerance for solution precision, by default None (default values in each solvers)
@@ -1511,7 +1511,7 @@ def solve_sample(
     .. math::
         \min_{\mathbf{T}\geq 0} \quad \sum_{i,j} T_{i,j}M_{i,j} + \lambda_u U(\mathbf{T}\mathbf{1},\mathbf{a}) + \lambda_u U(\mathbf{T}^T\mathbf{1},\mathbf{b})
 
-        with  M_{i,j} = d(x_i,y_j)
+        \text{with} \ M_{i,j} = d(x_i,y_j)
 
     can be solved with the following code:
 
@@ -1530,7 +1530,7 @@ def solve_sample(
     .. math::
         \min_{\mathbf{T}\geq 0} \quad \sum_{i,j} T_{i,j}M_{i,j} + \lambda_r R(\mathbf{T}) + \lambda_u U(\mathbf{T}\mathbf{1},\mathbf{a}) + \lambda_u U(\mathbf{T}^T\mathbf{1},\mathbf{b})
 
-        with  M_{i,j} = d(x_i,y_j)
+        \text{with} \ M_{i,j} = d(x_i,y_j)
 
     can be solved with the following code:
 
diff --git a/ot/unbalanced/_sinkhorn.py b/ot/unbalanced/_sinkhorn.py
index 6ee23ea95..fbb2f8757 100644
--- a/ot/unbalanced/_sinkhorn.py
+++ b/ot/unbalanced/_sinkhorn.py
@@ -65,16 +65,16 @@ def sinkhorn_unbalanced(
 
     Parameters
     ----------
-    a : array-like (dim_a,)
+    a : array-like, shape (dim_a,)
         Unnormalized histogram of dimension `dim_a`
         If `a` is an empty list or array ([]),
         then `a` is set to uniform distribution.
-    b : array-like (dim_b,)
+    b : array-like, shape (dim_b,)
         One or multiple unnormalized histograms of dimension `dim_b`.
         If `b` is an empty list or array ([]),
         then `b` is set to uniform distribution.
         If many, compute all the OT costs :math:`(\mathbf{a}, \mathbf{b}_i)_i`
-    M : array-like (dim_a, dim_b)
+    M : array-like, shape (dim_a, dim_b)
         loss matrix
     reg : float
         Entropy regularization term > 0
@@ -93,15 +93,16 @@ def sinkhorn_unbalanced(
         'sinkhorn_reg_scaling', see those function for specific parameters
     reg_type : string, optional
         Regularizer term. Can take two values:
-        + Negative entropy: 'entropy':
-        :math:`\Omega(\gamma) = \sum_{i,j} \gamma_{i,j} \log(\gamma_{i,j}) - \sum_{i,j} \gamma_{i,j}`.
-        This is equivalent (up to a constant) to :math:`\Omega(\gamma) = \text{KL}(\gamma, 1_{dim_a} 1_{dim_b}^T)`.
-        + Kullback-Leibler divergence (default): 'kl':
-        :math:`\Omega(\gamma) = \text{KL}(\gamma, \mathbf{a} \mathbf{b}^T)`.
-    c : array-like (dim_a, dim_b), optional (default=None)
+
+        - Negative entropy: 'entropy':
+          :math:`\Omega(\gamma) = \sum_{i,j} \gamma_{i,j} \log(\gamma_{i,j}) - \sum_{i,j} \gamma_{i,j}`.
+          This is equivalent (up to a constant) to :math:`\Omega(\gamma) = \text{KL}(\gamma, 1_{dim_a} 1_{dim_b}^T)`.
+        - Kullback-Leibler divergence (default): 'kl':
+          :math:`\Omega(\gamma) = \text{KL}(\gamma, \mathbf{a} \mathbf{b}^T)`.
+    c : array-like, shape (dim_a, dim_b), optional (default=None)
         Reference measure for the regularization.
         If None, then use :math:`\mathbf{c} = \mathbf{a} \mathbf{b}^T`.
-        If :math:`\texttt{reg_type}='entropy'`, then :math:`\mathbf{c} = 1_{dim_a} 1_{dim_b}^T`.
+        If :math:`\texttt{reg_type}=`'entropy', then :math:`\mathbf{c} = 1_{dim_a} 1_{dim_b}^T`.
     warmstart: tuple of arrays, shape (dim_a, dim_b), optional
         Initialization of dual potentials. If provided, the dual potentials should be given
         (that is the logarithm of the `u`, `v` sinkhorn scaling vectors).
@@ -118,12 +119,12 @@ def sinkhorn_unbalanced(
     Returns
     -------
     if n_hists == 1:
-        - gamma : (dim_a, dim_b) array-like
+        - gamma : array-like, shape(dim_a, dim_b)
             Optimal transportation matrix for the given parameters
         - log : dict
             log dictionary returned only if `log` is `True`
     else:
-        - ot_distance : (n_hists,) array-like
+        - ot_distance : array-like, shape (n_hists,)
             the OT distance between :math:`\mathbf{a}` and each of the histograms :math:`\mathbf{b}_i`
         - log : dict
             log dictionary returned only if `log` is `True`
@@ -295,16 +296,16 @@ def sinkhorn_unbalanced2(
 
     Parameters
     ----------
-    a : array-like (dim_a,)
+    a : array-like, shape (dim_a,)
         Unnormalized histogram of dimension `dim_a`
         If `a` is an empty list or array ([]),
         then `a` is set to uniform distribution.
-    b : array-like (dim_b,)
+    b : array-like, shape (dim_b,)
         One or multiple unnormalized histograms of dimension `dim_b`.
         If `b` is an empty list or array ([]),
         then `b` is set to uniform distribution.
         If many, compute all the OT costs :math:`(\mathbf{a}, \mathbf{b}_i)_i`
-    M : array-like (dim_a, dim_b)
+    M : array-like, shape (dim_a, dim_b)
         loss matrix
     reg : float
         Entropy regularization term > 0
@@ -323,15 +324,16 @@ def sinkhorn_unbalanced2(
         'sinkhorn_reg_scaling', see those function for specific parameters
     reg_type : string, optional
         Regularizer term. Can take two values:
-        + Negative entropy: 'entropy':
-        :math:`\Omega(\gamma) = \sum_{i,j} \gamma_{i,j} \log(\gamma_{i,j}) - \sum_{i,j} \gamma_{i,j}`.
-        This is equivalent (up to a constant) to :math:`\Omega(\gamma) = \text{KL}(\gamma, 1_{dim_a} 1_{dim_b}^T)`.
-        + Kullback-Leibler divergence: 'kl':
-        :math:`\Omega(\gamma) = \text{KL}(\gamma, \mathbf{a} \mathbf{b}^T)`.
-    c : array-like (dim_a, dim_b), optional (default=None)
+
+        - Negative entropy: 'entropy':
+          :math:`\Omega(\gamma) = \sum_{i,j} \gamma_{i,j} \log(\gamma_{i,j}) - \sum_{i,j} \gamma_{i,j}`.
+          This is equivalent (up to a constant) to :math:`\Omega(\gamma) = \text{KL}(\gamma, 1_{dim_a} 1_{dim_b}^T)`.
+        - Kullback-Leibler divergence: 'kl':
+          :math:`\Omega(\gamma) = \text{KL}(\gamma, \mathbf{a} \mathbf{b}^T)`.
+    c : array-like, shape (dim_a, dim_b), optional (default=None)
         Reference measure for the regularization.
         If None, then use :math:`\mathbf{c} = \mathbf{a} \mathbf{b}^T`.
-        If :math:`\texttt{reg_type}='entropy'`, then :math:`\mathbf{c} = 1_{dim_a} 1_{dim_b}^T`.
+        If :math:`\texttt{reg_type}=`'entropy', then :math:`\mathbf{c} = 1_{dim_a} 1_{dim_b}^T`.
     warmstart: tuple of arrays, shape (dim_a, dim_b), optional
         Initialization of dual potentials. If provided, the dual potentials should be given
         (that is the logarithm of the u,v sinkhorn scaling vectors).
@@ -350,7 +352,7 @@ def sinkhorn_unbalanced2(
 
     Returns
     -------
-    ot_cost : (n_hists,) array-like
+    ot_cost : array-like, shape (n_hists,)
         the OT cost between :math:`\mathbf{a}` and each of the histograms :math:`\mathbf{b}_i`
     log : dict
         log dictionary returned only if `log` is `True`
@@ -604,16 +606,16 @@ def sinkhorn_knopp_unbalanced(
 
     Parameters
     ----------
-    a : array-like (dim_a,)
+    a : array-like, shape (dim_a,)
         Unnormalized histogram of dimension `dim_a`
         If `a` is an empty list or array ([]),
         then `a` is set to uniform distribution.
-    b : array-like (dim_b,)
+    b : array-like, shape (dim_b,)
         One or multiple unnormalized histograms of dimension `dim_b`.
         If `b` is an empty list or array ([]),
         then `b` is set to uniform distribution.
         If many, compute all the OT costs :math:`(\mathbf{a}, \mathbf{b}_i)_i`
-    M : array-like (dim_a, dim_b)
+    M : array-like, shape (dim_a, dim_b)
         loss matrix
     reg : float
         Entropy regularization term > 0
@@ -629,15 +631,16 @@ def sinkhorn_knopp_unbalanced(
         it must have the same backend as input arrays `(a, b, M)`.
     reg_type : string, optional
         Regularizer term. Can take two values:
-        + Negative entropy: 'entropy':
-        :math:`\Omega(\gamma) = \sum_{i,j} \gamma_{i,j} \log(\gamma_{i,j}) - \sum_{i,j} \gamma_{i,j}`.
-        This is equivalent (up to a constant) to :math:`\Omega(\gamma) = \text{KL}(\gamma, 1_{dim_a} 1_{dim_b}^T)`.
-        + Kullback-Leibler divergence: 'kl':
-        :math:`\Omega(\gamma) = \text{KL}(\gamma, \mathbf{a} \mathbf{b}^T)`.
-    c : array-like (dim_a, dim_b), optional (default=None)
+
+        - Negative entropy: 'entropy':
+          :math:`\Omega(\gamma) = \sum_{i,j} \gamma_{i,j} \log(\gamma_{i,j}) - \sum_{i,j} \gamma_{i,j}`.
+          This is equivalent (up to a constant) to :math:`\Omega(\gamma) = \text{KL}(\gamma, 1_{dim_a} 1_{dim_b}^T)`.
+        - Kullback-Leibler divergence: 'kl':
+          :math:`\Omega(\gamma) = \text{KL}(\gamma, \mathbf{a} \mathbf{b}^T)`.
+    c : array-like, shape (dim_a, dim_b), optional (default=None)
         Reference measure for the regularization.
         If None, then use :math:`\mathbf{c} = \mathbf{a} \mathbf{b}^T`.
-        If :math:`\texttt{reg_type}='entropy'`, then :math:`\mathbf{c} = 1_{dim_a} 1_{dim_b}^T`.
+        If :math:`\texttt{reg_type}=`'entropy', then :math:`\mathbf{c} = 1_{dim_a} 1_{dim_b}^T`.
     warmstart: tuple of arrays, shape (dim_a, dim_b), optional
         Initialization of dual potentials. If provided, the dual potentials should be given
         (that is the logarithm of the `u`, `v` sinkhorn scaling vectors).
@@ -654,12 +657,12 @@ def sinkhorn_knopp_unbalanced(
     Returns
     -------
     if n_hists == 1:
-        - gamma : (dim_a, dim_b) array-like
+        - gamma : array-like, shape (dim_a, dim_b)
             Optimal transportation matrix for the given parameters
         - log : dict
             log dictionary returned only if `log` is `True`
     else:
-        - ot_cost : (n_hists,) array-like
+        - ot_cost : array-like, shape (n_hists,)
             the OT cost between :math:`\mathbf{a}` and each of the histograms :math:`\mathbf{b}_i`
         - log : dict
             log dictionary returned only if `log` is `True`
@@ -857,16 +860,16 @@ def sinkhorn_stabilized_unbalanced(
 
     Parameters
     ----------
-    a : array-like (dim_a,)
+    a : array-like, shape (dim_a,)
         Unnormalized histogram of dimension `dim_a`
         If `a` is an empty list or array ([]),
         then `a` is set to uniform distribution.
-    b : array-like (dim_b,)
+    b : array-like, shape (dim_b,)
         One or multiple unnormalized histograms of dimension `dim_b`.
         If `b` is an empty list or array ([]),
         then `b` is set to uniform distribution.
         If many, compute all the OT costs :math:`(\mathbf{a}, \mathbf{b}_i)_i`
-    M : array-like (dim_a, dim_b)
+    M : array-like, shape (dim_a, dim_b)
         loss matrix
     reg : float
         Entropy regularization term > 0
@@ -885,15 +888,16 @@ def sinkhorn_stabilized_unbalanced(
         'sinkhorn_reg_scaling', see those function for specific parameters
     reg_type : string, optional
         Regularizer term. Can take two values:
-        + Negative entropy: 'entropy':
-        :math:`\Omega(\gamma) = \sum_{i,j} \gamma_{i,j} \log(\gamma_{i,j}) - \sum_{i,j} \gamma_{i,j}`.
-        This is equivalent (up to a constant) to :math:`\Omega(\gamma) = \text{KL}(\gamma, 1_{dim_a} 1_{dim_b}^T)`.
-        + Kullback-Leibler divergence: 'kl':
-        :math:`\Omega(\gamma) = \text{KL}(\gamma, \mathbf{a} \mathbf{b}^T)`.
-    c : array-like (dim_a, dim_b), optional (default=None)
+
+        - Negative entropy: 'entropy':
+          :math:`\Omega(\gamma) = \sum_{i,j} \gamma_{i,j} \log(\gamma_{i,j}) - \sum_{i,j} \gamma_{i,j}`.
+          This is equivalent (up to a constant) to :math:`\Omega(\gamma) = \text{KL}(\gamma, 1_{dim_a} 1_{dim_b}^T)`.
+        - Kullback-Leibler divergence: 'kl':
+          :math:`\Omega(\gamma) = \text{KL}(\gamma, \mathbf{a} \mathbf{b}^T)`.
+    c : array-like, shape (dim_a, dim_b), optional (default=None)
         Reference measure for the regularization.
         If None, then use :math:`\mathbf{c} = \mathbf{a} \mathbf{b}^T`.
-        If :math:`\texttt{reg_type}='entropy'`, then :math:`\mathbf{c} = 1_{dim_a} 1_{dim_b}^T`.
+        If :math:`\texttt{reg_type}=`'entropy', then :math:`\mathbf{c} = 1_{dim_a} 1_{dim_b}^T`.
     warmstart: tuple of arrays, shape (dim_a, dim_b), optional
         Initialization of dual potentials. If provided, the dual potentials should be given
         (that is the logarithm of the `u`, `v` sinkhorn scaling vectors).
@@ -916,12 +920,12 @@ def sinkhorn_stabilized_unbalanced(
     Returns
     -------
     if n_hists == 1:
-        - gamma : (dim_a, dim_b) array-like
+        - gamma : array-like, shape (dim_a, dim_b)
             Optimal transportation matrix for the given parameters
         - log : dict
             log dictionary returned only if `log` is `True`
     else:
-        - ot_cost : (n_hists,) array-like
+        - ot_cost : array-like, shape (n_hists,)
             the OT cost between :math:`\mathbf{a}` and each of the histograms :math:`\mathbf{b}_i`
         - log : dict
             log dictionary returned only if `log` is `True`
@@ -1151,12 +1155,12 @@ def sinkhorn_unbalanced_translation_invariant(
 
     Parameters
     ----------
-    a : array-like (dim_a,)
+    a : array-like, shape (dim_a,)
         Unnormalized histogram of dimension `dim_a`
-    b : array-like (dim_b,) or array-like (dim_b, n_hists)
+    b : array-like, shape (dim_b,) or (dim_b, n_hists)
         One or multiple unnormalized histograms of dimension `dim_b`
         If many, compute all the OT distances (a, b_i)
-    M : array-like (dim_a, dim_b)
+    M : array-like, shape (dim_a, dim_b)
         loss matrix
     reg : float
         Entropy regularization term > 0
@@ -1174,10 +1178,10 @@ def sinkhorn_unbalanced_translation_invariant(
         :math:`\Omega(\gamma) = \sum_{i,j} \gamma_{i,j} \log(\gamma_{i,j}) - \sum_{i,j} \gamma_{i,j}`, or
         'kl' (Kullback-Leibler)
         :math:`\Omega(\gamma) = \text{KL}(\gamma, \mathbf{a} \mathbf{b}^T)`.
-    c : array-like (dim_a, dim_b), optional (default=None)
+    c : array-like, shape (dim_a, dim_b), optional (default=None)
         Reference measure for the regularization.
         If None, then use :math:`\mathbf{c} = \mathbf{a} \mathbf{b}^T`.
-        If :math:`\texttt{reg_type}='entropy'`, then :math:`\mathbf{c} = 1_{dim_a} 1_{dim_b}^T`.
+        If :math:`\texttt{reg_type}=`'entropy', then :math:`\mathbf{c} = 1_{dim_a} 1_{dim_b}^T`.
     warmstart: tuple of arrays, shape (dim_a, dim_b), optional
         Initialization of dual potentials. If provided, the dual potentials should be given
         (that is the logarithm of the u,v sinkhorn scaling vectors).
@@ -1194,12 +1198,12 @@ def sinkhorn_unbalanced_translation_invariant(
     Returns
     -------
     if n_hists == 1:
-        - gamma : (dim_a, dim_b) array-like
+        - gamma : array-like, shape (dim_a, dim_b)
             Optimal transportation matrix for the given parameters
         - log : dict
             log dictionary returned only if `log` is `True`
     else:
-        - ot_distance : (n_hists,) array-like
+        - ot_distance : array-like, shape (n_hists,)
             the OT distance between :math:`\mathbf{a}` and each of the histograms :math:`\mathbf{b}_i`
         - log : dict
             log dictionary returned only if `log` is `True`
@@ -1423,9 +1427,9 @@ def barycenter_unbalanced_stabilized(
 
     Parameters
     ----------
-    A : array-like (dim, n_hists)
+    A : array-like, shape (dim, n_hists)
         `n_hists` training distributions :math:`\mathbf{a}_i` of dimension `dim`
-    M : array-like (dim, dim)
+    M : array-like, shape (dim, dim)
         ground metric matrix for OT.
     reg : float
         Entropy regularization term > 0
@@ -1433,7 +1437,7 @@ def barycenter_unbalanced_stabilized(
         Marginal relaxation term > 0
     tau : float
         Stabilization threshold for log domain absorption.
-    weights : array-like (n_hists,) optional
+    weights : array-like, shape (n_hists,) optional
         Weight of each distribution (barycentric coordinates)
         If None, uniform weights are used.
     numItermax : int, optional
@@ -1448,7 +1452,7 @@ def barycenter_unbalanced_stabilized(
 
     Returns
     -------
-    a : (dim,) array-like
+    a : array-like, shape (dim,)
         Unbalanced Wasserstein barycenter
     log : dict
         log dictionary return only if :math:`log==True` in parameters
@@ -1586,15 +1590,15 @@ def barycenter_unbalanced_sinkhorn(
 
     Parameters
     ----------
-    A : array-like (dim, n_hists)
+    A : array-like, shape (dim, n_hists)
         `n_hists` training distributions :math:`\mathbf{a}_i` of dimension `dim`
-    M : array-like (dim, dim)
+    M : array-like, shape (dim, dim)
         ground metric matrix for OT.
     reg : float
         Entropy regularization term > 0
     reg_m: float
         Marginal relaxation term > 0
-    weights : array-like (n_hists,) optional
+    weights : array-like, shape (n_hists,) optional
         Weight of each distribution (barycentric coordinates)
         If None, uniform weights are used.
     numItermax : int, optional
@@ -1609,7 +1613,7 @@ def barycenter_unbalanced_sinkhorn(
 
     Returns
     -------
-    a : (dim,) array-like
+    a : array-like, shape (dim,)
         Unbalanced Wasserstein barycenter
     log : dict
         log dictionary return only if :math:`log==True` in parameters
@@ -1731,15 +1735,15 @@ def barycenter_unbalanced(
 
     Parameters
     ----------
-    A : array-like (dim, n_hists)
+    A : array-like, shape (dim, n_hists)
         `n_hists` training distributions :math:`\mathbf{a}_i` of dimension `dim`
-    M : array-like (dim, dim)
+    M : array-like, shape (dim, dim)
         ground metric matrix for OT.
     reg : float
         Entropy regularization term > 0
     reg_m: float
         Marginal relaxation term > 0
-    weights : array-like (n_hists,) optional
+    weights : array-like, shape (n_hists,) optional
         Weight of each distribution (barycentric coordinates)
         If None, uniform weights are used.
     numItermax : int, optional
@@ -1754,7 +1758,7 @@ def barycenter_unbalanced(
 
     Returns
     -------
-    a : (dim,) array-like
+    a : array-like, shape (dim,)
         Unbalanced Wasserstein barycenter
     log : dict
         log dictionary return only if log==True in parameters
diff --git a/ot/utils.py b/ot/utils.py
index a2d328484..a3fc9297c 100644
--- a/ot/utils.py
+++ b/ot/utils.py
@@ -228,12 +228,12 @@ def unif(n, type_as=None):
     ----------
     n : int
         number of bins in the histogram
-    type_as : array_like
+    type_as : array-like
         array of the same type of the expected output (numpy/pytorch/jax)
 
     Returns
     -------
-    h : array_like (`n`,)
+    h : array-like, shape (n,)
         histogram of length `n` such that :math:`\forall i, \mathbf{h}_i = \frac{1}{n}`
     """
     if type_as is None: