docs: Added detailed explanation on CAM computation (#8)

frgfm · web-flow · commit fb3be81a1271 · 2020-05-19T20:56:43.000+02:00
* docs: Added installation instructions

* docs: Fixed typo

* docs: Added detailed explanations for each CAM method

* docs: Clarified explanations of Score-CAM
diff --git a/docs/source/cams.rst b/docs/source/cams.rst
@@ -7,7 +7,7 @@ torchcam.cams
 
 CAM
 --------
-Related to activation-based class activation maps.
+Methods related to activation-based class activation maps.
 
 
 .. autoclass:: CAM
@@ -17,12 +17,11 @@ Related to activation-based class activation maps.
 
 Grad-CAM
 --------
-Related to gradient-based class activation maps.
+Methods related to gradient-based class activation maps.
 
 
 .. autoclass:: GradCAM
 
-
 .. autoclass:: GradCAMpp
 
 .. autoclass:: SmoothGradCAMpp
diff --git a/docs/source/index.rst b/docs/source/index.rst
@@ -3,9 +3,15 @@ Torchcam documentation
 
 The :mod:`torchcam` package gives PyTorch users the possibility to visualize the spatial influence on classification outputs.
 
+.. toctree::
+   :maxdepth: 2
+   :caption: Getting Started
+
+   installing
+
 .. toctree::
    :maxdepth: 1
-   :caption: Package Reference
+   :caption: Package Documentation
 
    cams
    utils
diff --git a/docs/source/installing.rst b/docs/source/installing.rst
@@ -0,0 +1,36 @@
+
+************
+Installation
+************
+
+This library requires Python 3.6 or newer.
+
+Via Python Package
+==================
+
+Install the last stable release of the package using pip:
+
+.. code:: bash
+
+    pip install torchcam
+
+
+Via Conda
+=========
+
+Install the last stable release of the package using conda:
+
+.. code:: bash
+
+    conda install -c frgfm torchcam
+
+
+Via Git
+=======
+
+Install the library in developper mode:
+
+.. code:: bash
+
+    git clone https://github.com/frgfm/torch-cam.git
+    pip install -e torch-cam/.
diff --git a/torchcam/cams/cam.py b/torchcam/cams/cam.py
@@ -117,7 +117,18 @@ def __repr__(self):
 
 
 class CAM(_CAM):
-    """Implements a class activation map extractor as described in https://arxiv.org/abs/1512.04150
+    """Implements a class activation map extractor as described in `"Learning Deep Features for Discriminative
+    Localization" <https://arxiv.org/pdf/1512.04150.pdf>`_.
+
+    The Class Activation Map (CAM) is defined for image classification models that have global pooling at the end
+    of the visual feature extraction block. The localization map is computed as follows:
+
+    .. math::
+        L^{(c)}_{CAM}(x, y) = ReLU\\Big(\\sum\\limits_k w_k^{(c)} A_k(x, y)\\Big)
+
+    where :math:`A_k(x, y)` is the activation of node :math:`k` in the last convolutional layer of the model at
+    position :math:`(x, y)`,
+    and :math:`w_k^{(c)}` is the weight corresponding to class :math:`c` for unit :math:`k`.
 
     Example::
         >>> from torchvision.models import resnet18
@@ -150,7 +161,29 @@ def _get_weights(self, class_idx, scores=None):
 
 
 class ScoreCAM(_CAM):
-    """Implements a class activation map extractor as described in https://arxiv.org/abs/1910.01279
+    """Implements a class activation map extractor as described in `"Score-CAM:
+    Score-Weighted Visual Explanations for Convolutional Neural Networks" <https://arxiv.org/pdf/1910.01279.pdf>`_.
+
+    The localization map is computed as follows:
+
+    .. math::
+        L^{(c)}_{Score-CAM}(x, y) = ReLU\\Big(\\sum\\limits_k w_k^{(c)} A_k(x, y)\\Big)
+
+    with the coefficient :math:`w_k^{(c)}` being defined as:
+
+    .. math::
+        w_k^{(c)} = softmax(Y^{(c)}(M) - Y^{(c)}(X_b))
+
+    where :math:`A_k(x, y)` is the activation of node :math:`k` in the last convolutional layer of the model at
+    position :math:`(x, y)`, :math:`Y^{(c)}(X)` is the model output score for class :math:`c` before softmax
+    for input :math:`X`, :math:`X_b` is a baseline image,
+    and :math:`M` is defined as follows:
+
+    .. math::
+        M = \\Big(\\frac{M^{(d)} - \\min M^{(d)}}{\\max M^{(d)} - \\min M^{(d)}} \\odot X \\Big)_{1 \\leq d \\leq D}
+
+    where :math:`\\odot` refers to the element-wise multiplication, :math:`M^{(d)}` is the upsampled version of
+    :math:`A_d` on node :math:`d`, and :math:`D` is the number of channels on the target convolutional layer.
 
     Example::
         >>> from torchvision.models import resnet18
diff --git a/torchcam/cams/gradcam.py b/torchcam/cams/gradcam.py
@@ -55,7 +55,23 @@ def _get_weights(self, class_idx, scores):
 
 
 class GradCAM(_GradCAM):
-    """Implements a class activation map extractor as described in https://arxiv.org/pdf/1710.11063.pdf
+    """Implements a class activation map extractor as described in `"Grad-CAM: Visual Explanations from Deep Networks
+    via Gradient-based Localization" <https://arxiv.org/pdf/1610.02391.pdf>`_.
+
+    The localization map is computed as follows:
+
+    .. math::
+        L^{(c)}_{Grad-CAM}(x, y) = ReLU\\Big(\\sum\\limits_k w_k^{(c)} A_k(x, y)\\Big)
+
+    with the coefficient :math:`w_k^{(c)}` being defined as:
+
+    .. math::
+        w_k^{(c)} = \\frac{1}{H \\cdot W} \\sum\\limits_{i=1}^H \\sum\\limits_{j=1}^W
+        \\frac{\\partial Y^{(c)}}{\\partial A_k(i, j)}
+
+    where :math:`A_k(x, y)` is the activation of node :math:`k` in the last convolutional layer of the model at
+    position :math:`(x, y)`,
+    and :math:`Y^{(c)}` is the model output score for class :math:`c` before softmax.
 
     Example::
         >>> from torchvision.models import resnet18
@@ -86,7 +102,32 @@ def _get_weights(self, class_idx, scores):
 
 
 class GradCAMpp(_GradCAM):
-    """Implements a class activation map extractor as described in https://arxiv.org/pdf/1710.11063.pdf
+    """Implements a class activation map extractor as described in `"Grad-CAM++: Improved Visual Explanations for
+    Deep Convolutional Networks" <https://arxiv.org/pdf/1710.11063.pdf>`_.
+
+    The localization map is computed as follows:
+
+    .. math::
+        L^{(c)}_{Grad-CAM++}(x, y) = \\sum\\limits_k w_k^{(c)} A_k(x, y)
+
+    with the coefficient :math:`w_k^{(c)}` being defined as:
+
+    .. math::
+        w_k^{(c)} = \\sum\\limits_{i=1}^H \\sum\\limits_{j=1}^W \\alpha_k^{(c)}(i, j) \\cdot
+        ReLU\\Big(\\frac{\\partial Y^{(c)}}{\\partial A_k(i, j)}\\Big)
+
+    where :math:`A_k(x, y)` is the activation of node :math:`k` in the last convolutional layer of the model at
+    position :math:`(x, y)`,
+    :math:`Y^{(c)}` is the model output score for class :math:`c` before softmax,
+    and :math:`\\alpha_k^{(c)}(i, j)` being defined as:
+
+    .. math::
+        \\alpha_k^{(c)}(i, j) = \\frac{1}{\\sum\\limits_{i, j} \\frac{\\partial Y^{(c)}}{\\partial A_k(i, j)}}
+        = \\frac{\\frac{\\partial^2 Y^{(c)}}{(\\partial A_k(i,j))^2}}{2 \\cdot
+        \\frac{\\partial^2 Y^{(c)}}{(\\partial A_k(i,j))^2} + \\sum\\limits_{a,b} A_k (a,b) \\cdot
+        \\frac{\\partial^3 Y^{(c)}}{(\\partial A_k(i,j))^3}}
+
+    if :math:`\\frac{\\partial Y^{(c)}}{\\partial A_k(i, j)} = 1` else :math:`0`.
 
     Example::
         >>> from torchvision.models import resnet18
@@ -122,8 +163,41 @@ def _get_weights(self, class_idx, scores):
 
 
 class SmoothGradCAMpp(_GradCAM):
-    """Implements a class activation map extractor as described in https://arxiv.org/pdf/1908.01224.pdf
-    with a personal correction to the paper (alpha coefficient numerator)
+    """Implements a class activation map extractor as described in `"Smooth Grad-CAM++: An Enhanced Inference Level
+    Visualization Technique for Deep Convolutional Neural Network Models" <https://arxiv.org/pdf/1908.01224.pdf>`_
+    with a personal correction to the paper (alpha coefficient numerator).
+
+    The localization map is computed as follows:
+
+    .. math::
+        L^{(c)}_{Smooth Grad-CAM++}(x, y) = \\sum\\limits_k w_k^{(c)} A_k(x, y)
+
+    with the coefficient :math:`w_k^{(c)}` being defined as:
+
+    .. math::
+        w_k^{(c)} = \\sum\\limits_{i=1}^H \\sum\\limits_{j=1}^W \\alpha_k^{(c)}(i, j) \\cdot
+        ReLU\\Big(\\frac{\\partial Y^{(c)}}{\\partial A_k(i, j)}\\Big)
+
+    where :math:`A_k(x, y)` is the activation of node :math:`k` in the last convolutional layer of the model at
+    position :math:`(x, y)`,
+    :math:`Y^{(c)}` is the model output score for class :math:`c` before softmax,
+    and :math:`\\alpha_k^{(c)}(i, j)` being defined as:
+
+    .. math::
+        \\alpha_k^{(c)}(i, j)
+        = \\frac{\\frac{\\partial^2 Y^{(c)}}{(\\partial A_k(i,j))^2}}{2 \\cdot
+        \\frac{\\partial^2 Y^{(c)}}{(\\partial A_k(i,j))^2} + \\sum\\limits_{a,b} A_k (a,b) \\cdot
+        \\frac{\\partial^3 Y^{(c)}}{(\\partial A_k(i,j))^3}}
+        = \\frac{\\frac{1}{n} \\sum\\limits_{m=1}^n D^{(c, 2)}_k(i, j)}{
+        \\frac{2}{n} \\sum\\limits_{m=1}^n D^{(c, 2)}_k(i, j) + \\sum\\limits_{a,b} A_k (a,b) \\cdot
+        \\frac{1}{n} \\sum\\limits_{m=1}^n D^{(c, 3)}_k(i, j)}
+
+    if :math:`\\frac{\\partial Y^{(c)}}{\\partial A_k(i, j)} = 1` else :math:`0`. Here :math:`D^{(c, p)}_k(i, j)`
+    refers to the p-th partial derivative of the class score of class :math:`c` relatively to the activation in layer
+    :math:`k` at position :math:`(i, j)`, and :math:`n` is the number of samples used to get the gradient estimate.
+
+    Please note the difference in the numerator of :math:`\\alpha_k^{(c)}(i, j)`,
+    which is actually :math:`\\frac{1}{n} \\sum\\limits_{k=1}^n D^{(c, 1)}_k(i,j)` in the paper.
 
     Example::
         >>> from torchvision.models import resnet18