@@ -55,7 +55,23 @@ def _get_weights(self, class_idx, scores):
5555
5656
5757class GradCAM (_GradCAM ):
58- """Implements a class activation map extractor as described in https://arxiv.org/pdf/1710.11063.pdf
58+ """Implements a class activation map extractor as described in `"Grad-CAM: Visual Explanations from Deep Networks
59+ via Gradient-based Localization" <https://arxiv.org/pdf/1610.02391.pdf>`_.
60+
61+ The localization map is computed as follows:
62+
63+ .. math::
64+ L^{(c)}_{Grad-CAM}(x, y) = ReLU\\ Big(\\ sum\\ limits_k w_k^{(c)} A_k(x, y)\\ Big)
65+
66+ with the coefficient :math:`w_k^{(c)}` being defined as:
67+
68+ .. math::
69+ w_k^{(c)} = \\ frac{1}{H \\ cdot W} \\ sum\\ limits_{i=1}^H \\ sum\\ limits_{j=1}^W
70+ \\ frac{\\ partial Y^{(c)}}{\\ partial A_k(i, j)}
71+
72+ where :math:`A_k(x, y)` is the activation of node :math:`k` in the last convolutional layer of the model at
73+ position :math:`(x, y)`,
74+ and :math:`Y^{(c)}` is the model output score for class :math:`c` before softmax.
5975
6076 Example::
6177 >>> from torchvision.models import resnet18
@@ -86,7 +102,32 @@ def _get_weights(self, class_idx, scores):
86102
87103
88104class GradCAMpp (_GradCAM ):
89- """Implements a class activation map extractor as described in https://arxiv.org/pdf/1710.11063.pdf
105+ """Implements a class activation map extractor as described in `"Grad-CAM++: Improved Visual Explanations for
106+ Deep Convolutional Networks" <https://arxiv.org/pdf/1710.11063.pdf>`_.
107+
108+ The localization map is computed as follows:
109+
110+ .. math::
111+ L^{(c)}_{Grad-CAM++}(x, y) = \\ sum\\ limits_k w_k^{(c)} A_k(x, y)
112+
113+ with the coefficient :math:`w_k^{(c)}` being defined as:
114+
115+ .. math::
116+ w_k^{(c)} = \\ sum\\ limits_{i=1}^H \\ sum\\ limits_{j=1}^W \\ alpha_k^{(c)}(i, j) \\ cdot
117+ ReLU\\ Big(\\ frac{\\ partial Y^{(c)}}{\\ partial A_k(i, j)}\\ Big)
118+
119+ where :math:`A_k(x, y)` is the activation of node :math:`k` in the last convolutional layer of the model at
120+ position :math:`(x, y)`,
121+ :math:`Y^{(c)}` is the model output score for class :math:`c` before softmax,
122+ and :math:`\\ alpha_k^{(c)}(i, j)` being defined as:
123+
124+ .. math::
125+ \\ alpha_k^{(c)}(i, j) = \\ frac{1}{\\ sum\\ limits_{i, j} \\ frac{\\ partial Y^{(c)}}{\\ partial A_k(i, j)}}
126+ = \\ frac{\\ frac{\\ partial^2 Y^{(c)}}{(\\ partial A_k(i,j))^2}}{2 \\ cdot
127+ \\ frac{\\ partial^2 Y^{(c)}}{(\\ partial A_k(i,j))^2} + \\ sum\\ limits_{a,b} A_k (a,b) \\ cdot
128+ \\ frac{\\ partial^3 Y^{(c)}}{(\\ partial A_k(i,j))^3}}
129+
130+ if :math:`\\ frac{\\ partial Y^{(c)}}{\\ partial A_k(i, j)} = 1` else :math:`0`.
90131
91132 Example::
92133 >>> from torchvision.models import resnet18
@@ -122,8 +163,41 @@ def _get_weights(self, class_idx, scores):
122163
123164
124165class SmoothGradCAMpp (_GradCAM ):
125- """Implements a class activation map extractor as described in https://arxiv.org/pdf/1908.01224.pdf
126- with a personal correction to the paper (alpha coefficient numerator)
166+ """Implements a class activation map extractor as described in `"Smooth Grad-CAM++: An Enhanced Inference Level
167+ Visualization Technique for Deep Convolutional Neural Network Models" <https://arxiv.org/pdf/1908.01224.pdf>`_
168+ with a personal correction to the paper (alpha coefficient numerator).
169+
170+ The localization map is computed as follows:
171+
172+ .. math::
173+ L^{(c)}_{Smooth Grad-CAM++}(x, y) = \\ sum\\ limits_k w_k^{(c)} A_k(x, y)
174+
175+ with the coefficient :math:`w_k^{(c)}` being defined as:
176+
177+ .. math::
178+ w_k^{(c)} = \\ sum\\ limits_{i=1}^H \\ sum\\ limits_{j=1}^W \\ alpha_k^{(c)}(i, j) \\ cdot
179+ ReLU\\ Big(\\ frac{\\ partial Y^{(c)}}{\\ partial A_k(i, j)}\\ Big)
180+
181+ where :math:`A_k(x, y)` is the activation of node :math:`k` in the last convolutional layer of the model at
182+ position :math:`(x, y)`,
183+ :math:`Y^{(c)}` is the model output score for class :math:`c` before softmax,
184+ and :math:`\\ alpha_k^{(c)}(i, j)` being defined as:
185+
186+ .. math::
187+ \\ alpha_k^{(c)}(i, j)
188+ = \\ frac{\\ frac{\\ partial^2 Y^{(c)}}{(\\ partial A_k(i,j))^2}}{2 \\ cdot
189+ \\ frac{\\ partial^2 Y^{(c)}}{(\\ partial A_k(i,j))^2} + \\ sum\\ limits_{a,b} A_k (a,b) \\ cdot
190+ \\ frac{\\ partial^3 Y^{(c)}}{(\\ partial A_k(i,j))^3}}
191+ = \\ frac{\\ frac{1}{n} \\ sum\\ limits_{m=1}^n D^{(c, 2)}_k(i, j)}{
192+ \\ frac{2}{n} \\ sum\\ limits_{m=1}^n D^{(c, 2)}_k(i, j) + \\ sum\\ limits_{a,b} A_k (a,b) \\ cdot
193+ \\ frac{1}{n} \\ sum\\ limits_{m=1}^n D^{(c, 3)}_k(i, j)}
194+
195+ if :math:`\\ frac{\\ partial Y^{(c)}}{\\ partial A_k(i, j)} = 1` else :math:`0`. Here :math:`D^{(c, p)}_k(i, j)`
196+ refers to the p-th partial derivative of the class score of class :math:`c` relatively to the activation in layer
197+ :math:`k` at position :math:`(i, j)`, and :math:`n` is the number of samples used to get the gradient estimate.
198+
199+ Please note the difference in the numerator of :math:`\\ alpha_k^{(c)}(i, j)`,
200+ which is actually :math:`\\ frac{1}{n} \\ sum\\ limits_{k=1}^n D^{(c, 1)}_k(i,j)` in the paper.
127201
128202 Example::
129203 >>> from torchvision.models import resnet18
0 commit comments