From 59aef83328cbc1df07f6d234720837b8f0e05adc Mon Sep 17 00:00:00 2001 From: trex22 <contact@jasonchalom.com> Date: Sat, 5 Nov 2022 22:43:34 +0200 Subject: [PATCH 01/72] Attempt to compute all base and grad-cam class operations using torch and not numpy :scientist: --- pytorch_grad_cam/base_cam.py | 32 ++++++++++++++++---------------- pytorch_grad_cam/grad_cam.py | 2 +- setup.py | 2 +- 3 files changed, 18 insertions(+), 18 deletions(-) diff --git a/pytorch_grad_cam/base_cam.py b/pytorch_grad_cam/base_cam.py index 7ee192971..640cec851 100644 --- a/pytorch_grad_cam/base_cam.py +++ b/pytorch_grad_cam/base_cam.py @@ -36,7 +36,7 @@ def get_cam_weights(self, target_layers: List[torch.nn.Module], targets: List[torch.nn.Module], activations: torch.Tensor, - grads: torch.Tensor) -> np.ndarray: + grads: torch.Tensor) -> torch.Tensor: raise Exception("Not Implemented") def get_cam_image(self, @@ -45,7 +45,7 @@ def get_cam_image(self, targets: List[torch.nn.Module], activations: torch.Tensor, grads: torch.Tensor, - eigen_smooth: bool = False) -> np.ndarray: + eigen_smooth: bool = False) -> torch.Tensor: weights = self.get_cam_weights(input_tensor, target_layer, @@ -62,7 +62,7 @@ def get_cam_image(self, def forward(self, input_tensor: torch.Tensor, targets: List[torch.nn.Module], - eigen_smooth: bool = False) -> np.ndarray: + eigen_smooth: bool = False) -> torch.Tensor: if self.cuda: input_tensor = input_tensor.cuda() @@ -73,7 +73,7 @@ def forward(self, outputs = self.activations_and_grads(input_tensor) if targets is None: - target_categories = np.argmax(outputs.cpu().data.numpy(), axis=-1) + target_categories = torch.argmax(outputs.data, axis=-1) targets = [ClassifierOutputTarget( category) for category in target_categories] @@ -106,10 +106,10 @@ def compute_cam_per_layer( self, input_tensor: torch.Tensor, targets: List[torch.nn.Module], - eigen_smooth: bool) -> np.ndarray: - activations_list = [a.cpu().data.numpy() + eigen_smooth: bool) -> torch.Tensor: + activations_list = [a.data for a in self.activations_and_grads.activations] - grads_list = [g.cpu().data.numpy() + grads_list = [g.data for g in self.activations_and_grads.gradients] target_size = self.get_target_width_height(input_tensor) @@ -130,7 +130,7 @@ def compute_cam_per_layer( layer_activations, layer_grads, eigen_smooth) - cam = np.maximum(cam, 0) + cam = torch.maximum(cam, 0) scaled = scale_cam_image(cam, target_size) cam_per_target_layer.append(scaled[:, None, :]) @@ -138,16 +138,16 @@ def compute_cam_per_layer( def aggregate_multi_layers( self, - cam_per_target_layer: np.ndarray) -> np.ndarray: - cam_per_target_layer = np.concatenate(cam_per_target_layer, axis=1) - cam_per_target_layer = np.maximum(cam_per_target_layer, 0) - result = np.mean(cam_per_target_layer, axis=1) + cam_per_target_layer: torch.Tensor) -> torch.Tensor: + cam_per_target_layer = torch.concatenate(cam_per_target_layer, axis=1) + cam_per_target_layer = torch.maximum(cam_per_target_layer, 0) + result = torch.mean(cam_per_target_layer, axis=1) return scale_cam_image(result) def forward_augmentation_smoothing(self, input_tensor: torch.Tensor, targets: List[torch.nn.Module], - eigen_smooth: bool = False) -> np.ndarray: + eigen_smooth: bool = False) -> torch.Tensor: transforms = tta.Compose( [ tta.HorizontalFlip(), @@ -167,18 +167,18 @@ def forward_augmentation_smoothing(self, cam = transform.deaugment_mask(cam) # Back to numpy float32, HxW - cam = cam.numpy() + # cam = cam.numpy() cam = cam[:, 0, :, :] cams.append(cam) - cam = np.mean(np.float32(cams), axis=0) + cam = torch.mean(torch.float32(cams), axis=0) return cam def __call__(self, input_tensor: torch.Tensor, targets: List[torch.nn.Module] = None, aug_smooth: bool = False, - eigen_smooth: bool = False) -> np.ndarray: + eigen_smooth: bool = False) -> torch.Tensor: # Smooth the CAM result with test time augmentation if aug_smooth is True: diff --git a/pytorch_grad_cam/grad_cam.py b/pytorch_grad_cam/grad_cam.py index 025bf45dd..1b9c93b5f 100644 --- a/pytorch_grad_cam/grad_cam.py +++ b/pytorch_grad_cam/grad_cam.py @@ -19,4 +19,4 @@ def get_cam_weights(self, target_category, activations, grads): - return np.mean(grads, axis=(2, 3)) + return torch.mean(grads, axis=(2, 3)) diff --git a/setup.py b/setup.py index 1d8ace600..ea87b563d 100644 --- a/setup.py +++ b/setup.py @@ -8,7 +8,7 @@ setuptools.setup( name='grad-cam', - version='1.4.6', + version='1.4.7', author='Jacob Gildenblat', author_email='jacob.gildenblat@gmail.com', description='Many Class Activation Map methods implemented in Pytorch for classification, segmentation, object detection and more', From 8bf752483962624be24f7ae8e2ee2facbf0746be Mon Sep 17 00:00:00 2001 From: trex22 <contact@jasonchalom.com> Date: Sat, 5 Nov 2022 22:54:48 +0200 Subject: [PATCH 02/72] Bump other version :cop: --- setup.cfg | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.cfg b/setup.cfg index 203e6a636..dceb4f5bc 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = grad-cam -version = 1.1.0 +version = 1.4.7 author = Jacob Gildenblat author_email = jacob.gildenblat@gmail.com description = Many Class Activation Map methods implemented in Pytorch. Including Grad-CAM, Grad-CAM++, Score-CAM, Ablation-CAM and XGrad-CAM @@ -16,4 +16,4 @@ classifiers = [options] packages = find: -python_requires = >=3.6 \ No newline at end of file +python_requires = >=3.6 From fa8c8d7e80b8ab93703eaadca1594529b6a2770c Mon Sep 17 00:00:00 2001 From: trex22 <contact@jasonchalom.com> Date: Sat, 5 Nov 2022 22:57:42 +0200 Subject: [PATCH 03/72] Fix import to use torch over Numpy :cop: --- pytorch_grad_cam/grad_cam.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pytorch_grad_cam/grad_cam.py b/pytorch_grad_cam/grad_cam.py index 1b9c93b5f..efb66e76e 100644 --- a/pytorch_grad_cam/grad_cam.py +++ b/pytorch_grad_cam/grad_cam.py @@ -1,4 +1,4 @@ -import numpy as np +import torch from pytorch_grad_cam.base_cam import BaseCAM From ce52619809fc4c186ca1feb57a23dd65c33c9e5a Mon Sep 17 00:00:00 2001 From: trex22 <contact@jasonchalom.com> Date: Sat, 5 Nov 2022 23:02:49 +0200 Subject: [PATCH 04/72] Convert max pos 2 to a tensor :cop: --- pytorch_grad_cam/base_cam.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pytorch_grad_cam/base_cam.py b/pytorch_grad_cam/base_cam.py index 640cec851..d81adee50 100644 --- a/pytorch_grad_cam/base_cam.py +++ b/pytorch_grad_cam/base_cam.py @@ -130,7 +130,7 @@ def compute_cam_per_layer( layer_activations, layer_grads, eigen_smooth) - cam = torch.maximum(cam, 0) + cam = torch.maximum(cam, torch.tensor(0)) scaled = scale_cam_image(cam, target_size) cam_per_target_layer.append(scaled[:, None, :]) @@ -140,7 +140,7 @@ def aggregate_multi_layers( self, cam_per_target_layer: torch.Tensor) -> torch.Tensor: cam_per_target_layer = torch.concatenate(cam_per_target_layer, axis=1) - cam_per_target_layer = torch.maximum(cam_per_target_layer, 0) + cam_per_target_layer = torch.maximum(cam_per_target_layer, torch.tensor(0)) result = torch.mean(cam_per_target_layer, axis=1) return scale_cam_image(result) From 73f720bd5ab34d4d441cf293b4eca95eb3b3ff01 Mon Sep 17 00:00:00 2001 From: trex22 <contact@jasonchalom.com> Date: Sat, 5 Nov 2022 23:07:09 +0200 Subject: [PATCH 05/72] Begin to find and migrate more numpy calls to torch calls. Also fix some minor bugs :cop: --- pytorch_grad_cam/base_cam.py | 4 ++-- pytorch_grad_cam/utils/image.py | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/pytorch_grad_cam/base_cam.py b/pytorch_grad_cam/base_cam.py index d81adee50..b9f3c3e63 100644 --- a/pytorch_grad_cam/base_cam.py +++ b/pytorch_grad_cam/base_cam.py @@ -169,9 +169,9 @@ def forward_augmentation_smoothing(self, # Back to numpy float32, HxW # cam = cam.numpy() cam = cam[:, 0, :, :] - cams.append(cam) + cams.append(cam) # TODO: Handle this for torch tensors - cam = torch.mean(torch.float32(cams), axis=0) + cam = torch.mean(cams.to(torch.float32), axis=0) return cam def __call__(self, diff --git a/pytorch_grad_cam/utils/image.py b/pytorch_grad_cam/utils/image.py index 34d92ba6f..57c4a4f18 100644 --- a/pytorch_grad_cam/utils/image.py +++ b/pytorch_grad_cam/utils/image.py @@ -160,12 +160,12 @@ def show_factorization_on_image(img: np.ndarray, def scale_cam_image(cam, target_size=None): result = [] for img in cam: - img = img - np.min(img) - img = img / (1e-7 + np.max(img)) + img = img - torch.min(img) + img = img / (1e-7 + torch.max(img)) if target_size is not None: - img = cv2.resize(img, target_size) + img = cv2.resize(img, target_size) # TODO: Change this to handle torch tensors via a convert result.append(img) - result = np.float32(result) + result = result.to(torch.float32) return result From f96e21fb54c11e94f43a1af2148821e5f46eb711 Mon Sep 17 00:00:00 2001 From: trex22 <contact@jasonchalom.com> Date: Sat, 5 Nov 2022 23:11:05 +0200 Subject: [PATCH 06/72] Make use of one strategy for resizing tensors over the cv2 call :cop: --- pytorch_grad_cam/utils/image.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pytorch_grad_cam/utils/image.py b/pytorch_grad_cam/utils/image.py index 57c4a4f18..025db23be 100644 --- a/pytorch_grad_cam/utils/image.py +++ b/pytorch_grad_cam/utils/image.py @@ -4,6 +4,7 @@ import cv2 import numpy as np import torch +import torchvision.transforms.functional as F from torchvision.transforms import Compose, Normalize, ToTensor from typing import List, Dict import math @@ -163,7 +164,7 @@ def scale_cam_image(cam, target_size=None): img = img - torch.min(img) img = img / (1e-7 + torch.max(img)) if target_size is not None: - img = cv2.resize(img, target_size) # TODO: Change this to handle torch tensors via a convert + img = F.resize(img, target_size) # TODO: Investigate better resizing techniques - Keeping defaults for now result.append(img) result = result.to(torch.float32) From ef3dcf5f960cace552ff8bd43e43d6dc2e2f1524 Mon Sep 17 00:00:00 2001 From: trex22 <contact@jasonchalom.com> Date: Sat, 5 Nov 2022 23:16:14 +0200 Subject: [PATCH 07/72] Go back to CPU for cv2 resizing :cop: --- pytorch_grad_cam/utils/image.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/pytorch_grad_cam/utils/image.py b/pytorch_grad_cam/utils/image.py index 025db23be..9b22b638a 100644 --- a/pytorch_grad_cam/utils/image.py +++ b/pytorch_grad_cam/utils/image.py @@ -164,7 +164,16 @@ def scale_cam_image(cam, target_size=None): img = img - torch.min(img) img = img / (1e-7 + torch.max(img)) if target_size is not None: - img = F.resize(img, target_size) # TODO: Investigate better resizing techniques - Keeping defaults for now + # There seem to be many different ways to resize a torch tensor + # with varying results + # TODO: Investigate these + # For now going to convert to cpu numpy and back just to get + # the crude experiment working - and then begin to tune and refine + # Possible way: + # img = F.resize(img, target_size) # TODO: Investigate better resizing techniques - Keeping defaults for now + + # Convert to numpy + img = torch.tensor(cv2.resize(img.cpu().numpy(), target_size)) result.append(img) result = result.to(torch.float32) From bf9b9dbc6b7bc5aa5ef591e947696c267d6d3b00 Mon Sep 17 00:00:00 2001 From: trex22 <contact@jasonchalom.com> Date: Sat, 5 Nov 2022 23:19:50 +0200 Subject: [PATCH 08/72] Attempt to get the image scaling function working with torch :cop: --- pytorch_grad_cam/utils/image.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pytorch_grad_cam/utils/image.py b/pytorch_grad_cam/utils/image.py index 9b22b638a..918214b20 100644 --- a/pytorch_grad_cam/utils/image.py +++ b/pytorch_grad_cam/utils/image.py @@ -175,7 +175,7 @@ def scale_cam_image(cam, target_size=None): # Convert to numpy img = torch.tensor(cv2.resize(img.cpu().numpy(), target_size)) result.append(img) - result = result.to(torch.float32) + result = torch.tensor(np.array(result)).to(torch.float32) # TODO: Optimise this to use pre-initialised torch tensor return result From df9b0358fb7d0ab9ac5a81ffd9651c4a930281f4 Mon Sep 17 00:00:00 2001 From: trex22 <contact@jasonchalom.com> Date: Sat, 5 Nov 2022 23:30:23 +0200 Subject: [PATCH 09/72] Use torch tensor only on the list of numpy arrays :cop: --- pytorch_grad_cam/utils/image.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pytorch_grad_cam/utils/image.py b/pytorch_grad_cam/utils/image.py index 918214b20..883150ae2 100644 --- a/pytorch_grad_cam/utils/image.py +++ b/pytorch_grad_cam/utils/image.py @@ -173,9 +173,10 @@ def scale_cam_image(cam, target_size=None): # img = F.resize(img, target_size) # TODO: Investigate better resizing techniques - Keeping defaults for now # Convert to numpy - img = torch.tensor(cv2.resize(img.cpu().numpy(), target_size)) + # img = torch.tensor(cv2.resize(img.cpu().numpy(), target_size)) + img = cv2.resize(img.cpu().numpy(), target_size) result.append(img) - result = torch.tensor(np.array(result)).to(torch.float32) # TODO: Optimise this to use pre-initialised torch tensor + result = torch.tensor(np.array(result).astype('float32')) # TODO: Optimise this to use pre-initialised torch tensor return result From fe6cb924fce1d9770d451baf572ae936602595e3 Mon Sep 17 00:00:00 2001 From: trex22 <contact@jasonchalom.com> Date: Sat, 5 Nov 2022 23:32:58 +0200 Subject: [PATCH 10/72] Use the correct torch function :cop: --- pytorch_grad_cam/base_cam.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pytorch_grad_cam/base_cam.py b/pytorch_grad_cam/base_cam.py index b9f3c3e63..547f001e4 100644 --- a/pytorch_grad_cam/base_cam.py +++ b/pytorch_grad_cam/base_cam.py @@ -139,7 +139,7 @@ def compute_cam_per_layer( def aggregate_multi_layers( self, cam_per_target_layer: torch.Tensor) -> torch.Tensor: - cam_per_target_layer = torch.concatenate(cam_per_target_layer, axis=1) + cam_per_target_layer = torch.cat(cam_per_target_layer, axis=1) cam_per_target_layer = torch.maximum(cam_per_target_layer, torch.tensor(0)) result = torch.mean(cam_per_target_layer, axis=1) return scale_cam_image(result) From f58e88cd5c50c5508af31e4633cb852f5fd707d3 Mon Sep 17 00:00:00 2001 From: trex22 <contact@jasonchalom.com> Date: Sat, 5 Nov 2022 23:43:49 +0200 Subject: [PATCH 11/72] Attempt to fix torch resizing :cop: --- pytorch_grad_cam/utils/image.py | 31 +++++++++++++------------------ 1 file changed, 13 insertions(+), 18 deletions(-) diff --git a/pytorch_grad_cam/utils/image.py b/pytorch_grad_cam/utils/image.py index 883150ae2..5d7681a44 100644 --- a/pytorch_grad_cam/utils/image.py +++ b/pytorch_grad_cam/utils/image.py @@ -4,8 +4,7 @@ import cv2 import numpy as np import torch -import torchvision.transforms.functional as F -from torchvision.transforms import Compose, Normalize, ToTensor +from torchvision.transforms import Compose, Normalize, ToTensor, Resize from typing import List, Dict import math @@ -159,26 +158,22 @@ def show_factorization_on_image(img: np.ndarray, def scale_cam_image(cam, target_size=None): - result = [] - for img in cam: + if target_size is not None: + result = torch.zeros([cam.shape[0], target_size[0], target_size[1]]) + else: + result = torch.zeros(cam.shape) + + for i in range(cam.shape[0]): + img = cam[i] img = img - torch.min(img) img = img / (1e-7 + torch.max(img)) + if target_size is not None: - # There seem to be many different ways to resize a torch tensor - # with varying results - # TODO: Investigate these - # For now going to convert to cpu numpy and back just to get - # the crude experiment working - and then begin to tune and refine - # Possible way: - # img = F.resize(img, target_size) # TODO: Investigate better resizing techniques - Keeping defaults for now - - # Convert to numpy - # img = torch.tensor(cv2.resize(img.cpu().numpy(), target_size)) - img = cv2.resize(img.cpu().numpy(), target_size) - result.append(img) - result = torch.tensor(np.array(result).astype('float32')) # TODO: Optimise this to use pre-initialised torch tensor + img = Resize(img, target_size) - return result + result[i] = img + + return result.to(torch.float32) def scale_accross_batch_and_channels(tensor, target_size): From 594bb0c771a594c4b36e738ba10f49a74e5683ec Mon Sep 17 00:00:00 2001 From: trex22 <contact@jasonchalom.com> Date: Sat, 5 Nov 2022 23:52:36 +0200 Subject: [PATCH 12/72] Use a transpose for the experiment. Investigate a proper resize later :cop: --- pytorch_grad_cam/utils/image.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pytorch_grad_cam/utils/image.py b/pytorch_grad_cam/utils/image.py index 5d7681a44..e571d7470 100644 --- a/pytorch_grad_cam/utils/image.py +++ b/pytorch_grad_cam/utils/image.py @@ -169,7 +169,9 @@ def scale_cam_image(cam, target_size=None): img = img / (1e-7 + torch.max(img)) if target_size is not None: - img = Resize(img, target_size) + # transform = Resize(target_size) + # img = Resize(size = target_size)(img) + img = img.T # Swap axes around for now. TODO: Investigate a better solution result[i] = img From f4739c2d17afa4f4ed10a92d7d0f479edcb0b84b Mon Sep 17 00:00:00 2001 From: trex22 <contact@jasonchalom.com> Date: Sat, 5 Nov 2022 23:56:30 +0200 Subject: [PATCH 13/72] Remove the resize for now :cop: --- pytorch_grad_cam/utils/image.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pytorch_grad_cam/utils/image.py b/pytorch_grad_cam/utils/image.py index e571d7470..2d7d416e0 100644 --- a/pytorch_grad_cam/utils/image.py +++ b/pytorch_grad_cam/utils/image.py @@ -168,10 +168,10 @@ def scale_cam_image(cam, target_size=None): img = img - torch.min(img) img = img / (1e-7 + torch.max(img)) - if target_size is not None: + # if target_size is not None: # transform = Resize(target_size) # img = Resize(size = target_size)(img) - img = img.T # Swap axes around for now. TODO: Investigate a better solution + result[i] = img From 995550e53348a9a4456978b4cecb48d3870b9682 Mon Sep 17 00:00:00 2001 From: trex22 <contact@jasonchalom.com> Date: Sat, 5 Nov 2022 23:59:28 +0200 Subject: [PATCH 14/72] Disable the scaling function from changing dimensions (for now) --- pytorch_grad_cam/utils/image.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/pytorch_grad_cam/utils/image.py b/pytorch_grad_cam/utils/image.py index 2d7d416e0..8b83deb56 100644 --- a/pytorch_grad_cam/utils/image.py +++ b/pytorch_grad_cam/utils/image.py @@ -158,10 +158,16 @@ def show_factorization_on_image(img: np.ndarray, def scale_cam_image(cam, target_size=None): - if target_size is not None: - result = torch.zeros([cam.shape[0], target_size[0], target_size[1]]) - else: - result = torch.zeros(cam.shape) + # Disabled the target_size scaling for now + # It appears to swap the axes dimensions and needs further work for the + # proof of concept + + # if target_size is not None: + # result = torch.zeros([cam.shape[0], target_size[0], target_size[1]]) + # else: + # result = torch.zeros(cam.shape) + + result = torch.zeros(cam.shape) for i in range(cam.shape[0]): img = cam[i] From 02b94515511a7b87ec0d1f30375925a0915f32d9 Mon Sep 17 00:00:00 2001 From: trex22 <contact@jasonchalom.com> Date: Sun, 6 Nov 2022 11:17:18 +0200 Subject: [PATCH 15/72] Create a simple benchmark :cop: --- benchmarks/torch_benchmark.py | 40 +++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 benchmarks/torch_benchmark.py diff --git a/benchmarks/torch_benchmark.py b/benchmarks/torch_benchmark.py new file mode 100644 index 000000000..8595c6a69 --- /dev/null +++ b/benchmarks/torch_benchmark.py @@ -0,0 +1,40 @@ +import argparse +import cv2 +import numpy as np +import torch + +from pytorch_grad_cam import GradCAM, \ + ScoreCAM, \ + GradCAMPlusPlus, \ + AblationCAM, \ + XGradCAM, \ + EigenCAM, \ + EigenGradCAM, \ + LayerCAM, \ + FullGrad + +import torchvision # You may need to install separately +from torchvision import models + +from torch.profiler import profile, record_function, ProfilerActivity + +model = models.resnet50() +random_tensor = torch.rand((256, 60, 3)) # TODO: Use real data? + +# Test with numpy v1.4.6 (master) +# Test with torch v1.4.7 (wip) + +# Run on CPU with profiler (save the profile to print later) +dev = torch.device('cpu') +model.to(dev) + +with profile(activities=[ProfilerActivity.CPU], profile_memory=True, record_shapes=True) as prof: + +print(prof.key_averages().table(sort_by="self_cpu_memory_usage", row_limit=15)) +breakpoint() # For now as I write this + +# Run on CUDA with profiler (save the profile to print later) + +# Run on CPU x100 (get min, max, and avg times) + +# Run on CUDA x100 From cc557d1d2974a8cc44b50fd44f9a4de9c204f26e Mon Sep 17 00:00:00 2001 From: trex22 <contact@jasonchalom.com> Date: Sun, 6 Nov 2022 11:20:08 +0200 Subject: [PATCH 16/72] Add in basic GradCAM :cop: --- benchmarks/torch_benchmark.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/benchmarks/torch_benchmark.py b/benchmarks/torch_benchmark.py index 8595c6a69..7634b4b1b 100644 --- a/benchmarks/torch_benchmark.py +++ b/benchmarks/torch_benchmark.py @@ -26,10 +26,12 @@ # Run on CPU with profiler (save the profile to print later) dev = torch.device('cpu') +use_cuda = False model.to(dev) +target_layers = [model.blocks[-1].norm1] with profile(activities=[ProfilerActivity.CPU], profile_memory=True, record_shapes=True) as prof: - + GradCAM(model=model, target_layers=target_layers, use_cuda=use_cuda) print(prof.key_averages().table(sort_by="self_cpu_memory_usage", row_limit=15)) breakpoint() # For now as I write this From 71f51d017ef9f196880fc74d8d30d08a8d9d9ea0 Mon Sep 17 00:00:00 2001 From: trex22 <contact@jasonchalom.com> Date: Sun, 6 Nov 2022 11:23:55 +0200 Subject: [PATCH 17/72] Continue to write a simple GradCAM :cop: --- benchmarks/torch_benchmark.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/benchmarks/torch_benchmark.py b/benchmarks/torch_benchmark.py index 7634b4b1b..5aafc8b1a 100644 --- a/benchmarks/torch_benchmark.py +++ b/benchmarks/torch_benchmark.py @@ -21,17 +21,29 @@ model = models.resnet50() random_tensor = torch.rand((256, 60, 3)) # TODO: Use real data? +# TODOs: # Test with numpy v1.4.6 (master) # Test with torch v1.4.7 (wip) +# Test other CAMs besides GradCAM # Run on CPU with profiler (save the profile to print later) dev = torch.device('cpu') use_cuda = False + model.to(dev) -target_layers = [model.blocks[-1].norm1] +random_tensor.to(dev) + +# Some defaults I use in research code +target_layers = [model.fc] +batch_size = 8 +targets = None # [ClassifierOutputTarget(None)] +# Profile the CPU call with profile(activities=[ProfilerActivity.CPU], profile_memory=True, record_shapes=True) as prof: - GradCAM(model=model, target_layers=target_layers, use_cuda=use_cuda) + cam_function = GradCAM(model=model, target_layers=target_layers, use_cuda=use_cuda) + cam_function.batch_size = batch_size + heatmap = cam_function(input_tensor=input_tensor, targets=targets) + print(prof.key_averages().table(sort_by="self_cpu_memory_usage", row_limit=15)) breakpoint() # For now as I write this From 06efbc4fb1decabf932ff4f8d1cea9548e1fe974 Mon Sep 17 00:00:00 2001 From: trex22 <contact@jasonchalom.com> Date: Sun, 6 Nov 2022 11:24:58 +0200 Subject: [PATCH 18/72] Properly name the variable :cop: --- benchmarks/torch_benchmark.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/benchmarks/torch_benchmark.py b/benchmarks/torch_benchmark.py index 5aafc8b1a..97683bce3 100644 --- a/benchmarks/torch_benchmark.py +++ b/benchmarks/torch_benchmark.py @@ -19,7 +19,7 @@ from torch.profiler import profile, record_function, ProfilerActivity model = models.resnet50() -random_tensor = torch.rand((256, 60, 3)) # TODO: Use real data? +input_tensor = torch.rand((256, 60, 3)) # TODO: Use real data? # TODOs: # Test with numpy v1.4.6 (master) @@ -31,7 +31,7 @@ use_cuda = False model.to(dev) -random_tensor.to(dev) +input_tensor.to(dev) # Some defaults I use in research code target_layers = [model.fc] From f2578d7d1395745c43937b3033f9dcfa385a6c1d Mon Sep 17 00:00:00 2001 From: trex22 <contact@jasonchalom.com> Date: Sun, 6 Nov 2022 11:25:47 +0200 Subject: [PATCH 19/72] Fix the tensor stack :cop: --- benchmarks/torch_benchmark.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/torch_benchmark.py b/benchmarks/torch_benchmark.py index 97683bce3..82c648549 100644 --- a/benchmarks/torch_benchmark.py +++ b/benchmarks/torch_benchmark.py @@ -19,7 +19,7 @@ from torch.profiler import profile, record_function, ProfilerActivity model = models.resnet50() -input_tensor = torch.rand((256, 60, 3)) # TODO: Use real data? +input_tensor = torch.rand((1, 256, 60, 3)) # TODO: Use real data? # TODOs: # Test with numpy v1.4.6 (master) From a4d2750f06bac92dd6283f9a77cdfceba1c21a11 Mon Sep 17 00:00:00 2001 From: trex22 <contact@jasonchalom.com> Date: Sun, 6 Nov 2022 11:28:04 +0200 Subject: [PATCH 20/72] Fix the dimensions needed for Resnet :cop: --- benchmarks/torch_benchmark.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/torch_benchmark.py b/benchmarks/torch_benchmark.py index 82c648549..129d3157b 100644 --- a/benchmarks/torch_benchmark.py +++ b/benchmarks/torch_benchmark.py @@ -19,7 +19,7 @@ from torch.profiler import profile, record_function, ProfilerActivity model = models.resnet50() -input_tensor = torch.rand((1, 256, 60, 3)) # TODO: Use real data? +input_tensor = torch.rand((1, 3, 256, 60)) # TODO: Use real data? # TODOs: # Test with numpy v1.4.6 (master) From 705812289b20fd24116a3068395a598f2eb8d457 Mon Sep 17 00:00:00 2001 From: trex22 <contact@jasonchalom.com> Date: Sun, 6 Nov 2022 11:32:55 +0200 Subject: [PATCH 21/72] Change target layer :cop: --- benchmarks/torch_benchmark.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/torch_benchmark.py b/benchmarks/torch_benchmark.py index 129d3157b..38b5334ac 100644 --- a/benchmarks/torch_benchmark.py +++ b/benchmarks/torch_benchmark.py @@ -34,7 +34,7 @@ input_tensor.to(dev) # Some defaults I use in research code -target_layers = [model.fc] +target_layers = [model.layer4] batch_size = 8 targets = None # [ClassifierOutputTarget(None)] From 1a77f74a5867021780b12d971ba627fe5a122b17 Mon Sep 17 00:00:00 2001 From: trex22 <contact@jasonchalom.com> Date: Sun, 6 Nov 2022 11:36:00 +0200 Subject: [PATCH 22/72] Add in cuda profiling :cop: --- benchmarks/torch_benchmark.py | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/benchmarks/torch_benchmark.py b/benchmarks/torch_benchmark.py index 38b5334ac..ea9abb57b 100644 --- a/benchmarks/torch_benchmark.py +++ b/benchmarks/torch_benchmark.py @@ -2,6 +2,7 @@ import cv2 import numpy as np import torch +import time from pytorch_grad_cam import GradCAM, \ ScoreCAM, \ @@ -44,11 +45,25 @@ cam_function.batch_size = batch_size heatmap = cam_function(input_tensor=input_tensor, targets=targets) -print(prof.key_averages().table(sort_by="self_cpu_memory_usage", row_limit=15)) -breakpoint() # For now as I write this +cpu_profile = prof.key_averages().table(sort_by="self_cpu_memory_usage", row_limit=15) # Run on CUDA with profiler (save the profile to print later) +dev = torch.device('cuda') +use_cuda = True + +model.to(dev) +input_tensor.to(dev) + +with profile(activities=[ProfilerActivity.CPU], profile_memory=True, record_shapes=True) as prof: + cam_function = GradCAM(model=model, target_layers=target_layers, use_cuda=use_cuda) + cam_function.batch_size = batch_size + heatmap = cam_function(input_tensor=input_tensor, targets=targets) + +cuda_profile = prof.key_averages().table(sort_by="self_cpu_memory_usage", row_limit=15) + +breakpoint() # Run on CPU x100 (get min, max, and avg times) + # Run on CUDA x100 From f4b759ad12b66d4fcece2d38f9fdcb297d96c4ab Mon Sep 17 00:00:00 2001 From: trex22 <contact@jasonchalom.com> Date: Sun, 6 Nov 2022 11:40:16 +0200 Subject: [PATCH 23/72] Create the large loop :cop: --- benchmarks/torch_benchmark.py | 43 +++++++++++++++++++++++++++++++---- 1 file changed, 39 insertions(+), 4 deletions(-) diff --git a/benchmarks/torch_benchmark.py b/benchmarks/torch_benchmark.py index ea9abb57b..4d4a6166e 100644 --- a/benchmarks/torch_benchmark.py +++ b/benchmarks/torch_benchmark.py @@ -19,13 +19,15 @@ from torch.profiler import profile, record_function, ProfilerActivity +number_of_inputs = 1000 model = models.resnet50() -input_tensor = torch.rand((1, 3, 256, 60)) # TODO: Use real data? +input_tensor = torch.rand((number_of_inputs, 3, 256, 60)) # TODO: Use real data? # TODOs: # Test with numpy v1.4.6 (master) # Test with torch v1.4.7 (wip) # Test other CAMs besides GradCAM +# Nice output # Run on CPU with profiler (save the profile to print later) dev = torch.device('cpu') @@ -61,9 +63,42 @@ cuda_profile = prof.key_averages().table(sort_by="self_cpu_memory_usage", row_limit=15) -breakpoint() +# Run on CPU x1000 (get min, max, and avg times) +cpu_min_time = 10000000000000 +cpu_max_time = 0 +cpu_sum_of_times = 0 + +for i in range(number_of_inputs): + start_time = time.time() + + input_tensor = torch.rand((number_of_inputs, 3, 256, 60)) # TODO: Use real data? + + dev = torch.device('cpu') + use_cuda = False -# Run on CPU x100 (get min, max, and avg times) + model.to(dev) + input_tensor.to(dev) + # Some defaults I use in research code + target_layers = [model.layer4] + batch_size = 8 + targets = None # [ClassifierOutputTarget(None)] -# Run on CUDA x100 + cam_function = GradCAM(model=model, target_layers=target_layers, use_cuda=use_cuda) + cam_function.batch_size = batch_size + heatmap = cam_function(input_tensor=input_tensor, targets=targets) + + end_time = time.time() + time_difference = end_time - start_time + + cpu_sum_of_times += time_difference + + if time_difference > cpu_max_time: + cpu_max_time = time_difference + + if time_difference < cpu_min_time: + cpu_min_time = time_difference + +cpu_avg_time = cpu_sum_of_times / number_of_inputs +breakpoint() +# Run on CUDA x1000 From 15ca2becdb16c7c67f4c32abc174285d25fbc6aa Mon Sep 17 00:00:00 2001 From: trex22 <contact@jasonchalom.com> Date: Sun, 6 Nov 2022 11:47:57 +0200 Subject: [PATCH 24/72] Refactor code to share some algorithm :cop: --- benchmarks/torch_benchmark.py | 109 +++++++++++++++------------------- 1 file changed, 48 insertions(+), 61 deletions(-) diff --git a/benchmarks/torch_benchmark.py b/benchmarks/torch_benchmark.py index 4d4a6166e..7d4c78a7a 100644 --- a/benchmarks/torch_benchmark.py +++ b/benchmarks/torch_benchmark.py @@ -19,86 +19,73 @@ from torch.profiler import profile, record_function, ProfilerActivity -number_of_inputs = 1000 -model = models.resnet50() -input_tensor = torch.rand((number_of_inputs, 3, 256, 60)) # TODO: Use real data? +def run_gradcam(model, number_of_inputs, use_cuda=False): + min_time = 10000000000000 + max_time = 0 + sum_of_times = 0 -# TODOs: -# Test with numpy v1.4.6 (master) -# Test with torch v1.4.7 (wip) -# Test other CAMs besides GradCAM -# Nice output + dev = torch.device('cpu') + if use_cuda: + dev = torch.device('cuda:0') -# Run on CPU with profiler (save the profile to print later) -dev = torch.device('cpu') -use_cuda = False + # TODO: Use real data? + # TODO: Configurable dimensions? -model.to(dev) -input_tensor.to(dev) + # Some defaults I use in research code + input_tensor = torch.rand((number_of_inputs, 3, 256, 60)) + batch_size = 8 + targets = None # [ClassifierOutputTarget(None)] -# Some defaults I use in research code -target_layers = [model.layer4] -batch_size = 8 -targets = None # [ClassifierOutputTarget(None)] + model.to(dev) + target_layers = [model.layer4] # Last CNN layer of ResNet50 -# Profile the CPU call -with profile(activities=[ProfilerActivity.CPU], profile_memory=True, record_shapes=True) as prof: cam_function = GradCAM(model=model, target_layers=target_layers, use_cuda=use_cuda) cam_function.batch_size = batch_size - heatmap = cam_function(input_tensor=input_tensor, targets=targets) - -cpu_profile = prof.key_averages().table(sort_by="self_cpu_memory_usage", row_limit=15) - -# Run on CUDA with profiler (save the profile to print later) -dev = torch.device('cuda') -use_cuda = True -model.to(dev) -input_tensor.to(dev) + for i in range(number_of_inputs): + start_time = time.time() -with profile(activities=[ProfilerActivity.CPU], profile_memory=True, record_shapes=True) as prof: - cam_function = GradCAM(model=model, target_layers=target_layers, use_cuda=use_cuda) - cam_function.batch_size = batch_size - heatmap = cam_function(input_tensor=input_tensor, targets=targets) + # Actual code to benchmark + input_image = input_tensor[i].to(dev) + heatmap = cam_function(input_tensor=input_image, targets=targets) -cuda_profile = prof.key_averages().table(sort_by="self_cpu_memory_usage", row_limit=15) + end_time = time.time() + time_difference = end_time - start_time -# Run on CPU x1000 (get min, max, and avg times) -cpu_min_time = 10000000000000 -cpu_max_time = 0 -cpu_sum_of_times = 0 + sum_of_times += time_difference -for i in range(number_of_inputs): - start_time = time.time() + if time_difference > max_time: + max_time = time_difference - input_tensor = torch.rand((number_of_inputs, 3, 256, 60)) # TODO: Use real data? + if time_difference < min_time: + min_time = time_difference - dev = torch.device('cpu') - use_cuda = False + avg_time = sum_of_times / number_of_inputs + return [min_time, max_time, avg_time] - model.to(dev) - input_tensor.to(dev) - - # Some defaults I use in research code - target_layers = [model.layer4] - batch_size = 8 - targets = None # [ClassifierOutputTarget(None)] +number_of_inputs = 1000 +model = models.resnet50() - cam_function = GradCAM(model=model, target_layers=target_layers, use_cuda=use_cuda) - cam_function.batch_size = batch_size - heatmap = cam_function(input_tensor=input_tensor, targets=targets) +# TODOs: +# Test with numpy v1.4.6 (master) +# Test with torch v1.4.7 (wip) +# Test other CAMs besides GradCAM +# Nice output - end_time = time.time() - time_difference = end_time - start_time +# Run on CPU with profiler (save the profile to print later) +with profile(activities=[ProfilerActivity.CPU], profile_memory=True, record_shapes=True) as prof: + cpu_profile_min_time, cpu_profile_max_time, cpu_profile_avg_time = run_gradcam(model, number_of_inputs, use_cuda=False) +cpu_profile = prof.key_averages().table(sort_by="self_cpu_memory_usage", row_limit=15) - cpu_sum_of_times += time_difference +# Run on CUDA with profiler (save the profile to print later) +with profile(activities=[ProfilerActivity.CPU], profile_memory=True, record_shapes=True) as prof: + cuda_profile_min_time, cuda_profile_max_time, cuda_profile_avg_time = run_gradcam(model, number_of_inputs, use_cuda=True) +cuda_profile = prof.key_averages().table(sort_by="self_cpu_memory_usage", row_limit=15) - if time_difference > cpu_max_time: - cpu_max_time = time_difference +# Run on CPU x1000 (get min, max, and avg times) +cpu_min_time, cpu_max_time, cpu_avg_time = run_gradcam(model, number_of_inputs, use_cuda=False) - if time_difference < cpu_min_time: - cpu_min_time = time_difference +# Run on CUDA x1000 +cuda_min_time, cuda_max_time, cuda_avg_time = run_gradcam(model, number_of_inputs, use_cuda=True) -cpu_avg_time = cpu_sum_of_times / number_of_inputs breakpoint() -# Run on CUDA x1000 From fb1b50d3e280b47a1ff843a7872f6a13119531e0 Mon Sep 17 00:00:00 2001 From: trex22 <contact@jasonchalom.com> Date: Sun, 6 Nov 2022 11:49:21 +0200 Subject: [PATCH 25/72] Fix batching :cop: --- benchmarks/torch_benchmark.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/benchmarks/torch_benchmark.py b/benchmarks/torch_benchmark.py index 7d4c78a7a..7235b00cd 100644 --- a/benchmarks/torch_benchmark.py +++ b/benchmarks/torch_benchmark.py @@ -42,11 +42,11 @@ def run_gradcam(model, number_of_inputs, use_cuda=False): cam_function = GradCAM(model=model, target_layers=target_layers, use_cuda=use_cuda) cam_function.batch_size = batch_size - for i in range(number_of_inputs): + for i in range(0, number_of_inputs, batch_size): start_time = time.time() # Actual code to benchmark - input_image = input_tensor[i].to(dev) + input_image = input_tensor[i:i+batch_size].to(dev) heatmap = cam_function(input_tensor=input_image, targets=targets) end_time = time.time() From 62c17096270fc15b67e9e8f28d1021b9871aca87 Mon Sep 17 00:00:00 2001 From: trex22 <contact@jasonchalom.com> Date: Sun, 6 Nov 2022 11:53:49 +0200 Subject: [PATCH 26/72] Add in proper output :cop: --- benchmarks/torch_benchmark.py | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/benchmarks/torch_benchmark.py b/benchmarks/torch_benchmark.py index 7235b00cd..f78f28b1e 100644 --- a/benchmarks/torch_benchmark.py +++ b/benchmarks/torch_benchmark.py @@ -66,6 +66,8 @@ def run_gradcam(model, number_of_inputs, use_cuda=False): number_of_inputs = 1000 model = models.resnet50() +print(f'Benchmarking GradCAM using {number_of_inputs} images for ResNet50...') + # TODOs: # Test with numpy v1.4.6 (master) # Test with torch v1.4.7 (wip) @@ -73,19 +75,46 @@ def run_gradcam(model, number_of_inputs, use_cuda=False): # Nice output # Run on CPU with profiler (save the profile to print later) +print('Profile list of images on CPU...') with profile(activities=[ProfilerActivity.CPU], profile_memory=True, record_shapes=True) as prof: cpu_profile_min_time, cpu_profile_max_time, cpu_profile_avg_time = run_gradcam(model, number_of_inputs, use_cuda=False) cpu_profile = prof.key_averages().table(sort_by="self_cpu_memory_usage", row_limit=15) # Run on CUDA with profiler (save the profile to print later) +print('Profile list of images on Cuda...') with profile(activities=[ProfilerActivity.CPU], profile_memory=True, record_shapes=True) as prof: cuda_profile_min_time, cuda_profile_max_time, cuda_profile_avg_time = run_gradcam(model, number_of_inputs, use_cuda=True) cuda_profile = prof.key_averages().table(sort_by="self_cpu_memory_usage", row_limit=15) # Run on CPU x1000 (get min, max, and avg times) +print('Run list of images on CPU...') cpu_min_time, cpu_max_time, cpu_avg_time = run_gradcam(model, number_of_inputs, use_cuda=False) # Run on CUDA x1000 +print('Run list of images on Cuda...') cuda_min_time, cuda_max_time, cuda_avg_time = run_gradcam(model, number_of_inputs, use_cuda=True) -breakpoint() +print('Complete!') + +print('==============================================================================\n\n') +print('CPU Profile:\n') +print(cpu_profile) + +print('==============================================================================\n\n') +print('Cuda Profile:\n') +print(cuda_profile) + +print('==============================================================================\n\n') +print('CPU Timing (No Profiler):\n') +print(f'Min time: {cpu_min_time}\n') +print(f'Max time: {cpu_max_time}\n') +print(f'Avg time: {cpu_avg_time}\n') + +print('==============================================================================\n\n') +print('Cuda Timing (No Profiler):\n') +print(f'Min time: {cuda_min_time}\n') +print(f'Max time: {cuda_max_time}\n') +print(f'Avg time: {cuda_avg_time}\n') + +print('==============================================================================\n\n') +print('Done!') From 045d200e651195ab7e3cf40e5b07550ef311f34d Mon Sep 17 00:00:00 2001 From: trex22 <contact@jasonchalom.com> Date: Sun, 6 Nov 2022 12:00:49 +0200 Subject: [PATCH 27/72] Add in loading bar :cop: --- benchmarks/torch_benchmark.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/benchmarks/torch_benchmark.py b/benchmarks/torch_benchmark.py index f78f28b1e..4aeef745f 100644 --- a/benchmarks/torch_benchmark.py +++ b/benchmarks/torch_benchmark.py @@ -3,6 +3,7 @@ import numpy as np import torch import time +import tqdm from pytorch_grad_cam import GradCAM, \ ScoreCAM, \ @@ -42,6 +43,8 @@ def run_gradcam(model, number_of_inputs, use_cuda=False): cam_function = GradCAM(model=model, target_layers=target_layers, use_cuda=use_cuda) cam_function.batch_size = batch_size + pbar = tqdm.tqdm(total=number_of_inputs) + for i in range(0, number_of_inputs, batch_size): start_time = time.time() @@ -60,6 +63,8 @@ def run_gradcam(model, number_of_inputs, use_cuda=False): if time_difference < min_time: min_time = time_difference + pbar.update(batch_size) + avg_time = sum_of_times / number_of_inputs return [min_time, max_time, avg_time] From 20d7ebd5dc3d3ed8cf1ec8d881d1acfdfe836e94 Mon Sep 17 00:00:00 2001 From: trex22 <contact@jasonchalom.com> Date: Sun, 6 Nov 2022 12:01:02 +0200 Subject: [PATCH 28/72] Reduce to 100 images :cop: --- benchmarks/torch_benchmark.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/torch_benchmark.py b/benchmarks/torch_benchmark.py index 4aeef745f..39015f611 100644 --- a/benchmarks/torch_benchmark.py +++ b/benchmarks/torch_benchmark.py @@ -68,7 +68,7 @@ def run_gradcam(model, number_of_inputs, use_cuda=False): avg_time = sum_of_times / number_of_inputs return [min_time, max_time, avg_time] -number_of_inputs = 1000 +number_of_inputs = 100 model = models.resnet50() print(f'Benchmarking GradCAM using {number_of_inputs} images for ResNet50...') From d9dbc85727fead2cdb55a63dfbec728474e3abed Mon Sep 17 00:00:00 2001 From: trex22 <contact@jasonchalom.com> Date: Sun, 6 Nov 2022 21:34:16 +0200 Subject: [PATCH 29/72] Attempt using a bigger batchsize :cop: --- benchmarks/torch_benchmark.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/benchmarks/torch_benchmark.py b/benchmarks/torch_benchmark.py index 39015f611..3e054f63a 100644 --- a/benchmarks/torch_benchmark.py +++ b/benchmarks/torch_benchmark.py @@ -20,7 +20,7 @@ from torch.profiler import profile, record_function, ProfilerActivity -def run_gradcam(model, number_of_inputs, use_cuda=False): +def run_gradcam(model, number_of_inputs, batch_size=8, use_cuda=False): min_time = 10000000000000 max_time = 0 sum_of_times = 0 @@ -34,7 +34,6 @@ def run_gradcam(model, number_of_inputs, use_cuda=False): # Some defaults I use in research code input_tensor = torch.rand((number_of_inputs, 3, 256, 60)) - batch_size = 8 targets = None # [ClassifierOutputTarget(None)] model.to(dev) @@ -82,22 +81,22 @@ def run_gradcam(model, number_of_inputs, use_cuda=False): # Run on CPU with profiler (save the profile to print later) print('Profile list of images on CPU...') with profile(activities=[ProfilerActivity.CPU], profile_memory=True, record_shapes=True) as prof: - cpu_profile_min_time, cpu_profile_max_time, cpu_profile_avg_time = run_gradcam(model, number_of_inputs, use_cuda=False) + cpu_profile_min_time, cpu_profile_max_time, cpu_profile_avg_time = run_gradcam(model, number_of_inputs, batch_size=32, use_cuda=False) cpu_profile = prof.key_averages().table(sort_by="self_cpu_memory_usage", row_limit=15) # Run on CUDA with profiler (save the profile to print later) print('Profile list of images on Cuda...') with profile(activities=[ProfilerActivity.CPU], profile_memory=True, record_shapes=True) as prof: - cuda_profile_min_time, cuda_profile_max_time, cuda_profile_avg_time = run_gradcam(model, number_of_inputs, use_cuda=True) + cuda_profile_min_time, cuda_profile_max_time, cuda_profile_avg_time = run_gradcam(model, number_of_inputs, batch_size=32, use_cuda=True) cuda_profile = prof.key_averages().table(sort_by="self_cpu_memory_usage", row_limit=15) # Run on CPU x1000 (get min, max, and avg times) print('Run list of images on CPU...') -cpu_min_time, cpu_max_time, cpu_avg_time = run_gradcam(model, number_of_inputs, use_cuda=False) +cpu_min_time, cpu_max_time, cpu_avg_time = run_gradcam(model, number_of_inputs, batch_size=32, use_cuda=False) # Run on CUDA x1000 print('Run list of images on Cuda...') -cuda_min_time, cuda_max_time, cuda_avg_time = run_gradcam(model, number_of_inputs, use_cuda=True) +cuda_min_time, cuda_max_time, cuda_avg_time = run_gradcam(model, number_of_inputs, batch_size=32, use_cuda=True) print('Complete!') From 2756d71c5791afcedeace35e5ae767ada931e33d Mon Sep 17 00:00:00 2001 From: trex22 <contact@jasonchalom.com> Date: Sun, 6 Nov 2022 22:22:34 +0200 Subject: [PATCH 30/72] Bump batch_size :cop: --- benchmarks/torch_benchmark.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/benchmarks/torch_benchmark.py b/benchmarks/torch_benchmark.py index 3e054f63a..2b93f4e57 100644 --- a/benchmarks/torch_benchmark.py +++ b/benchmarks/torch_benchmark.py @@ -20,6 +20,11 @@ from torch.profiler import profile, record_function, ProfilerActivity +number_of_inputs = 1000 +model = models.resnet50() + +print(f'Benchmarking GradCAM using {number_of_inputs} images for ResNet50...') + def run_gradcam(model, number_of_inputs, batch_size=8, use_cuda=False): min_time = 10000000000000 max_time = 0 @@ -67,11 +72,6 @@ def run_gradcam(model, number_of_inputs, batch_size=8, use_cuda=False): avg_time = sum_of_times / number_of_inputs return [min_time, max_time, avg_time] -number_of_inputs = 100 -model = models.resnet50() - -print(f'Benchmarking GradCAM using {number_of_inputs} images for ResNet50...') - # TODOs: # Test with numpy v1.4.6 (master) # Test with torch v1.4.7 (wip) @@ -81,22 +81,22 @@ def run_gradcam(model, number_of_inputs, batch_size=8, use_cuda=False): # Run on CPU with profiler (save the profile to print later) print('Profile list of images on CPU...') with profile(activities=[ProfilerActivity.CPU], profile_memory=True, record_shapes=True) as prof: - cpu_profile_min_time, cpu_profile_max_time, cpu_profile_avg_time = run_gradcam(model, number_of_inputs, batch_size=32, use_cuda=False) + cpu_profile_min_time, cpu_profile_max_time, cpu_profile_avg_time = run_gradcam(model, number_of_inputs, batch_size=64, use_cuda=False) cpu_profile = prof.key_averages().table(sort_by="self_cpu_memory_usage", row_limit=15) # Run on CUDA with profiler (save the profile to print later) print('Profile list of images on Cuda...') with profile(activities=[ProfilerActivity.CPU], profile_memory=True, record_shapes=True) as prof: - cuda_profile_min_time, cuda_profile_max_time, cuda_profile_avg_time = run_gradcam(model, number_of_inputs, batch_size=32, use_cuda=True) + cuda_profile_min_time, cuda_profile_max_time, cuda_profile_avg_time = run_gradcam(model, number_of_inputs, batch_size=64, use_cuda=True) cuda_profile = prof.key_averages().table(sort_by="self_cpu_memory_usage", row_limit=15) # Run on CPU x1000 (get min, max, and avg times) print('Run list of images on CPU...') -cpu_min_time, cpu_max_time, cpu_avg_time = run_gradcam(model, number_of_inputs, batch_size=32, use_cuda=False) +cpu_min_time, cpu_max_time, cpu_avg_time = run_gradcam(model, number_of_inputs, batch_size=64, use_cuda=False) # Run on CUDA x1000 print('Run list of images on Cuda...') -cuda_min_time, cuda_max_time, cuda_avg_time = run_gradcam(model, number_of_inputs, batch_size=32, use_cuda=True) +cuda_min_time, cuda_max_time, cuda_avg_time = run_gradcam(model, number_of_inputs, batch_size=64, use_cuda=True) print('Complete!') From 4200b99038f68db34c3377fedefe2b518a53f6c4 Mon Sep 17 00:00:00 2001 From: trex22 <contact@jasonchalom.com> Date: Tue, 8 Nov 2022 18:11:09 +0200 Subject: [PATCH 31/72] Add workflow test :cop: --- benchmarks/torch_benchmark.py | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/benchmarks/torch_benchmark.py b/benchmarks/torch_benchmark.py index 2b93f4e57..fb2fb285e 100644 --- a/benchmarks/torch_benchmark.py +++ b/benchmarks/torch_benchmark.py @@ -25,7 +25,7 @@ print(f'Benchmarking GradCAM using {number_of_inputs} images for ResNet50...') -def run_gradcam(model, number_of_inputs, batch_size=8, use_cuda=False): +def run_gradcam(model, number_of_inputs, batch_size=8, use_cuda=False, workflow_test=False): min_time = 10000000000000 max_time = 0 sum_of_times = 0 @@ -56,6 +56,11 @@ def run_gradcam(model, number_of_inputs, batch_size=8, use_cuda=False): input_image = input_tensor[i:i+batch_size].to(dev) heatmap = cam_function(input_tensor=input_image, targets=targets) + if workflow_test: + # Create a binary map + threshold_plot = torch.where(heatmap > 0.5, 1, 0) + output_image = input_image * threshold_plot + end_time = time.time() time_difference = end_time - start_time @@ -90,6 +95,12 @@ def run_gradcam(model, number_of_inputs, batch_size=8, use_cuda=False): cuda_profile_min_time, cuda_profile_max_time, cuda_profile_avg_time = run_gradcam(model, number_of_inputs, batch_size=64, use_cuda=True) cuda_profile = prof.key_averages().table(sort_by="self_cpu_memory_usage", row_limit=15) +# Run on CUDA with extra workflow +print('Profile list of images on Cuda and then run workflow...') +with profile(activities=[ProfilerActivity.CPU], profile_memory=True, record_shapes=True) as prof: + cuda_profile_min_time, cuda_profile_max_time, cuda_profile_avg_time = run_gradcam(model, number_of_inputs, batch_size=64, use_cuda=True, workflow_test=True) +work_flow_cuda_profile = prof.key_averages().table(sort_by="self_cpu_memory_usage", row_limit=15) + # Run on CPU x1000 (get min, max, and avg times) print('Run list of images on CPU...') cpu_min_time, cpu_max_time, cpu_avg_time = run_gradcam(model, number_of_inputs, batch_size=64, use_cuda=False) @@ -98,6 +109,10 @@ def run_gradcam(model, number_of_inputs, batch_size=8, use_cuda=False): print('Run list of images on Cuda...') cuda_min_time, cuda_max_time, cuda_avg_time = run_gradcam(model, number_of_inputs, batch_size=64, use_cuda=True) +# Run Workflow +print('Run list of images on Cuda with a workflow...') +workflow_cuda_min_time, workflow_cuda_max_time, workflow_cuda_avg_time = run_gradcam(model, number_of_inputs, batch_size=64, use_cuda=True, workflow_test=True) + print('Complete!') print('==============================================================================\n\n') @@ -108,6 +123,10 @@ def run_gradcam(model, number_of_inputs, batch_size=8, use_cuda=False): print('Cuda Profile:\n') print(cuda_profile) +print('==============================================================================\n\n') +print('Workflow Cuda Profile:\n') +print(work_flow_cuda_profile) + print('==============================================================================\n\n') print('CPU Timing (No Profiler):\n') print(f'Min time: {cpu_min_time}\n') @@ -120,5 +139,11 @@ def run_gradcam(model, number_of_inputs, batch_size=8, use_cuda=False): print(f'Max time: {cuda_max_time}\n') print(f'Avg time: {cuda_avg_time}\n') +print('==============================================================================\n\n') +print('Workflow Cuda Timing (No Profiler):\n') +print(f'Min time: {workflow_cuda_min_time}\n') +print(f'Max time: {workflow_cuda_max_time}\n') +print(f'Avg time: {workflow_cuda_avg_time}\n') + print('==============================================================================\n\n') print('Done!') From 1489ea3170f9ca00781d377e6effb1e9fb386a95 Mon Sep 17 00:00:00 2001 From: trex22 <contact@jasonchalom.com> Date: Tue, 8 Nov 2022 18:14:04 +0200 Subject: [PATCH 32/72] Fix tensor issue in 1.4.6 :cop: --- benchmarks/torch_benchmark.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/torch_benchmark.py b/benchmarks/torch_benchmark.py index fb2fb285e..034e99fd0 100644 --- a/benchmarks/torch_benchmark.py +++ b/benchmarks/torch_benchmark.py @@ -58,7 +58,7 @@ def run_gradcam(model, number_of_inputs, batch_size=8, use_cuda=False, workflow_ if workflow_test: # Create a binary map - threshold_plot = torch.where(heatmap > 0.5, 1, 0) + threshold_plot = torch.where(torch.tensor(heatmap) > 0.5, 1, 0) output_image = input_image * threshold_plot end_time = time.time() From 8833ee1df54c3218461dab0e1063296c8a1a4e5c Mon Sep 17 00:00:00 2001 From: trex22 <contact@jasonchalom.com> Date: Tue, 8 Nov 2022 18:17:24 +0200 Subject: [PATCH 33/72] Add inner loop :cop: --- benchmarks/torch_benchmark.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/benchmarks/torch_benchmark.py b/benchmarks/torch_benchmark.py index 034e99fd0..c8836713d 100644 --- a/benchmarks/torch_benchmark.py +++ b/benchmarks/torch_benchmark.py @@ -57,9 +57,10 @@ def run_gradcam(model, number_of_inputs, batch_size=8, use_cuda=False, workflow_ heatmap = cam_function(input_tensor=input_image, targets=targets) if workflow_test: - # Create a binary map - threshold_plot = torch.where(torch.tensor(heatmap) > 0.5, 1, 0) - output_image = input_image * threshold_plot + for j in range(batch_size): + # Create a binary map + threshold_plot = torch.where(torch.tensor(heatmap[j]) > 0.5, 1, 0) + output_image = input_image * threshold_plot end_time = time.time() time_difference = end_time - start_time From d492c36262ec2fbf19b76dc30515ee23f3a3c405 Mon Sep 17 00:00:00 2001 From: trex22 <contact@jasonchalom.com> Date: Tue, 8 Nov 2022 18:19:12 +0200 Subject: [PATCH 34/72] Force cuda device :cop: --- benchmarks/torch_benchmark.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/torch_benchmark.py b/benchmarks/torch_benchmark.py index c8836713d..0012eaf77 100644 --- a/benchmarks/torch_benchmark.py +++ b/benchmarks/torch_benchmark.py @@ -59,7 +59,7 @@ def run_gradcam(model, number_of_inputs, batch_size=8, use_cuda=False, workflow_ if workflow_test: for j in range(batch_size): # Create a binary map - threshold_plot = torch.where(torch.tensor(heatmap[j]) > 0.5, 1, 0) + threshold_plot = torch.where(torch.tensor(heatmap[j]).to(torch.device('cuda:0')) > 0.5, 1, 0) output_image = input_image * threshold_plot end_time = time.time() From 5bbdf8f153fc338fbfc544e45a8c8f1e1efc56fc Mon Sep 17 00:00:00 2001 From: trex22 <contact@jasonchalom.com> Date: Tue, 8 Nov 2022 18:21:01 +0200 Subject: [PATCH 35/72] Fix loop range :cop: --- benchmarks/torch_benchmark.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/torch_benchmark.py b/benchmarks/torch_benchmark.py index 0012eaf77..531c20703 100644 --- a/benchmarks/torch_benchmark.py +++ b/benchmarks/torch_benchmark.py @@ -57,7 +57,7 @@ def run_gradcam(model, number_of_inputs, batch_size=8, use_cuda=False, workflow_ heatmap = cam_function(input_tensor=input_image, targets=targets) if workflow_test: - for j in range(batch_size): + for j in range(heatmap.shape[0]): # Create a binary map threshold_plot = torch.where(torch.tensor(heatmap[j]).to(torch.device('cuda:0')) > 0.5, 1, 0) output_image = input_image * threshold_plot From b8a8a4669cde9bd8302d32c6dc3cab3a769f307f Mon Sep 17 00:00:00 2001 From: trex22 <contact@jasonchalom.com> Date: Thu, 10 Nov 2022 15:23:16 +0200 Subject: [PATCH 36/72] Make use of the tensor resize transform :cop: --- pytorch_grad_cam/utils/image.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/pytorch_grad_cam/utils/image.py b/pytorch_grad_cam/utils/image.py index 8b83deb56..4aba1bf24 100644 --- a/pytorch_grad_cam/utils/image.py +++ b/pytorch_grad_cam/utils/image.py @@ -174,10 +174,8 @@ def scale_cam_image(cam, target_size=None): img = img - torch.min(img) img = img / (1e-7 + torch.max(img)) - # if target_size is not None: - # transform = Resize(target_size) - # img = Resize(size = target_size)(img) - + if target_size is not None: + img = img.resize_(target_size) result[i] = img From 77b19da5ee6e78ddc658b04162924e4865fa21b3 Mon Sep 17 00:00:00 2001 From: trex22 <contact@jasonchalom.com> Date: Thu, 10 Nov 2022 15:31:24 +0200 Subject: [PATCH 37/72] Add in a different model to benchmark too :cop: --- benchmarks/torch_benchmark.py | 60 +++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) diff --git a/benchmarks/torch_benchmark.py b/benchmarks/torch_benchmark.py index 531c20703..1eb3adfa6 100644 --- a/benchmarks/torch_benchmark.py +++ b/benchmarks/torch_benchmark.py @@ -15,6 +15,8 @@ LayerCAM, \ FullGrad +from torch import nn + import torchvision # You may need to install separately from torchvision import models @@ -25,6 +27,43 @@ print(f'Benchmarking GradCAM using {number_of_inputs} images for ResNet50...') +# Simple model to test +class SimpleCNN(nn.Module): + def __init__(self): + super(SimpleCNN, self).__init__() + + # Grad-CAM interface + self.target_layer = nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1) + self.target_layers = [self.target_layer] + + self.cnn_stack = nn.Sequential( + nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1), + nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1), + nn.ReLU(inplace=True), + self.target_layer, + nn.ReLU(inplace=True), + nn.MaxPool2d((2, 2)), + nn.Flatten(), + nn.Linear(122880, 10), + nn.Linear(10, 1) + ) + + def forward(self, x): + logits = self.cnn_stack(x) + logits = F.normalize(logits, dim = 0) + + return logits + +def xavier_uniform_init(layer): + if type(layer) == nn.Linear or type(layer) == nn.Conv2d: + gain = nn.init.calculate_gain('relu') + + if layer.bias is not None: + nn.init.zeros_(layer.bias) + + nn.init.xavier_uniform_(layer.weight, gain=gain) + +# Code to run benchmark def run_gradcam(model, number_of_inputs, batch_size=8, use_cuda=False, workflow_test=False): min_time = 10000000000000 max_time = 0 @@ -102,6 +141,12 @@ def run_gradcam(model, number_of_inputs, batch_size=8, use_cuda=False, workflow_ cuda_profile_min_time, cuda_profile_max_time, cuda_profile_avg_time = run_gradcam(model, number_of_inputs, batch_size=64, use_cuda=True, workflow_test=True) work_flow_cuda_profile = prof.key_averages().table(sort_by="self_cpu_memory_usage", row_limit=15) +# Run on CUDA with extra workflow +print('Profile list of images on Cuda and then run workflow with a simple CNN...') +with profile(activities=[ProfilerActivity.CPU], profile_memory=True, record_shapes=True) as prof: + cuda_profile_min_time, cuda_profile_max_time, cuda_profile_avg_time = run_gradcam(model, number_of_inputs, batch_size=64, use_cuda=True, workflow_test=True) +simple_work_flow_cuda_profile = prof.key_averages().table(sort_by="self_cpu_memory_usage", row_limit=15) + # Run on CPU x1000 (get min, max, and avg times) print('Run list of images on CPU...') cpu_min_time, cpu_max_time, cpu_avg_time = run_gradcam(model, number_of_inputs, batch_size=64, use_cuda=False) @@ -114,6 +159,11 @@ def run_gradcam(model, number_of_inputs, batch_size=8, use_cuda=False, workflow_ print('Run list of images on Cuda with a workflow...') workflow_cuda_min_time, workflow_cuda_max_time, workflow_cuda_avg_time = run_gradcam(model, number_of_inputs, batch_size=64, use_cuda=True, workflow_test=True) +print('Run list of images on Cuda with a workflow using simple CNN...') +model = SimpleCNN() +model.apply(xavier_uniform_init) # Randomise more weights +simple_workflow_cuda_min_time, simple_workflow_cuda_max_time, simple_workflow_cuda_avg_time = run_gradcam(model, number_of_inputs, batch_size=64, use_cuda=True, workflow_test=True) + print('Complete!') print('==============================================================================\n\n') @@ -128,6 +178,10 @@ def run_gradcam(model, number_of_inputs, batch_size=8, use_cuda=False, workflow_ print('Workflow Cuda Profile:\n') print(work_flow_cuda_profile) +print('==============================================================================\n\n') +print('Simple Workflow Cuda Profile:\n') +print(simple_work_flow_cuda_profile) + print('==============================================================================\n\n') print('CPU Timing (No Profiler):\n') print(f'Min time: {cpu_min_time}\n') @@ -146,5 +200,11 @@ def run_gradcam(model, number_of_inputs, batch_size=8, use_cuda=False, workflow_ print(f'Max time: {workflow_cuda_max_time}\n') print(f'Avg time: {workflow_cuda_avg_time}\n') +print('==============================================================================\n\n') +print('Simple Workflow Cuda Timing (No Profiler):\n') +print(f'Min time: {workflow_cuda_min_time}\n') +print(f'Max time: {workflow_cuda_max_time}\n') +print(f'Avg time: {workflow_cuda_avg_time}\n') + print('==============================================================================\n\n') print('Done!') From 85f196badd5f1db6510192a2881ed0841bf24198 Mon Sep 17 00:00:00 2001 From: trex22 <contact@jasonchalom.com> Date: Thu, 10 Nov 2022 15:36:35 +0200 Subject: [PATCH 38/72] handle the tensor list size :cop: --- pytorch_grad_cam/utils/image.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/pytorch_grad_cam/utils/image.py b/pytorch_grad_cam/utils/image.py index 4aba1bf24..9a6714d17 100644 --- a/pytorch_grad_cam/utils/image.py +++ b/pytorch_grad_cam/utils/image.py @@ -162,12 +162,10 @@ def scale_cam_image(cam, target_size=None): # It appears to swap the axes dimensions and needs further work for the # proof of concept - # if target_size is not None: - # result = torch.zeros([cam.shape[0], target_size[0], target_size[1]]) - # else: - # result = torch.zeros(cam.shape) - - result = torch.zeros(cam.shape) + if target_size is not None: + result = torch.zeros([cam.shape[0], target_size[0], target_size[1]]) + else: + result = torch.zeros(cam.shape) for i in range(cam.shape[0]): img = cam[i] From a56647d29334866b7dcbd2ab54110082a9e2e588 Mon Sep 17 00:00:00 2001 From: trex22 <contact@jasonchalom.com> Date: Thu, 10 Nov 2022 15:43:45 +0200 Subject: [PATCH 39/72] Correct the dimensions in the resize :cop: --- benchmarks/torch_benchmark.py | 1 + pytorch_grad_cam/utils/image.py | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/benchmarks/torch_benchmark.py b/benchmarks/torch_benchmark.py index 1eb3adfa6..47c5db03e 100644 --- a/benchmarks/torch_benchmark.py +++ b/benchmarks/torch_benchmark.py @@ -35,6 +35,7 @@ def __init__(self): # Grad-CAM interface self.target_layer = nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1) self.target_layers = [self.target_layer] + self.layer4 = self.target_layer self.cnn_stack = nn.Sequential( nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1), diff --git a/pytorch_grad_cam/utils/image.py b/pytorch_grad_cam/utils/image.py index 9a6714d17..fd9a6f3a3 100644 --- a/pytorch_grad_cam/utils/image.py +++ b/pytorch_grad_cam/utils/image.py @@ -163,7 +163,7 @@ def scale_cam_image(cam, target_size=None): # proof of concept if target_size is not None: - result = torch.zeros([cam.shape[0], target_size[0], target_size[1]]) + result = torch.zeros([cam.shape[0], target_size[1], target_size[0]]) else: result = torch.zeros(cam.shape) @@ -173,7 +173,7 @@ def scale_cam_image(cam, target_size=None): img = img / (1e-7 + torch.max(img)) if target_size is not None: - img = img.resize_(target_size) + img = img.resize_(target_size).T result[i] = img From 922d2d31964a3852013cf75ff0d445737b0d6546 Mon Sep 17 00:00:00 2001 From: trex22 <contact@jasonchalom.com> Date: Thu, 10 Nov 2022 15:47:48 +0200 Subject: [PATCH 40/72] pdate using the correct models in the benchmark :cop: --- benchmarks/torch_benchmark.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/benchmarks/torch_benchmark.py b/benchmarks/torch_benchmark.py index 47c5db03e..7fbb77bc1 100644 --- a/benchmarks/torch_benchmark.py +++ b/benchmarks/torch_benchmark.py @@ -16,6 +16,7 @@ FullGrad from torch import nn +import torch.nn.functional as F import torchvision # You may need to install separately from torchvision import models @@ -144,10 +145,13 @@ def run_gradcam(model, number_of_inputs, batch_size=8, use_cuda=False, workflow_ # Run on CUDA with extra workflow print('Profile list of images on Cuda and then run workflow with a simple CNN...') +model = SimpleCNN() +model.apply(xavier_uniform_init) # Randomise more weights with profile(activities=[ProfilerActivity.CPU], profile_memory=True, record_shapes=True) as prof: cuda_profile_min_time, cuda_profile_max_time, cuda_profile_avg_time = run_gradcam(model, number_of_inputs, batch_size=64, use_cuda=True, workflow_test=True) simple_work_flow_cuda_profile = prof.key_averages().table(sort_by="self_cpu_memory_usage", row_limit=15) +model = models.resnet50() # Run on CPU x1000 (get min, max, and avg times) print('Run list of images on CPU...') cpu_min_time, cpu_max_time, cpu_avg_time = run_gradcam(model, number_of_inputs, batch_size=64, use_cuda=False) From 5dbc8bcd71cf819c859ac08bb0b26a683aa0e9f1 Mon Sep 17 00:00:00 2001 From: trex22 <contact@jasonchalom.com> Date: Thu, 10 Nov 2022 15:50:54 +0200 Subject: [PATCH 41/72] Fix output :cop: --- benchmarks/torch_benchmark.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/benchmarks/torch_benchmark.py b/benchmarks/torch_benchmark.py index 7fbb77bc1..15187eded 100644 --- a/benchmarks/torch_benchmark.py +++ b/benchmarks/torch_benchmark.py @@ -207,9 +207,9 @@ def run_gradcam(model, number_of_inputs, batch_size=8, use_cuda=False, workflow_ print('==============================================================================\n\n') print('Simple Workflow Cuda Timing (No Profiler):\n') -print(f'Min time: {workflow_cuda_min_time}\n') -print(f'Max time: {workflow_cuda_max_time}\n') -print(f'Avg time: {workflow_cuda_avg_time}\n') +print(f'Min time: {simple_workflow_cuda_min_time}\n') +print(f'Max time: {simple_workflow_cuda_max_time}\n') +print(f'Avg time: {simple_workflow_cuda_avg_time}\n') print('==============================================================================\n\n') print('Done!') From 20ab49fd79dc91cdb8601d89d704f59a7db319c7 Mon Sep 17 00:00:00 2001 From: trex22 <contact@jasonchalom.com> Date: Fri, 18 Nov 2022 13:56:01 +0200 Subject: [PATCH 42/72] Improve benchmarking and make a functions file to store reusable components :cop: --- benchmarks/benchmark_functions.py | 107 ++++++++++++++++++++++++++++ benchmarks/models_benchmark.py | 53 ++++++++++++++ benchmarks/torch_benchmark.py | 113 ++++-------------------------- 3 files changed, 172 insertions(+), 101 deletions(-) create mode 100644 benchmarks/benchmark_functions.py create mode 100644 benchmarks/models_benchmark.py diff --git a/benchmarks/benchmark_functions.py b/benchmarks/benchmark_functions.py new file mode 100644 index 000000000..f9ab50d04 --- /dev/null +++ b/benchmarks/benchmark_functions.py @@ -0,0 +1,107 @@ +import argparse +import cv2 +import numpy as np +import torch +import time +import tqdm + +from pytorch_grad_cam import GradCAM + +from torch import nn +import torch.nn.functional as F + +import torchvision # You may need to install separately +from torchvision import models + +from torch.profiler import profile, record_function, ProfilerActivity + +# Simple model to test +class SimpleCNN(nn.Module): + def __init__(self): + super(SimpleCNN, self).__init__() + + # Grad-CAM interface + self.target_layer = nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1) + self.target_layers = [self.target_layer] + self.layer4 = self.target_layer + + self.cnn_stack = nn.Sequential( + nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1), + nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1), + nn.ReLU(inplace=True), + self.target_layer, + nn.ReLU(inplace=True), + nn.MaxPool2d((2, 2)), + nn.Flatten(), + nn.Linear(122880, 10), + nn.Linear(10, 1) + ) + + def forward(self, x): + logits = self.cnn_stack(x) + logits = F.normalize(logits, dim = 0) + + return logits + +def xavier_uniform_init(layer): + if type(layer) == nn.Linear or type(layer) == nn.Conv2d: + gain = nn.init.calculate_gain('relu') + + if layer.bias is not None: + nn.init.zeros_(layer.bias) + + nn.init.xavier_uniform_(layer.weight, gain=gain) + +# Code to run benchmark +def run_gradcam(model, number_of_inputs, batch_size=1, use_cuda=False, workflow_test=False): + min_time = 10000000000000 + max_time = 0 + sum_of_times = 0 + + dev = torch.device('cpu') + if use_cuda: + dev = torch.device('cuda:0') + + # TODO: Use real data? + # TODO: Configurable dimensions? + + # Some defaults I use in research code + input_tensor = torch.rand((number_of_inputs, 3, 256, 60)) + targets = None # [ClassifierOutputTarget(None)] + + model.to(dev) + target_layers = [model.layer4] # Last CNN layer of ResNet50 + + cam_function = GradCAM(model=model, target_layers=target_layers, use_cuda=use_cuda) + cam_function.batch_size = batch_size + + pbar = tqdm.tqdm(total=number_of_inputs) + + for i in range(0, number_of_inputs, batch_size): + start_time = time.time() + + # Actual code to benchmark + input_image = input_tensor[i:i+batch_size].to(dev) + heatmap = cam_function(input_tensor=input_image, targets=targets) + + if workflow_test: + for j in range(heatmap.shape[0]): + # Create a binary map + threshold_plot = torch.where(torch.tensor(heatmap[j]).to(torch.device('cuda:0')) > 0.5, 1, 0) + output_image = input_image * threshold_plot + + end_time = time.time() + time_difference = end_time - start_time + + sum_of_times += time_difference + + if time_difference > max_time: + max_time = time_difference + + if time_difference < min_time: + min_time = time_difference + + pbar.update(batch_size) + + avg_time = sum_of_times / number_of_inputs + return [min_time, max_time, avg_time] diff --git a/benchmarks/models_benchmark.py b/benchmarks/models_benchmark.py new file mode 100644 index 000000000..561af7f22 --- /dev/null +++ b/benchmarks/models_benchmark.py @@ -0,0 +1,53 @@ +import argparse +import cv2 +import numpy as np +import torch +import time +import tqdm + +from pytorch_grad_cam import GradCAM + +from torch import nn +import torch.nn.functional as F + +import torchvision # You may need to install separately +from torchvision import models + +from torch.profiler import profile, record_function, ProfilerActivity + +import benchmark_functions + +number_of_inputs = 1000 + +print(f'Benchmarking GradCAM using {number_of_inputs} images for multiple models...') + +models_to_benchmark = [ + ["SimpleCNN", benchmark_functions.SimpleCNN()], + ["resnet18", models.resnet18()], + ["resnet34", models.resnet34()], + ["resnet50", models.resnet50()], + ["alexnet", models.alexnet()], + ["vgg16", models.vgg16()], + ["googlenet", models.googlenet()], + ["mobilenet_v2", models.mobilenet_v2()], + ["densenet161", models.densenet161()] +] + +for model_name, model in tqdm.tqdm(models_to_benchmark): + print('==============================================================================\n\n') + print(f'Simple Workflow for model #{model_name}:\n') + + model.apply(xavier_uniform_init) # Randomise more weights + cpu_min_time, cpu_max_time, cpu_avg_time = benchmark_functions.run_gradcam(model, number_of_inputs, batch_size=8, use_cuda=False, workflow_test=True) + cuda_min_time, cuda_max_time, cuda_avg_time = benchmark_functions.run_gradcam(model, number_of_inputs, batch_size=8, use_cuda=True, workflow_test=True) + + print(f'Cuda Min time: {cuda_min_time}\n') + print(f'Cuda Max time: {cuda_max_time}\n') + print(f'Cuda Avg time: {cuda_avg_time}\n\n') + print(f'CPU Min time: {cpu_min_time}\n') + print(f'CPU Max time: {cpu_max_time}\n') + print(f'CPU Avg time: {cpu_avg_time}\n') + + +print('==============================================================================\n\n') +print('Done!') diff --git a/benchmarks/torch_benchmark.py b/benchmarks/torch_benchmark.py index 15187eded..1528a1fbf 100644 --- a/benchmarks/torch_benchmark.py +++ b/benchmarks/torch_benchmark.py @@ -23,102 +23,13 @@ from torch.profiler import profile, record_function, ProfilerActivity +import benchmark_functions + number_of_inputs = 1000 model = models.resnet50() print(f'Benchmarking GradCAM using {number_of_inputs} images for ResNet50...') -# Simple model to test -class SimpleCNN(nn.Module): - def __init__(self): - super(SimpleCNN, self).__init__() - - # Grad-CAM interface - self.target_layer = nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1) - self.target_layers = [self.target_layer] - self.layer4 = self.target_layer - - self.cnn_stack = nn.Sequential( - nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1), - nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1), - nn.ReLU(inplace=True), - self.target_layer, - nn.ReLU(inplace=True), - nn.MaxPool2d((2, 2)), - nn.Flatten(), - nn.Linear(122880, 10), - nn.Linear(10, 1) - ) - - def forward(self, x): - logits = self.cnn_stack(x) - logits = F.normalize(logits, dim = 0) - - return logits - -def xavier_uniform_init(layer): - if type(layer) == nn.Linear or type(layer) == nn.Conv2d: - gain = nn.init.calculate_gain('relu') - - if layer.bias is not None: - nn.init.zeros_(layer.bias) - - nn.init.xavier_uniform_(layer.weight, gain=gain) - -# Code to run benchmark -def run_gradcam(model, number_of_inputs, batch_size=8, use_cuda=False, workflow_test=False): - min_time = 10000000000000 - max_time = 0 - sum_of_times = 0 - - dev = torch.device('cpu') - if use_cuda: - dev = torch.device('cuda:0') - - # TODO: Use real data? - # TODO: Configurable dimensions? - - # Some defaults I use in research code - input_tensor = torch.rand((number_of_inputs, 3, 256, 60)) - targets = None # [ClassifierOutputTarget(None)] - - model.to(dev) - target_layers = [model.layer4] # Last CNN layer of ResNet50 - - cam_function = GradCAM(model=model, target_layers=target_layers, use_cuda=use_cuda) - cam_function.batch_size = batch_size - - pbar = tqdm.tqdm(total=number_of_inputs) - - for i in range(0, number_of_inputs, batch_size): - start_time = time.time() - - # Actual code to benchmark - input_image = input_tensor[i:i+batch_size].to(dev) - heatmap = cam_function(input_tensor=input_image, targets=targets) - - if workflow_test: - for j in range(heatmap.shape[0]): - # Create a binary map - threshold_plot = torch.where(torch.tensor(heatmap[j]).to(torch.device('cuda:0')) > 0.5, 1, 0) - output_image = input_image * threshold_plot - - end_time = time.time() - time_difference = end_time - start_time - - sum_of_times += time_difference - - if time_difference > max_time: - max_time = time_difference - - if time_difference < min_time: - min_time = time_difference - - pbar.update(batch_size) - - avg_time = sum_of_times / number_of_inputs - return [min_time, max_time, avg_time] - # TODOs: # Test with numpy v1.4.6 (master) # Test with torch v1.4.7 (wip) @@ -128,46 +39,46 @@ def run_gradcam(model, number_of_inputs, batch_size=8, use_cuda=False, workflow_ # Run on CPU with profiler (save the profile to print later) print('Profile list of images on CPU...') with profile(activities=[ProfilerActivity.CPU], profile_memory=True, record_shapes=True) as prof: - cpu_profile_min_time, cpu_profile_max_time, cpu_profile_avg_time = run_gradcam(model, number_of_inputs, batch_size=64, use_cuda=False) + cpu_profile_min_time, cpu_profile_max_time, cpu_profile_avg_time = benchmark_functions.run_gradcam(model, number_of_inputs, batch_size=64, use_cuda=False) cpu_profile = prof.key_averages().table(sort_by="self_cpu_memory_usage", row_limit=15) # Run on CUDA with profiler (save the profile to print later) print('Profile list of images on Cuda...') with profile(activities=[ProfilerActivity.CPU], profile_memory=True, record_shapes=True) as prof: - cuda_profile_min_time, cuda_profile_max_time, cuda_profile_avg_time = run_gradcam(model, number_of_inputs, batch_size=64, use_cuda=True) + cuda_profile_min_time, cuda_profile_max_time, cuda_profile_avg_time = benchmark_functions.run_gradcam(model, number_of_inputs, batch_size=64, use_cuda=True) cuda_profile = prof.key_averages().table(sort_by="self_cpu_memory_usage", row_limit=15) # Run on CUDA with extra workflow print('Profile list of images on Cuda and then run workflow...') with profile(activities=[ProfilerActivity.CPU], profile_memory=True, record_shapes=True) as prof: - cuda_profile_min_time, cuda_profile_max_time, cuda_profile_avg_time = run_gradcam(model, number_of_inputs, batch_size=64, use_cuda=True, workflow_test=True) + cuda_profile_min_time, cuda_profile_max_time, cuda_profile_avg_time = benchmark_functions.run_gradcam(model, number_of_inputs, batch_size=64, use_cuda=True, workflow_test=True) work_flow_cuda_profile = prof.key_averages().table(sort_by="self_cpu_memory_usage", row_limit=15) # Run on CUDA with extra workflow print('Profile list of images on Cuda and then run workflow with a simple CNN...') -model = SimpleCNN() +model = benchmark_functions.SimpleCNN() model.apply(xavier_uniform_init) # Randomise more weights with profile(activities=[ProfilerActivity.CPU], profile_memory=True, record_shapes=True) as prof: - cuda_profile_min_time, cuda_profile_max_time, cuda_profile_avg_time = run_gradcam(model, number_of_inputs, batch_size=64, use_cuda=True, workflow_test=True) + cuda_profile_min_time, cuda_profile_max_time, cuda_profile_avg_time = benchmark_functions.run_gradcam(model, number_of_inputs, batch_size=64, use_cuda=True, workflow_test=True) simple_work_flow_cuda_profile = prof.key_averages().table(sort_by="self_cpu_memory_usage", row_limit=15) model = models.resnet50() # Run on CPU x1000 (get min, max, and avg times) print('Run list of images on CPU...') -cpu_min_time, cpu_max_time, cpu_avg_time = run_gradcam(model, number_of_inputs, batch_size=64, use_cuda=False) +cpu_min_time, cpu_max_time, cpu_avg_time = benchmark_functions.run_gradcam(model, number_of_inputs, batch_size=64, use_cuda=False) # Run on CUDA x1000 print('Run list of images on Cuda...') -cuda_min_time, cuda_max_time, cuda_avg_time = run_gradcam(model, number_of_inputs, batch_size=64, use_cuda=True) +cuda_min_time, cuda_max_time, cuda_avg_time = benchmark_functions.run_gradcam(model, number_of_inputs, batch_size=64, use_cuda=True) # Run Workflow print('Run list of images on Cuda with a workflow...') -workflow_cuda_min_time, workflow_cuda_max_time, workflow_cuda_avg_time = run_gradcam(model, number_of_inputs, batch_size=64, use_cuda=True, workflow_test=True) +workflow_cuda_min_time, workflow_cuda_max_time, workflow_cuda_avg_time = benchmark_functions.run_gradcam(model, number_of_inputs, batch_size=64, use_cuda=True, workflow_test=True) print('Run list of images on Cuda with a workflow using simple CNN...') -model = SimpleCNN() +model = benchmark_functions.SimpleCNN() model.apply(xavier_uniform_init) # Randomise more weights -simple_workflow_cuda_min_time, simple_workflow_cuda_max_time, simple_workflow_cuda_avg_time = run_gradcam(model, number_of_inputs, batch_size=64, use_cuda=True, workflow_test=True) +simple_workflow_cuda_min_time, simple_workflow_cuda_max_time, simple_workflow_cuda_avg_time = benchmark_functions.run_gradcam(model, number_of_inputs, batch_size=64, use_cuda=True, workflow_test=True) print('Complete!') From 3eceb8439bd94caff7491bb399d33639e05f27f1 Mon Sep 17 00:00:00 2001 From: trex22 <contact@jasonchalom.com> Date: Fri, 18 Nov 2022 13:59:36 +0200 Subject: [PATCH 43/72] Make use of shared functions :cop: --- benchmarks/models_benchmark.py | 2 +- benchmarks/torch_benchmark.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/benchmarks/models_benchmark.py b/benchmarks/models_benchmark.py index 561af7f22..a97887f26 100644 --- a/benchmarks/models_benchmark.py +++ b/benchmarks/models_benchmark.py @@ -37,7 +37,7 @@ print('==============================================================================\n\n') print(f'Simple Workflow for model #{model_name}:\n') - model.apply(xavier_uniform_init) # Randomise more weights + model.apply(benchmark_functions.xavier_uniform_init) # Randomise more weights cpu_min_time, cpu_max_time, cpu_avg_time = benchmark_functions.run_gradcam(model, number_of_inputs, batch_size=8, use_cuda=False, workflow_test=True) cuda_min_time, cuda_max_time, cuda_avg_time = benchmark_functions.run_gradcam(model, number_of_inputs, batch_size=8, use_cuda=True, workflow_test=True) diff --git a/benchmarks/torch_benchmark.py b/benchmarks/torch_benchmark.py index 1528a1fbf..a7c58ffbf 100644 --- a/benchmarks/torch_benchmark.py +++ b/benchmarks/torch_benchmark.py @@ -57,7 +57,7 @@ # Run on CUDA with extra workflow print('Profile list of images on Cuda and then run workflow with a simple CNN...') model = benchmark_functions.SimpleCNN() -model.apply(xavier_uniform_init) # Randomise more weights +model.apply(benchmark_functions.xavier_uniform_init) # Randomise more weights with profile(activities=[ProfilerActivity.CPU], profile_memory=True, record_shapes=True) as prof: cuda_profile_min_time, cuda_profile_max_time, cuda_profile_avg_time = benchmark_functions.run_gradcam(model, number_of_inputs, batch_size=64, use_cuda=True, workflow_test=True) simple_work_flow_cuda_profile = prof.key_averages().table(sort_by="self_cpu_memory_usage", row_limit=15) @@ -77,7 +77,7 @@ print('Run list of images on Cuda with a workflow using simple CNN...') model = benchmark_functions.SimpleCNN() -model.apply(xavier_uniform_init) # Randomise more weights +model.apply(benchmark_functions.xavier_uniform_init) # Randomise more weights simple_workflow_cuda_min_time, simple_workflow_cuda_max_time, simple_workflow_cuda_avg_time = benchmark_functions.run_gradcam(model, number_of_inputs, batch_size=64, use_cuda=True, workflow_test=True) print('Complete!') From 901391e359debd443087efe0136d15bb8ffcc3a6 Mon Sep 17 00:00:00 2001 From: trex22 <contact@jasonchalom.com> Date: Fri, 18 Nov 2022 14:01:20 +0200 Subject: [PATCH 44/72] Attempt to fix device memory issues :cop: --- benchmarks/benchmark_functions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/benchmark_functions.py b/benchmarks/benchmark_functions.py index f9ab50d04..2b1f4997d 100644 --- a/benchmarks/benchmark_functions.py +++ b/benchmarks/benchmark_functions.py @@ -87,7 +87,7 @@ def run_gradcam(model, number_of_inputs, batch_size=1, use_cuda=False, workflow_ if workflow_test: for j in range(heatmap.shape[0]): # Create a binary map - threshold_plot = torch.where(torch.tensor(heatmap[j]).to(torch.device('cuda:0')) > 0.5, 1, 0) + threshold_plot = torch.where(torch.tensor(heatmap[j]).to(torch.device('cuda:0')) > 0.5, 1, 0).to(dev) output_image = input_image * threshold_plot end_time = time.time() From 2748c5ca361ce846953f366b15f620df69419cec Mon Sep 17 00:00:00 2001 From: trex22 <contact@jasonchalom.com> Date: Fri, 18 Nov 2022 14:18:10 +0200 Subject: [PATCH 45/72] Select the last CNN model as the GradCAM taregt layer :scientist: --- benchmarks/benchmark_functions.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/benchmarks/benchmark_functions.py b/benchmarks/benchmark_functions.py index 2b1f4997d..2c9f7f60c 100644 --- a/benchmarks/benchmark_functions.py +++ b/benchmarks/benchmark_functions.py @@ -36,6 +36,7 @@ def __init__(self): nn.Linear(122880, 10), nn.Linear(10, 1) ) + self.features = slef.cnn_stack def forward(self, x): logits = self.cnn_stack(x) @@ -52,6 +53,13 @@ def xavier_uniform_init(layer): nn.init.xavier_uniform_(layer.weight, gain=gain) +def last_cnn_layer(features): + for feature in features: + if isinstance(feature, nn.Conv2d): + return feature + + return None + # Code to run benchmark def run_gradcam(model, number_of_inputs, batch_size=1, use_cuda=False, workflow_test=False): min_time = 10000000000000 @@ -70,7 +78,7 @@ def run_gradcam(model, number_of_inputs, batch_size=1, use_cuda=False, workflow_ targets = None # [ClassifierOutputTarget(None)] model.to(dev) - target_layers = [model.layer4] # Last CNN layer of ResNet50 + target_layers = [last_cnn_layer(model.features)] # Last CNN layer of ResNet50 cam_function = GradCAM(model=model, target_layers=target_layers, use_cuda=use_cuda) cam_function.batch_size = batch_size From b77aa5b0fcd372373f0c40d51adbcfe5b429bd04 Mon Sep 17 00:00:00 2001 From: trex22 <contact@jasonchalom.com> Date: Fri, 18 Nov 2022 14:18:43 +0200 Subject: [PATCH 46/72] Fix spelling miskate :cop: --- benchmarks/benchmark_functions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/benchmark_functions.py b/benchmarks/benchmark_functions.py index 2c9f7f60c..776a4c990 100644 --- a/benchmarks/benchmark_functions.py +++ b/benchmarks/benchmark_functions.py @@ -36,7 +36,7 @@ def __init__(self): nn.Linear(122880, 10), nn.Linear(10, 1) ) - self.features = slef.cnn_stack + self.features = self.cnn_stack def forward(self, x): logits = self.cnn_stack(x) From 65f1b1fbd8d6493d5daed0ddc1031009abf7ddfa Mon Sep 17 00:00:00 2001 From: trex22 <contact@jasonchalom.com> Date: Fri, 18 Nov 2022 14:25:40 +0200 Subject: [PATCH 47/72] Attempt another way to iterate through model params :cop: --- benchmarks/benchmark_functions.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/benchmarks/benchmark_functions.py b/benchmarks/benchmark_functions.py index 776a4c990..097e65423 100644 --- a/benchmarks/benchmark_functions.py +++ b/benchmarks/benchmark_functions.py @@ -53,9 +53,9 @@ def xavier_uniform_init(layer): nn.init.xavier_uniform_(layer.weight, gain=gain) -def last_cnn_layer(features): - for feature in features: - if isinstance(feature, nn.Conv2d): +def last_cnn_layer(model): + for name, param in model.named_parameters(): + if isinstance(param, nn.Conv2d): return feature return None @@ -78,7 +78,7 @@ def run_gradcam(model, number_of_inputs, batch_size=1, use_cuda=False, workflow_ targets = None # [ClassifierOutputTarget(None)] model.to(dev) - target_layers = [last_cnn_layer(model.features)] # Last CNN layer of ResNet50 + target_layers = [last_cnn_layer(model)] # Last CNN layer of ResNet50 cam_function = GradCAM(model=model, target_layers=target_layers, use_cuda=use_cuda) cam_function.batch_size = batch_size From 9c274bec8f7a462314fbfad9b454a98ebdcc27dc Mon Sep 17 00:00:00 2001 From: trex22 <contact@jasonchalom.com> Date: Fri, 18 Nov 2022 21:59:46 +0200 Subject: [PATCH 48/72] Handle multiple models :cop: --- benchmarks/benchmark_functions.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/benchmarks/benchmark_functions.py b/benchmarks/benchmark_functions.py index 097e65423..30855ce4b 100644 --- a/benchmarks/benchmark_functions.py +++ b/benchmarks/benchmark_functions.py @@ -54,7 +54,13 @@ def xavier_uniform_init(layer): nn.init.xavier_uniform_(layer.weight, gain=gain) def last_cnn_layer(model): - for name, param in model.named_parameters(): + if hasattr(model, 'layer4'): + return model.layer4 + + if hasattr(model, 'conv3'): + return model.conv3 + + for param in model.features: if isinstance(param, nn.Conv2d): return feature From eaaf0a945e46181cbcbb4c92e712c899c899e992 Mon Sep 17 00:00:00 2001 From: trex22 <contact@jasonchalom.com> Date: Fri, 18 Nov 2022 22:03:26 +0200 Subject: [PATCH 49/72] Fix feature bug :cop: --- benchmarks/benchmark_functions.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/benchmarks/benchmark_functions.py b/benchmarks/benchmark_functions.py index 30855ce4b..5804bbbe4 100644 --- a/benchmarks/benchmark_functions.py +++ b/benchmarks/benchmark_functions.py @@ -60,8 +60,8 @@ def last_cnn_layer(model): if hasattr(model, 'conv3'): return model.conv3 - for param in model.features: - if isinstance(param, nn.Conv2d): + for feature in model.features: + if isinstance(feature, nn.Conv2d): return feature return None From dc5db2edc744a0247228e20f25244dfaa35105b0 Mon Sep 17 00:00:00 2001 From: trex22 <contact@jasonchalom.com> Date: Fri, 18 Nov 2022 22:05:11 +0200 Subject: [PATCH 50/72] Cleanup progress :cop: --- benchmarks/benchmark_functions.py | 5 +++-- benchmarks/models_benchmark.py | 4 ++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/benchmarks/benchmark_functions.py b/benchmarks/benchmark_functions.py index 5804bbbe4..85f4ed6f1 100644 --- a/benchmarks/benchmark_functions.py +++ b/benchmarks/benchmark_functions.py @@ -67,7 +67,7 @@ def last_cnn_layer(model): return None # Code to run benchmark -def run_gradcam(model, number_of_inputs, batch_size=1, use_cuda=False, workflow_test=False): +def run_gradcam(model, number_of_inputs, batch_size=1, use_cuda=False, workflow_test=False, progress_bar=True): min_time = 10000000000000 max_time = 0 sum_of_times = 0 @@ -115,7 +115,8 @@ def run_gradcam(model, number_of_inputs, batch_size=1, use_cuda=False, workflow_ if time_difference < min_time: min_time = time_difference - pbar.update(batch_size) + if progress_bar: + pbar.update(batch_size) avg_time = sum_of_times / number_of_inputs return [min_time, max_time, avg_time] diff --git a/benchmarks/models_benchmark.py b/benchmarks/models_benchmark.py index a97887f26..08be44206 100644 --- a/benchmarks/models_benchmark.py +++ b/benchmarks/models_benchmark.py @@ -38,8 +38,8 @@ print(f'Simple Workflow for model #{model_name}:\n') model.apply(benchmark_functions.xavier_uniform_init) # Randomise more weights - cpu_min_time, cpu_max_time, cpu_avg_time = benchmark_functions.run_gradcam(model, number_of_inputs, batch_size=8, use_cuda=False, workflow_test=True) - cuda_min_time, cuda_max_time, cuda_avg_time = benchmark_functions.run_gradcam(model, number_of_inputs, batch_size=8, use_cuda=True, workflow_test=True) + cpu_min_time, cpu_max_time, cpu_avg_time = benchmark_functions.run_gradcam(model, number_of_inputs, batch_size=8, use_cuda=False, workflow_test=True, progress_bar=False) + cuda_min_time, cuda_max_time, cuda_avg_time = benchmark_functions.run_gradcam(model, number_of_inputs, batch_size=8, use_cuda=True, workflow_test=True, progress_bar=False) print(f'Cuda Min time: {cuda_min_time}\n') print(f'Cuda Max time: {cuda_max_time}\n') From c75bbef2bfd320fca2fa0a0885b439552a9fcaa1 Mon Sep 17 00:00:00 2001 From: trex22 <contact@jasonchalom.com> Date: Fri, 18 Nov 2022 23:14:17 +0200 Subject: [PATCH 51/72] Add in method benchmark :cop: --- benchmarks/benchmark_functions.py | 4 +- benchmarks/methods_benchmark.py | 64 +++++++++++++++++++++++++++++++ 2 files changed, 66 insertions(+), 2 deletions(-) create mode 100644 benchmarks/methods_benchmark.py diff --git a/benchmarks/benchmark_functions.py b/benchmarks/benchmark_functions.py index 85f4ed6f1..d06060d3e 100644 --- a/benchmarks/benchmark_functions.py +++ b/benchmarks/benchmark_functions.py @@ -67,7 +67,7 @@ def last_cnn_layer(model): return None # Code to run benchmark -def run_gradcam(model, number_of_inputs, batch_size=1, use_cuda=False, workflow_test=False, progress_bar=True): +def run_gradcam(model, number_of_inputs, batch_size=1, use_cuda=False, workflow_test=False, progress_bar=True, method=GradCAM): min_time = 10000000000000 max_time = 0 sum_of_times = 0 @@ -86,7 +86,7 @@ def run_gradcam(model, number_of_inputs, batch_size=1, use_cuda=False, workflow_ model.to(dev) target_layers = [last_cnn_layer(model)] # Last CNN layer of ResNet50 - cam_function = GradCAM(model=model, target_layers=target_layers, use_cuda=use_cuda) + cam_function = method(model=model, target_layers=target_layers, use_cuda=use_cuda) cam_function.batch_size = batch_size pbar = tqdm.tqdm(total=number_of_inputs) diff --git a/benchmarks/methods_benchmark.py b/benchmarks/methods_benchmark.py new file mode 100644 index 000000000..8c52229b5 --- /dev/null +++ b/benchmarks/methods_benchmark.py @@ -0,0 +1,64 @@ +import argparse +import cv2 +import numpy as np +import torch +import time +import tqdm + +from pytorch_grad_cam import GradCAM, \ + ScoreCAM, \ + GradCAMPlusPlus, \ + AblationCAM, \ + XGradCAM, \ + EigenCAM, \ + EigenGradCAM, \ + LayerCAM, \ + FullGrad + +from torch import nn +import torch.nn.functional as F + +import torchvision # You may need to install separately +from torchvision import models + +from torch.profiler import profile, record_function, ProfilerActivity + +import benchmark_functions + +number_of_inputs = 1000 + +print(f'Benchmarking GradCAM using {number_of_inputs} images for multiple models...') + +methods_to_benchmark = [ + ['GradCAM', GradCAM], + ['ScoreCAM', ScoreCAM], + ['GradCAMPlusPlus', GradCAMPlusPlus], + ['AblationCAM', AblationCAM], + ['XGradCAM', XGradCAM], + ['EigenCAM', EigenCAM], + ['EigenGradCAM', EigenGradCAM], + ['LayerCAM', LayerCAM], + ['FullGrad', FullGrad] +] + +model = benchmark_functions.SimpleCNN() +# model = models.resnet18() + +model.apply(benchmark_functions.xavier_uniform_init) # Randomise more weights + +for method_name, method in tqdm.tqdm(methods_to_benchmark): + print('==============================================================================\n\n') + print(f'Simple Workflow for method #{method_name}:\n') + + cpu_min_time, cpu_max_time, cpu_avg_time = benchmark_functions.run_gradcam(model, number_of_inputs, batch_size=8, use_cuda=False, workflow_test=True, progress_bar=False, method=method) + cuda_min_time, cuda_max_time, cuda_avg_time = benchmark_functions.run_gradcam(model, number_of_inputs, batch_size=8, use_cuda=True, workflow_test=True, progress_bar=False, method=method) + + print(f'Cuda Min time: {cuda_min_time}\n') + print(f'Cuda Max time: {cuda_max_time}\n') + print(f'Cuda Avg time: {cuda_avg_time}\n\n') + print(f'CPU Min time: {cpu_min_time}\n') + print(f'CPU Max time: {cpu_max_time}\n') + print(f'CPU Avg time: {cpu_avg_time}\n') + +print('==============================================================================\n\n') +print('Done!') From 915b99f14ef572de9d9dde188a5e7922acd0b0e3 Mon Sep 17 00:00:00 2001 From: trex22 <contact@jasonchalom.com> Date: Fri, 17 Feb 2023 22:36:01 +0200 Subject: [PATCH 52/72] Patch in cuda device support :cop: --- pytorch_grad_cam/base_cam.py | 13 +++++++++++-- pytorch_grad_cam/fullgrad_cam.py | 3 ++- pytorch_grad_cam/grad_cam.py | 3 ++- 3 files changed, 15 insertions(+), 4 deletions(-) diff --git a/pytorch_grad_cam/base_cam.py b/pytorch_grad_cam/base_cam.py index 547f001e4..62cbca659 100644 --- a/pytorch_grad_cam/base_cam.py +++ b/pytorch_grad_cam/base_cam.py @@ -13,14 +13,21 @@ def __init__(self, model: torch.nn.Module, target_layers: List[torch.nn.Module], use_cuda: bool = False, + cuda_device: None, reshape_transform: Callable = None, compute_input_gradient: bool = False, uses_gradients: bool = True) -> None: self.model = model.eval() self.target_layers = target_layers + self.cuda = use_cuda - if self.cuda: + self.cuda_device = cuda_device + + if self.cuda_device and self.cuda: + self.model.to(self.cuda_device) + elif self.cuda: self.model = model.cuda() + self.reshape_transform = reshape_transform self.compute_input_gradient = compute_input_gradient self.uses_gradients = uses_gradients @@ -64,7 +71,9 @@ def forward(self, targets: List[torch.nn.Module], eigen_smooth: bool = False) -> torch.Tensor: - if self.cuda: + if self.cuda_device and self.cuda: + input_tensor = input_tensor.to(self.cuda_device) + elif self.cuda: input_tensor = input_tensor.cuda() if self.compute_input_gradient: diff --git a/pytorch_grad_cam/fullgrad_cam.py b/pytorch_grad_cam/fullgrad_cam.py index 1a2685eff..f1e289094 100644 --- a/pytorch_grad_cam/fullgrad_cam.py +++ b/pytorch_grad_cam/fullgrad_cam.py @@ -9,7 +9,7 @@ class FullGrad(BaseCAM): - def __init__(self, model, target_layers, use_cuda=False, + def __init__(self, model, target_layers, use_cuda=False, cuda_device: None, reshape_transform=None): if len(target_layers) > 0: print( @@ -28,6 +28,7 @@ def layer_with_2D_bias(layer): model, target_layers, use_cuda, + cuda_device, reshape_transform, compute_input_gradient=True) self.bias_data = [self.get_bias_data( diff --git a/pytorch_grad_cam/grad_cam.py b/pytorch_grad_cam/grad_cam.py index efb66e76e..718481484 100644 --- a/pytorch_grad_cam/grad_cam.py +++ b/pytorch_grad_cam/grad_cam.py @@ -3,7 +3,7 @@ class GradCAM(BaseCAM): - def __init__(self, model, target_layers, use_cuda=False, + def __init__(self, model, target_layers, use_cuda=False, cuda_device: None, reshape_transform=None): super( GradCAM, @@ -11,6 +11,7 @@ def __init__(self, model, target_layers, use_cuda=False, model, target_layers, use_cuda, + cuda_device, reshape_transform) def get_cam_weights(self, From 0305eecc9fd34ea44108f3e5f1e03a29151db976 Mon Sep 17 00:00:00 2001 From: trex22 <contact@jasonchalom.com> Date: Fri, 17 Feb 2023 22:43:35 +0200 Subject: [PATCH 53/72] Fix cuda device call :cop: --- pytorch_grad_cam/fullgrad_cam.py | 2 +- pytorch_grad_cam/grad_cam.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pytorch_grad_cam/fullgrad_cam.py b/pytorch_grad_cam/fullgrad_cam.py index f1e289094..d2b438a4c 100644 --- a/pytorch_grad_cam/fullgrad_cam.py +++ b/pytorch_grad_cam/fullgrad_cam.py @@ -9,7 +9,7 @@ class FullGrad(BaseCAM): - def __init__(self, model, target_layers, use_cuda=False, cuda_device: None, + def __init__(self, model, target_layers, use_cuda=False, cuda_device=None, reshape_transform=None): if len(target_layers) > 0: print( diff --git a/pytorch_grad_cam/grad_cam.py b/pytorch_grad_cam/grad_cam.py index 718481484..2a48bdd8b 100644 --- a/pytorch_grad_cam/grad_cam.py +++ b/pytorch_grad_cam/grad_cam.py @@ -3,7 +3,7 @@ class GradCAM(BaseCAM): - def __init__(self, model, target_layers, use_cuda=False, cuda_device: None, + def __init__(self, model, target_layers, use_cuda=False, cuda_device=None, reshape_transform=None): super( GradCAM, From 0da12a0ae047f0a69acb43ccd91bc3507dbb0573 Mon Sep 17 00:00:00 2001 From: trex22 <contact@jasonchalom.com> Date: Fri, 17 Feb 2023 22:46:17 +0200 Subject: [PATCH 54/72] Fix cuda device call :cop: --- pytorch_grad_cam/base_cam.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pytorch_grad_cam/base_cam.py b/pytorch_grad_cam/base_cam.py index 62cbca659..055111fbc 100644 --- a/pytorch_grad_cam/base_cam.py +++ b/pytorch_grad_cam/base_cam.py @@ -13,7 +13,7 @@ def __init__(self, model: torch.nn.Module, target_layers: List[torch.nn.Module], use_cuda: bool = False, - cuda_device: None, + cuda_device = None, reshape_transform: Callable = None, compute_input_gradient: bool = False, uses_gradients: bool = True) -> None: From 82f71e728e9670307480801e428ac01dd4a94165 Mon Sep 17 00:00:00 2001 From: trex22 <contact@jasonchalom.com> Date: Thu, 9 Mar 2023 11:48:46 +0200 Subject: [PATCH 55/72] Work on a single image benchmark :cop: --- benchmarks/single_image_benchmark.py | 122 +++++++++++++++++++++++++++ 1 file changed, 122 insertions(+) create mode 100644 benchmarks/single_image_benchmark.py diff --git a/benchmarks/single_image_benchmark.py b/benchmarks/single_image_benchmark.py new file mode 100644 index 000000000..cb9568ffb --- /dev/null +++ b/benchmarks/single_image_benchmark.py @@ -0,0 +1,122 @@ +import argparse +import cv2 +import numpy as np +import torch +import time +import tqdm + +from pytorch_grad_cam import GradCAM, \ + ScoreCAM, \ + GradCAMPlusPlus, \ + AblationCAM, \ + XGradCAM, \ + EigenCAM, \ + EigenGradCAM, \ + LayerCAM, \ + FullGrad + +from torch import nn +import torch.nn.functional as F + +import torchvision # You may need to install separately +from torchvision import models + +from torch.profiler import profile, record_function, ProfilerActivity + +import benchmark_functions + +number_of_inputs = 1 +model = models.resnet50() + +# TODO: Load image + +print(f'Benchmarking GradCAM using {number_of_inputs} image for ResNet50...') + +# Run on CPU with profiler (save the profile to print later) +print('Profile list of images on CPU...') +with profile(activities=[ProfilerActivity.CPU], profile_memory=True, record_shapes=True) as prof: + cpu_profile_min_time, cpu_profile_max_time, cpu_profile_avg_time = benchmark_functions.run_gradcam(model, number_of_inputs, batch_size=64, use_cuda=False) +cpu_profile = prof.key_averages().table(sort_by="self_cpu_memory_usage", row_limit=15) + +# Run on CUDA with profiler (save the profile to print later) +print('Profile list of images on Cuda...') +with profile(activities=[ProfilerActivity.CPU], profile_memory=True, record_shapes=True) as prof: + cuda_profile_min_time, cuda_profile_max_time, cuda_profile_avg_time = benchmark_functions.run_gradcam(model, number_of_inputs, batch_size=64, use_cuda=True) +cuda_profile = prof.key_averages().table(sort_by="self_cpu_memory_usage", row_limit=15) + +# Run on CUDA with extra workflow +print('Profile list of images on Cuda and then run workflow...') +with profile(activities=[ProfilerActivity.CPU], profile_memory=True, record_shapes=True) as prof: + cuda_profile_min_time, cuda_profile_max_time, cuda_profile_avg_time = benchmark_functions.run_gradcam(model, number_of_inputs, batch_size=64, use_cuda=True, workflow_test=True) +work_flow_cuda_profile = prof.key_averages().table(sort_by="self_cpu_memory_usage", row_limit=15) + +# Run on CUDA with extra workflow +print('Profile list of images on Cuda and then run workflow with a simple CNN...') +model = benchmark_functions.SimpleCNN() +model.apply(benchmark_functions.xavier_uniform_init) # Randomise more weights +with profile(activities=[ProfilerActivity.CPU], profile_memory=True, record_shapes=True) as prof: + cuda_profile_min_time, cuda_profile_max_time, cuda_profile_avg_time = benchmark_functions.run_gradcam(model, number_of_inputs, batch_size=64, use_cuda=True, workflow_test=True) +simple_work_flow_cuda_profile = prof.key_averages().table(sort_by="self_cpu_memory_usage", row_limit=15) + +model = models.resnet50() +# Run on CPU x1000 (get min, max, and avg times) +print('Run list of images on CPU...') +cpu_min_time, cpu_max_time, cpu_avg_time = benchmark_functions.run_gradcam(model, number_of_inputs, batch_size=64, use_cuda=False) + +# Run on CUDA x1000 +print('Run list of images on Cuda...') +cuda_min_time, cuda_max_time, cuda_avg_time = benchmark_functions.run_gradcam(model, number_of_inputs, batch_size=64, use_cuda=True) + +# Run Workflow +print('Run list of images on Cuda with a workflow...') +workflow_cuda_min_time, workflow_cuda_max_time, workflow_cuda_avg_time = benchmark_functions.run_gradcam(model, number_of_inputs, batch_size=64, use_cuda=True, workflow_test=True) + +print('Run list of images on Cuda with a workflow using simple CNN...') +model = benchmark_functions.SimpleCNN() +model.apply(benchmark_functions.xavier_uniform_init) # Randomise more weights +simple_workflow_cuda_min_time, simple_workflow_cuda_max_time, simple_workflow_cuda_avg_time = benchmark_functions.run_gradcam(model, number_of_inputs, batch_size=64, use_cuda=True, workflow_test=True) + +print('Complete!') + +print('==============================================================================\n\n') +print('CPU Profile:\n') +print(cpu_profile) + +print('==============================================================================\n\n') +print('Cuda Profile:\n') +print(cuda_profile) + +print('==============================================================================\n\n') +print('Workflow Cuda Profile:\n') +print(work_flow_cuda_profile) + +print('==============================================================================\n\n') +print('Simple Workflow Cuda Profile:\n') +print(simple_work_flow_cuda_profile) + +print('==============================================================================\n\n') +print('CPU Timing (No Profiler):\n') +print(f'Min time: {cpu_min_time}\n') +print(f'Max time: {cpu_max_time}\n') +print(f'Avg time: {cpu_avg_time}\n') + +print('==============================================================================\n\n') +print('Cuda Timing (No Profiler):\n') +print(f'Min time: {cuda_min_time}\n') +print(f'Max time: {cuda_max_time}\n') +print(f'Avg time: {cuda_avg_time}\n') + +print('==============================================================================\n\n') +print('Workflow Cuda Timing (No Profiler):\n') +print(f'Min time: {workflow_cuda_min_time}\n') +print(f'Max time: {workflow_cuda_max_time}\n') +print(f'Avg time: {workflow_cuda_avg_time}\n') + +print('==============================================================================\n\n') +print('Simple Workflow Cuda Timing (No Profiler):\n') +print(f'Min time: {simple_workflow_cuda_min_time}\n') +print(f'Max time: {simple_workflow_cuda_max_time}\n') +print(f'Avg time: {simple_workflow_cuda_avg_time}\n') + +print('==============================================================================\n\n') +print('Done!') From 199815c849aa30d43667732f6b08dc768681a8c3 Mon Sep 17 00:00:00 2001 From: trex22 <contact@jasonchalom.com> Date: Thu, 9 Mar 2023 11:54:27 +0200 Subject: [PATCH 56/72] Disable cpu benchmarking :cop: --- benchmarks/single_image_benchmark.py | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/benchmarks/single_image_benchmark.py b/benchmarks/single_image_benchmark.py index cb9568ffb..1f9f870ee 100644 --- a/benchmarks/single_image_benchmark.py +++ b/benchmarks/single_image_benchmark.py @@ -33,10 +33,10 @@ print(f'Benchmarking GradCAM using {number_of_inputs} image for ResNet50...') # Run on CPU with profiler (save the profile to print later) -print('Profile list of images on CPU...') -with profile(activities=[ProfilerActivity.CPU], profile_memory=True, record_shapes=True) as prof: - cpu_profile_min_time, cpu_profile_max_time, cpu_profile_avg_time = benchmark_functions.run_gradcam(model, number_of_inputs, batch_size=64, use_cuda=False) -cpu_profile = prof.key_averages().table(sort_by="self_cpu_memory_usage", row_limit=15) +# print('Profile list of images on CPU...') +# with profile(activities=[ProfilerActivity.CPU], profile_memory=True, record_shapes=True) as prof: +# cpu_profile_min_time, cpu_profile_max_time, cpu_profile_avg_time = benchmark_functions.run_gradcam(model, number_of_inputs, batch_size=64, use_cuda=False) +# cpu_profile = prof.key_averages().table(sort_by="self_cpu_memory_usage", row_limit=15) # Run on CUDA with profiler (save the profile to print later) print('Profile list of images on Cuda...') @@ -60,8 +60,8 @@ model = models.resnet50() # Run on CPU x1000 (get min, max, and avg times) -print('Run list of images on CPU...') -cpu_min_time, cpu_max_time, cpu_avg_time = benchmark_functions.run_gradcam(model, number_of_inputs, batch_size=64, use_cuda=False) +# print('Run list of images on CPU...') +# cpu_min_time, cpu_max_time, cpu_avg_time = benchmark_functions.run_gradcam(model, number_of_inputs, batch_size=64, use_cuda=False) # Run on CUDA x1000 print('Run list of images on Cuda...') @@ -78,9 +78,9 @@ print('Complete!') -print('==============================================================================\n\n') -print('CPU Profile:\n') -print(cpu_profile) +# print('==============================================================================\n\n') +# print('CPU Profile:\n') +# print(cpu_profile) print('==============================================================================\n\n') print('Cuda Profile:\n') @@ -94,11 +94,11 @@ print('Simple Workflow Cuda Profile:\n') print(simple_work_flow_cuda_profile) -print('==============================================================================\n\n') -print('CPU Timing (No Profiler):\n') -print(f'Min time: {cpu_min_time}\n') -print(f'Max time: {cpu_max_time}\n') -print(f'Avg time: {cpu_avg_time}\n') +# print('==============================================================================\n\n') +# print('CPU Timing (No Profiler):\n') +# print(f'Min time: {cpu_min_time}\n') +# print(f'Max time: {cpu_max_time}\n') +# print(f'Avg time: {cpu_avg_time}\n') print('==============================================================================\n\n') print('Cuda Timing (No Profiler):\n') From cf020cfe8e24e421e7f95754b17b90e36299c9e5 Mon Sep 17 00:00:00 2001 From: trex22 <contact@jasonchalom.com> Date: Thu, 9 Mar 2023 12:11:32 +0200 Subject: [PATCH 57/72] output the resultant image and allow inputting a image :cop: --- benchmarks/benchmark_functions.py | 9 ++++++--- benchmarks/methods_benchmark.py | 4 ++-- benchmarks/models_benchmark.py | 4 ++-- benchmarks/single_image_benchmark.py | 16 ++++++++-------- benchmarks/torch_benchmark.py | 16 ++++++++-------- 5 files changed, 26 insertions(+), 23 deletions(-) diff --git a/benchmarks/benchmark_functions.py b/benchmarks/benchmark_functions.py index d06060d3e..ce9ed05e3 100644 --- a/benchmarks/benchmark_functions.py +++ b/benchmarks/benchmark_functions.py @@ -67,7 +67,7 @@ def last_cnn_layer(model): return None # Code to run benchmark -def run_gradcam(model, number_of_inputs, batch_size=1, use_cuda=False, workflow_test=False, progress_bar=True, method=GradCAM): +def run_gradcam(model, number_of_inputs, batch_size=1, use_cuda=False, workflow_test=False, progress_bar=True, method=GradCAM, input_image=None): min_time = 10000000000000 max_time = 0 sum_of_times = 0 @@ -95,7 +95,10 @@ def run_gradcam(model, number_of_inputs, batch_size=1, use_cuda=False, workflow_ start_time = time.time() # Actual code to benchmark - input_image = input_tensor[i:i+batch_size].to(dev) + if input_image is None: + input_image = input_tensor[i:i+batch_size] + input_image = input_image.to(dev) + heatmap = cam_function(input_tensor=input_image, targets=targets) if workflow_test: @@ -119,4 +122,4 @@ def run_gradcam(model, number_of_inputs, batch_size=1, use_cuda=False, workflow_ pbar.update(batch_size) avg_time = sum_of_times / number_of_inputs - return [min_time, max_time, avg_time] + return [min_time, max_time, avg_time, output_image] diff --git a/benchmarks/methods_benchmark.py b/benchmarks/methods_benchmark.py index 8c52229b5..5660e3a79 100644 --- a/benchmarks/methods_benchmark.py +++ b/benchmarks/methods_benchmark.py @@ -50,8 +50,8 @@ print('==============================================================================\n\n') print(f'Simple Workflow for method #{method_name}:\n') - cpu_min_time, cpu_max_time, cpu_avg_time = benchmark_functions.run_gradcam(model, number_of_inputs, batch_size=8, use_cuda=False, workflow_test=True, progress_bar=False, method=method) - cuda_min_time, cuda_max_time, cuda_avg_time = benchmark_functions.run_gradcam(model, number_of_inputs, batch_size=8, use_cuda=True, workflow_test=True, progress_bar=False, method=method) + cpu_min_time, cpu_max_time, cpu_avg_time, _output_image = benchmark_functions.run_gradcam(model, number_of_inputs, batch_size=8, use_cuda=False, workflow_test=True, progress_bar=False, method=method) + cuda_min_time, cuda_max_time, cuda_avg_time, _output_image = benchmark_functions.run_gradcam(model, number_of_inputs, batch_size=8, use_cuda=True, workflow_test=True, progress_bar=False, method=method) print(f'Cuda Min time: {cuda_min_time}\n') print(f'Cuda Max time: {cuda_max_time}\n') diff --git a/benchmarks/models_benchmark.py b/benchmarks/models_benchmark.py index 08be44206..9c6fdfa89 100644 --- a/benchmarks/models_benchmark.py +++ b/benchmarks/models_benchmark.py @@ -38,8 +38,8 @@ print(f'Simple Workflow for model #{model_name}:\n') model.apply(benchmark_functions.xavier_uniform_init) # Randomise more weights - cpu_min_time, cpu_max_time, cpu_avg_time = benchmark_functions.run_gradcam(model, number_of_inputs, batch_size=8, use_cuda=False, workflow_test=True, progress_bar=False) - cuda_min_time, cuda_max_time, cuda_avg_time = benchmark_functions.run_gradcam(model, number_of_inputs, batch_size=8, use_cuda=True, workflow_test=True, progress_bar=False) + cpu_min_time, cpu_max_time, cpu_avg_time, _output_image = benchmark_functions.run_gradcam(model, number_of_inputs, batch_size=8, use_cuda=False, workflow_test=True, progress_bar=False) + cuda_min_time, cuda_max_time, cuda_avg_time, _output_image = benchmark_functions.run_gradcam(model, number_of_inputs, batch_size=8, use_cuda=True, workflow_test=True, progress_bar=False) print(f'Cuda Min time: {cuda_min_time}\n') print(f'Cuda Max time: {cuda_max_time}\n') diff --git a/benchmarks/single_image_benchmark.py b/benchmarks/single_image_benchmark.py index 1f9f870ee..ca86da669 100644 --- a/benchmarks/single_image_benchmark.py +++ b/benchmarks/single_image_benchmark.py @@ -35,19 +35,19 @@ # Run on CPU with profiler (save the profile to print later) # print('Profile list of images on CPU...') # with profile(activities=[ProfilerActivity.CPU], profile_memory=True, record_shapes=True) as prof: -# cpu_profile_min_time, cpu_profile_max_time, cpu_profile_avg_time = benchmark_functions.run_gradcam(model, number_of_inputs, batch_size=64, use_cuda=False) +# cpu_profile_min_time, cpu_profile_max_time, cpu_profile_avg_time, _output_image = benchmark_functions.run_gradcam(model, number_of_inputs, batch_size=64, use_cuda=False) # cpu_profile = prof.key_averages().table(sort_by="self_cpu_memory_usage", row_limit=15) # Run on CUDA with profiler (save the profile to print later) print('Profile list of images on Cuda...') with profile(activities=[ProfilerActivity.CPU], profile_memory=True, record_shapes=True) as prof: - cuda_profile_min_time, cuda_profile_max_time, cuda_profile_avg_time = benchmark_functions.run_gradcam(model, number_of_inputs, batch_size=64, use_cuda=True) + cuda_profile_min_time, cuda_profile_max_time, cuda_profile_avg_time, _output_image = benchmark_functions.run_gradcam(model, number_of_inputs, batch_size=64, use_cuda=True) cuda_profile = prof.key_averages().table(sort_by="self_cpu_memory_usage", row_limit=15) # Run on CUDA with extra workflow print('Profile list of images on Cuda and then run workflow...') with profile(activities=[ProfilerActivity.CPU], profile_memory=True, record_shapes=True) as prof: - cuda_profile_min_time, cuda_profile_max_time, cuda_profile_avg_time = benchmark_functions.run_gradcam(model, number_of_inputs, batch_size=64, use_cuda=True, workflow_test=True) + cuda_profile_min_time, cuda_profile_max_time, cuda_profile_avg_time, _output_image = benchmark_functions.run_gradcam(model, number_of_inputs, batch_size=64, use_cuda=True, workflow_test=True) work_flow_cuda_profile = prof.key_averages().table(sort_by="self_cpu_memory_usage", row_limit=15) # Run on CUDA with extra workflow @@ -55,26 +55,26 @@ model = benchmark_functions.SimpleCNN() model.apply(benchmark_functions.xavier_uniform_init) # Randomise more weights with profile(activities=[ProfilerActivity.CPU], profile_memory=True, record_shapes=True) as prof: - cuda_profile_min_time, cuda_profile_max_time, cuda_profile_avg_time = benchmark_functions.run_gradcam(model, number_of_inputs, batch_size=64, use_cuda=True, workflow_test=True) + cuda_profile_min_time, cuda_profile_max_time, cuda_profile_avg_time, _output_image = benchmark_functions.run_gradcam(model, number_of_inputs, batch_size=64, use_cuda=True, workflow_test=True) simple_work_flow_cuda_profile = prof.key_averages().table(sort_by="self_cpu_memory_usage", row_limit=15) model = models.resnet50() # Run on CPU x1000 (get min, max, and avg times) # print('Run list of images on CPU...') -# cpu_min_time, cpu_max_time, cpu_avg_time = benchmark_functions.run_gradcam(model, number_of_inputs, batch_size=64, use_cuda=False) +# cpu_min_time, cpu_max_time, cpu_avg_time, _output_image = benchmark_functions.run_gradcam(model, number_of_inputs, batch_size=64, use_cuda=False) # Run on CUDA x1000 print('Run list of images on Cuda...') -cuda_min_time, cuda_max_time, cuda_avg_time = benchmark_functions.run_gradcam(model, number_of_inputs, batch_size=64, use_cuda=True) +cuda_min_time, cuda_max_time, cuda_avg_time, _output_image = benchmark_functions.run_gradcam(model, number_of_inputs, batch_size=64, use_cuda=True) # Run Workflow print('Run list of images on Cuda with a workflow...') -workflow_cuda_min_time, workflow_cuda_max_time, workflow_cuda_avg_time = benchmark_functions.run_gradcam(model, number_of_inputs, batch_size=64, use_cuda=True, workflow_test=True) +workflow_cuda_min_time, workflow_cuda_max_time, workflow_cuda_avg_time, _output_image = benchmark_functions.run_gradcam(model, number_of_inputs, batch_size=64, use_cuda=True, workflow_test=True) print('Run list of images on Cuda with a workflow using simple CNN...') model = benchmark_functions.SimpleCNN() model.apply(benchmark_functions.xavier_uniform_init) # Randomise more weights -simple_workflow_cuda_min_time, simple_workflow_cuda_max_time, simple_workflow_cuda_avg_time = benchmark_functions.run_gradcam(model, number_of_inputs, batch_size=64, use_cuda=True, workflow_test=True) +simple_workflow_cuda_min_time, simple_workflow_cuda_max_time, simple_workflow_cuda_avg_time, _output_image = benchmark_functions.run_gradcam(model, number_of_inputs, batch_size=64, use_cuda=True, workflow_test=True) print('Complete!') diff --git a/benchmarks/torch_benchmark.py b/benchmarks/torch_benchmark.py index a7c58ffbf..793f099c5 100644 --- a/benchmarks/torch_benchmark.py +++ b/benchmarks/torch_benchmark.py @@ -39,19 +39,19 @@ # Run on CPU with profiler (save the profile to print later) print('Profile list of images on CPU...') with profile(activities=[ProfilerActivity.CPU], profile_memory=True, record_shapes=True) as prof: - cpu_profile_min_time, cpu_profile_max_time, cpu_profile_avg_time = benchmark_functions.run_gradcam(model, number_of_inputs, batch_size=64, use_cuda=False) + cpu_profile_min_time, cpu_profile_max_time, cpu_profile_avg_time, _output_image = benchmark_functions.run_gradcam(model, number_of_inputs, batch_size=64, use_cuda=False) cpu_profile = prof.key_averages().table(sort_by="self_cpu_memory_usage", row_limit=15) # Run on CUDA with profiler (save the profile to print later) print('Profile list of images on Cuda...') with profile(activities=[ProfilerActivity.CPU], profile_memory=True, record_shapes=True) as prof: - cuda_profile_min_time, cuda_profile_max_time, cuda_profile_avg_time = benchmark_functions.run_gradcam(model, number_of_inputs, batch_size=64, use_cuda=True) + cuda_profile_min_time, cuda_profile_max_time, cuda_profile_avg_time, _output_image = benchmark_functions.run_gradcam(model, number_of_inputs, batch_size=64, use_cuda=True) cuda_profile = prof.key_averages().table(sort_by="self_cpu_memory_usage", row_limit=15) # Run on CUDA with extra workflow print('Profile list of images on Cuda and then run workflow...') with profile(activities=[ProfilerActivity.CPU], profile_memory=True, record_shapes=True) as prof: - cuda_profile_min_time, cuda_profile_max_time, cuda_profile_avg_time = benchmark_functions.run_gradcam(model, number_of_inputs, batch_size=64, use_cuda=True, workflow_test=True) + cuda_profile_min_time, cuda_profile_max_time, cuda_profile_avg_time, _output_image = benchmark_functions.run_gradcam(model, number_of_inputs, batch_size=64, use_cuda=True, workflow_test=True) work_flow_cuda_profile = prof.key_averages().table(sort_by="self_cpu_memory_usage", row_limit=15) # Run on CUDA with extra workflow @@ -59,26 +59,26 @@ model = benchmark_functions.SimpleCNN() model.apply(benchmark_functions.xavier_uniform_init) # Randomise more weights with profile(activities=[ProfilerActivity.CPU], profile_memory=True, record_shapes=True) as prof: - cuda_profile_min_time, cuda_profile_max_time, cuda_profile_avg_time = benchmark_functions.run_gradcam(model, number_of_inputs, batch_size=64, use_cuda=True, workflow_test=True) + cuda_profile_min_time, cuda_profile_max_time, cuda_profile_avg_time, _output_image = benchmark_functions.run_gradcam(model, number_of_inputs, batch_size=64, use_cuda=True, workflow_test=True) simple_work_flow_cuda_profile = prof.key_averages().table(sort_by="self_cpu_memory_usage", row_limit=15) model = models.resnet50() # Run on CPU x1000 (get min, max, and avg times) print('Run list of images on CPU...') -cpu_min_time, cpu_max_time, cpu_avg_time = benchmark_functions.run_gradcam(model, number_of_inputs, batch_size=64, use_cuda=False) +cpu_min_time, cpu_max_time, cpu_avg_time, _output_image = benchmark_functions.run_gradcam(model, number_of_inputs, batch_size=64, use_cuda=False) # Run on CUDA x1000 print('Run list of images on Cuda...') -cuda_min_time, cuda_max_time, cuda_avg_time = benchmark_functions.run_gradcam(model, number_of_inputs, batch_size=64, use_cuda=True) +cuda_min_time, cuda_max_time, cuda_avg_time, _output_image = benchmark_functions.run_gradcam(model, number_of_inputs, batch_size=64, use_cuda=True) # Run Workflow print('Run list of images on Cuda with a workflow...') -workflow_cuda_min_time, workflow_cuda_max_time, workflow_cuda_avg_time = benchmark_functions.run_gradcam(model, number_of_inputs, batch_size=64, use_cuda=True, workflow_test=True) +workflow_cuda_min_time, workflow_cuda_max_time, workflow_cuda_avg_time, _output_image = benchmark_functions.run_gradcam(model, number_of_inputs, batch_size=64, use_cuda=True, workflow_test=True) print('Run list of images on Cuda with a workflow using simple CNN...') model = benchmark_functions.SimpleCNN() model.apply(benchmark_functions.xavier_uniform_init) # Randomise more weights -simple_workflow_cuda_min_time, simple_workflow_cuda_max_time, simple_workflow_cuda_avg_time = benchmark_functions.run_gradcam(model, number_of_inputs, batch_size=64, use_cuda=True, workflow_test=True) +simple_workflow_cuda_min_time, simple_workflow_cuda_max_time, simple_workflow_cuda_avg_time, _output_image = benchmark_functions.run_gradcam(model, number_of_inputs, batch_size=64, use_cuda=True, workflow_test=True) print('Complete!') From 9513c866ee474bea61232277a7f5f5118f124706 Mon Sep 17 00:00:00 2001 From: trex22 <contact@jasonchalom.com> Date: Thu, 9 Mar 2023 12:18:59 +0200 Subject: [PATCH 58/72] Allow for output saving for a snaity check :cop: --- benchmarks/benchmark_functions.py | 7 +++++-- benchmarks/single_image_benchmark.py | 9 ++++++++- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/benchmarks/benchmark_functions.py b/benchmarks/benchmark_functions.py index ce9ed05e3..661f15896 100644 --- a/benchmarks/benchmark_functions.py +++ b/benchmarks/benchmark_functions.py @@ -66,6 +66,9 @@ def last_cnn_layer(model): return None +def save_image(image, path): + return torchvision.utils.save_image(tensor: image, fp: path) + # Code to run benchmark def run_gradcam(model, number_of_inputs, batch_size=1, use_cuda=False, workflow_test=False, progress_bar=True, method=GradCAM, input_image=None): min_time = 10000000000000 @@ -86,7 +89,7 @@ def run_gradcam(model, number_of_inputs, batch_size=1, use_cuda=False, workflow_ model.to(dev) target_layers = [last_cnn_layer(model)] # Last CNN layer of ResNet50 - cam_function = method(model=model, target_layers=target_layers, use_cuda=use_cuda) + cam_function = method(model=model, target_layers=target_layers, cuda_device=dev, use_cuda=use_cuda) cam_function.batch_size = batch_size pbar = tqdm.tqdm(total=number_of_inputs) @@ -122,4 +125,4 @@ def run_gradcam(model, number_of_inputs, batch_size=1, use_cuda=False, workflow_ pbar.update(batch_size) avg_time = sum_of_times / number_of_inputs - return [min_time, max_time, avg_time, output_image] + return [min_time, max_time, avg_time, [threshold_plot, output_image]] diff --git a/benchmarks/single_image_benchmark.py b/benchmarks/single_image_benchmark.py index ca86da669..c7c3959cc 100644 --- a/benchmarks/single_image_benchmark.py +++ b/benchmarks/single_image_benchmark.py @@ -74,7 +74,7 @@ print('Run list of images on Cuda with a workflow using simple CNN...') model = benchmark_functions.SimpleCNN() model.apply(benchmark_functions.xavier_uniform_init) # Randomise more weights -simple_workflow_cuda_min_time, simple_workflow_cuda_max_time, simple_workflow_cuda_avg_time, _output_image = benchmark_functions.run_gradcam(model, number_of_inputs, batch_size=64, use_cuda=True, workflow_test=True) +simple_workflow_cuda_min_time, simple_workflow_cuda_max_time, simple_workflow_cuda_avg_time, output = benchmark_functions.run_gradcam(model, number_of_inputs, batch_size=64, use_cuda=True, workflow_test=True) print('Complete!') @@ -118,5 +118,12 @@ print(f'Max time: {simple_workflow_cuda_max_time}\n') print(f'Avg time: {simple_workflow_cuda_avg_time}\n') +print('==============================================================================\n\n') +print('Output the resultant heat-map') +threshold_plot, output_image = output + +benchmark_functions.save_image(threshold_plot, '~/threshold.png') +benchmark_functions.save_image(output_image, '~/output_image.png') + print('==============================================================================\n\n') print('Done!') From 186c14bdffedd92e1e643ffb525c0f0d2489ff2a Mon Sep 17 00:00:00 2001 From: trex22 <contact@jasonchalom.com> Date: Thu, 9 Mar 2023 12:19:43 +0200 Subject: [PATCH 59/72] Allow for output saving for a snaity check :cop: --- benchmarks/benchmark_functions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/benchmark_functions.py b/benchmarks/benchmark_functions.py index 661f15896..77ba3e297 100644 --- a/benchmarks/benchmark_functions.py +++ b/benchmarks/benchmark_functions.py @@ -67,7 +67,7 @@ def last_cnn_layer(model): return None def save_image(image, path): - return torchvision.utils.save_image(tensor: image, fp: path) + return torchvision.utils.save_image(image, path) # Code to run benchmark def run_gradcam(model, number_of_inputs, batch_size=1, use_cuda=False, workflow_test=False, progress_bar=True, method=GradCAM, input_image=None): From b6d220230d9dac964b5b5e54f13fcb19ca17d7e1 Mon Sep 17 00:00:00 2001 From: trex22 <contact@jasonchalom.com> Date: Thu, 9 Mar 2023 12:27:54 +0200 Subject: [PATCH 60/72] Allow for output saving for a snaity check :cop: --- benchmarks/benchmark_functions.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/benchmarks/benchmark_functions.py b/benchmarks/benchmark_functions.py index 77ba3e297..4ebca6111 100644 --- a/benchmarks/benchmark_functions.py +++ b/benchmarks/benchmark_functions.py @@ -97,6 +97,9 @@ def run_gradcam(model, number_of_inputs, batch_size=1, use_cuda=False, workflow_ for i in range(0, number_of_inputs, batch_size): start_time = time.time() + threshold_plot = torch.rand((number_of_inputs, 3, 256, 60)) + output_image = torch.rand((number_of_inputs, 3, 256, 60)) + # Actual code to benchmark if input_image is None: input_image = input_tensor[i:i+batch_size] From 84a0689fb3cfecbae606eaac23b0959eb7551442 Mon Sep 17 00:00:00 2001 From: trex22 <contact@jasonchalom.com> Date: Thu, 9 Mar 2023 12:28:47 +0200 Subject: [PATCH 61/72] Allow for output saving for a snaity check :cop: --- benchmarks/single_image_benchmark.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/benchmarks/single_image_benchmark.py b/benchmarks/single_image_benchmark.py index c7c3959cc..450e3afed 100644 --- a/benchmarks/single_image_benchmark.py +++ b/benchmarks/single_image_benchmark.py @@ -122,8 +122,8 @@ print('Output the resultant heat-map') threshold_plot, output_image = output -benchmark_functions.save_image(threshold_plot, '~/threshold.png') -benchmark_functions.save_image(output_image, '~/output_image.png') +benchmark_functions.save_image(threshold_plot.to("cpu", torch.uint8), '~/threshold.png') +benchmark_functions.save_image(output_image.to("cpu", torch.uint8), '~/output_image.png') print('==============================================================================\n\n') print('Done!') From 18f8d8e6a6b2c83529577c7711e25cca9d994753 Mon Sep 17 00:00:00 2001 From: trex22 <contact@jasonchalom.com> Date: Thu, 9 Mar 2023 12:36:56 +0200 Subject: [PATCH 62/72] Open image :cop: --- benchmarks/single_image_benchmark.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/benchmarks/single_image_benchmark.py b/benchmarks/single_image_benchmark.py index 450e3afed..da56a0618 100644 --- a/benchmarks/single_image_benchmark.py +++ b/benchmarks/single_image_benchmark.py @@ -28,7 +28,9 @@ number_of_inputs = 1 model = models.resnet50() -# TODO: Load image +# Just hard-coding a path for now +image_path = '~/image.jpg' +input_tensor = torch.read_image(image_path) print(f'Benchmarking GradCAM using {number_of_inputs} image for ResNet50...') From 8eaf1b772c7767e431176fa782d9bfdb54aa0c14 Mon Sep 17 00:00:00 2001 From: trex22 <contact@jasonchalom.com> Date: Thu, 9 Mar 2023 12:38:14 +0200 Subject: [PATCH 63/72] Open image :cop: --- benchmarks/single_image_benchmark.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/single_image_benchmark.py b/benchmarks/single_image_benchmark.py index da56a0618..21b81faf9 100644 --- a/benchmarks/single_image_benchmark.py +++ b/benchmarks/single_image_benchmark.py @@ -30,7 +30,7 @@ # Just hard-coding a path for now image_path = '~/image.jpg' -input_tensor = torch.read_image(image_path) +input_tensor = torchvision.io.read_image(image_path) print(f'Benchmarking GradCAM using {number_of_inputs} image for ResNet50...') From b8054654b9c2475b5ff93fd920e342e5fbfd472c Mon Sep 17 00:00:00 2001 From: trex22 <contact@jasonchalom.com> Date: Thu, 9 Mar 2023 12:48:59 +0200 Subject: [PATCH 64/72] Change to simple model :cop: --- benchmarks/single_image_benchmark.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/single_image_benchmark.py b/benchmarks/single_image_benchmark.py index 21b81faf9..bb5d54df2 100644 --- a/benchmarks/single_image_benchmark.py +++ b/benchmarks/single_image_benchmark.py @@ -26,7 +26,7 @@ import benchmark_functions number_of_inputs = 1 -model = models.resnet50() +model = benchmark_functions.SimpleCNN() # Just hard-coding a path for now image_path = '~/image.jpg' From 7393e77030ad9b2236aca1484d6d0bf2924a9e1c Mon Sep 17 00:00:00 2001 From: trex22 <contact@jasonchalom.com> Date: Thu, 9 Mar 2023 12:50:56 +0200 Subject: [PATCH 65/72] try with trained weights :cop: --- benchmarks/single_image_benchmark.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/single_image_benchmark.py b/benchmarks/single_image_benchmark.py index bb5d54df2..09f0a9e09 100644 --- a/benchmarks/single_image_benchmark.py +++ b/benchmarks/single_image_benchmark.py @@ -26,7 +26,7 @@ import benchmark_functions number_of_inputs = 1 -model = benchmark_functions.SimpleCNN() +model = models.resnet50(weights=ResNet50_Weights.IMAGENET1K_V2) # Just hard-coding a path for now image_path = '~/image.jpg' From 193c9f29d8fa3735b44e7a057c2b91d83f35d26d Mon Sep 17 00:00:00 2001 From: trex22 <contact@jasonchalom.com> Date: Thu, 9 Mar 2023 12:52:08 +0200 Subject: [PATCH 66/72] try with trained weights :cop: --- benchmarks/single_image_benchmark.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/single_image_benchmark.py b/benchmarks/single_image_benchmark.py index 09f0a9e09..2d8368442 100644 --- a/benchmarks/single_image_benchmark.py +++ b/benchmarks/single_image_benchmark.py @@ -26,7 +26,7 @@ import benchmark_functions number_of_inputs = 1 -model = models.resnet50(weights=ResNet50_Weights.IMAGENET1K_V2) +model = models.resnet50(weights=models.ResNet50_Weights.IMAGENET1K_V2) # Just hard-coding a path for now image_path = '~/image.jpg' From a3d327b5953edbfdc64ff3d000fdb1dd4d26131f Mon Sep 17 00:00:00 2001 From: trex22 <contact@jasonchalom.com> Date: Mon, 17 Apr 2023 15:00:42 +0200 Subject: [PATCH 67/72] Add in cuda device support for gradients and activations :cop: --- pytorch_grad_cam/activations_and_gradients.py | 19 ++++++++++++++++--- pytorch_grad_cam/base_cam.py | 5 ++++- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/pytorch_grad_cam/activations_and_gradients.py b/pytorch_grad_cam/activations_and_gradients.py index 0c2071e59..957c976e1 100644 --- a/pytorch_grad_cam/activations_and_gradients.py +++ b/pytorch_grad_cam/activations_and_gradients.py @@ -2,12 +2,16 @@ class ActivationsAndGradients: """ Class for extracting activations and registering gradients from targetted intermediate layers """ - def __init__(self, model, target_layers, reshape_transform): + def __init__(self, model, target_layers, reshape_transform, use_cuda: bool = False, cuda_device = None): self.model = model self.gradients = [] self.activations = [] self.reshape_transform = reshape_transform self.handles = [] + + self.use_cuda = use_cuda + self.cuda_device = cuda_device + for target_layer in target_layers: self.handles.append( target_layer.register_forward_hook(self.save_activation)) @@ -21,7 +25,11 @@ def save_activation(self, module, input, output): if self.reshape_transform is not None: activation = self.reshape_transform(activation) - self.activations.append(activation.cpu().detach()) + + if self.use_cuda: + self.activations.append(activation.to(self.cuda_device)) + else: + self.activations.append(activation.cpu().detach()) def save_gradient(self, module, input, output): if not hasattr(output, "requires_grad") or not output.requires_grad: @@ -32,13 +40,18 @@ def save_gradient(self, module, input, output): def _store_grad(grad): if self.reshape_transform is not None: grad = self.reshape_transform(grad) - self.gradients = [grad.cpu().detach()] + self.gradients + + if self.use_cuda: + self.gradients = [grad.to(self.cuda_device)] + self.gradients + else: + self.gradients = [grad.cpu().detach()] + self.gradients output.register_hook(_store_grad) def __call__(self, x): self.gradients = [] self.activations = [] + return self.model(x) def release(self): diff --git a/pytorch_grad_cam/base_cam.py b/pytorch_grad_cam/base_cam.py index 055111fbc..5c45be2a8 100644 --- a/pytorch_grad_cam/base_cam.py +++ b/pytorch_grad_cam/base_cam.py @@ -32,7 +32,7 @@ def __init__(self, self.compute_input_gradient = compute_input_gradient self.uses_gradients = uses_gradients self.activations_and_grads = ActivationsAndGradients( - self.model, target_layers, reshape_transform) + self.model, target_layers, reshape_transfor, use_cuda = use_cuda, cuda_device = cuda_device) """ Get a vector of weights for every channel in the target layer. Methods that return weights channels, @@ -126,8 +126,10 @@ def compute_cam_per_layer( # Loop over the saliency image from every layer for i in range(len(self.target_layers)): target_layer = self.target_layers[i] + layer_activations = None layer_grads = None + if i < len(activations_list): layer_activations = activations_list[i] if i < len(grads_list): @@ -139,6 +141,7 @@ def compute_cam_per_layer( layer_activations, layer_grads, eigen_smooth) + cam = torch.maximum(cam, torch.tensor(0)) scaled = scale_cam_image(cam, target_size) cam_per_target_layer.append(scaled[:, None, :]) From 15de5ed4c34dfc42d14a038fdb48b26886e35ba0 Mon Sep 17 00:00:00 2001 From: trex22 <contact@jasonchalom.com> Date: Mon, 17 Apr 2023 15:11:52 +0200 Subject: [PATCH 68/72] Fix typo :cop: --- pytorch_grad_cam/base_cam.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pytorch_grad_cam/base_cam.py b/pytorch_grad_cam/base_cam.py index 5c45be2a8..73553def0 100644 --- a/pytorch_grad_cam/base_cam.py +++ b/pytorch_grad_cam/base_cam.py @@ -31,8 +31,9 @@ def __init__(self, self.reshape_transform = reshape_transform self.compute_input_gradient = compute_input_gradient self.uses_gradients = uses_gradients + self.activations_and_grads = ActivationsAndGradients( - self.model, target_layers, reshape_transfor, use_cuda = use_cuda, cuda_device = cuda_device) + self.model, target_layers, reshape_transform, use_cuda = use_cuda, cuda_device = cuda_device) """ Get a vector of weights for every channel in the target layer. Methods that return weights channels, From 9e3859948ee49b49fd09d89e04822469adf1a0e9 Mon Sep 17 00:00:00 2001 From: trex22 <contact@jasonchalom.com> Date: Mon, 17 Apr 2023 17:32:06 +0200 Subject: [PATCH 69/72] Attempt to force a difference FakeTensorMode :scientist: --- pytorch_grad_cam/activations_and_gradients.py | 24 +++++++++++++------ 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/pytorch_grad_cam/activations_and_gradients.py b/pytorch_grad_cam/activations_and_gradients.py index 957c976e1..0cf9129a1 100644 --- a/pytorch_grad_cam/activations_and_gradients.py +++ b/pytorch_grad_cam/activations_and_gradients.py @@ -1,3 +1,12 @@ +# from torch._subclasses import fake_tensor + +# from torch._subclasses.fake_tensor import ( +# FakeTensor, +# FakeTensorMode, +# FakeTensorConverter) + +from torch._subclasses.fake_tensor import FakeTensorMode + class ActivationsAndGradients: """ Class for extracting activations and registering gradients from targetted intermediate layers """ @@ -12,13 +21,14 @@ def __init__(self, model, target_layers, reshape_transform, use_cuda: bool = Fal self.use_cuda = use_cuda self.cuda_device = cuda_device - for target_layer in target_layers: - self.handles.append( - target_layer.register_forward_hook(self.save_activation)) - # Because of https://github.com/pytorch/pytorch/issues/61519, - # we don't use backward hook to record gradients. - self.handles.append( - target_layer.register_forward_hook(self.save_gradient)) + with FakeTensorMode(allow_non_fake_inputs=True): + for target_layer in target_layers: + self.handles.append( + target_layer.register_forward_hook(self.save_activation)) + # Because of https://github.com/pytorch/pytorch/issues/61519, + # we don't use backward hook to record gradients. + self.handles.append( + target_layer.register_forward_hook(self.save_gradient)) def save_activation(self, module, input, output): activation = output From ed64d06d201fe7dd94ac16882786808c20c50dda Mon Sep 17 00:00:00 2001 From: trex22 <contact@jasonchalom.com> Date: Mon, 17 Apr 2023 19:23:42 +0200 Subject: [PATCH 70/72] Attempt to patch issue with pytorch 2.0 :cop: --- pytorch_grad_cam/activations_and_gradients.py | 24 ++++++------------- pytorch_grad_cam/base_cam.py | 4 ++-- 2 files changed, 9 insertions(+), 19 deletions(-) diff --git a/pytorch_grad_cam/activations_and_gradients.py b/pytorch_grad_cam/activations_and_gradients.py index 0cf9129a1..957c976e1 100644 --- a/pytorch_grad_cam/activations_and_gradients.py +++ b/pytorch_grad_cam/activations_and_gradients.py @@ -1,12 +1,3 @@ -# from torch._subclasses import fake_tensor - -# from torch._subclasses.fake_tensor import ( -# FakeTensor, -# FakeTensorMode, -# FakeTensorConverter) - -from torch._subclasses.fake_tensor import FakeTensorMode - class ActivationsAndGradients: """ Class for extracting activations and registering gradients from targetted intermediate layers """ @@ -21,14 +12,13 @@ def __init__(self, model, target_layers, reshape_transform, use_cuda: bool = Fal self.use_cuda = use_cuda self.cuda_device = cuda_device - with FakeTensorMode(allow_non_fake_inputs=True): - for target_layer in target_layers: - self.handles.append( - target_layer.register_forward_hook(self.save_activation)) - # Because of https://github.com/pytorch/pytorch/issues/61519, - # we don't use backward hook to record gradients. - self.handles.append( - target_layer.register_forward_hook(self.save_gradient)) + for target_layer in target_layers: + self.handles.append( + target_layer.register_forward_hook(self.save_activation)) + # Because of https://github.com/pytorch/pytorch/issues/61519, + # we don't use backward hook to record gradients. + self.handles.append( + target_layer.register_forward_hook(self.save_gradient)) def save_activation(self, module, input, output): activation = output diff --git a/pytorch_grad_cam/base_cam.py b/pytorch_grad_cam/base_cam.py index 73553def0..73ee0b47d 100644 --- a/pytorch_grad_cam/base_cam.py +++ b/pytorch_grad_cam/base_cam.py @@ -6,7 +6,7 @@ from pytorch_grad_cam.utils.svd_on_activations import get_2d_projection from pytorch_grad_cam.utils.image import scale_cam_image from pytorch_grad_cam.utils.model_targets import ClassifierOutputTarget - +from torch._subclasses.fake_tensor import FakeTensor, FakeTensorMode class BaseCAM: def __init__(self, @@ -143,7 +143,7 @@ def compute_cam_per_layer( layer_grads, eigen_smooth) - cam = torch.maximum(cam, torch.tensor(0)) + with FakeTensorMode(allow_non_fake_inputs=True): cam = torch.maximum(cam.cpu(), torch.tensor(0)) scaled = scale_cam_image(cam, target_size) cam_per_target_layer.append(scaled[:, None, :]) From 273bd81ecdebda2618177cc7f1a7fed7372d28aa Mon Sep 17 00:00:00 2001 From: trex22 <contact@jasonchalom.com> Date: Wed, 26 Apr 2023 18:20:00 +0200 Subject: [PATCH 71/72] Make more meaningful changes extracted from another branch :cop: --- pytorch_grad_cam/utils/svd_on_activations.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pytorch_grad_cam/utils/svd_on_activations.py b/pytorch_grad_cam/utils/svd_on_activations.py index a406aeea8..2370cbc0d 100644 --- a/pytorch_grad_cam/utils/svd_on_activations.py +++ b/pytorch_grad_cam/utils/svd_on_activations.py @@ -1,9 +1,9 @@ -import numpy as np +import torch def get_2d_projection(activation_batch): # TBD: use pytorch batch svd implementation - activation_batch[np.isnan(activation_batch)] = 0 + activation_batch[torch.isnan(activation_batch)] = 0 projections = [] for activations in activation_batch: reshaped_activations = (activations).reshape( @@ -12,8 +12,8 @@ def get_2d_projection(activation_batch): # Otherwise the image returned is negative reshaped_activations = reshaped_activations - \ reshaped_activations.mean(axis=0) - U, S, VT = np.linalg.svd(reshaped_activations, full_matrices=True) + U, S, VT = torch.linalg.svd(reshaped_activations, full_matrices=True) projection = reshaped_activations @ VT[0, :] projection = projection.reshape(activations.shape[1:]) projections.append(projection) - return np.float32(projections) + return torch.tensor(projections).to(torch.float32) From 9ed4c9be91c562374d30b5a4c12d85dbfd05118f Mon Sep 17 00:00:00 2001 From: trex22 <contact@jasonchalom.com> Date: Wed, 26 Apr 2023 18:21:09 +0200 Subject: [PATCH 72/72] Remove TODO :cop: --- pytorch_grad_cam/utils/svd_on_activations.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pytorch_grad_cam/utils/svd_on_activations.py b/pytorch_grad_cam/utils/svd_on_activations.py index 2370cbc0d..91bfab0e1 100644 --- a/pytorch_grad_cam/utils/svd_on_activations.py +++ b/pytorch_grad_cam/utils/svd_on_activations.py @@ -2,7 +2,6 @@ def get_2d_projection(activation_batch): - # TBD: use pytorch batch svd implementation activation_batch[torch.isnan(activation_batch)] = 0 projections = [] for activations in activation_batch: