speed up by reusing the grid

oeway · oeway · commit 571b20a20c40 · 2017-04-05T16:02:51.000+02:00
diff --git a/tests/test_deform_conv.py b/tests/test_deform_conv.py
@@ -52,10 +52,10 @@ def test_th_batch_map_offsets_grad():
     offsets = (np.random.random((4, 100, 100, 2)) * 2)
 
     input = Variable(torch.from_numpy(input), requires_grad=True)
-    offsets = Variable(torch.from_numpy(offsets), requires_grad=False)
+    offsets = Variable(torch.from_numpy(offsets), requires_grad=True)
 
     th_mapped_vals = th_batch_map_offsets(input, offsets)
     e = torch.from_numpy(np.random.random((4, 100, 100)))
     th_mapped_vals.backward(e)
-    grad = input.grad
-    assert not np.allclose(grad.data.numpy(), 0)
+    assert not np.allclose(input.grad.data.numpy(), 0)
+    assert not np.allclose(offsets.grad.data.numpy(), 0)
diff --git a/torch_deform_conv/deform_conv.py b/torch_deform_conv/deform_conv.py
@@ -138,7 +138,21 @@ def sp_batch_map_offsets(input, offsets):
     return mapped_vals
 
 
-def th_batch_map_offsets(input, offsets, order=1):
+def th_generate_grid(batch_size, input_size, dtype, cuda):
+    grid = np.meshgrid(
+        range(input_size), range(input_size), indexing='ij'
+    )
+    grid = np.stack(grid, axis=-1)
+    grid = grid.reshape(-1, 2)
+
+    grid = np_repeat_2d(grid, batch_size)
+    grid = torch.from_numpy(grid).type(dtype)
+    if cuda:
+        grid = grid.cuda()
+    return Variable(grid, requires_grad=False)
+
+
+def th_batch_map_offsets(input, offsets, grid=None, order=1):
     """Batch map offsets into input
     Parameters
     ---------
@@ -148,23 +162,14 @@ def th_batch_map_offsets(input, offsets, order=1):
     -------
     torch.Tensor. shape = (b, s, s)
     """
-    input_shape = input.size()
-    batch_size = input_shape[0]
-    input_size = input_shape[1]
+    batch_size = input.size(0)
+    input_size = input.size(1)
 
     offsets = offsets.view(batch_size, -1, 2)
-    grid = np.meshgrid(
-        range(input_size), range(input_size), indexing='ij'
-    )
-    grid = np.stack(grid, axis=-1)
-    grid = grid.reshape(-1, 2)
+    if grid is None:
+        grid = th_generate_grid(batch_size, input_size, offsets.data.type(), offsets.data.is_cuda)
 
-    grid = np_repeat_2d(grid, batch_size)
-    grid = torch.from_numpy(grid).type(offsets.data.type())
-    if offsets.is_cuda:
-        grid = grid.cuda()
-
-    coords = offsets.add(Variable(grid, requires_grad=False))
+    coords = offsets + grid
 
     mapped_vals = th_batch_map_coordinates(input, coords)
     return mapped_vals
diff --git a/torch_deform_conv/layers.py b/torch_deform_conv/layers.py
@@ -4,7 +4,7 @@
 import torch.nn as nn
 
 import numpy as np
-from torch_deform_conv.deform_conv import th_batch_map_offsets
+from torch_deform_conv.deform_conv import th_batch_map_offsets, th_generate_grid
 
 
 class ConvOffset2D(nn.Conv2d):
@@ -29,6 +29,7 @@ def __init__(self, filters, init_normal_stddev=0.01, **kwargs):
             Pass to superclass. See Con2d layer in pytorch
         """
         self.filters = filters
+        self._grid_param = None
         super(ConvOffset2D, self).__init__(self.filters, self.filters*2, 3, padding=1, bias=False, **kwargs)
         self.weight.data.copy_(self._init_weights(self.weight, init_normal_stddev))
 
@@ -44,13 +45,23 @@ def forward(self, x):
         x = self._to_bc_h_w(x, x_shape)
 
         # X_offset: (b*c, h, w)
-        x_offset = th_batch_map_offsets(x, offsets)
+        x_offset = th_batch_map_offsets(x, offsets, grid=self._get_grid(self,x))
 
         # x_offset: (b, h, w, c)
         x_offset = self._to_b_c_h_w(x_offset, x_shape)
 
         return x_offset
 
+    @staticmethod
+    def _get_grid(self, x):
+        batch_size, input_size= x.size(0), x.size(1)
+        dtype, cuda = x.data.type(), x.data.is_cuda
+        if self._grid_param == (batch_size, input_size, dtype, cuda):
+            return self._grid
+        self._grid_param = (batch_size, input_size, dtype, cuda)
+        self._grid = th_generate_grid(batch_size, input_size, dtype, cuda)
+        return self._grid
+    
     @staticmethod
     def _init_weights(weights, std):
         fan_out = weights.size(0)