Working precompute_img_features.py script

wtzmx · Nov 8, 2017 · c487470 · c487470
1 parent ee2999f
commit c487470
Show file tree

Hide file tree

Showing 4 changed files with 95 additions and 16 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,9 +1,11 @@
 *~
 data
 build
-img_features
+*.tsv
 sim_imgs
 *.so
 *kdev4*
 *.caffemodel
 *.caffemodel.h5
+*.pyc
+*.out
diff --git a/img_features/.gitkeep b/img_features/.gitkeep
diff --git a/scripts/precompute_img_features.py b/scripts/precompute_img_features.py
@@ -18,14 +18,19 @@
 
 #caffe_root = '../'  # your caffe build
 #sys.path.insert(0, caffe_root + 'python')
-#import caffe
+import caffe
+
+from timer import Timer
 
 
 TSV_FIELDNAMES = ['scanId', 'viewpointId', 'image_w','image_h', 'vfov', 'features']
-BATCH_SIZE = 36             # All the discretized views from one viewpoint
+VIEWPOINT_SIZE = 36 # Number of discretized views from one viewpoint
+FEATURE_SIZE = 2048
+BATCH_SIZE = 4  # Some fraction of viewpoint size - batch size 4 equals 11GB memory
 GPU_ID = 0
 PROTO = 'models/ResNet-152-deploy.prototxt'
-MODEL = ''                  # You need to download this, see README.md
+MODEL = 'models/ResNet-152-model.caffemodel'  # You need to download this, see README.md
+#MODEL = 'models/resnet152_places365.caffemodel'
 OUTFILE = 'img_features/ResNet-152-imagenet.tsv'
 GRAPHS = 'connectivity/'
 
@@ -59,8 +64,7 @@ def transform_img(im):
     return blob
 
 
-if __name__ == "__main__":
-
+def build_tsv():
     # Set up the simulator
     sim = MatterSim.Simulator()
     sim.setCameraResolution(WIDTH, HEIGHT)
@@ -75,14 +79,19 @@ def transform_img(im):
     net.blobs['data'].reshape(BATCH_SIZE, 3, HEIGHT, WIDTH)
 
     count = 0
+    t_render = Timer()
+    t_net = Timer()
     with open(OUTFILE, 'wb') as tsvfile:
         writer = csv.DictWriter(tsvfile, delimiter = '\t', fieldnames = TSV_FIELDNAMES)          
 
         # Loop all the viewpoints in the simulator
         viewpointIds = load_viewpointids()
         for scanId,viewpointId in viewpointIds:
+            t_render.tic()
             # Loop all discretized views from this location
-            for ix in range(BATCH_SIZE):
+            blobs = []
+            features = np.empty([VIEWPOINT_SIZE, FEATURE_SIZE], dtype=np.float32)
+            for ix in range(VIEWPOINT_SIZE):
                 if ix == 0:
                     sim.newEpisode(scanId, viewpointId, 0, math.radians(-30))
                 elif ix % 12 == 0:
@@ -92,25 +101,61 @@ def transform_img(im):
 
                 state = sim.getState()
                 assert state.viewIndex == ix
+
+                # Transform and save generated image
+                blobs.append(transform_img(state.rgb))
+
+            t_render.toc()
+            t_net.tic()
+            # Run as many forward passes as necessary
+            assert VIEWPOINT_SIZE % BATCH_SIZE == 0
+            forward_passes = VIEWPOINT_SIZE / BATCH_SIZE            
+            ix = 0
+            for f in range(forward_passes):
+                for n in range(BATCH_SIZE):
+                    # Copy image blob to the net
+                    net.blobs['data'].data[n, :, :, :] = blobs[ix]
+                    ix += 1
+                # Forward pass
+                output = net.forward()
+                features[f*BATCH_SIZE:(f+1)*BATCH_SIZE, :] = net.blobs['pool5'].data[:,:,0,0]
 
-                # Copy generated image to the net
-                blob = transform_img(state.rgb)
-                net.blobs['data'].data[ix, :, :, :] = blob
-
-            # Forward pass
-            output = net.forward()
-            pool5 = net.blobs['pool5'].data
             writer.writerow({
                 'scanId': scanId,
                 'viewpointId': viewpointId,
                 'image_w': WIDTH,
                 'image_h': HEIGHT,
                 'vfov' : VFOV,
-                'features': base64.b64encode(pool5)
+                'features': base64.b64encode(features)
             })
             count += 1
+            t_net.toc()
             if count % 100 == 0:
-                print 'Processed %d / %d viewpoints' % (count,len(viewpointIds))
+                print 'Processed %d / %d viewpoints, %.1fs avg render time, %.1fs avg net time, projected %.1f hours' %\
+                  (count,len(viewpointIds), t_render.average_time, t_net.average_time, 
+                  (t_render.average_time+t_net.average_time)*len(viewpointIds)/3600)
+
+
+def read_tsv(infile):
+    # Verify we can read a tsv
+    in_data = []
+    with open(infile, "r+b") as tsv_in_file:
+        reader = csv.DictReader(tsv_in_file, delimiter='\t', fieldnames = TSV_FIELDNAMES)
+        for item in reader:
+            item['image_h'] = int(item['image_h'])
+            item['image_w'] = int(item['image_w'])   
+            item['vfov'] = int(item['vfov'])   
+            item['features'] = np.frombuffer(base64.decodestring(item['features']), 
+                    dtype=np.float32).reshape((VIEWPOINT_SIZE, FEATURE_SIZE))
+            in_data.append(item)
+    return in_data
+
+
+if __name__ == "__main__":
+
+    build_tsv()
+    #data = read_tsv(OUTFILE)
+    #print data[0]
 
 
 
diff --git a/scripts/timer.py b/scripts/timer.py
@@ -0,0 +1,32 @@
+# --------------------------------------------------------
+# Fast R-CNN
+# Copyright (c) 2015 Microsoft
+# Licensed under The MIT License [see LICENSE for details]
+# Written by Ross Girshick
+# --------------------------------------------------------
+
+import time
+
+class Timer(object):
+    """A simple timer."""
+    def __init__(self):
+        self.total_time = 0.
+        self.calls = 0
+        self.start_time = 0.
+        self.diff = 0.
+        self.average_time = 0.
+
+    def tic(self):
+        # using time.time instead of time.clock because time time.clock
+        # does not normalize for multithreading
+        self.start_time = time.time()
+
+    def toc(self, average=True):
+        self.diff = time.time() - self.start_time
+        self.total_time += self.diff
+        self.calls += 1
+        self.average_time = self.total_time / self.calls
+        if average:
+            return self.average_time
+        else:
+            return self.diff