Upgrade docker to ubuntu18.04, upgrade code to python 3, fixes petean…

…derson80#46
wtzmx · Oct 13, 2019 · cb196b6 · cb196b6
1 parent e4aa7fd
commit cb196b6
Show file tree

Hide file tree

Showing 13 changed files with 116 additions and 131 deletions.
diff --git a/Dockerfile b/Dockerfile
@@ -1,13 +1,13 @@
 # Matterport3DSimulator
-# Requires nvidia gpu with driver 384.xx or higher
+# Requires nvidia gpu with driver 396.37 or higher
 
 
-FROM nvidia/cudagl:9.0-devel-ubuntu16.04
+FROM nvidia/cudagl:9.2-devel-ubuntu18.04
 
 # Install a few libraries to support both EGL and OSMESA options
-RUN apt-get update && apt-get install -y wget doxygen curl libjsoncpp-dev libepoxy-dev libglm-dev libosmesa6 libosmesa6-dev libglew-dev libopencv-dev python-opencv python-setuptools python-dev
-RUN easy_install pip
-RUN pip install torch torchvision pandas networkx
+ENV DEBIAN_FRONTEND=noninteractive
+RUN apt-get update && apt-get install -y wget doxygen curl libjsoncpp-dev libepoxy-dev libglm-dev libosmesa6 libosmesa6-dev libglew-dev libopencv-dev python-opencv python3-setuptools python3-dev python3-pip
+RUN pip3 install opencv-python==4.1.0.25 torch==1.1.0 torchvision==0.3.0 numpy==1.13.3 pandas==0.24.1 networkx==2.2
 
 #install latest cmake
 ADD https://cmake.org/files/v3.12/cmake-3.12.2-Linux-x86_64.sh /cmake-3.12.2-Linux-x86_64.sh
@@ -16,3 +16,4 @@ RUN sh /cmake-3.12.2-Linux-x86_64.sh --prefix=/opt/cmake --skip-license
 RUN ln -s /opt/cmake/bin/cmake /usr/local/bin/cmake
 RUN cmake --version
 
+ENV PYTHONPATH=/root/mount/Matterport3DSimulator/build
diff --git a/README.md b/README.md
@@ -55,8 +55,7 @@ We recommend using our [Dockerfile](Dockerfile) to install the simulator. The si
 
 ### Prerequisites
 
-- Ubuntu 16.04
-- Nvidia GPU with driver >= 384
+- Nvidia GPU with driver >= 396.37
 - Install [docker](https://docs.docker.com/engine/installation/)
 - Install [nvidia-docker2.0](https://github.com/nvidia/nvidia-docker/wiki/Installation-(version-2.0))
 - Note: CUDA / CuDNN toolkits do not need to be installed (these are provided by the docker image)
@@ -111,12 +110,12 @@ Depth skyboxes are generated from the `undistorted_depth_images` using a simple
 
 Build the docker image:
 ```
-docker build -t mattersim .
+docker build -t mattersim:9.2-devel-ubuntu18.04 .
 ```
 
 Run the docker container, mounting both the git repo and the dataset:
 ```
-nvidia-docker run -it --mount type=bind,source=$MATTERPORT_DATA_DIR,target=/root/mount/Matterport3DSimulator/data/v1/scans,readonly --volume `pwd`:/root/mount/Matterport3DSimulator mattersim
+nvidia-docker run -it --mount type=bind,source=$MATTERPORT_DATA_DIR,target=/root/mount/Matterport3DSimulator/data/v1/scans,readonly --volume `pwd`:/root/mount/Matterport3DSimulator mattersim:9.2-devel-ubuntu18.04
 ```
 
 Now (from inside the docker container), build the simulator and run the unit tests:
@@ -149,10 +148,10 @@ The recommended (fast) approach for training agents is using off-screen GPU rend
 
 ### Interactive Demo
 
-To run an interactive demo, build the docker image as described above (`docker build -t mattersim .`), then run the docker container while sharing the host's X server and DISPLAY environment variable with the container:
+To run an interactive demo, build the docker image as described above (`docker build -t mattersim:9.2-devel-ubuntu18.04 .`), then run the docker container while sharing the host's X server and DISPLAY environment variable with the container:
 ```
 xhost +
-nvidia-docker run -it -e DISPLAY -v /tmp/.X11-unix:/tmp/.X11-unix --mount type=bind,source=$MATTERPORT_DATA_DIR,target=/root/mount/Matterport3DSimulator/data/v1/scans,readonly --volume `pwd`:/root/mount/Matterport3DSimulator mattersim
+nvidia-docker run -it -e DISPLAY -v /tmp/.X11-unix:/tmp/.X11-unix --mount type=bind,source=$MATTERPORT_DATA_DIR,target=/root/mount/Matterport3DSimulator/data/v1/scans,readonly --volume `pwd`:/root/mount/Matterport3DSimulator mattersim:9.2-devel-ubuntu18.04
 cd /root/mount/Matterport3DSimulator
 ```
 

diff --git a/scripts/depth_to_skybox.py b/scripts/depth_to_skybox.py
@@ -1,8 +1,8 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
-''' Script for generating depth skyboxes based on undistorted depth images, 
-    in order to support depth output in the simulator. The current version 
-    assumes that undistorted depth images are aligned to matterport skyboxes, 
+''' Script for generating depth skyboxes based on undistorted depth images,
+    in order to support depth output in the simulator. The current version
+    assumes that undistorted depth images are aligned to matterport skyboxes,
     and uses simple blending. Images are downsized 50%. '''
 
 import os
@@ -11,7 +11,7 @@
 import numpy as np
 from multiprocessing import Pool
 from numpy.linalg import inv,norm
-from StringIO import StringIO
+from io import StringIO
 
 
 # Parameters
@@ -22,8 +22,6 @@
 VISUALIZE_OUTPUT = False
 
 if FILL_HOLES:
-  import sys
-  sys.path.append('build')
   from MatterSim import cbf
 
 # Constants
@@ -117,7 +115,7 @@ def fill_joint_bilateral_filter(rgb, depth):
   depth = (depth.astype(np.float64)/maxDepth)
   depth[depth > 1] = 1
   depth = (depth*255).astype(np.uint8)
-  
+
   # Convert to col major order
   depth = np.asfortranarray(depth)
   intensity = np.asfortranarray(intensity)
@@ -138,7 +136,7 @@ def depth_to_skybox(scan, visualize=VISUALIZE_OUTPUT, fill_holes=FILL_HOLES):
   K_skybox = instrinsic_matrix(SKYBOX_WIDTH, SKYBOX_HEIGHT)
 
   pano_ids = list(set([item.split('_')[0] for item in intrinsics.keys()]))
-  print 'Processing scan %s with %d panoramas' % (scan, len(pano_ids))
+  print('Processing scan %s with %d panoramas' % (scan, len(pano_ids)))
 
   if visualize:
     cv2.namedWindow('RGB')
@@ -239,7 +237,7 @@ def depth_to_skybox(scan, visualize=VISUALIZE_OUTPUT, fill_holes=FILL_HOLES):
 
   if visualize:
     cv2.destroyAllWindows()
-  print 'Completed scan %s' % (scan)
+  print('Completed scan %s' % (scan))
 
 
 
@@ -249,6 +247,3 @@ def depth_to_skybox(scan, visualize=VISUALIZE_OUTPUT, fill_holes=FILL_HOLES):
     scans = [scan.strip() for scan in f.readlines()]
     p = Pool(NUM_WORKER_PROCESSES)
     p.map(depth_to_skybox, scans)
-
-
-
diff --git a/scripts/downsize_skybox.py b/scripts/downsize_skybox.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
 ''' Script for downsizing skybox images. '''
 
@@ -28,7 +28,7 @@ def downsizeWithMerge(scan):
   # Load pano ids
   intrinsics,_ = camera_parameters(scan)
   pano_ids = list(set([item.split('_')[0] for item in intrinsics.keys()]))
-  print 'Processing scan %s with %d panoramas' % (scan, len(pano_ids))
+  print('Processing scan %s with %d panoramas' % (scan, len(pano_ids)))
 
   for pano in pano_ids:
 
@@ -49,7 +49,7 @@ def downsize(scan):
   # Load pano ids
   intrinsics,_ = camera_parameters(scan)
   pano_ids = list(set([item.split('_')[0] for item in intrinsics.keys()]))
-  print 'Processing scan %s with %d panoramas' % (scan, len(pano_ids))
+  print('Processing scan %s with %d panoramas' % (scan, len(pano_ids)))
 
   for pano in pano_ids:
 
@@ -68,6 +68,4 @@ def downsize(scan):
   with open('connectivity/scans.txt') as f:
     scans = [scan.strip() for scan in f.readlines()]
     p = Pool(NUM_WORKER_PROCESSES)
-    p.map(downsizeWithMerge, scans)  
-
-
+    p.map(downsizeWithMerge, scans)
diff --git a/scripts/fill_depth.py b/scripts/fill_depth.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
 ''' Script for filling missing values in undistorted depth images. '''
 
@@ -24,7 +24,7 @@ def fill_joint_bilateral_filter(scan):
   # Load camera parameters
   intrinsics,_ = camera_parameters(scan)
   pano_ids = list(set([item.split('_')[0] for item in intrinsics.keys()]))
-  print 'Processing scan %s with %d panoramas' % (scan, len(pano_ids))
+  print('Processing scan %s with %d panoramas' % (scan, len(pano_ids)))
 
   for pano in pano_ids:
 
@@ -45,7 +45,7 @@ def fill_joint_bilateral_filter(scan):
         depth = (depth*255).astype(np.uint8)
 
         #cv2.imshow('input', cv2.applyColorMap(depth, cv2.COLORMAP_JET))
-        
+
         # Convert to col major order
         depth = np.asfortranarray(depth)
         intensity = np.asfortranarray(intensity)
@@ -54,7 +54,7 @@ def fill_joint_bilateral_filter(scan):
 
         # Fill holes
         cbf(depth, intensity, mask, result)
-  
+
         #cv2.imshow('result', cv2.applyColorMap(result, cv2.COLORMAP_JET))
         #cv2.waitKey(0)
 
@@ -68,5 +68,3 @@ def fill_joint_bilateral_filter(scan):
     scans = [scan.strip() for scan in f.readlines()]
     p = Pool(10)
     p.map(fill_joint_bilateral_filter, scans)
-
-
diff --git a/scripts/precompute_img_features.py b/scripts/precompute_img_features.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
 ''' Script to precompute image features using a Caffe ResNet CNN, using 36 discretized views
     at each viewpoint in 30 degree increments, and the provided camera WIDTH, HEIGHT 
@@ -16,11 +16,10 @@
 
 
 # Caffe and MatterSim need to be on the Python path
-sys.path.insert(0, 'build')
 import MatterSim
 
-#caffe_root = '../'  # your caffe build
-#sys.path.insert(0, caffe_root + 'python')
+caffe_root = '../'  # your caffe build
+sys.path.insert(0, caffe_root + 'python')
 import caffe
 
 from timer import Timer
@@ -53,12 +52,13 @@ def load_viewpointids():
                 for item in data:
                     if item['included']:
                         viewpointIds.append((scan, item['image_id']))
-    print 'Loaded %d viewpoints' % len(viewpointIds)
+    print('Loaded %d viewpoints' % len(viewpointIds))
     return viewpointIds
 
 
 def transform_img(im):
     ''' Prep opencv 3 channel image for the network '''
+    im = np.array(im, copy=True)
     im_orig = im.astype(np.float32, copy=True)
     im_orig -= np.array([[[103.1, 115.9, 123.2]]]) # BGR pixel mean
     blob = np.zeros((1, im.shape[0], im.shape[1], 3), dtype=np.float32)
@@ -73,7 +73,8 @@ def build_tsv():
     sim.setCameraResolution(WIDTH, HEIGHT)
     sim.setCameraVFOV(math.radians(VFOV))
     sim.setDiscretizedViewingAngles(True)
-    sim.init()
+    sim.setBatchSize(1)
+    sim.initialize()
 
     # Set up Caffe resnet
     caffe.set_device(GPU_ID)
@@ -84,8 +85,8 @@ def build_tsv():
     count = 0
     t_render = Timer()
     t_net = Timer()
-    with open(OUTFILE, 'wb') as tsvfile:
-        writer = csv.DictWriter(tsvfile, delimiter = '\t', fieldnames = TSV_FIELDNAMES)          
+    with open(OUTFILE, 'wt') as tsvfile:
+        writer = csv.DictWriter(tsvfile, delimiter = '\t', fieldnames = TSV_FIELDNAMES)
 
         # Loop all the viewpoints in the simulator
         viewpointIds = load_viewpointids()
@@ -96,13 +97,13 @@ def build_tsv():
             features = np.empty([VIEWPOINT_SIZE, FEATURE_SIZE], dtype=np.float32)
             for ix in range(VIEWPOINT_SIZE):
                 if ix == 0:
-                    sim.newEpisode(scanId, viewpointId, 0, math.radians(-30))
+                    sim.newEpisode([scanId], [viewpointId], [0], [math.radians(-30)])
                 elif ix % 12 == 0:
-                    sim.makeAction(0, 1.0, 1.0)
+                    sim.makeAction([0], [1.0], [1.0])
                 else:
-                    sim.makeAction(0, 1.0, 0)
+                    sim.makeAction([0], [1.0], [0])
 
-                state = sim.getState()
+                state = sim.getState()[0]
                 assert state.viewIndex == ix
 
                 # Transform and save generated image
@@ -112,7 +113,7 @@ def build_tsv():
             t_net.tic()
             # Run as many forward passes as necessary
             assert VIEWPOINT_SIZE % BATCH_SIZE == 0
-            forward_passes = VIEWPOINT_SIZE / BATCH_SIZE            
+            forward_passes = VIEWPOINT_SIZE // BATCH_SIZE
             ix = 0
             for f in range(forward_passes):
                 for n in range(BATCH_SIZE):
@@ -122,33 +123,33 @@ def build_tsv():
                 # Forward pass
                 output = net.forward()
                 features[f*BATCH_SIZE:(f+1)*BATCH_SIZE, :] = net.blobs['pool5'].data[:,:,0,0]
-
             writer.writerow({
                 'scanId': scanId,
                 'viewpointId': viewpointId,
                 'image_w': WIDTH,
                 'image_h': HEIGHT,
                 'vfov' : VFOV,
-                'features': base64.b64encode(features)
+                'features': str(base64.b64encode(features), "utf-8")
             })
             count += 1
             t_net.toc()
             if count % 100 == 0:
-                print 'Processed %d / %d viewpoints, %.1fs avg render time, %.1fs avg net time, projected %.1f hours' %\
+                print('Processed %d / %d viewpoints, %.1fs avg render time, %.1fs avg net time, projected %.1f hours' %\
                   (count,len(viewpointIds), t_render.average_time, t_net.average_time, 
-                  (t_render.average_time+t_net.average_time)*len(viewpointIds)/3600)
+                  (t_render.average_time+t_net.average_time)*len(viewpointIds)/3600))
 
 
 def read_tsv(infile):
     # Verify we can read a tsv
     in_data = []
-    with open(infile, "r+b") as tsv_in_file:
+    with open(infile, "rt") as tsv_in_file:
         reader = csv.DictReader(tsv_in_file, delimiter='\t', fieldnames = TSV_FIELDNAMES)
         for item in reader:
+            item['scanId'] = item['scanId']
             item['image_h'] = int(item['image_h'])
-            item['image_w'] = int(item['image_w'])   
-            item['vfov'] = int(item['vfov'])   
-            item['features'] = np.frombuffer(base64.decodestring(item['features']), 
+            item['image_w'] = int(item['image_w'])
+            item['vfov'] = int(item['vfov'])
+            item['features'] = np.frombuffer(base64.b64decode(item['features']),
                     dtype=np.float32).reshape((VIEWPOINT_SIZE, FEATURE_SIZE))
             in_data.append(item)
     return in_data
@@ -158,5 +159,5 @@ def read_tsv(infile):
 
     build_tsv()
     data = read_tsv(OUTFILE)
-    print 'Completed %d viewpoints' % len(data)
+    print('Completed %d viewpoints' % len(data))
 
diff --git a/tasks/R2R/README.md b/tasks/R2R/README.md
@@ -42,9 +42,9 @@ For the test set, only the first path_id (starting location) is included. We wil
 
 ## Prerequisites
 
-Python 2, [PyTorch](http://pytorch.org/), [NetworkX](https://networkx.github.io/), [pandas](https://pandas.pydata.org/), etc. These should already be installed in the docker image, or can be installed by running:
+Python 3, [PyTorch](http://pytorch.org/), [NetworkX](https://networkx.github.io/), [pandas](https://pandas.pydata.org/), etc. These should already be installed in the docker image, or can be installed by running:
 ```
-pip install -r tasks/R2R/requirements.txt
+pip3 install -r tasks/R2R/requirements.txt
 ```
 
 
@@ -54,17 +54,17 @@ There is a test server and leaderboard available at [EvalAI](https://evalai.clou
 
 To train the baseline seq2seq model with student-forcing:
 ```
-python tasks/R2R/train.py
+python3 tasks/R2R/train.py
 ```
 
 To run some simple learning free baselines:
 ```
-python tasks/R2R/eval.py
+python3 tasks/R2R/eval.py
 ```
 
 Generate figures from the paper:
 ```
-python tasks/R2R/plot.py
+python3 tasks/R2R/plot.py
 ```
 
 The simple baselines include: