wtzmx
diff --git a/‎LICENSE
Lines changed: 1 addition & 1 deletion b/‎LICENSE
Lines changed: 1 addition & 1 deletion
diff --git a/‎README.md
Lines changed: 23 additions & 4 deletions b/‎README.md
Lines changed: 23 additions & 4 deletions
diff --git a/‎src/lib/MatterSim.cpp
Lines changed: 10 additions & 10 deletions b/‎src/lib/MatterSim.cpp
Lines changed: 10 additions & 10 deletions
diff --git a/‎tasks/R2R/README.md
Lines changed: 61 additions & 1 deletion b/‎tasks/R2R/README.md
Lines changed: 61 additions & 1 deletion
@@ -1,6 +1,6 @@
 MIT License
 
-Copyright (c) 2017 Peter Anderson, Philip Roberts, Qi Wu, Damien Teney, 
+Copyright (c) 2017 Peter Anderson, Philip Roberts, Qi Wu, Damien Teney, Jake Bruce
 Mark Johnson, Niko Sunderhauf, Ian Reid, Stephen Gould, Anton van den Hengel
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 
@@ -3,6 +3,8 @@ AI Research Platform for Reinforcement Learning from Real Panoramic Images.
 
 The Matterport3D Simulator enables development of AI **agents that interact with real 3D environments using visual information** (RGB-D images). It is primarily intended for research in deep reinforcement learning, at the intersection of computer vision, natural language processing and robotics.
 
+![Concept](teaser.jpg)
+
 *This is development code for early release. We may make breaking changes, particularly as we look at possible integration with [ParlAI](https://github.com/facebookresearch/ParlAI) and [OpenAI Gym](https://github.com/openai/gym).*
 
 ## Features
@@ -13,13 +15,21 @@ The Matterport3D Simulator enables development of AI **agents that interact with
 - Supports GPU rendering using OpenGL, as well as off-screen CPU rendering using OSMESA,
 - Future releases will include depth data (RGB-D) as well as class and instance object segmentations.
 
-## Cite as
+## Reference
+
+The Matterport3D Simulator and the Room-to-Room (R2R) navigation dataset are described in:
+- [Vision-and-Language Navigation: Interpreting visually-grounded navigation instructions in real environments](https://arxiv.org/abs/1711.07280). 
 
-Todo
+If you use the simulator or dataset, please cite our paper:
 
 ### Bibtex:
 ```
-todo
+@article{mattersim,
+  title={{Vision-and-Language Navigation}: Interpreting visually-grounded navigation instructions in real environments},
+  author={Peter Anderson and Qi Wu and Damien Teney and Jake Bruce and Mark Johnson and Niko Sünderhauf and Ian Reid and Stephen Gould and Anton van den Hengel},
+  journal={arXiv preprint arXiv:1711.07280},
+  year={2017}
+}
 ```
 
 ## Simulator Data
@@ -28,7 +38,7 @@ Matterport3D Simulator is based on densely sampled 360-degree indoor RGB-D image
 
 ### Actions
 
-At each viewpoint location, the agent can pan and elevate the camera. The agent can also choose to move between viewpoints. The precise details of the agent's observations and actions are configurable.
+At each viewpoint location, the agent can pan and elevate the camera. The agent can also choose to move between viewpoints. The precise details of the agent's observations and actions are described in the paper and defined in `include/MatterSim.hpp`.
 
 ## Tasks
 
@@ -129,6 +139,9 @@ doxygen
 ```
 
 ### Demo
+
+These are very simple demos designed to illustrate the use of the simulator in python and C++. Use the arrow keys to pan and tilt the camera. In the python demo, the top row number keys can be used to move to another viewpoint (if any are visible).
+
 Python demo:
 ```
 python src/driver/driver.py
@@ -157,4 +170,10 @@ The Matterport3D dataset, and data derived from it, is released under the [Matte
 
 We would like to thank Matterport for allowing the Matterport3D dataset to be used by the academic community. This project is supported by a Facebook ParlAI Research Award and by the [Australian Centre for Robotic Vision](https://www.roboticvision.org/). 
 
+## Contributing
+
+We welcome contributions from the community. All submissions require review and in most cases would require tests.
+
+
+
 
@@ -121,10 +121,10 @@ void Simulator::init() {
         ctx = OSMesaCreateContext(OSMESA_RGBA, NULL);
         buffer = malloc(width * height * 4 * sizeof(GLubyte));
         if (!buffer) {
-            throw std::runtime_error( "Malloc image buffer failed" );
+            throw std::runtime_error( "MatterSim: Malloc image buffer failed" );
         }
         if (!OSMesaMakeCurrent(ctx, buffer, GL_UNSIGNED_BYTE, width, height)) {
-            throw std::runtime_error( "OSMesaMakeCurrent failed" );
+            throw std::runtime_error( "MatterSim: OSMesaMakeCurrent failed" );
         }
 #else
         cv::namedWindow("renderwin", cv::WINDOW_OPENGL);
@@ -159,7 +159,7 @@ void Simulator::init() {
 
         // Always check that our framebuffer is ok
         if(glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
-            throw std::runtime_error( "GL_FRAMEBUFFER failure" );
+            throw std::runtime_error( "MatterSim: GL_FRAMEBUFFER failure" );
         }
 #endif
 
@@ -245,7 +245,7 @@ void Simulator::loadLocationGraph() {
     auto navGraphFile =  navGraphPath + "/" + state->scanId + "_connectivity.json";
     std::ifstream ifs(navGraphFile, std::ifstream::in);
     if (ifs.fail()){
-        throw std::invalid_argument( "Could not open navigation graph file: " +
+        throw std::invalid_argument( "MatterSim: Could not open navigation graph file: " +
                 navGraphFile + ", is scan id valid?" );
     }
     ifs >> root;
@@ -325,14 +325,14 @@ void Simulator::loadTexture(int locationId) {
     auto zpos = cv::imread(datafolder + viewpointId + "_skybox1_sami.jpg");
     auto zneg = cv::imread(datafolder + viewpointId + "_skybox3_sami.jpg");
     if (xpos.empty() || xneg.empty() || ypos.empty() || yneg.empty() || zpos.empty() || zneg.empty()) {
-        throw std::invalid_argument( "Could not open skybox files at: " + datafolder + viewpointId + "_skybox*_sami.jpg");
+        throw std::invalid_argument( "MatterSim: Could not open skybox files at: " + datafolder + viewpointId + "_skybox*_sami.jpg");
     }
     cpuLoadTimer.Stop();
     gpuLoadTimer.Start();
     setupCubeMap(scanLocations[state->scanId][locationId]->cubemap_texture, xpos, xneg, ypos, yneg, zpos, zneg);
     gpuLoadTimer.Stop();
     if (!glIsTexture(scanLocations[state->scanId][locationId]->cubemap_texture)){
-        throw std::runtime_error( "loadTexture failed" );
+        throw std::runtime_error( "MatterSim: loadTexture failed" );
     }
 }
 
@@ -402,23 +402,23 @@ void Simulator::newEpisode(const std::string& scanId,
             ix++;
             if (ix >= scanLocations[state->scanId].size()) ix = 0;
             if (ix == start_ix) {
-                throw std::logic_error( "ScanId: " + scanId + " has no included viewpoints!");
+                throw std::logic_error( "MatterSim: ScanId: " + scanId + " has no included viewpoints!");
             }
         }
     } else {
         // Find index of selected viewpoint
         for (int i = 0; i < scanLocations[state->scanId].size(); ++i) {
             if (scanLocations[state->scanId][i]->viewpointId == viewpointId) {
                 if (!scanLocations[state->scanId][i]->included) {
-                    throw std::invalid_argument( "ViewpointId: " +
+                    throw std::invalid_argument( "MatterSim: ViewpointId: " +
                             viewpointId + ", is excluded from the connectivity graph." );
                 }
                 ix = i;
                 break;
             }
         }
         if (ix < 0) {
-            throw std::invalid_argument( "Could not find viewpointId: " +
+            throw std::invalid_argument( "MatterSim: Could not find viewpointId: " +
                     viewpointId + ", is viewpoint id valid?" );
         }
     }
@@ -472,7 +472,7 @@ void Simulator::makeAction(int index, double heading, double elevation) {
     // move
     if (!initialized || index < 0 || index >= state->navigableLocations.size() ){
         std::stringstream msg;
-        msg << "Invalid action index: " << index;
+        msg << "MatterSim: Invalid action index: " << index;
         throw std::domain_error( msg.str() );
     }
     state->location = state->navigableLocations[index];
 
@@ -1,7 +1,6 @@
 # Room-to-Room (R2R) Navigation Task
 
 
-
 ## Download Data
 
 Data consists of train/val-seen/val-unseen/test splits. There are two validation sets to better understand generalization performance between buildings that are in the training set (val-seen) and unseen buildings. The test set consists entirely of unseen buildings. 
@@ -10,3 +9,64 @@ To download, from the top level directory, run:
 ```
 ./tasks/R2R/data/download.sh
 ```
+
+Data is formatted as follows:
+```
+{
+  "distance": float,
+  "scan": str,
+  "path_id": int,
+  "path": [str x num_steps],
+  "heading": float,
+  "instructions": [str x 3],
+}
+```
+- `distance`: length of the path in meters.
+- `scan`: Matterport scan id.
+- `path_id`: Unique id for this path.
+- `path`: List of viewpoint ids (the first is is the start location, the last is the goal location)
+- `heading`: Agents initial heading in radians (elevation is always assumed to be zero).
+- `instructions`: Three unique natural language strings describing how to find the goal given the start pose.
+
+For the test set, only the first path_id (starting location) is included. We will provide a test server for scoring uploaded trajectories according to the metrics in the [paper](https://arxiv.org/abs/1711.07280).
+
+## Directory Structure
+
+- `env.py`: Wraps the simulator and adds language instructions, with several simplifications -- namely discretized heading / elevation and pre-cached image features. This is not intended to be a standard component, or to preclude the use of continous camera actions, end-to-end training etc. Use the simulator and the data as you see fit, but this can provide a starting point.
+- `utils.py`: Text pre-processing, navigation graph loading etc.
+- `eval.py`: Evaluation script.
+- `model.py`: PyTorch seq2seq model with attention.
+- `agent.py`: Various implementations of an agent.
+- `train.py`: Training entrypoint, parameter settings etc.
+- `plot.py`: Figures from the arXiv paper.
+
+## Prerequisites
+
+Python 2, [PyTorch](http://pytorch.org/), [NetworkX](https://networkx.github.io/). Install python dependencies by running:
+```
+pip install -r /tasks/R2R/requirements.txt
+```
+
+## Training and Evaluation
+
+To train the seq2seq model with student-forcing:
+```
+python tasks/R2R/train.py
+```
+
+To run some simple baselines:
+```
+python tasks/R2R/eval.py
+```
+
+Generate figures from the paper:
+```
+python tasks/R2R/plot.py
+```
+
+The simple baselines include:
+- `ShortestAgent`: Agent that always follows the shortest path to goal (foundation for supervised training).
+- `RandomAgent`: Agent that randomly picks a directly, then tries to go straight for 5 viewpoints.
+- `StopAgent`: Agent that remains at the starting position.
+
+![Navigation Error](plots/error.png)
Original file line number	Diff line number	Diff line change
`@@ -121,10 +121,10 @@ void Simulator::init() {`
`121`	`121`	`ctx = OSMesaCreateContext(OSMESA_RGBA, NULL);`
`122`	`122`	`buffer = malloc(width * height * 4 * sizeof(GLubyte));`
`123`	`123`	`if (!buffer) {`
`124`		`- throw std::runtime_error( "Malloc image buffer failed" );`
	`124`	`+ throw std::runtime_error( "MatterSim: Malloc image buffer failed" );`
`125`	`125`	`}`
`126`	`126`	`if (!OSMesaMakeCurrent(ctx, buffer, GL_UNSIGNED_BYTE, width, height)) {`
`127`		`- throw std::runtime_error( "OSMesaMakeCurrent failed" );`
	`127`	`+ throw std::runtime_error( "MatterSim: OSMesaMakeCurrent failed" );`
`128`	`128`	`}`
`129`	`129`	`#else`
`130`	`130`	`cv::namedWindow("renderwin", cv::WINDOW_OPENGL);`
`@@ -159,7 +159,7 @@ void Simulator::init() {`
`159`	`159`
`160`	`160`	`// Always check that our framebuffer is ok`
`161`	`161`	`if(glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {`
`162`		`- throw std::runtime_error( "GL_FRAMEBUFFER failure" );`
	`162`	`+ throw std::runtime_error( "MatterSim: GL_FRAMEBUFFER failure" );`
`163`	`163`	`}`
`164`	`164`	`#endif`
`165`	`165`
`@@ -245,7 +245,7 @@ void Simulator::loadLocationGraph() {`
`245`	`245`	`auto navGraphFile = navGraphPath + "/" + state->scanId + "_connectivity.json";`
`246`	`246`	`std::ifstream ifs(navGraphFile, std::ifstream::in);`
`247`	`247`	`if (ifs.fail()){`
`248`		`- throw std::invalid_argument( "Could not open navigation graph file: " +`
	`248`	`+ throw std::invalid_argument( "MatterSim: Could not open navigation graph file: " +`
`249`	`249`	`navGraphFile + ", is scan id valid?" );`
`250`	`250`	`}`
`251`	`251`	`ifs >> root;`
`@@ -325,14 +325,14 @@ void Simulator::loadTexture(int locationId) {`
`325`	`325`	`auto zpos = cv::imread(datafolder + viewpointId + "_skybox1_sami.jpg");`
`326`	`326`	`auto zneg = cv::imread(datafolder + viewpointId + "_skybox3_sami.jpg");`
`327`	`327`	`if (xpos.empty() \|\| xneg.empty() \|\| ypos.empty() \|\| yneg.empty() \|\| zpos.empty() \|\| zneg.empty()) {`
`328`		`- throw std::invalid_argument( "Could not open skybox files at: " + datafolder + viewpointId + "_skybox*_sami.jpg");`
	`328`	`+ throw std::invalid_argument( "MatterSim: Could not open skybox files at: " + datafolder + viewpointId + "_skybox*_sami.jpg");`
`329`	`329`	`}`
`330`	`330`	`cpuLoadTimer.Stop();`
`331`	`331`	`gpuLoadTimer.Start();`
`332`	`332`	`setupCubeMap(scanLocations[state->scanId][locationId]->cubemap_texture, xpos, xneg, ypos, yneg, zpos, zneg);`
`333`	`333`	`gpuLoadTimer.Stop();`
`334`	`334`	`if (!glIsTexture(scanLocations[state->scanId][locationId]->cubemap_texture)){`
`335`		`- throw std::runtime_error( "loadTexture failed" );`
	`335`	`+ throw std::runtime_error( "MatterSim: loadTexture failed" );`
`336`	`336`	`}`
`337`	`337`	`}`
`338`	`338`
`@@ -402,23 +402,23 @@ void Simulator::newEpisode(const std::string& scanId,`
`402`	`402`	`ix++;`
`403`	`403`	`if (ix >= scanLocations[state->scanId].size()) ix = 0;`
`404`	`404`	`if (ix == start_ix) {`
`405`		`- throw std::logic_error( "ScanId: " + scanId + " has no included viewpoints!");`
	`405`	`+ throw std::logic_error( "MatterSim: ScanId: " + scanId + " has no included viewpoints!");`
`406`	`406`	`}`
`407`	`407`	`}`
`408`	`408`	`} else {`
`409`	`409`	`// Find index of selected viewpoint`
`410`	`410`	`for (int i = 0; i < scanLocations[state->scanId].size(); ++i) {`
`411`	`411`	`if (scanLocations[state->scanId][i]->viewpointId == viewpointId) {`
`412`	`412`	`if (!scanLocations[state->scanId][i]->included) {`
`413`		`- throw std::invalid_argument( "ViewpointId: " +`
	`413`	`+ throw std::invalid_argument( "MatterSim: ViewpointId: " +`
`414`	`414`	`viewpointId + ", is excluded from the connectivity graph." );`
`415`	`415`	`}`
`416`	`416`	`ix = i;`
`417`	`417`	`break;`
`418`	`418`	`}`
`419`	`419`	`}`
`420`	`420`	`if (ix < 0) {`
`421`		`- throw std::invalid_argument( "Could not find viewpointId: " +`
	`421`	`+ throw std::invalid_argument( "MatterSim: Could not find viewpointId: " +`
`422`	`422`	`viewpointId + ", is viewpoint id valid?" );`
`423`	`423`	`}`
`424`	`424`	`}`
`@@ -472,7 +472,7 @@ void Simulator::makeAction(int index, double heading, double elevation) {`
`472`	`472`	`// move`
`473`	`473`	`if (!initialized \|\| index < 0 \|\| index >= state->navigableLocations.size() ){`
`474`	`474`	`std::stringstream msg;`
`475`		`- msg << "Invalid action index: " << index;`
	`475`	`+ msg << "MatterSim: Invalid action index: " << index;`
`476`	`476`	`throw std::domain_error( msg.str() );`
`477`	`477`	`}`
`478`	`478`	`state->location = state->navigableLocations[index];`