facebookresearch
diff --git a/‎.gitignore
+12 b/‎.gitignore
+12
diff --git a/‎CODE_OF_CONDUCT.md
+80 b/‎CODE_OF_CONDUCT.md
+80
diff --git a/‎CONTRIBUTING.md
+32 b/‎CONTRIBUTING.md
+32
diff --git a/‎LICENSE
+21 b/‎LICENSE
+21
diff --git a/‎README.md
+68 b/‎README.md
+68
diff --git a/‎conda_env.yml
+30 b/‎conda_env.yml
+30
diff --git a/‎config.yaml
+59 b/‎config.yaml
+59
@@ -0,0 +1,12 @@
+__pycache__/
+.ipynb_checkpoints/
+exp_local
+exp
+exp_fixed
+exp_hard
+nbs
+code_snapshots
+exp_drqv2_*.py
+dmc_benchmarks.py
+check_sweep.py
+cancel_sweep.py
@@ -0,0 +1,80 @@
+# Code of Conduct
+
+## Our Pledge
+
+In the interest of fostering an open and welcoming environment, we as
+contributors and maintainers pledge to make participation in our project and
+our community a harassment-free experience for everyone, regardless of age, body
+size, disability, ethnicity, sex characteristics, gender identity and expression,
+level of experience, education, socio-economic status, nationality, personal
+appearance, race, religion, or sexual identity and orientation.
+
+## Our Standards
+
+Examples of behavior that contributes to creating a positive environment
+include:
+
+* Using welcoming and inclusive language
+* Being respectful of differing viewpoints and experiences
+* Gracefully accepting constructive criticism
+* Focusing on what is best for the community
+* Showing empathy towards other community members
+
+Examples of unacceptable behavior by participants include:
+
+* The use of sexualized language or imagery and unwelcome sexual attention or
+advances
+* Trolling, insulting/derogatory comments, and personal or political attacks
+* Public or private harassment
+* Publishing others' private information, such as a physical or electronic
+address, without explicit permission
+* Other conduct which could reasonably be considered inappropriate in a
+professional setting
+
+## Our Responsibilities
+
+Project maintainers are responsible for clarifying the standards of acceptable
+behavior and are expected to take appropriate and fair corrective action in
+response to any instances of unacceptable behavior.
+
+Project maintainers have the right and responsibility to remove, edit, or
+reject comments, commits, code, wiki edits, issues, and other contributions
+that are not aligned to this Code of Conduct, or to ban temporarily or
+permanently any contributor for other behaviors that they deem inappropriate,
+threatening, offensive, or harmful.
+
+## Scope
+
+This Code of Conduct applies within all project spaces, and it also applies when
+an individual is representing the project or its community in public spaces.
+Examples of representing a project or community include using an official
+project e-mail address, posting via an official social media account, or acting
+as an appointed representative at an online or offline event. Representation of
+a project may be further defined and clarified by project maintainers.
+
+This Code of Conduct also applies outside the project spaces when there is a
+reasonable belief that an individual's behavior may have a negative impact on
+the project or its community.
+
+## Enforcement
+
+Instances of abusive, harassing, or otherwise unacceptable behavior may be
+reported by contacting the project team at <[email protected]>. All
+complaints will be reviewed and investigated and will result in a response that
+is deemed necessary and appropriate to the circumstances. The project team is
+obligated to maintain confidentiality with regard to the reporter of an incident.
+Further details of specific enforcement policies may be posted separately.
+
+Project maintainers who do not follow or enforce the Code of Conduct in good
+faith may face temporary or permanent repercussions as determined by other
+members of the project's leadership.
+
+## Attribution
+
+This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
+available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html
+
+[homepage]: https://www.contributor-covenant.org
+
+For answers to common questions about this code of conduct, see
+https://www.contributor-covenant.org/faq
@@ -0,0 +1,32 @@
+# Contributing to DrQ-v2
+We want to make contributing to this project as easy and transparent as
+possible.
+
+## Pull Requests
+We actively welcome your pull requests.
+
+1. Fork the repo and create your branch from `main`.
+2. If you've added code that should be tested, add tests.
+3. If you've changed APIs, update the documentation.
+4. Ensure the test suite passes.
+5. Make sure your code lints.
+6. If you haven't already, complete the Contributor License Agreement ("CLA").
+
+## Contributor License Agreement ("CLA")
+In order to accept your pull request, we need you to submit a CLA. You only need
+to do this once to work on any of Facebook's open source projects.
+
+Complete your CLA here: <https://code.facebook.com/cla>
+
+## Issues
+We use GitHub issues to track public bugs. Please ensure your description is
+clear and has sufficient instructions to be able to reproduce the issue.
+
+Facebook has a [bounty program](https://www.facebook.com/whitehat/) for the safe
+disclosure of security bugs. In those cases, please go through the process
+outlined on that page and do not file a public issue.
+
+
+## License
+By contributing to DrQ-v2, you agree that your contributions will be licensed
+under the LICENSE file in the root directory of this source tree.
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) Facebook, Inc. and its affiliates.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
@@ -0,0 +1,68 @@
+
+
+# DrQ-v2: Improved Data-Augmented RL Agent
+
+<p align="center">
+  <img width="19.5%" src="https://i.imgur.com/NzY7Pyv.gif">
+  <img width="19.5%" src="https://imgur.com/O5Va3NY.gif">
+  <img width="19.5%" src="https://imgur.com/PCOR9Mm.gif">
+  <img width="19.5%" src="https://imgur.com/H0ab6tz.gif">
+  <img width="19.5%" src="https://imgur.com/sDGgRos.gif">
+  <img width="19.5%" src="https://imgur.com/gj3qo1X.gif">
+  <img width="19.5%" src="https://imgur.com/FFzRwFt.gif">
+  <img width="19.5%" src="https://imgur.com/W5BKyRL.gif">
+  <img width="19.5%" src="https://imgur.com/qwOGfRQ.gif">
+  <img width="19.5%" src="https://imgur.com/Uubf00R.gif">
+ </p>
+
+## Method
+DrQ-v2 is a model-free off-policy algorithm for image-based continuous control. DrQ-v2 builds on [DrQ](https://github.com/denisyarats/drq), an actor-critic approach that uses data augmentation to learn directly from pixels. We introduce several improvements including:
+- Switch the base RL learner from SAC to DDPG.
+- Incorporate n-step returns to estimate TD error.
+- Introduce a decaying schedule for exploration noise.
+- Make implementation 3.5 times faster.
+- Find better hyper-parameters.
+ 
+<p align="center">
+  <img src="https://i.imgur.com/SemY10G.png" width="100%"/>
+</p>
+
+These changes allow us to significantly improve sample efficiency and wall-clock training time on a set of challening tasks from the [DeepMind Control Suite](https://github.com/deepmind/dm_control) compared to prior methods. Furthermore, DrQ-v2 is able to solve complex humanoid locomotion tasks directly from pixel observations, previously unattained by model-free RL.
+
+<p align="center">
+  <img width="100%" src="https://imgur.com/mrS4fFA.png">
+  <img width="100%" src="https://imgur.com/pPd1ks6.png">
+ </p>
+
+## Citation
+
+If you use this repo in your research, please consider citing the paper as follows:
+```
+@article{yarats2021drqv2,
+  title={Mastering Visual Continuous Control: Improved Data-Augmented Reinforcement Learning},
+  author={Denis Yarats and Rob Fergus and Alessandro Lazaric and Lerrel Pinto},
+  journal={arXiv preprint arXiv:},
+  year={2021}
+}
+```
+
+## Instructions
+
+Install dependencies:
+```sh
+conda env create -f conda_env.yml
+conda activate drqv2
+```
+
+Train the agent:
+```sh
+python train.py task=quadruped_walk
+```
+
+Monitor results:
+```sh
+tensorboard --logdir exp_local
+```
+
+## License
+The majority of DrQ-v2 is licensed under the MIT license, however portions of the project are available under separate license terms: DeepMind is licensed under the Apache 2.0 license.
@@ -0,0 +1,30 @@
+name: drqv2
+channels:
+  - defaults
+dependencies:
+  - python=3.8
+  - pip=21.1.3
+  - numpy=1.19.2
+  - absl-py=0.13.0
+  - pyparsing=2.4.7
+  - jupyterlab=3.0.14
+  - scikit-image=0.18.1
+  - nvidia::cudatoolkit=11.1
+  - pytorch::pytorch
+  - pytorch::torchvision
+  - pytorch::torchaudio
+  - pip:
+    - termcolor==1.1.0
+    - git+git://github.com/deepmind/dm_control.git
+    - tb-nightly
+    - imageio==2.9.0
+    - imageio-ffmpeg==0.4.4
+    - hydra-core==1.1.0
+    - hydra-submitit-launcher==1.1.5
+    - pandas==1.3.0
+    - ipdb==0.13.9
+    - yapf==0.31.0
+    - mujoco_py==2.0.2.13
+    - sklearn==0.0
+    - matplotlib==3.4.2
+    - opencv-python==4.5.3.56
@@ -0,0 +1,59 @@
+defaults:
+  - override hydra/launcher: submitit_local
+
+# task settings
+task: quadruped_walk
+frame_stack: 3
+action_repeat: 2
+discount: 0.99
+# train settings
+num_train_frames: 1000000
+num_seed_frames: 4000
+# eval
+eval_every_frames: 10000
+num_eval_episodes: 10
+# snapshot
+save_snapshot: false
+# replay buffer
+replay_buffer_size: 1000000
+replay_buffer_num_workers: 4
+nstep: 3
+batch_size: 256
+# misc
+seed: 1
+device: cuda
+save_video: true
+save_train_video: false
+use_tb: false
+# experiment
+experiment: exp
+
+agent:
+  _target_: drqv2.DrQV2Agent
+  obs_shape: ??? # to be specified later
+  action_shape: ??? # to be specified later
+  device: ${device}
+  lr: 1e-4
+  critic_target_tau: 0.01
+  update_every_steps: 2
+  use_tb: ${use_tb}
+  num_expl_steps: 2000
+  hidden_dim: 1024
+  feature_dim: 50
+  stddev_schedule: 'linear(1.0,0.1,500000)'
+  stddev_clip: 0.3
+
+hydra:
+  run:
+    dir: ./exp_local/${now:%Y.%m.%d}/${now:%H%M%S}_${hydra.job.override_dirname}
+  sweep:
+    dir: ./exp/${now:%Y.%m.%d}/${now:%H%M}_${experiment}
+    subdir: ${hydra.job.num}
+  launcher:
+    timeout_min: 4300
+    cpus_per_task: 10
+    gpus_per_node: 1
+    tasks_per_node: 1
+    mem_gb: 160
+    nodes: 1
+    submitit_folder: ./exp/${now:%Y.%m.%d}/${now:%H%M%S}_${experiment}/.slurm