autonomousvision
diff --git a/‎.gitignore
+165 b/‎.gitignore
+165
diff --git a/‎README.md
+27-1 b/‎README.md
+27-1
diff --git a/‎main_depth.py
+19-8 b/‎main_depth.py
+19-8
diff --git a/‎scripts/depthsplat_depth_demo.sh
+37 b/‎scripts/depthsplat_depth_demo.sh
+37
diff --git a/‎unimatch/backbone.py
+18-2 b/‎unimatch/backbone.py
+18-2
@@ -0,0 +1,165 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
+.pdm.toml
+.pdm-python
+.pdm-build/
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+
+output/
+pretrained/
@@ -48,6 +48,11 @@ This project is developed based on our previous works:
 - [AANet: Adaptive Aggregation Network for Efficient Stereo Matching, CVPR 2020](https://github.com/haofeixu/aanet)
 
 
+## Updates
+
+- 2025-01-04: Check out [DepthSplat](https://haofeixu.github.io/depthsplat/) for a modern multi-view depth model, which leverages monocular depth ([Depth Anything V2](https://github.com/DepthAnything/Depth-Anything-V2)) to significantly improve the robustness of UniMatch.
+
+- 2025-01-04: The UniMatch depth model served as the foundational backbone of [MVSplat (ECCV 2024, Oral)](https://donydchen.github.io/mvsplat/) for sparse-view feed-forward 3DGS reconstruction.
 
 ## Installation
 
@@ -67,11 +72,22 @@ bash pip_install.sh
 ```
 
 
+To use the [depth models from DepthSplat](https://github.com/cvg/depthsplat/blob/main/MODEL_ZOO.md), you need to create a new conda environment with higher version dependencies:
+
+```
+conda create -y -n depthsplat-depth python=3.10
+conda activate depthsplat-depth
+pip install torch==2.4.0 torchvision==0.19.0 --index-url https://download.pytorch.org/whl/cu124
+pip install tensorboard==2.9.1 einops opencv-python>=4.8.1.78 matplotlib
+```
+
 
 ## Model Zoo
 
 A large number of pretrained models with different speed-accuracy trade-offs for flow, stereo and depth are available at [MODEL_ZOO.md](MODEL_ZOO.md).
 
+Check out [DepthSplat's Model Zoo](https://github.com/cvg/depthsplat/blob/main/MODEL_ZOO.md) for better depth models.
+
 We assume the downloaded weights are located under the `pretrained` directory.
 
 Otherwise, you may need to change the corresponding paths in the scripts.
@@ -82,7 +98,7 @@ Otherwise, you may need to change the corresponding paths in the scripts.
 
 Given an image pair or a video sequence, our code supports generating prediction results of optical flow, disparity and depth.
 
-Please refer to [scripts/gmflow_demo.sh](scripts/gmflow_demo.sh), [scripts/gmstereo_demo.sh](scripts/gmstereo_demo.sh) and [scripts/gmdepth_demo.sh](scripts/gmdepth_demo.sh) for example usages.
+Please refer to [scripts/gmflow_demo.sh](scripts/gmflow_demo.sh), [scripts/gmstereo_demo.sh](scripts/gmstereo_demo.sh), [scripts/gmdepth_demo.sh](scripts/gmdepth_demo.sh) and [scripts/depthsplat_depth_demo.sh](scripts/depthsplat_depth_demo.sh) for example usages.
 
 
 
@@ -142,6 +158,16 @@ This work is a substantial extension of our previous conference paper [GMFlow (C
 }
 ```
 
+Please consider citing [DepthSplat](https://arxiv.org/abs/2410.13862) if DepthSplat's depth model is used in your research.
+
+```
+@article{xu2024depthsplat,
+      title   = {DepthSplat: Connecting Gaussian Splatting and Depth},
+      author  = {Xu, Haofei and Peng, Songyou and Wang, Fangjinhua and Blum, Hermann and Barath, Daniel and Geiger, Andreas and Pollefeys, Marc},
+      journal = {arXiv preprint arXiv:2410.13862},
+      year    = {2024}
+    }
+```
 
 
 ## Acknowledgements
 
@@ -6,6 +6,7 @@
 from torch.utils.tensorboard import SummaryWriter
 
 from unimatch.unimatch import UniMatch
+from unimatch.unimatch_depthsplat import UniMatchDepthSplat
 from dataloader.depth.datasets import DemonDataset, ScannetDataset
 from dataloader.depth import augmentation
 from loss.depth_loss import depth_loss_func, depth_grad_loss_func
@@ -85,6 +86,10 @@ def get_args_parser():
     parser.add_argument('--num_reg_refine', default=1, type=int,
                         help='number of additional local regression refinement')
 
+    # depthsplat depth model
+    parser.add_argument('--depthsplat_depth', action='store_true')
+    parser.add_argument('--vit_type', default='vits', type=str, choices=['vits', 'vitb', 'vitl'])
+
     # loss
     parser.add_argument('--depth_loss_weight', default=20, type=float)
     parser.add_argument('--depth_grad_loss_weight', default=20, type=float)
@@ -143,14 +148,20 @@ def main(args):
         setup_for_distributed(args.local_rank == 0)
 
     # model
-    model = UniMatch(feature_channels=args.feature_channels,
-                     num_scales=args.num_scales,
-                     upsample_factor=args.upsample_factor,
-                     num_head=args.num_head,
-                     ffn_dim_expansion=args.ffn_dim_expansion,
-                     num_transformer_layers=args.num_transformer_layers,
-                     reg_refine=args.reg_refine,
-                     task=args.task).to(device)
+    if args.depthsplat_depth:
+        model = UniMatchDepthSplat(num_scales=args.num_scales,
+                                   upsample_factor=args.upsample_factor,
+                                   vit_type=args.vit_type,
+                                   ).to(device)
+    else:
+        model = UniMatch(feature_channels=args.feature_channels,
+                         num_scales=args.num_scales,
+                         upsample_factor=args.upsample_factor,
+                         num_head=args.num_head,
+                         ffn_dim_expansion=args.ffn_dim_expansion,
+                         num_transformer_layers=args.num_transformer_layers,
+                         reg_refine=args.reg_refine,
+                         task=args.task).to(device)
 
     if print_info:
         print(model)
 
@@ -0,0 +1,37 @@
+#!/usr/bin/env bash
+
+
+# depthsplat-depth-small
+CUDA_VISIBLE_DEVICES=0 python main_depth.py \
+--inference_dir demo/depth-scannet \
+--output_path output/depthsplat-depth-small \
+--resume pretrained/depthsplat-depth-small-3d79dd5e.pth \
+--depthsplat_depth 
+
+# predict depth for both images
+# --pred_bidir_depth
+
+
+
+# depthsplat-depth-base
+CUDA_VISIBLE_DEVICES=0 python main_depth.py \
+--inference_dir demo/depth-scannet \
+--output_path output/depthsplat-depth-base \
+--resume pretrained/depthsplat-depth-base-f57113bd.pth \
+--depthsplat_depth \
+--vit_type vitb \
+--num_scales 2 \
+--upsample_factor 4
+
+
+
+# depthsplat-depth-large
+CUDA_VISIBLE_DEVICES=0 python main_depth.py \
+--inference_dir demo/depth-scannet \
+--output_path output/depthsplat-depth-large \
+--resume pretrained/depthsplat-depth-large-50d3d7cf.pth \
+--depthsplat_depth \
+--vit_type vitl \
+--num_scales 2 \
+--upsample_factor 4
+
@@ -40,10 +40,12 @@ class CNNEncoder(nn.Module):
     def __init__(self, output_dim=128,
                  norm_layer=nn.InstanceNorm2d,
                  num_output_scales=1,
+                 return_all_scales=False,
                  **kwargs,
                  ):
         super(CNNEncoder, self).__init__()
         self.num_branch = num_output_scales
+        self.return_all_scales = return_all_scales
 
         feature_dims = [64, 96, 128]
 
@@ -56,14 +58,17 @@ def __init__(self, output_dim=128,
         self.layer2 = self._make_layer(feature_dims[1], stride=2, norm_layer=norm_layer)  # 1/4
 
         # highest resolution 1/4 or 1/8
-        stride = 2 if num_output_scales == 1 else 1
+        if return_all_scales:  # depthsplat
+            stride = 2
+        else:
+            stride = 2 if num_output_scales == 1 else 1
         self.layer3 = self._make_layer(feature_dims[2], stride=stride,
                                        norm_layer=norm_layer,
                                        )  # 1/4 or 1/8
 
         self.conv2 = nn.Conv2d(feature_dims[2], output_dim, 1, 1, 0)
 
-        if self.num_branch > 1:
+        if self.num_branch > 1 and not return_all_scales:
             if self.num_branch == 4:
                 strides = (1, 2, 4, 8)
             elif self.num_branch == 3:
@@ -99,16 +104,27 @@ def _make_layer(self, dim, stride=1, dilation=1, norm_layer=nn.InstanceNorm2d):
         return nn.Sequential(*layers)
 
     def forward(self, x):
+        output_all_scales = []
         x = self.conv1(x)
         x = self.norm1(x)
         x = self.relu1(x)
 
         x = self.layer1(x)  # 1/2
+        if self.return_all_scales:
+            output_all_scales.append(x)
+
         x = self.layer2(x)  # 1/4
+        if self.return_all_scales:
+            output_all_scales.append(x)
+
         x = self.layer3(x)  # 1/8 or 1/4
 
         x = self.conv2(x)
 
+        if self.return_all_scales:
+            output_all_scales.append(x)
+            return output_all_scales
+
         if self.num_branch > 1:
             out = self.trident_conv([x] * self.num_branch)  # high to low res
         else: