March updates (#79)

* Add iRaftStereo_RVC. Update README.md * Update README.md
princeton-vl · Mar 18, 2023 · fa8ed9d · fa8ed9d
1 parent 55b9e58
commit fa8ed9d
Show file tree

Hide file tree

Showing 7 changed files with 49 additions and 8 deletions.
diff --git a/README.md b/README.md
@@ -18,28 +18,32 @@ Lahav Lipson, Zachary Teed and Jia Deng<br/>
 
 <img src="https://media.giphy.com/media/nYqxbmAdGDgVJ2lQYK/giphy.gif" alt="drawing" width="400"/> <img src="https://media.giphy.com/media/y8hD5SNh1QHc8yCGBv/giphy.gif" alt="drawing" width="400"/>
 
-##  [<img src="https://i.imgur.com/QCojoJk.png" width="40"> You can run RAFT-Stereo + Point-Cloud Visualization in Google Colab](https://colab.research.google.com/drive/1G8WJCQt9y55qxQH6QV6PpPvWEbd393g2?usp=sharing)
+##  [<img src="https://i.imgur.com/QCojoJk.png" width="40"> RAFT-Stereo + Point-Cloud Visualization in Google Colab](https://colab.research.google.com/drive/1G8WJCQt9y55qxQH6QV6PpPvWEbd393g2?usp=sharing)
 
 ## Requirements
-The code has been tested with PyTorch 1.7 and Cuda 10.2.
+The code has been tested with PyTorch 1.7 and Cuda 10.2
 ```Shell
 conda env create -f environment.yaml
 conda activate raftstereo
 ```
-
+and with PyTorch 1.11 and Cuda 11.3
+```Shell
+conda env create -f environment_cuda11.yaml
+conda activate raftstereo
+```
 
 
 
 ## Required Data
 To evaluate/train RAFT-stereo, you will need to download the required datasets. 
-* [Sceneflow](https://lmb.informatik.uni-freiburg.de/resources/datasets/SceneFlowDatasets.en.html#:~:text=on%20Academic%20Torrents-,FlyingThings3D,-Driving) (Includes FlyingThings3D, Driving & Monkaa
+* [Sceneflow](https://lmb.informatik.uni-freiburg.de/resources/datasets/SceneFlowDatasets.en.html#:~:text=on%20Academic%20Torrents-,FlyingThings3D,-Driving) (Includes FlyingThings3D, Driving & Monkaa)
 * [Middlebury](https://vision.middlebury.edu/stereo/data/)
 * [ETH3D](https://www.eth3d.net/datasets#low-res-two-view-test-data)
 * [KITTI](http://www.cvlibs.net/datasets/kitti/eval_scene_flow.php?benchmark=stereo)
 
 To download the ETH3D and Middlebury test datasets for the [demos](#demos), run 
 ```Shell
-chmod ug+x download_datasets.sh && ./download_datasets.sh
+bash download_datasets.sh
 ```
 
 By default `stereo_datasets.py` will search for the datasets in these locations. You can create symbolic links to wherever the datasets were downloaded in the `datasets` folder
@@ -68,10 +72,23 @@ By default `stereo_datasets.py` will search for the datasets in these locations.
         ├── two_view_testing
 ```
 
+## **(New 03/17/23)**: Robust Vision Challenge 2022
+
+iRaftStereo_RVC ranked 2nd on the [stereo leaderboard](http://www.robustvision.net/leaderboard.php) at the Robust Vision Challenge at ECCV 2022.
+
+To use the model, download + unzip [models.zip](https://www.dropbox.com/s/ftveifyqcomiwaq/models.zip) and run
+```
+python demo.py --restore_ckpt models/iraftstereo_rvc.pth --context_norm instance -l=datasets/ETH3D/two_view_testing/*/im0.png -r=datasets/ETH3D/two_view_testing/*/im1.png
+```
+
+Thank you to [Insta360](https://www.insta360.com/) and Jiang et al. for their excellent work.
+
+See their manuscript for training details: [An Improved RaftStereo Trained with A Mixed Dataset for the Robust Vision Challenge 2022](https://arxiv.org/pdf/2210.12785.pdf)
+
 ## Demos
 Pretrained models can be downloaded by running
 ```Shell
-chmod ug+x download_models.sh && ./download_models.sh
+bash download_models.sh
 ```
 or downloaded from [google drive](https://drive.google.com/drive/folders/1booUFYEXmsdombVuglatP0nZXb5qI89J). We recommend our [Middlebury model](https://drive.google.com/file/d/1m3KoukUmKDoMv-ySOO6vBzYfWLyj9yqd/view?usp=sharing) for in-the-wild images.
 

diff --git a/core/raft_stereo.py b/core/raft_stereo.py
@@ -26,7 +26,7 @@ def __init__(self, args):
 
         context_dims = args.hidden_dims
 
-        self.cnet = MultiBasicEncoder(output_dim=[args.hidden_dims, context_dims], norm_fn="batch", downsample=args.n_downsample)
+        self.cnet = MultiBasicEncoder(output_dim=[args.hidden_dims, context_dims], norm_fn=args.context_norm, downsample=args.n_downsample)
         self.update_block = BasicMultiUpdateBlock(self.args, hidden_dims=args.hidden_dims)
 
         self.context_zqr_convs = nn.ModuleList([nn.Conv2d(context_dims[i], args.hidden_dims[i]*3, 3, padding=3//2) for i in range(self.args.n_gru_layers)])

diff --git a/demo.py b/demo.py
@@ -44,6 +44,8 @@ def demo(args):
             image1, image2 = padder.pad(image1, image2)
 
             _, flow_up = model(image1, image2, iters=args.valid_iters, test_mode=True)
+            flow_up = padder.unpad(flow_up).squeeze()
+
             file_stem = imfile1.split('/')[-2]
             if args.save_numpy:
                 np.save(output_directory / f"{file_stem}.npy", flow_up.cpu().numpy().squeeze())
@@ -67,6 +69,7 @@ def demo(args):
     parser.add_argument('--corr_levels', type=int, default=4, help="number of levels in the correlation pyramid")
     parser.add_argument('--corr_radius', type=int, default=4, help="width of the correlation pyramid")
     parser.add_argument('--n_downsample', type=int, default=2, help="resolution of the disparity field (1/2^K)")
+    parser.add_argument('--context_norm', type=str, default="batch", choices=['group', 'batch', 'instance', 'none'], help="normalization of context encoder")
     parser.add_argument('--slow_fast_gru', action='store_true', help="iterate the low-res GRUs more frequently")
     parser.add_argument('--n_gru_layers', type=int, default=3, help="number of hidden GRU levels")
 

diff --git a/download_models.sh b/download_models.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 mkdir models -p
 cd models
-wget https://www.dropbox.com/s/q4312z8g5znhhkp/models.zip
+wget https://www.dropbox.com/s/ftveifyqcomiwaq/models.zip
 unzip models.zip
 rm models.zip -f
diff --git a/environment_cuda11.yaml b/environment_cuda11.yaml
@@ -0,0 +1,19 @@
+name: raftstereo
+channels:
+  - pytorch
+  - bioconda
+  - defaults
+dependencies:
+  - python=3.7.6
+  - pytorch=1.11
+  - torchvision=0.13
+  - cudatoolkit=11.3
+  - matplotlib
+  - tensorboard
+  - scipy
+  - opencv
+  - tqdm
+  - opt_einsum
+  - imageio
+  - scikit-image
+  - p7zip
diff --git a/evaluate_stereo.py b/evaluate_stereo.py
@@ -203,6 +203,7 @@ def validate_middlebury(model, iters=32, split='F', mixed_prec=False):
     parser.add_argument('--corr_levels', type=int, default=4, help="number of levels in the correlation pyramid")
     parser.add_argument('--corr_radius', type=int, default=4, help="width of the correlation pyramid")
     parser.add_argument('--n_downsample', type=int, default=2, help="resolution of the disparity field (1/2^K)")
+    parser.add_argument('--context_norm', type=str, default="batch", choices=['group', 'batch', 'instance', 'none'], help="normalization of context encoder")
     parser.add_argument('--slow_fast_gru', action='store_true', help="iterate the low-res GRUs more frequently")
     parser.add_argument('--n_gru_layers', type=int, default=3, help="number of hidden GRU levels")
     args = parser.parse_args()

diff --git a/train_stereo.py b/train_stereo.py
@@ -235,6 +235,7 @@ def train(args):
     parser.add_argument('--corr_levels', type=int, default=4, help="number of levels in the correlation pyramid")
     parser.add_argument('--corr_radius', type=int, default=4, help="width of the correlation pyramid")
     parser.add_argument('--n_downsample', type=int, default=2, help="resolution of the disparity field (1/2^K)")
+    parser.add_argument('--context_norm', type=str, default="batch", choices=['group', 'batch', 'instance', 'none'], help="normalization of context encoder")
     parser.add_argument('--slow_fast_gru', action='store_true', help="iterate the low-res GRUs more frequently")
     parser.add_argument('--n_gru_layers', type=int, default=3, help="number of hidden GRU levels")
     parser.add_argument('--hidden_dims', nargs='+', type=int, default=[128]*3, help="hidden state and context dimensions")