diff --git a/README.md b/README.md index 2e58656be..e86dfac80 100644 --- a/README.md +++ b/README.md @@ -155,13 +155,17 @@ mask = mask_utils.decode(annotation["segmentation"]) See [here](https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocotools/mask.py) for more instructions to manipulate masks stored in RLE format. +## Note on Reproducibility with GPU + +The fine-tuning training process is not reproducible when using GPU. Using `torch.use_deterministic_algorithms(True)` with GPU does not work due to non-deterministic routines in PyTorch/CUDA. The `ResizeLongestSide` class in `segment_anything/utils/transforms.py` is suspected to be the cause of non-reproducibility. The current setup with `torch.use_deterministic_algorithms(False)` does not ensure reproducibility in fine-tuning training with GPU. As a potential workaround, you can use CPU for reproducibility, although it may be significantly slower. + ## License The model is licensed under the [Apache 2.0 license](LICENSE). ## Contributing -See [contributing](CONTRIBUTING.md) and the [code of conduct](CODE_OF_CONDUCT.md). +See [contributing](CONTRIBUTING.md) and the [code of conduct](CODE_OF-CONDUCT.md). ## Contributors diff --git a/notebooks/onnx_model_example.ipynb b/notebooks/onnx_model_example.ipynb index ca49c3571..6b5f5daca 100644 --- a/notebooks/onnx_model_example.ipynb +++ b/notebooks/onnx_model_example.ipynb @@ -535,6 +535,7 @@ "id": "d778a8fb", "metadata": {}, "outputs": [], + "source": [], "source": [ "masks.shape" ] @@ -748,6 +749,16 @@ "plt.axis('off')\n", "plt.show()" ] + }, + { + "cell_type": "markdown", + "id": "d3e8b1b2", + "metadata": {}, + "source": [ + "## Note on Reproducibility with GPU\n", + "\n", + "The fine-tuning training process is not reproducible when using GPU. Using `torch.use_deterministic_algorithms(True)` with GPU does not work due to non-deterministic routines in PyTorch/CUDA. The `ResizeLongestSide` class in `segment_anything/utils/transforms.py` is suspected to be the cause of non-reproducibility. The current setup with `torch.use_deterministic_algorithms(False)` does not ensure reproducibility in fine-tuning training with GPU. As a potential workaround, you can use CPU for reproducibility, although it may be significantly slower." + ] } ], "metadata": { @@ -772,3 +783,5 @@ "nbformat": 4, "nbformat_minor": 5 } + " ax.scatter(neg_points[:, 0], neg_points[:, 1], color='red', marker='*', s=marker_size, edgecolor='white', linewidth=1.25) \n", + " \n", diff --git a/scripts/amg.py b/scripts/amg.py index f2dbf676a..ba35736a1 100644 --- a/scripts/amg.py +++ b/scripts/amg.py @@ -218,6 +218,9 @@ def main(args: argparse.Namespace) -> None: continue image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + # Use apply_image_deterministic for deterministic resizing + image = generator.predictor.transform.apply_image_deterministic(image) + masks = generator.generate(image) base = os.path.basename(t) diff --git a/segment_anything/utils/transforms.py b/segment_anything/utils/transforms.py index c08ba1e3d..fc54b133a 100644 --- a/segment_anything/utils/transforms.py +++ b/segment_anything/utils/transforms.py @@ -30,6 +30,17 @@ def apply_image(self, image: np.ndarray) -> np.ndarray: target_size = self.get_preprocess_shape(image.shape[0], image.shape[1], self.target_length) return np.array(resize(to_pil_image(image), target_size)) + def apply_image_deterministic(self, image: np.ndarray) -> np.ndarray: + """ + Expects a numpy array with shape HxWxC in uint8 format. + Uses torch.nn.functional.interpolate with mode='nearest' for deterministic resizing. + """ + target_size = self.get_preprocess_shape(image.shape[0], image.shape[1], self.target_length) + image_torch = torch.from_numpy(image).permute(2, 0, 1).unsqueeze(0).float() + resized_image_torch = F.interpolate(image_torch, size=target_size, mode='nearest') + resized_image = resized_image_torch.squeeze(0).permute(1, 2, 0).byte().numpy() + return resized_image + def apply_coords(self, coords: np.ndarray, original_size: Tuple[int, ...]) -> np.ndarray: """ Expects a numpy array of length 2 in the final dimension. Requires the