From 85eb4f0312ac4068951c13e9363ac0d15467aa77 Mon Sep 17 00:00:00 2001
From: Ryan Dick <ryanjdick3@gmail.com>
Date: Tue, 7 Jan 2025 15:59:50 +0000
Subject: [PATCH] Fix an edge case with model offloading from VRAM to RAM. If a
 GGML-quantized model is offloaded from VRAM inside of a
 torch.inference_mode() context manager, this will cause the following error:
 'RuntimeError: Cannot set version_counter for inference tensor'.

---
 invokeai/app/invocations/spandrel_image_to_image.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/invokeai/app/invocations/spandrel_image_to_image.py b/invokeai/app/invocations/spandrel_image_to_image.py
index 0f34bec7771..e47959ad647 100644
--- a/invokeai/app/invocations/spandrel_image_to_image.py
+++ b/invokeai/app/invocations/spandrel_image_to_image.py
@@ -151,7 +151,7 @@ def upscale_image(
 
         return pil_image
 
-    @torch.inference_mode()
+    @torch.no_grad()
     def invoke(self, context: InvocationContext) -> ImageOutput:
         # Images are converted to RGB, because most models don't support an alpha channel. In the future, we may want to
         # revisit this.
@@ -197,7 +197,7 @@ class SpandrelImageToImageAutoscaleInvocation(SpandrelImageToImageInvocation):
         description="If true, the output image will be resized to the nearest multiple of 8 in both dimensions.",
     )
 
-    @torch.inference_mode()
+    @torch.no_grad()
     def invoke(self, context: InvocationContext) -> ImageOutput:
         # Images are converted to RGB, because most models don't support an alpha channel. In the future, we may want to
         # revisit this.