From 4218e3688bb11105a5bcabf5534fabfa1aa199ac Mon Sep 17 00:00:00 2001
From: samuel100 <samkemp@microsoft.com>
Date: Mon, 30 Dec 2024 17:15:46 +0000
Subject: [PATCH 1/3] explicity move quantized model to cuda device

---
 olive/passes/pytorch/gptq.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/olive/passes/pytorch/gptq.py b/olive/passes/pytorch/gptq.py
index f7b23ac7c..8fddd6ce5 100644
--- a/olive/passes/pytorch/gptq.py
+++ b/olive/passes/pytorch/gptq.py
@@ -158,6 +158,10 @@ def _run_for_config(
         model_type = pytorch_model.config.model_type if hasattr(pytorch_model, "config") else ""
         model_class = GPTQ_CAUSAL_LM_MODEL_MAP.get(model_type, BaseGPTQForCausalLM)
         quantized_model: BaseGPTQForCausalLM = model_class(pytorch_model, False, quantize_config)
+        
+        # explicitly move quantized model to CUDA device to avoid the "Expected all tensors to be
+        # on the same device" error in auto-gptq
+        quantized_model.to("cuda")
 
         fields_to_set = {
             "outside_layer_modules": MODEL_OUTSIDE_LAYER_MODULES,

From 307c960d37cdb60937eff667e43fa6329936e52e Mon Sep 17 00:00:00 2001
From: samuel100 <samkemp@microsoft.com>
Date: Tue, 31 Dec 2024 10:18:14 +0000
Subject: [PATCH 2/3] add gh issue to comment

---
 olive/passes/pytorch/gptq.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/olive/passes/pytorch/gptq.py b/olive/passes/pytorch/gptq.py
index 8fddd6ce5..6d0ea3ce5 100644
--- a/olive/passes/pytorch/gptq.py
+++ b/olive/passes/pytorch/gptq.py
@@ -160,7 +160,8 @@ def _run_for_config(
         quantized_model: BaseGPTQForCausalLM = model_class(pytorch_model, False, quantize_config)
         
         # explicitly move quantized model to CUDA device to avoid the "Expected all tensors to be
-        # on the same device" error in auto-gptq
+        # on the same device" error in auto-gptq.
+        # see https://github.com/AutoGPTQ/AutoGPTQ/issues/729 
         quantized_model.to("cuda")
 
         fields_to_set = {

From b474b8bfd95fe8c86c18d2b0c230d7d900c85a1e Mon Sep 17 00:00:00 2001
From: samuel100 <samkemp@microsoft.com>
Date: Tue, 31 Dec 2024 10:24:56 +0000
Subject: [PATCH 3/3] fixed linting

---
 olive/passes/pytorch/gptq.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/olive/passes/pytorch/gptq.py b/olive/passes/pytorch/gptq.py
index 6d0ea3ce5..45d91ef3f 100644
--- a/olive/passes/pytorch/gptq.py
+++ b/olive/passes/pytorch/gptq.py
@@ -158,10 +158,9 @@ def _run_for_config(
         model_type = pytorch_model.config.model_type if hasattr(pytorch_model, "config") else ""
         model_class = GPTQ_CAUSAL_LM_MODEL_MAP.get(model_type, BaseGPTQForCausalLM)
         quantized_model: BaseGPTQForCausalLM = model_class(pytorch_model, False, quantize_config)
-        
         # explicitly move quantized model to CUDA device to avoid the "Expected all tensors to be
         # on the same device" error in auto-gptq.
-        # see https://github.com/AutoGPTQ/AutoGPTQ/issues/729 
+        # see https://github.com/AutoGPTQ/AutoGPTQ/issues/729
         quantized_model.to("cuda")
 
         fields_to_set = {