From 4218e3688bb11105a5bcabf5534fabfa1aa199ac Mon Sep 17 00:00:00 2001 From: samuel100 Date: Mon, 30 Dec 2024 17:15:46 +0000 Subject: [PATCH 1/3] explicity move quantized model to cuda device --- olive/passes/pytorch/gptq.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/olive/passes/pytorch/gptq.py b/olive/passes/pytorch/gptq.py index f7b23ac7c..8fddd6ce5 100644 --- a/olive/passes/pytorch/gptq.py +++ b/olive/passes/pytorch/gptq.py @@ -158,6 +158,10 @@ def _run_for_config( model_type = pytorch_model.config.model_type if hasattr(pytorch_model, "config") else "" model_class = GPTQ_CAUSAL_LM_MODEL_MAP.get(model_type, BaseGPTQForCausalLM) quantized_model: BaseGPTQForCausalLM = model_class(pytorch_model, False, quantize_config) + + # explicitly move quantized model to CUDA device to avoid the "Expected all tensors to be + # on the same device" error in auto-gptq + quantized_model.to("cuda") fields_to_set = { "outside_layer_modules": MODEL_OUTSIDE_LAYER_MODULES, From 307c960d37cdb60937eff667e43fa6329936e52e Mon Sep 17 00:00:00 2001 From: samuel100 Date: Tue, 31 Dec 2024 10:18:14 +0000 Subject: [PATCH 2/3] add gh issue to comment --- olive/passes/pytorch/gptq.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/olive/passes/pytorch/gptq.py b/olive/passes/pytorch/gptq.py index 8fddd6ce5..6d0ea3ce5 100644 --- a/olive/passes/pytorch/gptq.py +++ b/olive/passes/pytorch/gptq.py @@ -160,7 +160,8 @@ def _run_for_config( quantized_model: BaseGPTQForCausalLM = model_class(pytorch_model, False, quantize_config) # explicitly move quantized model to CUDA device to avoid the "Expected all tensors to be - # on the same device" error in auto-gptq + # on the same device" error in auto-gptq. + # see https://github.com/AutoGPTQ/AutoGPTQ/issues/729 quantized_model.to("cuda") fields_to_set = { From b474b8bfd95fe8c86c18d2b0c230d7d900c85a1e Mon Sep 17 00:00:00 2001 From: samuel100 Date: Tue, 31 Dec 2024 10:24:56 +0000 Subject: [PATCH 3/3] fixed linting --- olive/passes/pytorch/gptq.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/olive/passes/pytorch/gptq.py b/olive/passes/pytorch/gptq.py index 6d0ea3ce5..45d91ef3f 100644 --- a/olive/passes/pytorch/gptq.py +++ b/olive/passes/pytorch/gptq.py @@ -158,10 +158,9 @@ def _run_for_config( model_type = pytorch_model.config.model_type if hasattr(pytorch_model, "config") else "" model_class = GPTQ_CAUSAL_LM_MODEL_MAP.get(model_type, BaseGPTQForCausalLM) quantized_model: BaseGPTQForCausalLM = model_class(pytorch_model, False, quantize_config) - # explicitly move quantized model to CUDA device to avoid the "Expected all tensors to be # on the same device" error in auto-gptq. - # see https://github.com/AutoGPTQ/AutoGPTQ/issues/729 + # see https://github.com/AutoGPTQ/AutoGPTQ/issues/729 quantized_model.to("cuda") fields_to_set = {