diff --git a/olive/passes/pytorch/gptq.py b/olive/passes/pytorch/gptq.py index f7b23ac7c..45d91ef3f 100644 --- a/olive/passes/pytorch/gptq.py +++ b/olive/passes/pytorch/gptq.py @@ -158,6 +158,10 @@ def _run_for_config( model_type = pytorch_model.config.model_type if hasattr(pytorch_model, "config") else "" model_class = GPTQ_CAUSAL_LM_MODEL_MAP.get(model_type, BaseGPTQForCausalLM) quantized_model: BaseGPTQForCausalLM = model_class(pytorch_model, False, quantize_config) + # explicitly move quantized model to CUDA device to avoid the "Expected all tensors to be + # on the same device" error in auto-gptq. + # see https://github.com/AutoGPTQ/AutoGPTQ/issues/729 + quantized_model.to("cuda") fields_to_set = { "outside_layer_modules": MODEL_OUTSIDE_LAYER_MODULES,