support to export gguf q4_0 and q4_1 format (#393)

intel · Jan 8, 2025 · 86767b0 · 86767b0
1 parent ba2426c
commit 86767b0
Show file tree

Hide file tree

Showing 12 changed files with 4,827 additions and 28 deletions.
diff --git a/.azure-pipelines/scripts/codeScan/codespell/autoround_dict.txt b/.azure-pipelines/scripts/codeScan/codespell/autoround_dict.txt
@@ -0,0 +1 @@
+endianess
diff --git a/auto_round/__main__.py b/auto_round/__main__.py
@@ -14,9 +14,15 @@
 import sys
 
 def run_eval():
-    from auto_round.script.llm import setup_eval_parser, eval
-    args = setup_eval_parser()
-    eval(args)
+    if "--native" in sys.argv:
+        sys.argv.remove("--native")
+        from auto_round.script.llm import setup_eval_parser, eval
+        args = setup_eval_parser()
+        eval(args)
+    else:
+        from auto_round.script.llm import setup_eval_parser, eval_sequence
+        args = setup_eval_parser()
+        eval_sequence(args)
 
 def run():
     if "--eval" in sys.argv:

diff --git a/auto_round/autoround.py b/auto_round/autoround.py
@@ -1267,6 +1267,14 @@ def save_quantized(self, output_dir=None, format="auto_round", inplace=True, **k
             if processor is not None:
                 processor.save_pretrained(output_dir)
             return
+        if format in ["gguf:q4_0", "gguf:q4_1"]:
+            if self.group_size != 32:
+                logger.error(f"{format} need group_size=32, but it is {self.group_size}, cannot export.")
+                return
+            if format == "gguf:q4_0" and not self.sym:
+                logger.warning(f"incorrect format choose, will reset to gguf:q4_1")
+            if format == "gguf:q4_1" and self.sym:
+                logger.warning(f"incorrect format choose, will reset to gguf:q4_0")
 
         from auto_round.export import EXPORT_FORMAT
         backend = format

diff --git a/auto_round/export/__init__.py b/auto_round/export/__init__.py
@@ -48,3 +48,8 @@ def _save_quantized_as_autoawq(*args, **kwargs):
     from auto_round.export.export_to_awq.export import save_quantized_as_autoawq
 
     return save_quantized_as_autoawq(*args, **kwargs)
+
+@register_format("gguf")
+def _save_quantized_as_autoawq(*args, **kwargs):
+    from auto_round.export.export_to_gguf.export import save_quantized_as_gguf
+    return save_quantized_as_gguf(*args, **kwargs)
diff --git a/auto_round/export/export_to_gguf/__init__.py b/auto_round/export/export_to_gguf/__init__.py
@@ -0,0 +1,13 @@
+# Copyright (c) 2024 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.