Skip to content

Commit

Permalink
support to export gguf q4_0 and q4_1 format (#393)
Browse files Browse the repository at this point in the history
  • Loading branch information
n1ck-guo authored Jan 8, 2025
1 parent ba2426c commit 86767b0
Show file tree
Hide file tree
Showing 12 changed files with 4,827 additions and 28 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
endianess
12 changes: 9 additions & 3 deletions auto_round/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,15 @@
import sys

def run_eval():
from auto_round.script.llm import setup_eval_parser, eval
args = setup_eval_parser()
eval(args)
if "--native" in sys.argv:
sys.argv.remove("--native")
from auto_round.script.llm import setup_eval_parser, eval
args = setup_eval_parser()
eval(args)
else:
from auto_round.script.llm import setup_eval_parser, eval_sequence
args = setup_eval_parser()
eval_sequence(args)

def run():
if "--eval" in sys.argv:
Expand Down
8 changes: 8 additions & 0 deletions auto_round/autoround.py
Original file line number Diff line number Diff line change
Expand Up @@ -1267,6 +1267,14 @@ def save_quantized(self, output_dir=None, format="auto_round", inplace=True, **k
if processor is not None:
processor.save_pretrained(output_dir)
return
if format in ["gguf:q4_0", "gguf:q4_1"]:
if self.group_size != 32:
logger.error(f"{format} need group_size=32, but it is {self.group_size}, cannot export.")
return
if format == "gguf:q4_0" and not self.sym:
logger.warning(f"incorrect format choose, will reset to gguf:q4_1")
if format == "gguf:q4_1" and self.sym:
logger.warning(f"incorrect format choose, will reset to gguf:q4_0")

from auto_round.export import EXPORT_FORMAT
backend = format
Expand Down
5 changes: 5 additions & 0 deletions auto_round/export/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,3 +48,8 @@ def _save_quantized_as_autoawq(*args, **kwargs):
from auto_round.export.export_to_awq.export import save_quantized_as_autoawq

return save_quantized_as_autoawq(*args, **kwargs)

@register_format("gguf")
def _save_quantized_as_autoawq(*args, **kwargs):
from auto_round.export.export_to_gguf.export import save_quantized_as_gguf
return save_quantized_as_gguf(*args, **kwargs)
13 changes: 13 additions & 0 deletions auto_round/export/export_to_gguf/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Copyright (c) 2024 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
Loading

0 comments on commit 86767b0

Please sign in to comment.