Skip to content

Commit ef56bf1

Browse files
Jack-Khuufacebook-github-bot
authored andcommitted
Plumb group_size to 4b quant (#2734)
Summary: Previously group size wasn't being passed properly to 4b quant. This just passes it through Differential Revision: D55458352
1 parent e7a429a commit ef56bf1

File tree

1 file changed

+8
-2
lines changed

1 file changed

+8
-2
lines changed

examples/models/llama2/export_llama_lib.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -254,7 +254,9 @@ def quantize(
254254
elif qmode == "8da4w":
255255
from torchao.quantization.quant_api import Int8DynActInt4WeightQuantizer
256256

257-
model = Int8DynActInt4WeightQuantizer(precision=torch_dtype).quantize(model)
257+
model = Int8DynActInt4WeightQuantizer(
258+
precision=torch_dtype, group_size=group_size
259+
).quantize(model)
258260
if verbose_export():
259261
print("quantized model:", model)
260262
return model
@@ -406,7 +408,11 @@ def build_args_parser() -> argparse.ArgumentParser:
406408
help="Use cProfile to profile model export. Results saved to profile_path as a html file.",
407409
)
408410
parser.add_argument(
409-
"-G", "--group_size", default=None, help="group_size for weight quantization"
411+
"-G",
412+
"--group_size",
413+
type=int,
414+
default=256,
415+
help="group_size for weight quantization",
410416
)
411417

412418
parser.add_argument(

0 commit comments

Comments
 (0)