Skip to content

Commit 566528f

Browse files
Michael Gschwindfacebook-github-bot
authored andcommitted
Add options for embedding quantization: bitwidth, group_size on CLI
Summary: Add options for embedding quantization: bitwidth, group_size on CLI Reviewed By: mavlyutovr Differential Revision: D54159472 fbshipit-source-id: 25b0b560667c3875c911b878ee0f28fd042ff713
1 parent 4ab839f commit 566528f

File tree

1 file changed

+16
-2
lines changed

1 file changed

+16
-2
lines changed

examples/models/llama2/export_llama_lib.py

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -231,7 +231,13 @@ def build_args_parser() -> argparse.ArgumentParser:
231231
parser.add_argument(
232232
"-q", "--quantized_ckpt", default=None, help="quantized checkpoint file"
233233
)
234-
parser.add_argument("-E", "--embedding-quantize", default=None, action="store_true")
234+
parser.add_argument(
235+
"-E",
236+
"--embedding-quantize",
237+
default=None,
238+
type=str,
239+
help="type of embedding quantization, '<bitwidth>,<groupsize>', e.g., '8,1024'.",
240+
)
235241
parser.add_argument(
236242
"--pt2e_quantize",
237243
default=None,
@@ -362,8 +368,16 @@ def _export_llama(modelname, args) -> str: # noqa: C901
362368

363369
if args.embedding_quantize:
364370
modelname = f"{modelname}_e"
371+
bitwidth, group_size = args.embedding_quantize.split(",")
372+
if group_size == "none" or group_size == "None" or group_size == "0":
373+
group_size = None
374+
else:
375+
group_size = int(group_size)
376+
bitwidth = int(bitwidth)
365377
transforms.append(
366-
lambda model: EmbeddingOnlyInt8QuantHandler(model).convert_for_runtime()
378+
lambda model: EmbeddingOnlyInt8QuantHandler(
379+
model, bitwidth=bitwidth, group_size=group_size
380+
).convert_for_runtime()
367381
)
368382

369383
# export_to_edge

0 commit comments

Comments
 (0)