Skip to content

Commit 9e922d3

Browse files
cccclaifacebook-github-bot
authored andcommitted
add qnn option (#2606)
Summary: Pull Request resolved: #2606 Add qualcomm option, including both qnn_quantizer and qnn_partitioner Reviewed By: kimishpatel Differential Revision: D55218789 fbshipit-source-id: b5db2efde649c7defaa41901374ce6fb5363fa22
1 parent be43255 commit 9e922d3

File tree

1 file changed

+71
-0
lines changed

1 file changed

+71
-0
lines changed

examples/models/llama2/export_llama_lib.py

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -422,6 +422,11 @@ def build_args_parser() -> argparse.ArgumentParser:
422422
parser.add_argument("-V", "--vulkan", action="store_true")
423423
parser.add_argument("--mps", action="store_true")
424424
parser.add_argument("--coreml", action="store_true")
425+
parser.add_argument(
426+
"--qnn",
427+
action="store_true",
428+
help="Delegate llama2 to qnn backend (Qualcomm), please use it --kv_cahce=True",
429+
)
425430

426431
parser.add_argument(
427432
"--expand_rope_table",
@@ -555,6 +560,28 @@ def _export_llama(modelname, args) -> str: # noqa: C901
555560
# export_to_edge
556561
pt2e_quant_params = _get_pt2e_quantization_params(args)
557562
quantizers = get_pt2e_quantizers(pt2e_quant_params, args)
563+
if args.qnn:
564+
assert (
565+
args.quantization_mode is None
566+
), "Currently qnn backend only supports QnnQuantizer via pt2e flow"
567+
try:
568+
# pyre-ignore: Undefined import [21]: Could not find a module corresponding to import `executorch.backends.qualcomm.quantizer.quantizer`
569+
from executorch.backends.qualcomm.quantizer.quantizer import QnnQuantizer
570+
571+
# reset quantizers and pt2e_quant_params from xnnpack backend
572+
pt2e_quant_params = None
573+
quantizers = []
574+
except ImportError:
575+
raise ImportError(
576+
"Please install the Qualcomm backend follwing https://pytorch.org/executorch/main/build-run-qualcomm.html"
577+
)
578+
579+
# pyre-ignore: Undefined attribute [16]: Module `executorch.backends` has no attribute `qualcomm`.
580+
qnn_quantizer = QnnQuantizer()
581+
# more custom quantization are supported including 16a4w etc. default to 8bit quantized
582+
custom_annotations = ()
583+
qnn_quantizer.add_custom_quant_annotations(custom_annotations)
584+
quantizers.append(qnn_quantizer)
558585

559586
builder_exported_to_edge = _prepare_for_llama_export(
560587
modelname, args
@@ -638,6 +665,50 @@ def _export_llama(modelname, args) -> str: # noqa: C901
638665
)
639666
modelname = f"coreml_{modelname}"
640667

668+
if args.qnn:
669+
assert (
670+
args.use_kv_cache is True
671+
), "Qualcomm backend currently only supports static shape and use_kv_cache=True is the only way to support it at the moment"
672+
try:
673+
# pyre-ignore: Undefined import [21]: Could not find a module corresponding to import `executorch.backends.qualcomm.partition.qnn_partitioner`
674+
from executorch.backends.qualcomm.partition.qnn_partitioner import (
675+
QnnPartitioner,
676+
)
677+
678+
# pyre-ignore: Undefined import [21]: Could not find a module corresponding to import `executorch.backends.qualcomm.serialization.qnn_compile_spec_schema`
679+
from executorch.backends.qualcomm.serialization.qnn_compile_spec_schema import (
680+
QcomChipset,
681+
)
682+
683+
# pyre-ignore: Undefined import [21]: Could not find a module corresponding to import `executorch.backends.qualcomm.utils.utils`
684+
from executorch.backends.qualcomm.utils.utils import (
685+
_transform,
686+
generate_htp_compiler_spec,
687+
generate_qnn_executorch_compiler_spec,
688+
)
689+
except ImportError:
690+
raise ImportError(
691+
"Please install the Qualcomm backend follwing https://pytorch.org/executorch/main/build-run-qualcomm.html"
692+
)
693+
694+
# pyre-ignore: Undefined attribute [16]: Module `executorch.backends` has no attribute `qualcomm`
695+
backend_options = generate_htp_compiler_spec(use_fp16=False)
696+
# pyre-ignore: Undefined attribute [16]: Module `executorch.backends` has no attribute `qualcomm`
697+
partitioner = QnnPartitioner(
698+
# pyre-ignore: Undefined attribute [16]: Module `executorch.backends` has no attribute `qualcomm`
699+
generate_qnn_executorch_compiler_spec(
700+
# pyre-ignore: Undefined attribute [16]: Module `executorch.backends` has no attribute `qualcomm`.
701+
soc_model=QcomChipset.SM8650, # default to SM8650
702+
backend_options=backend_options,
703+
debug=False,
704+
saver=False,
705+
),
706+
skip_node_id_set={},
707+
skip_node_op_set={},
708+
)
709+
# pyre-ignore: Undefined attribute [16]: Module `executorch.backends` has no attribute `qualcomm`
710+
_transform(builder_exported_to_edge.export_program())
711+
641712
if args.generate_etrecord:
642713
if not builder_exported_to_edge.edge_manager:
643714
raise ValueError("Unable to generate etrecord due to missing edge manager.")

0 commit comments

Comments
 (0)