@@ -422,6 +422,11 @@ def build_args_parser() -> argparse.ArgumentParser:
422
422
parser .add_argument ("-V" , "--vulkan" , action = "store_true" )
423
423
parser .add_argument ("--mps" , action = "store_true" )
424
424
parser .add_argument ("--coreml" , action = "store_true" )
425
+ parser .add_argument (
426
+ "--qnn" ,
427
+ action = "store_true" ,
428
+ help = "Delegate llama2 to qnn backend (Qualcomm), please use it --kv_cahce=True" ,
429
+ )
425
430
426
431
parser .add_argument (
427
432
"--expand_rope_table" ,
@@ -555,6 +560,28 @@ def _export_llama(modelname, args) -> str: # noqa: C901
555
560
# export_to_edge
556
561
pt2e_quant_params = _get_pt2e_quantization_params (args )
557
562
quantizers = get_pt2e_quantizers (pt2e_quant_params , args )
563
+ if args .qnn :
564
+ assert (
565
+ args .quantization_mode is None
566
+ ), "Currently qnn backend only supports QnnQuantizer via pt2e flow"
567
+ try :
568
+ # pyre-ignore: Undefined import [21]: Could not find a module corresponding to import `executorch.backends.qualcomm.quantizer.quantizer`
569
+ from executorch .backends .qualcomm .quantizer .quantizer import QnnQuantizer
570
+
571
+ # reset quantizers and pt2e_quant_params from xnnpack backend
572
+ pt2e_quant_params = None
573
+ quantizers = []
574
+ except ImportError :
575
+ raise ImportError (
576
+ "Please install the Qualcomm backend follwing https://pytorch.org/executorch/main/build-run-qualcomm.html"
577
+ )
578
+
579
+ # pyre-ignore: Undefined attribute [16]: Module `executorch.backends` has no attribute `qualcomm`.
580
+ qnn_quantizer = QnnQuantizer ()
581
+ # more custom quantization are supported including 16a4w etc. default to 8bit quantized
582
+ custom_annotations = ()
583
+ qnn_quantizer .add_custom_quant_annotations (custom_annotations )
584
+ quantizers .append (qnn_quantizer )
558
585
559
586
builder_exported_to_edge = _prepare_for_llama_export (
560
587
modelname , args
@@ -638,6 +665,50 @@ def _export_llama(modelname, args) -> str: # noqa: C901
638
665
)
639
666
modelname = f"coreml_{ modelname } "
640
667
668
+ if args .qnn :
669
+ assert (
670
+ args .use_kv_cache is True
671
+ ), "Qualcomm backend currently only supports static shape and use_kv_cache=True is the only way to support it at the moment"
672
+ try :
673
+ # pyre-ignore: Undefined import [21]: Could not find a module corresponding to import `executorch.backends.qualcomm.partition.qnn_partitioner`
674
+ from executorch .backends .qualcomm .partition .qnn_partitioner import (
675
+ QnnPartitioner ,
676
+ )
677
+
678
+ # pyre-ignore: Undefined import [21]: Could not find a module corresponding to import `executorch.backends.qualcomm.serialization.qnn_compile_spec_schema`
679
+ from executorch .backends .qualcomm .serialization .qnn_compile_spec_schema import (
680
+ QcomChipset ,
681
+ )
682
+
683
+ # pyre-ignore: Undefined import [21]: Could not find a module corresponding to import `executorch.backends.qualcomm.utils.utils`
684
+ from executorch .backends .qualcomm .utils .utils import (
685
+ _transform ,
686
+ generate_htp_compiler_spec ,
687
+ generate_qnn_executorch_compiler_spec ,
688
+ )
689
+ except ImportError :
690
+ raise ImportError (
691
+ "Please install the Qualcomm backend follwing https://pytorch.org/executorch/main/build-run-qualcomm.html"
692
+ )
693
+
694
+ # pyre-ignore: Undefined attribute [16]: Module `executorch.backends` has no attribute `qualcomm`
695
+ backend_options = generate_htp_compiler_spec (use_fp16 = False )
696
+ # pyre-ignore: Undefined attribute [16]: Module `executorch.backends` has no attribute `qualcomm`
697
+ partitioner = QnnPartitioner (
698
+ # pyre-ignore: Undefined attribute [16]: Module `executorch.backends` has no attribute `qualcomm`
699
+ generate_qnn_executorch_compiler_spec (
700
+ # pyre-ignore: Undefined attribute [16]: Module `executorch.backends` has no attribute `qualcomm`.
701
+ soc_model = QcomChipset .SM8650 , # default to SM8650
702
+ backend_options = backend_options ,
703
+ debug = False ,
704
+ saver = False ,
705
+ ),
706
+ skip_node_id_set = {},
707
+ skip_node_op_set = {},
708
+ )
709
+ # pyre-ignore: Undefined attribute [16]: Module `executorch.backends` has no attribute `qualcomm`
710
+ _transform (builder_exported_to_edge .export_program ())
711
+
641
712
if args .generate_etrecord :
642
713
if not builder_exported_to_edge .edge_manager :
643
714
raise ValueError ("Unable to generate etrecord due to missing edge manager." )
0 commit comments