@@ -602,18 +602,18 @@ def _export_llama(modelname, args) -> str: # noqa: C901
602
602
).export_to_edge (quantizers )
603
603
604
604
# to_backend
605
- partitioner = None
605
+ partitioners = []
606
606
if pt2e_quant_params is not None and pt2e_quant_params .quantize_linear is not None :
607
- partitioner = XnnpackDynamicallyQuantizedPartitioner ()
607
+ partitioners . append ( XnnpackDynamicallyQuantizedPartitioner () )
608
608
modelname = f"xnnpack_dq_{ modelname } "
609
609
610
610
if args .xnnpack :
611
611
# Following changes due to.
612
612
# 1. We need dynamically quantized partitioner for both pt2e_quantize options
613
613
# as well as "qmode 8da4w" which is also dynamic quantizes linear layers.
614
614
# 2. XNNPACK partitioner seems to result in seg fault for non dqlinear ops.
615
- partitioner = XnnpackDynamicallyQuantizedPartitioner ()
616
- # partitioner = XnnpackPartitioner()
615
+ partitioners . append ( XnnpackDynamicallyQuantizedPartitioner () )
616
+ # partitioners.append( XnnpackPartitioner() )
617
617
modelname = f"xnnpack_{ modelname } "
618
618
619
619
if args .vulkan :
@@ -624,7 +624,7 @@ def _export_llama(modelname, args) -> str: # noqa: C901
624
624
args .quantization_mode is None
625
625
), "Vulkan backend does not support quantization at the moment"
626
626
627
- partitioner = VulkanPartitioner ()
627
+ partitioners . append ( VulkanPartitioner () )
628
628
modelname = f"vulkan_{ modelname } "
629
629
630
630
if args .mps :
@@ -643,7 +643,7 @@ def _export_llama(modelname, args) -> str: # noqa: C901
643
643
644
644
compile_specs = [CompileSpec ("use_fp16" , bytes ([True ]))]
645
645
# pyre-ignore: Undefined attribute [16]: Module `executorch.backends` has no attribute `apple`.
646
- partitioner = MPSPartitioner (compile_specs )
646
+ partitioners . append ( MPSPartitioner (compile_specs ) )
647
647
modelname = f"mps_{ modelname } "
648
648
649
649
if args .coreml :
@@ -673,9 +673,11 @@ def _export_llama(modelname, args) -> str: # noqa: C901
673
673
# pyre-ignore: Undefined attribute [16]: Module `executorch.backends` has no attribute `apple`
674
674
model_type = CoreMLBackend .MODEL_TYPE .MODEL ,
675
675
)
676
- # pyre-ignore: Undefined attribute [16]: Module `executorch.backends` has no attribute `apple`
677
- partitioner = CoreMLPartitioner (
678
- skip_ops_for_coreml_delegation = None , compile_specs = compile_specs
676
+ partitioners .append (
677
+ # pyre-ignore: Undefined attribute [16]: Module `executorch.backends` has no attribute `apple`
678
+ CoreMLPartitioner (
679
+ skip_ops_for_coreml_delegation = None , compile_specs = compile_specs
680
+ )
679
681
)
680
682
modelname = f"coreml_{ modelname } "
681
683
@@ -730,7 +732,7 @@ def _export_llama(modelname, args) -> str: # noqa: C901
730
732
logging .info ("Generating etrecord" )
731
733
# Copy the edge manager which will be serialized into etrecord. This is memory-wise expensive.
732
734
edge_manager_copy = copy .deepcopy (builder_exported_to_edge .edge_manager )
733
- builder = builder_exported_to_edge .to_backend (partitioner ).to_executorch ()
735
+ builder = builder_exported_to_edge .to_backend (partitioners ).to_executorch ()
734
736
735
737
# Generate ETRecord
736
738
if edge_manager_copy :
@@ -741,7 +743,7 @@ def _export_llama(modelname, args) -> str: # noqa: C901
741
743
)
742
744
logging .info ("Generated etrecord.bin" )
743
745
else :
744
- builder = builder_exported_to_edge .to_backend (partitioner ).to_executorch ()
746
+ builder = builder_exported_to_edge .to_backend (partitioners ).to_executorch ()
745
747
746
748
if args .profile_memory :
747
749
generate_memory_trace (builder .export_program , "memory_profile.json" )
0 commit comments