@@ -539,22 +539,18 @@ def _export_llama(modelname, args) -> str: # noqa: C901
539
539
).export_to_edge (quantizers )
540
540
541
541
# to_backend
542
- partitioners = {}
542
+ partitioner = None
543
543
if pt2e_quant_params is not None and pt2e_quant_params .quantize_linear is not None :
544
- partitioners [XnnpackDynamicallyQuantizedPartitioner .__name__ ] = (
545
- XnnpackDynamicallyQuantizedPartitioner ()
546
- )
544
+ partitioner = XnnpackDynamicallyQuantizedPartitioner ()
547
545
modelname = f"xnnpack_dq_{ modelname } "
548
546
549
547
if args .xnnpack :
550
548
# Following changes due to.
551
549
# 1. We need dynamically quantized partitioner for both pt2e_quantize options
552
550
# as well as "qmode 8da4w" which is also dynamic quantizes linear layers.
553
551
# 2. XNNPACK partitioner seems to result in seg fault for non dqlinear ops.
554
- partitioners [XnnpackDynamicallyQuantizedPartitioner .__name__ ] = (
555
- XnnpackDynamicallyQuantizedPartitioner ()
556
- )
557
- # partitioners[XnnpackPartitioner.__name__] = XnnpackPartitioner()
552
+ partitioner = XnnpackDynamicallyQuantizedPartitioner ()
553
+ # partitioner = XnnpackPartitioner()
558
554
modelname = f"xnnpack_{ modelname } "
559
555
560
556
if args .vulkan :
@@ -565,7 +561,7 @@ def _export_llama(modelname, args) -> str: # noqa: C901
565
561
args .quantization_mode is None
566
562
), "Vulkan backend does not support quantization at the moment"
567
563
568
- partitioners [ VulkanPartitioner . __name__ ] = VulkanPartitioner ()
564
+ partitioner = VulkanPartitioner ()
569
565
modelname = f"vulkan_{ modelname } "
570
566
571
567
if args .mps :
@@ -584,7 +580,7 @@ def _export_llama(modelname, args) -> str: # noqa: C901
584
580
585
581
compile_specs = [CompileSpec ("use_fp16" , bytes ([True ]))]
586
582
# pyre-ignore: Undefined attribute [16]: Module `executorch.backends` has no attribute `apple`.
587
- partitioners [ MPSPartitioner . __name__ ] = MPSPartitioner (compile_specs )
583
+ partitioner = MPSPartitioner (compile_specs )
588
584
modelname = f"mps_{ modelname } "
589
585
590
586
if args .coreml :
@@ -615,7 +611,7 @@ def _export_llama(modelname, args) -> str: # noqa: C901
615
611
model_type = CoreMLBackend .MODEL_TYPE .MODEL ,
616
612
)
617
613
# pyre-ignore: Undefined attribute [16]: Module `executorch.backends` has no attribute `apple`
618
- partitioners [ CoreMLPartitioner . __name__ ] = CoreMLPartitioner (
614
+ partitioner = CoreMLPartitioner (
619
615
skip_ops_for_coreml_delegation = None , compile_specs = compile_specs
620
616
)
621
617
modelname = f"coreml_{ modelname } "
@@ -627,7 +623,7 @@ def _export_llama(modelname, args) -> str: # noqa: C901
627
623
logging .info ("Generating etrecord" )
628
624
# Copy the edge manager which will be serialized into etrecord. This is memory-wise expensive.
629
625
edge_manager_copy = copy .deepcopy (builder_exported_to_edge .edge_manager )
630
- builder = builder_exported_to_edge .to_backend (partitioners ).to_executorch ()
626
+ builder = builder_exported_to_edge .to_backend (partitioner ).to_executorch ()
631
627
632
628
# Generate ETRecord
633
629
if edge_manager_copy :
@@ -638,7 +634,7 @@ def _export_llama(modelname, args) -> str: # noqa: C901
638
634
)
639
635
logging .info ("Generated etrecord.bin" )
640
636
else :
641
- builder = builder_exported_to_edge .to_backend (partitioners ).to_executorch ()
637
+ builder = builder_exported_to_edge .to_backend (partitioner ).to_executorch ()
642
638
643
639
if args .profile_memory :
644
640
generate_memory_trace (builder .export_program , "memory_profile.json" )
0 commit comments