@@ -490,22 +490,18 @@ def _export_llama(modelname, args) -> str: # noqa: C901
490
490
).export_to_edge (quantizers )
491
491
492
492
# to_backend
493
- partitioners = {}
493
+ partitioner = None
494
494
if pt2e_quant_params is not None and pt2e_quant_params .quantize_linear is not None :
495
- partitioners [XnnpackDynamicallyQuantizedPartitioner .__name__ ] = (
496
- XnnpackDynamicallyQuantizedPartitioner ()
497
- )
495
+ partitioner = XnnpackDynamicallyQuantizedPartitioner ()
498
496
modelname = f"xnnpack_dq_{ modelname } "
499
497
500
498
if args .xnnpack :
501
499
# Following changes due to.
502
500
# 1. We need dynamically quantized partitioner for both pt2e_quantize options
503
501
# as well as "qmode int4" which is also dynamic quantizes linear layers.
504
502
# 2. XNNPACK partitioner seems to result in seg fault for non dqlinear ops.
505
- partitioners [XnnpackDynamicallyQuantizedPartitioner .__name__ ] = (
506
- XnnpackDynamicallyQuantizedPartitioner ()
507
- )
508
- # partitioners[XnnpackPartitioner.__name__] = XnnpackPartitioner()
503
+ partitioner = XnnpackDynamicallyQuantizedPartitioner ()
504
+ # partitioner = XnnpackPartitioner()
509
505
modelname = f"xnnpack_{ modelname } "
510
506
511
507
if args .vulkan :
@@ -516,7 +512,7 @@ def _export_llama(modelname, args) -> str: # noqa: C901
516
512
args .quantization_mode is None
517
513
), "Vulkan backend does not support quantization at the moment"
518
514
519
- partitioners [ VulkanPartitioner . __name__ ] = VulkanPartitioner ()
515
+ partitioner = VulkanPartitioner ()
520
516
modelname = f"vulkan_{ modelname } "
521
517
522
518
if args .mps :
@@ -535,7 +531,7 @@ def _export_llama(modelname, args) -> str: # noqa: C901
535
531
536
532
compile_specs = [CompileSpec ("use_fp16" , bytes ([True ]))]
537
533
# pyre-ignore: Undefined attribute [16]: Module `executorch.backends` has no attribute `apple`.
538
- partitioners [ MPSPartitioner . __name__ ] = MPSPartitioner (compile_specs )
534
+ partitioner = MPSPartitioner (compile_specs )
539
535
modelname = f"mps_{ modelname } "
540
536
541
537
if args .generate_etrecord :
@@ -545,7 +541,7 @@ def _export_llama(modelname, args) -> str: # noqa: C901
545
541
logging .info ("Generating etrecord" )
546
542
# Copy the edge manager which will be serialized into etrecord. This is memory-wise expensive.
547
543
edge_manager_copy = copy .deepcopy (builder_exported_to_edge .edge_manager )
548
- builder = builder_exported_to_edge .to_backend (partitioners ).to_executorch ()
544
+ builder = builder_exported_to_edge .to_backend (partitioner ).to_executorch ()
549
545
550
546
# Generate ETRecord
551
547
if edge_manager_copy :
@@ -556,7 +552,7 @@ def _export_llama(modelname, args) -> str: # noqa: C901
556
552
)
557
553
logging .info ("Generated etrecord.bin" )
558
554
else :
559
- builder = builder_exported_to_edge .to_backend (partitioners ).to_executorch ()
555
+ builder = builder_exported_to_edge .to_backend (partitioner ).to_executorch ()
560
556
561
557
if args .profile_memory :
562
558
generate_memory_trace (builder .export_program , "memory_profile.json" )
0 commit comments