Skip to content

Commit c89a758

Browse files
cccclaifacebook-github-bot
authored andcommitted
use partitioner instance directly in to_backend (#2513)
Summary: Pull Request resolved: #2513 to_backend either takes partitioner or a dict of partitioner `key: method_name, value: partitioner`. We shouldn't do key as the backend name and value as the partitioner. Reviewed By: mergennachin Differential Revision: D55078939 fbshipit-source-id: 7c2b8587095d2227d17a198c3b4e975910f4df00
1 parent 8716780 commit c89a758

File tree

2 files changed

+15
-33
lines changed

2 files changed

+15
-33
lines changed

examples/models/llama2/builder.py

Lines changed: 6 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
import logging
1313
from enum import Enum
1414
from json import JSONDecodeError
15-
from typing import Any, Callable, Dict, List, Optional, Union
15+
from typing import Any, Callable, Dict, List, Optional
1616

1717
import torch
1818
from executorch.backends.transforms.duplicate_dynamic_quant_chain import (
@@ -288,30 +288,18 @@ def export_to_edge(
288288
)
289289
return self
290290

291-
def to_backend(
292-
self, partitioner: Union[Partitioner, Dict[str, Partitioner]]
293-
) -> "LlamaEdgeManager":
291+
def to_backend(self, partitioner: Optional[Partitioner]) -> "LlamaEdgeManager":
294292
"""
295293
Partition the model and lower to different backends. The signature is
296294
aligned with the signature of `to_backend` method of EdgeManager.
297295
Args:
298-
partitioner (Union[Partitioner, Dict[str, Partitioner]]): One or more
296+
partitioner (Optional[Partitioner]): One or more
299297
partitioner to be sent to EdgeManager.to_backend().
300298
"""
301299
assert self.edge_manager is not None, "Need to run export_to_edge() first"
302-
if isinstance(partitioner, dict):
303-
for key, p in partitioner.items():
304-
assert self.edge_manager is not None
305-
self.edge_manager = self.edge_manager.to_backend(p)
306-
if self.verbose:
307-
logging.info(
308-
print_delegated_graph(
309-
self.edge_manager.exported_program().graph_module
310-
)
311-
)
312-
logging.info(f"Applied partitioners: {key}")
313-
elif isinstance(partitioner, Partitioner):
314-
assert self.edge_manager is not None
300+
if partitioner is None:
301+
logging.info("No partitioner provided, passing...")
302+
else:
315303
self.edge_manager = self.edge_manager.to_backend(partitioner)
316304
if self.verbose:
317305
logging.info(
@@ -320,8 +308,6 @@ def to_backend(
320308
)
321309
)
322310
logging.info(f"Applied partitioners: {partitioner}")
323-
else:
324-
logging.warning("Invalid partitioner, skipping...")
325311
return self
326312

327313
def to_executorch(self) -> "LlamaEdgeManager":

examples/models/llama2/export_llama_lib.py

Lines changed: 9 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -539,22 +539,18 @@ def _export_llama(modelname, args) -> str: # noqa: C901
539539
).export_to_edge(quantizers)
540540

541541
# to_backend
542-
partitioners = {}
542+
partitioner = None
543543
if pt2e_quant_params is not None and pt2e_quant_params.quantize_linear is not None:
544-
partitioners[XnnpackDynamicallyQuantizedPartitioner.__name__] = (
545-
XnnpackDynamicallyQuantizedPartitioner()
546-
)
544+
partitioner = XnnpackDynamicallyQuantizedPartitioner()
547545
modelname = f"xnnpack_dq_{modelname}"
548546

549547
if args.xnnpack:
550548
# Following changes due to.
551549
# 1. We need dynamically quantized partitioner for both pt2e_quantize options
552550
# as well as "qmode 8da4w" which is also dynamic quantizes linear layers.
553551
# 2. XNNPACK partitioner seems to result in seg fault for non dqlinear ops.
554-
partitioners[XnnpackDynamicallyQuantizedPartitioner.__name__] = (
555-
XnnpackDynamicallyQuantizedPartitioner()
556-
)
557-
# partitioners[XnnpackPartitioner.__name__] = XnnpackPartitioner()
552+
partitioner = XnnpackDynamicallyQuantizedPartitioner()
553+
# partitioner = XnnpackPartitioner()
558554
modelname = f"xnnpack_{modelname}"
559555

560556
if args.vulkan:
@@ -565,7 +561,7 @@ def _export_llama(modelname, args) -> str: # noqa: C901
565561
args.quantization_mode is None
566562
), "Vulkan backend does not support quantization at the moment"
567563

568-
partitioners[VulkanPartitioner.__name__] = VulkanPartitioner()
564+
partitioner = VulkanPartitioner()
569565
modelname = f"vulkan_{modelname}"
570566

571567
if args.mps:
@@ -584,7 +580,7 @@ def _export_llama(modelname, args) -> str: # noqa: C901
584580

585581
compile_specs = [CompileSpec("use_fp16", bytes([True]))]
586582
# pyre-ignore: Undefined attribute [16]: Module `executorch.backends` has no attribute `apple`.
587-
partitioners[MPSPartitioner.__name__] = MPSPartitioner(compile_specs)
583+
partitioner = MPSPartitioner(compile_specs)
588584
modelname = f"mps_{modelname}"
589585

590586
if args.coreml:
@@ -615,7 +611,7 @@ def _export_llama(modelname, args) -> str: # noqa: C901
615611
model_type=CoreMLBackend.MODEL_TYPE.MODEL,
616612
)
617613
# pyre-ignore: Undefined attribute [16]: Module `executorch.backends` has no attribute `apple`
618-
partitioners[CoreMLPartitioner.__name__] = CoreMLPartitioner(
614+
partitioner = CoreMLPartitioner(
619615
skip_ops_for_coreml_delegation=None, compile_specs=compile_specs
620616
)
621617
modelname = f"coreml_{modelname}"
@@ -627,7 +623,7 @@ def _export_llama(modelname, args) -> str: # noqa: C901
627623
logging.info("Generating etrecord")
628624
# Copy the edge manager which will be serialized into etrecord. This is memory-wise expensive.
629625
edge_manager_copy = copy.deepcopy(builder_exported_to_edge.edge_manager)
630-
builder = builder_exported_to_edge.to_backend(partitioners).to_executorch()
626+
builder = builder_exported_to_edge.to_backend(partitioner).to_executorch()
631627

632628
# Generate ETRecord
633629
if edge_manager_copy:
@@ -638,7 +634,7 @@ def _export_llama(modelname, args) -> str: # noqa: C901
638634
)
639635
logging.info("Generated etrecord.bin")
640636
else:
641-
builder = builder_exported_to_edge.to_backend(partitioners).to_executorch()
637+
builder = builder_exported_to_edge.to_backend(partitioner).to_executorch()
642638

643639
if args.profile_memory:
644640
generate_memory_trace(builder.export_program, "memory_profile.json")

0 commit comments

Comments
 (0)