taking list of partitioners (#2628)

cccclai · facebook-github-bot · commit f4b97b24255a · 2024-03-27T01:46:18.000-07:00
Summary: bypass-github-pytorch-ci-checks Pull Request resolved: #2628 as comments from D55078939 Reviewed By: kimishpatel Differential Revision: D55232842 fbshipit-source-id: bd8ed7a6edd8561f4f33b5cc5f4fb1f3bd1de20f
diff --git a/examples/models/llama2/builder.py b/examples/models/llama2/builder.py
@@ -283,26 +283,36 @@ def export_to_edge(
             )
         return self
 
-    def to_backend(self, partitioner: Optional[Partitioner]) -> "LlamaEdgeManager":
+    def to_backend(
+        self, partitioners: Optional[List[Partitioner]]
+    ) -> "LlamaEdgeManager":
         """
         Partition the model and lower to different backends. The signature is
         aligned with the signature of `to_backend` method of EdgeManager.
         Args:
             partitioner (Optional[Partitioner]): One or more
                 partitioner to be sent to EdgeManager.to_backend().
         """
-        assert self.edge_manager is not None, "Need to run export_to_edge() first"
-        if partitioner is None:
+        if partitioners is None:
             logging.info("No partitioner provided, passing...")
         else:
-            self.edge_manager = self.edge_manager.to_backend(partitioner)
-            if self.verbose:
-                logging.info(
-                    print_delegated_graph(
-                        self.edge_manager.exported_program().graph_module
-                    )
-                )
-                logging.info(f"Applied partitioners: {partitioner}")
+            for partitioner in partitioners:
+                if partitioner is not None:
+                    assert (
+                        self.edge_manager is not None
+                    ), "Need to run export_to_edge() first"
+                    self.edge_manager = self.edge_manager.to_backend(partitioner)
+                    if self.verbose:
+                        logging.info(
+                            print_delegated_graph(
+                                self.edge_manager.exported_program().graph_module
+                            )
+                        )
+                        logging.info(f"Applied partitioners: {partitioner}")
+                else:
+                    logging.info("No partitioner provided, passing...")
+                    continue
+
         return self
 
     def to_executorch(self) -> "LlamaEdgeManager":
diff --git a/examples/models/llama2/export_llama_lib.py b/examples/models/llama2/export_llama_lib.py
@@ -602,18 +602,18 @@ def _export_llama(modelname, args) -> str:  # noqa: C901
     ).export_to_edge(quantizers)
 
     # to_backend
-    partitioner = None
+    partitioners = []
     if pt2e_quant_params is not None and pt2e_quant_params.quantize_linear is not None:
-        partitioner = XnnpackDynamicallyQuantizedPartitioner()
+        partitioners.append(XnnpackDynamicallyQuantizedPartitioner())
         modelname = f"xnnpack_dq_{modelname}"
 
     if args.xnnpack:
         # Following changes due to.
         # 1. We need dynamically quantized partitioner for both pt2e_quantize options
         #    as well as "qmode 8da4w" which is also dynamic quantizes linear layers.
         # 2. XNNPACK partitioner seems to result in seg fault for non dqlinear ops.
-        partitioner = XnnpackDynamicallyQuantizedPartitioner()
-        # partitioner = XnnpackPartitioner()
+        partitioners.append(XnnpackDynamicallyQuantizedPartitioner())
+        # partitioners.append(XnnpackPartitioner())
         modelname = f"xnnpack_{modelname}"
 
     if args.vulkan:
@@ -624,7 +624,7 @@ def _export_llama(modelname, args) -> str:  # noqa: C901
             args.quantization_mode is None
         ), "Vulkan backend does not support quantization at the moment"
 
-        partitioner = VulkanPartitioner()
+        partitioners.append(VulkanPartitioner())
         modelname = f"vulkan_{modelname}"
 
     if args.mps:
@@ -643,7 +643,7 @@ def _export_llama(modelname, args) -> str:  # noqa: C901
 
         compile_specs = [CompileSpec("use_fp16", bytes([True]))]
         # pyre-ignore: Undefined attribute [16]: Module `executorch.backends` has no attribute `apple`.
-        partitioner = MPSPartitioner(compile_specs)
+        partitioners.append(MPSPartitioner(compile_specs))
         modelname = f"mps_{modelname}"
 
     if args.coreml:
@@ -673,9 +673,11 @@ def _export_llama(modelname, args) -> str:  # noqa: C901
             # pyre-ignore: Undefined attribute [16]: Module `executorch.backends` has no attribute `apple`
             model_type=CoreMLBackend.MODEL_TYPE.MODEL,
         )
-        # pyre-ignore: Undefined attribute [16]: Module `executorch.backends` has no attribute `apple`
-        partitioner = CoreMLPartitioner(
-            skip_ops_for_coreml_delegation=None, compile_specs=compile_specs
+        partitioners.append(
+            # pyre-ignore: Undefined attribute [16]: Module `executorch.backends` has no attribute `apple`
+            CoreMLPartitioner(
+                skip_ops_for_coreml_delegation=None, compile_specs=compile_specs
+            )
         )
         modelname = f"coreml_{modelname}"
 
@@ -730,7 +732,7 @@ def _export_llama(modelname, args) -> str:  # noqa: C901
         logging.info("Generating etrecord")
         # Copy the edge manager which will be serialized into etrecord. This is memory-wise expensive.
         edge_manager_copy = copy.deepcopy(builder_exported_to_edge.edge_manager)
-        builder = builder_exported_to_edge.to_backend(partitioner).to_executorch()
+        builder = builder_exported_to_edge.to_backend(partitioners).to_executorch()
 
         # Generate ETRecord
         if edge_manager_copy:
@@ -741,7 +743,7 @@ def _export_llama(modelname, args) -> str:  # noqa: C901
             )
             logging.info("Generated etrecord.bin")
     else:
-        builder = builder_exported_to_edge.to_backend(partitioner).to_executorch()
+        builder = builder_exported_to_edge.to_backend(partitioners).to_executorch()
 
     if args.profile_memory:
         generate_memory_trace(builder.export_program, "memory_profile.json")