use partitioner instance directly in to_backend (#2513)

cccclai · facebook-github-bot · commit 5cd40393c143 · 2024-03-19T13:52:54.000-07:00
Summary: Pull Request resolved: #2513 to_backend either takes partitioner or a dict of partitioner `key: method_name, value: partitioner`. We shouldn't do key as the backend name and value as the partitioner. Differential Revision: D55078939
diff --git a/examples/models/llama2/builder.py b/examples/models/llama2/builder.py
@@ -286,9 +286,7 @@ def export_to_edge(
             )
         return self
 
-    def to_backend(
-        self, partitioner: Union[Partitioner, Dict[str, Partitioner]]
-    ) -> "LlamaEdgeManager":
+    def to_backend(self, partitioner: Partitioner) -> "LlamaEdgeManager":
         """
         Partition the model and lower to different backends. The signature is
         aligned with the signature of `to_backend` method of EdgeManager.
@@ -297,18 +295,7 @@ def to_backend(
                 partitioner to be sent to EdgeManager.to_backend().
         """
         assert self.edge_manager is not None, "Need to run export_to_edge() first"
-        if isinstance(partitioner, dict):
-            for key, p in partitioner.items():
-                assert self.edge_manager is not None
-                self.edge_manager = self.edge_manager.to_backend(p)
-                if self.verbose:
-                    logging.info(
-                        print_delegated_graph(
-                            self.edge_manager.exported_program().graph_module
-                        )
-                    )
-                    logging.info(f"Applied partitioners: {key}")
-        elif isinstance(partitioner, Partitioner):
+        if isinstance(partitioner, Partitioner):
             assert self.edge_manager is not None
             self.edge_manager = self.edge_manager.to_backend(partitioner)
             if self.verbose:
diff --git a/examples/models/llama2/export_llama_lib.py b/examples/models/llama2/export_llama_lib.py
@@ -490,21 +490,17 @@ def _export_llama(modelname, args) -> str:  # noqa: C901
     ).export_to_edge(quantizers)
 
     # to_backend
-    partitioners = {}
+    partitioner = None
     if pt2e_quant_params is not None and pt2e_quant_params.quantize_linear is not None:
-        partitioners[XnnpackDynamicallyQuantizedPartitioner.__name__] = (
-            XnnpackDynamicallyQuantizedPartitioner()
-        )
+        partitioner = XnnpackDynamicallyQuantizedPartitioner()
         modelname = f"xnnpack_dq_{modelname}"
 
     if args.xnnpack:
         # Following changes due to.
         # 1. We need dynamically quantized partitioner for both pt2e_quantize options
         #    as well as "qmode int4" which is also dynamic quantizes linear layers.
         # 2. XNNPACK partitioner seems to result in seg fault for non dqlinear ops.
-        partitioners[XnnpackDynamicallyQuantizedPartitioner.__name__] = (
-            XnnpackDynamicallyQuantizedPartitioner()
-        )
+        partitioner = XnnpackDynamicallyQuantizedPartitioner()
         # partitioners[XnnpackPartitioner.__name__] = XnnpackPartitioner()
         modelname = f"xnnpack_{modelname}"
 
@@ -516,7 +512,7 @@ def _export_llama(modelname, args) -> str:  # noqa: C901
             args.quantization_mode is None
         ), "Vulkan backend does not support quantization at the moment"
 
-        partitioners[VulkanPartitioner.__name__] = VulkanPartitioner()
+        partitioner = VulkanPartitioner()
         modelname = f"vulkan_{modelname}"
 
     if args.mps:
@@ -545,7 +541,8 @@ def _export_llama(modelname, args) -> str:  # noqa: C901
         logging.info("Generating etrecord")
         # Copy the edge manager which will be serialized into etrecord. This is memory-wise expensive.
         edge_manager_copy = copy.deepcopy(builder_exported_to_edge.edge_manager)
-        builder = builder_exported_to_edge.to_backend(partitioners).to_executorch()
+        # #pyre-ignore: pyre can't recognize the type of the instance
+        builder = builder_exported_to_edge.to_backend(partitioner).to_executorch()
 
         # Generate ETRecord
         if edge_manager_copy:
@@ -556,7 +553,8 @@ def _export_llama(modelname, args) -> str:  # noqa: C901
             )
             logging.info("Generated etrecord.bin")
     else:
-        builder = builder_exported_to_edge.to_backend(partitioners).to_executorch()
+        # #pyre-ignore: pyre can't recognize the type of the instance
+        builder = builder_exported_to_edge.to_backend(partitioner).to_executorch()
 
     if args.profile_memory:
         generate_memory_trace(builder.export_program, "memory_profile.json")