Qualcomm AI Engine Direct - Adapt to new IR capture flow

quic_chuntl · quic_chuntl · commit bbf58679a1f3 · 2024-03-08T16:12:22.000+08:00
Summary:
- Change existent IR capture flow (exir.capture) to torch.export.export
- Add custom decomposition table for mitigating maintaining effort
- Fix breakages encountered and make sure all tests passed as well
diff --git a/backends/qualcomm/tests/utils.py b/backends/qualcomm/tests/utils.py
@@ -25,7 +25,6 @@
 from executorch.backends.qualcomm.utils.utils import capture_program
 from executorch.examples.qualcomm.scripts.utils import SimpleADB
 
-from executorch.exir.backend.backend_api import to_backend
 from executorch.exir.backend.compile_spec_schema import CompileSpec
 from torch.ao.quantization.quantize_pt2e import convert_pt2e, prepare_pt2e
 
@@ -107,19 +106,18 @@ def lower_module_and_test_output(
         qnn_partitioner = QnnPartitioner(
             self.compiler_specs, skip_node_id_set, skip_node_op_set
         )
-        delegated_program = capture_program(module, sample_inputs)
-        delegated_program.exported_program = to_backend(
-            delegated_program.exported_program, qnn_partitioner
-        )
-        exec_prog = delegated_program.to_executorch()
+        delegated_program_mgr = capture_program(module, sample_inputs)
+        delegated_program_mgr = delegated_program_mgr.to_backend(qnn_partitioner)
+        exec_prog_mgr = delegated_program_mgr.to_executorch()
 
         # Assert the backend name is qnn
         self.assertEqual(
-            len(exec_prog.program.execution_plan[0].delegates), expected_partitions
+            len(exec_prog_mgr.executorch_program.execution_plan[0].delegates),
+            expected_partitions,
         )
         for i in range(expected_partitions):
             self.assertEqual(
-                exec_prog.program.execution_plan[0].delegates[i].id,
+                exec_prog_mgr.executorch_program.execution_plan[0].delegates[i].id,
                 QnnBackend.__name__,
             )
 
@@ -132,7 +130,7 @@ def lower_module_and_test_output(
                     pte_fname,
                 ) = self._save_model_and_expected_output(
                     module,
-                    exec_prog.buffer,
+                    exec_prog_mgr.buffer,
                     sample_inputs,
                     tmp_dir,
                 )
diff --git a/backends/qualcomm/utils/utils.py b/backends/qualcomm/utils/utils.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
-from typing import List, Tuple
+from typing import Callable, Dict, List, Tuple
 
 import executorch.exir as exir
 
@@ -20,7 +20,6 @@
 )
 from executorch.backends.qualcomm.passes.convert_bmm_to_matmul import ConvertBmmToMatmul
 from executorch.backends.qualcomm.passes.convert_hardsigmoid import ConvertHardsigmoid
-from executorch.backends.qualcomm.passes.convert_hardswish import ConvertHardswish
 from executorch.backends.qualcomm.passes.convert_interpolate_with_upsample2d import (
     ConvertInterpolateWithUpsample2D,
 )
@@ -29,9 +28,6 @@
 from executorch.backends.qualcomm.passes.i64_to_i32 import I64toI32
 from executorch.backends.qualcomm.passes.insert_requantize import InsertRequantize
 from executorch.backends.qualcomm.passes.layout_transform import LayoutTransform
-from executorch.backends.qualcomm.passes.recompose_pixel_shuffle import (
-    RecomposePixelShuffle,
-)
 from executorch.backends.qualcomm.passes.remove_clone import RemoveClone
 from executorch.backends.qualcomm.serialization.qnn_compile_spec_schema import (
     _soc_info_table,
@@ -46,7 +42,10 @@
 from executorch.backends.qualcomm.serialization.qnn_compile_spec_serialize import (
     convert_to_flatbuffer,
 )
+from executorch.exir import ExirExportedProgram
 from executorch.exir.backend.compile_spec_schema import CompileSpec
+from executorch.exir.program._program import to_edge
+from torch._decomp import core_aten_decompositions
 from torch.fx import passes
 
 QNN_COMPILE_SPEC = "qnn_compile_spec"
@@ -60,32 +59,44 @@ def qnn_edge_config() -> exir.EdgeCompileConfig:
     return exir.EdgeCompileConfig(_check_ir_validity=False)
 
 
+def get_decomp_table() -> Dict[torch._ops.OperatorBase, Callable]:
+    source_decompositions = core_aten_decompositions()
+    # The below super ops are supported by QNN
+    remove_decompositions = [
+        torch.ops.aten.pixel_shuffle.default,
+        torch.ops.aten.hardswish.default,
+    ]
+
+    return {
+        key: source_decompositions[key]
+        for key in source_decompositions
+        if key not in remove_decompositions
+    }
+
+
 def capture_program(
     module: torch.nn.Module,
     inputs: Tuple[torch.Tensor],
 ) -> exir.ExirExportedProgram:
-    # TODO: should switch to torch.export.export & custom deomposition
-    #       to reduce maintaining effort.
-    exir_exported_program = exir.capture(
-        module,
-        inputs,
-        qnn_capture_config(),
-    )
+    ep = torch.export.export(module, inputs)
+    decomposed_ep = ep.run_decompositions(get_decomp_table())
+
     # We choose call_operator by target in ConvertBinaryOpsWithScalar
     # because it is the same source_fn_stack for MultiheadAttention
-    exir_exported_program.transform(ConvertBinaryOpsWithScalar())
-    ex_prog = exir_exported_program.to_edge(qnn_edge_config())
+    # TODO: Should modify the scalar op in the op builder instead of
+    #       using transformation
+    core_ep = ExirExportedProgram(decomposed_ep, False)
+    core_ep.transform(ConvertBinaryOpsWithScalar())
+    edge_ep_mgr = to_edge(core_ep.exported_program, compile_config=qnn_edge_config())
 
     # currently ExirExportedProgram.transform does not accept
     # changes of input number which was caused by FoldQDQ
     # apply passes one by one here to avoid IR capture failure
-    edge_program = ex_prog.exported_program
+    edge_program = edge_ep_mgr.exported_program()
     graph_module = edge_program.graph_module
     RemoveClone()(graph_module)
-    RecomposePixelShuffle()(graph_module)
     ConvertToLinear()(graph_module)
     ConvertHardsigmoid()(graph_module)
-    ConvertHardswish()(graph_module)
     ConvertBmmToMatmul()(graph_module)
     ConvertInterpolateWithUpsample2D()(graph_module)
     I64toI32(edge_program)(graph_module)
@@ -95,7 +106,7 @@ def capture_program(
     FoldQDQ()(graph_module)
     InsertRequantize(edge_program)(graph_module)
     LayoutTransform(edge_program)(graph_module)
-    return ex_prog
+    return edge_ep_mgr
 
 
 def draw_graph(title, path, graph_module: torch.fx.GraphModule):
diff --git a/examples/qualcomm/scripts/utils.py b/examples/qualcomm/scripts/utils.py
@@ -26,7 +26,6 @@
     capture_program,
     generate_qnn_executorch_compiler_spec,
 )
-from executorch.exir.backend.backend_api import to_backend
 from executorch.exir.capture._config import ExecutorchBackendConfig
 from torch.ao.quantization.quantize_pt2e import convert_pt2e, prepare_pt2e
 
@@ -185,9 +184,9 @@ def build_executorch_binary(
         skip_node_id_set,
         skip_node_op_set,
     )
-    edge_prog.exported_program = to_backend(edge_prog.exported_program, qnn_partitioner)
-    edge_prog.exported_program.graph_module.graph.print_tabular()
-    exec_prog = edge_prog.to_executorch(
+    delegated_program_mgr = edge_prog.to_backend(qnn_partitioner)
+    edge_prog.exported_program().graph_module.graph.print_tabular()
+    exec_prog = delegated_program_mgr.to_executorch(
         config=ExecutorchBackendConfig(extract_constant_segment=False)
     )
     with open(f"{file_name}.pte", "wb") as file:

Original file line number	Diff line number	Diff line change
`@@ -26,7 +26,6 @@`
`26`	`26`	`capture_program,`
`27`	`27`	`generate_qnn_executorch_compiler_spec,`
`28`	`28`	`)`
`29`		`-from executorch.exir.backend.backend_api import to_backend`
`30`	`29`	`from executorch.exir.capture._config import ExecutorchBackendConfig`
`31`	`30`	`from torch.ao.quantization.quantize_pt2e import convert_pt2e, prepare_pt2e`
`32`	`31`
`@@ -185,9 +184,9 @@ def build_executorch_binary(`
`185`	`184`	`skip_node_id_set,`
`186`	`185`	`skip_node_op_set,`
`187`	`186`	`)`
`188`		`- edge_prog.exported_program = to_backend(edge_prog.exported_program, qnn_partitioner)`
`189`		`- edge_prog.exported_program.graph_module.graph.print_tabular()`
`190`		`- exec_prog = edge_prog.to_executorch(`
	`187`	`+ delegated_program_mgr = edge_prog.to_backend(qnn_partitioner)`
	`188`	`+ edge_prog.exported_program().graph_module.graph.print_tabular()`
	`189`	`+ exec_prog = delegated_program_mgr.to_executorch(`
`191`	`190`	`config=ExecutorchBackendConfig(extract_constant_segment=False)`
`192`	`191`	`)`
`193`	`192`	`with open(f"{file_name}.pte", "wb") as file:`