Skip decompose op

winskuo-quic · winskuo-quic · commit f277d442b0d5 · 2025-03-25T20:34:44.000+08:00
diff --git a/backends/qualcomm/_passes/annotate_decomposed.py b/backends/qualcomm/_passes/annotate_decomposed.py
@@ -17,6 +17,8 @@ class AnnotateDecomposed(ExportPass):
     generated after quantization process.
     """
 
+    decomp_ops = [torch.ops.aten.stack.default, torch.ops.aten.unbind.int]
+
     def __init__(self, edge_program: torch.export.ExportedProgram):
         super(AnnotateDecomposed, self).__init__()
         self.edge_program = edge_program
@@ -32,7 +34,7 @@ def _annotate_unbind(self, graph_module: torch.fx.GraphModule):
                         n.meta[QCOM_QUANT_ATTRS] = quant_attrs.copy()
 
     def _annotate_stack(self, graph_module: torch.fx.GraphModule):
-        partitions = get_source_partitions(graph_module.graph, [torch.stack])
+        partitions = get_source_partitions(graph_module.graph, [torch.stack, "stack"])
         for _, src_partitions in partitions.items():
             for src_partition in src_partitions:
                 output = src_partition.output_nodes[0]
diff --git a/backends/qualcomm/utils/utils.py b/backends/qualcomm/utils/utils.py
@@ -323,7 +323,7 @@ def canonicalize_program(obj):
     update_spill_fill_size(obj)
 
 
-def get_decomp_table() -> Dict[torch._ops.OperatorBase, Callable]:
+def get_decomp_table(passes_job) -> Dict[torch._ops.OperatorBase, Callable]:
     source_decompositions = core_aten_decompositions()
     # The below super ops are supported by QNN
     skip_decompositions = [
@@ -337,10 +337,17 @@ def get_decomp_table() -> Dict[torch._ops.OperatorBase, Callable]:
         torch.ops.pt2e_quant.quantize_affine.default,
         torch.ops.pt2e_quant.dequantize_affine.default,
         torch.ops.aten._safe_softmax.default,
-        torch.ops.aten.stack.default,  # TODO: Might need to remove this later due to Mimi. QNN does not support int io for stack op.
+        torch.ops.aten.stack.default,
         torch.ops.aten.unbind.int,
     ]
 
+    # If we want to annotate the decomposed ops, then we should decompose the operation.
+    if passes_job and passes_job.get(AnnotateDecomposed, False):
+        skip_decompositions = [
+            skip_decomp_op
+            for skip_decomp_op in skip_decompositions
+            if skip_decomp_op not in AnnotateDecomposed.decomp_ops
+        ]
     remove_decompositions(source_decompositions, skip_decompositions)
 
     return source_decompositions
@@ -468,7 +475,7 @@ def capture_program(
     module = _preprocess_module(module, inputs)
     ep = torch.export.export(module, inputs, dynamic_shapes=dynamic_shapes, strict=True)
     # TODO: Handle stack op. If we want to run annotate_decomposed pass for stack op, we need to make stack op decompose, which means we need to find a method to remove it from skip_decomp table
-    decomposed_ep = ep.run_decompositions(get_decomp_table())
+    decomposed_ep = ep.run_decompositions(get_decomp_table(passes_job))
     core_ep = ExirExportedProgram(decomposed_ep, False)
     core_ep.transform(TensorI64toI32(edge_program=core_ep))
     edge_ep = core_ep.to_edge(qnn_edge_config())
diff --git a/examples/models/llama/export_llama_lib.py b/examples/models/llama/export_llama_lib.py
@@ -794,9 +794,19 @@ def _to_edge_and_lower_llama(  # noqa: C901
             )
         )
         # pyre-ignore: Undefined import [21]: Could not find a module corresponding to import `executorch.backends.qualcomm.utils.utils`
-        from executorch.backends.qualcomm.utils.utils import _transform, tag_quant_io
+        from executorch.backends.qualcomm._passes.annotate_decomposed import (
+            AnnotateDecomposed,
+        )
+        from executorch.backends.qualcomm.utils.constants import QCOM_PASS_ACTIVATE_KEY
+        from executorch.backends.qualcomm.utils.utils import (
+            _transform,
+            get_capture_program_passes,
+            tag_quant_io,
+        )
 
-        _transform(builder_exported_to_edge.edge_manager.exported_program())
+        passes_job = get_capture_program_passes()
+        passes_job[AnnotateDecomposed][QCOM_PASS_ACTIVATE_KEY] = True
+        _transform(builder_exported_to_edge.edge_manager.exported_program(), passes_job)
 
         if args.num_sharding > 0:
             model_sharding.split_graph(