Fix SDPA decomp problem

mcremon-meta · web-flow · commit d7c069f495e2 · 2024-08-22T16:29:15.000-07:00
Differential Revision: D61639074 Pull Request resolved: #4851
diff --git a/backends/cadence/aot/compiler.py b/backends/cadence/aot/compiler.py
@@ -18,12 +18,13 @@
     ReplaceLogicalNotBooleanWhereWithWherePass,
     ReplacePT2DequantWithCadenceDequantPass,
     ReplacePT2QuantWithCadenceQuantPass,
+    ReplaceSafeSoftmaxWithSoftmax,
     ReplaceScalarTensorWithFullPass,
     ReplaceSqueezeAndUnsqueezeWithViewPass,
 )
 from executorch.backends.cadence.aot.quantizer.fusion_pass import QuantFusion
 from executorch.backends.cadence.aot.quantizer.quantizer import CadenceQuantizer
-from executorch.backends.cadence.aot.utils import model_is_quantized
+from executorch.backends.cadence.aot.utils import model_gm_has_SDPA, model_is_quantized
 from executorch.backends.transforms.decompose_sdpa import (
     DecomposeScaledDotProductAttention,
 )
@@ -57,13 +58,20 @@ def convert_pt2(
     """
 
     # Export with dynamo
-    model_exp = capture_pre_autograd_graph(model, inputs)
+    model_gm = capture_pre_autograd_graph(model, inputs)
 
-    # Decompose SDPA
-    DecomposeScaledDotProductAttention(False)(model_exp)
+    if model_gm_has_SDPA(model_gm):
+        # Decompose SDPA
+        DecomposeScaledDotProductAttention(False)(model_gm)
+
+        # Swap _safe_softmax with _softmax (see https://github.com/pytorch/pytorch/pull/133882
+        # for details).
+        result = ReplaceSafeSoftmaxWithSoftmax()(model_gm)
+        assert result is not None
+        model_gm = result.graph_module
 
     # Prepare
-    prepared_model = prepare_pt2e(model_exp, quantizer)
+    prepared_model = prepare_pt2e(model_gm, quantizer)
 
     # Calibrate
     prepared_model(*inputs)
diff --git a/backends/cadence/aot/passes.py b/backends/cadence/aot/passes.py
@@ -266,3 +266,29 @@ def call(self, graph_module: torch.fx.GraphModule) -> PassResult:
         result = SpecPropPass()(graph_module)
         assert result is not None
         return result
+
+
+class ReplaceSafeSoftmaxWithSoftmax(ExportPass):
+    """
+    Replace _safe_softmax with _softmax
+    """
+
+    def call_operator(
+        self,
+        op,  # pyre-ignore
+        args: tuple[Argument, ...],
+        kwargs: dict[str, Argument],
+        meta: NodeMetadata,
+    ) -> ProxyValue:
+        if op != torch.ops.aten._safe_softmax.default:
+            return super().call_operator(op, args, kwargs, meta)
+
+        # Add False for the half_to_float argument of softmax
+        softmax_args = list(args) + [False]
+
+        return super().call_operator(
+            torch.ops.aten._softmax.default,
+            tuple(softmax_args),
+            kwargs,
+            meta,
+        )
diff --git a/backends/cadence/aot/utils.py b/backends/cadence/aot/utils.py
@@ -177,3 +177,11 @@ def print_ops_info(
                 tablefmt="outline",
             )
         )
+
+
+def model_gm_has_SDPA(model_gm: torch.fx.GraphModule) -> bool:
+    for node in model_gm.graph.nodes:
+        if node.op == "call_function":
+            if node.target == torch.ops.aten.scaled_dot_product_attention.default:
+                return True
+    return False

Original file line number	Diff line number	Diff line change
`@@ -177,3 +177,11 @@ def print_ops_info(`
`177`	`177`	`tablefmt="outline",`
`178`	`178`	`)`
`179`	`179`	`)`
	`180`	`+`
	`181`	`+`
	`182`	`+def model_gm_has_SDPA(model_gm: torch.fx.GraphModule) -> bool:`
	`183`	`+ for node in model_gm.graph.nodes:`
	`184`	`+ if node.op == "call_function":`
	`185`	`+ if node.target == torch.ops.aten.scaled_dot_product_attention.default:`
	`186`	`+ return True`
	`187`	`+ return False`