Add pass for decomposing (log)softmax

Erik-Lundell · web-flow · commit d695f15b32bd · 2024-10-24T08:56:32.000-07:00
Differential Revision: D64472857 Pull Request resolved: #6287
diff --git a/backends/arm/_passes/arm_pass_manager.py b/backends/arm/_passes/arm_pass_manager.py
@@ -23,6 +23,9 @@
     DecomposeLayerNormPass,
 )
 from executorch.backends.arm._passes.decompose_meandim_pass import DecomposeMeanDimPass
+from executorch.backends.arm._passes.decompose_softmaxes_pass import (
+    DecomposeSoftmaxesPass,
+)
 from executorch.backends.arm._passes.decompose_var_pass import DecomposeVarPass
 from executorch.backends.arm._passes.insert_squeeze_after_sum_pass import (
     InsertSqueezeAfterSumPass,
@@ -66,6 +69,7 @@ def transform_to_backend_pipeline(
         self.add_pass(DecomposeDivPass())
         self.add_pass(InsertSqueezeAfterSumPass())
         self.add_pass(ConvertSplitToSlicePass())
+        self.add_pass(DecomposeSoftmaxesPass())
         for spec in compile_spec:
             if spec.key == "permute_memory_format":
                 memory_format = spec.value.decode()
@@ -75,9 +79,10 @@ def transform_to_backend_pipeline(
         return self._transform(exported_program.graph_module)
 
     def transform_for_annotation_pipeline(self, graph_module: torch.fx.GraphModule):
+        self.add_pass(ScalarsToAttributePass())
         self.add_pass(DecomposeLayerNormPass())
         self.add_pass(DecomposeVarPass())
         self.add_pass(DecomposeMeanDimPass())
-        self.add_pass(ScalarsToAttributePass())
         self.add_pass(DecomposeDivPass())
+        self.add_pass(DecomposeSoftmaxesPass())
         return self._transform(graph_module)
diff --git a/backends/arm/_passes/decompose_softmaxes_pass.py b/backends/arm/_passes/decompose_softmaxes_pass.py
@@ -0,0 +1,74 @@
+# Copyright 2024 Arm Limited and/or its affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch
+from executorch.exir.dialects._ops import ops as exir_ops
+from executorch.exir.pass_base import ExportPass
+
+# For BI case
+torch_softmax = (torch.ops.aten.softmax.int, torch.ops.aten.log_softmax.int)
+
+# For MI case
+edge_softmax = (
+    exir_ops.edge.aten._softmax.default,
+    exir_ops.edge.aten._log_softmax.default,
+)
+
+log_softmax = (torch.ops.aten.log_softmax.int, exir_ops.edge.aten._log_softmax.default)
+
+
+def get_logsoftmax_ops(op) -> tuple:
+    """
+    Returns the the (log_op, expo_op, sum_op, reciprocal_op), where the ops depends on if
+    the logsoftmax op is in exir_ops torch.ops.aten.
+    """
+    if op in edge_softmax:
+        return (
+            exir_ops.edge.aten.log.default,
+            exir_ops.edge.aten.exp.default,
+            exir_ops.edge.aten.sum.dim_IntList,
+            exir_ops.edge.aten.reciprocal.default,
+            exir_ops.edge.aten.mul.Tensor,
+        )
+    if op in torch_softmax:
+        return (
+            torch.ops.aten.log.default,
+            torch.ops.aten.exp.default,
+            torch.ops.aten.sum.dim_IntList,
+            torch.ops.aten.reciprocal.default,
+            torch.ops.aten.mul.Tensor,
+        )
+    raise RuntimeError(f"Can't get softmax decomposition ops for op {op}")
+
+
+class DecomposeSoftmaxesPass(ExportPass):
+    """
+    This pass decomposes log softmax or softmax into more primitive ops.
+
+    Example:
+        %op1 = exp(x)
+        %op2 = sum(%op1, dim)
+        %op3 = reciprocal(%op2)
+        %op4 = mul(%op1, %op3)
+        (in logsoftmax case: %op5 = log(%op4))
+    """
+
+    def call_operator(self, op, args, kwargs, meta):
+        if op not in torch_softmax + edge_softmax:
+            return super().call_operator(op, args, kwargs, meta)
+
+        log_op, exp_op, sum_op, reciprocal_op, mul_op = get_logsoftmax_ops(op)
+
+        _input = args[0]
+        dim = [args[1]]
+
+        op1 = super().call_operator(exp_op, (_input,), {}, meta)
+        op2 = super().call_operator(sum_op, (op1, dim, True), {}, meta)
+        op3 = super().call_operator(reciprocal_op, (op2,), {}, meta)
+        op4 = super().call_operator(mul_op, (op1, op3), {}, meta)
+        if op in log_softmax:
+            op4 = super().call_operator(log_op, (op4,), {}, meta)
+        return op4
diff --git a/backends/arm/arm_partitioner.py b/backends/arm/arm_partitioner.py
@@ -62,6 +62,7 @@ def is_node_supported(self, submodules, node: torch.fx.Node) -> bool:
             exir_ops.edge.aten.relu.default,
             exir_ops.edge.aten.rsqrt.default,
             exir_ops.edge.aten._softmax.default,
+            exir_ops.edge.aten._log_softmax.default,
             exir_ops.edge.aten.slice_copy.Tensor,
             exir_ops.edge.aten.sub.Tensor,
             exir_ops.edge.aten.sum.dim_IntList,
diff --git a/backends/arm/operators/__init__.py b/backends/arm/operators/__init__.py
@@ -30,7 +30,6 @@
     op_rsqrt,
     op_sigmoid,
     op_slice,
-    op_softmax,
     op_squeeze,
     op_sub,
     op_sum,
diff --git a/backends/arm/operators/op_exp.py b/backends/arm/operators/op_exp.py
@@ -42,7 +42,6 @@ def define_node(
     ) -> None:
 
         assert len(node.all_input_nodes) == 1
-        assert len(node.users) == 1
 
         if is_quant_node:
             # Assume quantized input is 8 bit.
diff --git a/backends/arm/operators/op_softmax.py b/backends/arm/operators/op_softmax.py
diff --git a/backends/arm/test/ops/test_logsoftmax.py b/backends/arm/test/ops/test_logsoftmax.py
@@ -0,0 +1,158 @@
+# Copyright 2024 Arm Limited and/or its affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import unittest
+
+from typing import Tuple
+
+import torch
+from executorch.backends.arm.test import common
+from executorch.backends.arm.test.tester.arm_tester import ArmTester
+from executorch.exir.backend.compile_spec_schema import CompileSpec
+from parameterized import parameterized
+
+
+test_data_suite = [
+    # (test_name, test_data, dim)
+    ("zeros", torch.zeros(10, 10, 10, 10), 0),
+    ("zeros_neg_dim", torch.zeros(10, 10, 10, 10), -4),
+    ("ones", torch.ones(10, 10), 1),
+    ("rand_neg_dim", torch.rand(10, 10, 10), -1),
+    ("rand", torch.rand(10, 10, 10, 10), 2),
+    ("rand_neg_dim", torch.rand(10, 10, 2, 3), -2),
+    ("randn", torch.randn(10, 10, 5, 10), 3),
+    ("randn_neg_dim", torch.randn(1, 10, 10, 10), -3),
+]
+
+
+class TestLogSoftmax(unittest.TestCase):
+    """Tests logsoftmax."""
+
+    class LogSoftmax(torch.nn.Module):
+        def __init__(self, dim: int = -1):
+            super().__init__()
+            self.logsoftmax = torch.nn.LogSoftmax(dim=dim)
+
+        def forward(self, x):
+            return self.logsoftmax(x)
+
+    def _test_logsoftmax_tosa_MI_pipeline(
+        self, module: torch.nn.Module, test_data: Tuple[torch.tensor]
+    ):
+        (
+            ArmTester(
+                module,
+                example_inputs=test_data,
+                compile_spec=common.get_tosa_compile_spec(),
+            )
+            .export()
+            .check(["torch.ops.aten.log_softmax.int"])
+            .check_not(["torch.ops.quantized_decomposed"])
+            .to_edge()
+            .partition()
+            .check_not(["executorch_exir_dialects_edge__ops_aten__logsoftmax_default"])
+            .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
+            .to_executorch()
+            .run_method_and_compare_outputs(inputs=test_data)
+        )
+
+    def _test_logsoftmax_tosa_BI_pipeline(
+        self, module: torch.nn.Module, test_data: Tuple[torch.tensor]
+    ):
+        (
+            ArmTester(
+                module,
+                example_inputs=test_data,
+                compile_spec=common.get_tosa_compile_spec(),
+            )
+            .quantize()
+            .export()
+            .check_not(["torch.ops.aten.log_softmax.int"])
+            .check(["torch.ops.quantized_decomposed", "torch.ops.aten.mul.Tensor"])
+            .to_edge()
+            .partition()
+            .check_not(["executorch_exir_dialects_edge__ops_aten__log_softmax_default"])
+            .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
+            .to_executorch()
+            .run_method_and_compare_outputs(inputs=test_data, qtol=1)
+        )
+
+    def _test_logsoftmax_tosa_ethos_BI_pipeline(
+        self,
+        compile_spec: list[CompileSpec],
+        module: torch.nn.Module,
+        test_data: Tuple[torch.tensor],
+    ):
+        (
+            ArmTester(
+                module,
+                example_inputs=test_data,
+                compile_spec=compile_spec,
+            )
+            .quantize()
+            .export()
+            .check_not(["torch.ops.aten.log_softmax.int"])
+            .check(["torch.ops.quantized_decomposed", "torch.ops.aten.mul.Tensor"])
+            .to_edge()
+            .partition()
+            .check_not(["executorch_exir_dialects_edge__ops_aten__logsoftmax_default"])
+            .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
+            .to_executorch()
+        )
+
+    def _test_logsoftmax_tosa_u55_BI_pipeline(
+        self, module: torch.nn.Module, test_data: Tuple[torch.tensor]
+    ):
+        self._test_logsoftmax_tosa_ethos_BI_pipeline(
+            common.get_u55_compile_spec(), module, test_data
+        )
+
+    def _test_logsoftmax_tosa_u85_BI_pipeline(
+        self, module: torch.nn.Module, test_data: Tuple[torch.tensor]
+    ):
+        self._test_logsoftmax_tosa_ethos_BI_pipeline(
+            common.get_u85_compile_spec(), module, test_data
+        )
+
+    @parameterized.expand(test_data_suite)
+    def test_logsoftmax_tosa_MI(
+        self,
+        test_name: str,
+        test_data: torch.Tensor,
+        dim: int,
+    ):
+        self._test_logsoftmax_tosa_MI_pipeline(self.LogSoftmax(dim=dim), (test_data,))
+
+    @parameterized.expand(test_data_suite)
+    def test_logsoftmax_tosa_BI(
+        self,
+        test_name: str,
+        test_data: torch.Tensor,
+        dim: int,
+    ):
+        self._test_logsoftmax_tosa_BI_pipeline(self.LogSoftmax(dim=dim), (test_data,))
+
+    @parameterized.expand(test_data_suite)
+    def test_logsoftmax_tosa_u55_BI(
+        self,
+        test_name: str,
+        test_data: torch.Tensor,
+        dim: int,
+    ):
+        self._test_logsoftmax_tosa_u55_BI_pipeline(
+            self.LogSoftmax(dim=dim), (test_data,)
+        )
+
+    @parameterized.expand(test_data_suite)
+    def test_logsoftmax_tosa_u85_BI(
+        self,
+        test_name: str,
+        test_data: torch.Tensor,
+        dim: int,
+    ):
+        self._test_logsoftmax_tosa_u55_BI_pipeline(
+            self.LogSoftmax(dim=dim), (test_data,)
+        )
diff --git a/backends/arm/test/ops/test_softmax.py b/backends/arm/test/ops/test_softmax.py