Arm backend: Convert int pow to multiplications (#11037)

per · web-flow · commit 8d11ea688426 · 2025-05-21T14:49:48.000+02:00
### Summary

Add a pass to convert integer pow to series of multiplications to handle
square operations on negative values since TOSA 1.0 only allows values &gt;
0 for its POW operation.

### Test plan
Test on internal and external CI.

Signed-off-by: Per Åstrand &lt;per.astrand@arm.com&gt;
diff --git a/backends/arm/_passes/__init__.py b/backends/arm/_passes/__init__.py
@@ -15,6 +15,7 @@
 from .convert_any_default_dim_dims_pass import ConvertAnyDefaultDimDimsPass  # noqa
 from .convert_expand_copy_to_repeat import ConvertExpandCopyToRepeatPass  # noqa
 from .convert_full_like_to_full_pass import ConvertFullLikeToFullPass  # noqa
+from .convert_int_pow_to_mul import ConvertIntPowToMuls  # noqa
 from .convert_minmax_pass import ConvertMinMaxPass  # noqa
 from .convert_split_to_slice import ConvertSplitToSlicePass  # noqa
 from .convert_squeezes_to_view import ConvertSqueezesToViewPass  # noqa
diff --git a/backends/arm/_passes/arm_pass_manager.py b/backends/arm/_passes/arm_pass_manager.py
@@ -18,6 +18,7 @@
     ConvertAnyDefaultDimDimsPass,
     ConvertExpandCopyToRepeatPass,
     ConvertFullLikeToFullPass,
+    ConvertIntPowToMuls,
     ConvertMinMaxPass,
     ConvertMmToBmmPass,
     ConvertSplitToSlicePass,
@@ -131,14 +132,14 @@ def _tosa_080_BI_pipeline(self, exported_program: ExportedProgram) -> GraphModul
 
     def _tosa_080_MI_pipeline(self, exported_program: ExportedProgram) -> GraphModule:
         self.add_pass(DecomposeSqrtPass())
+        self.add_pass(ConvertIntPowToMuls())
         self.add_pass(ReplaceScalarWithTensorArgPassTOSAMI())
         self.add_pass(FuseQuantizedActivationPass())
         self.add_pass(RemoveGetItemPass())
         self.add_pass(ConvertSplitToSlicePass())
         self.add_pass(FuseBatchnorm2DPass(exported_program))
         self.add_pass(ConvertMmToBmmPass())
         self.add_pass(DecomposeLinearPass())
-        self.add_pass(DecomposeLinearVectorNormPass())
         self.add_pass(DecomposeLeakyReLUPass())
         self.add_pass(DecomposeBatchNormPass())
         self.add_pass(DecomposeLayerNormPass())
diff --git a/backends/arm/_passes/convert_int_pow_to_mul.py b/backends/arm/_passes/convert_int_pow_to_mul.py
@@ -0,0 +1,52 @@
+# Copyright 2025 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+# pyre-unsafe
+
+from executorch.backends.arm._passes import ArmPass
+from executorch.exir.dialects._ops import ops as exir_ops
+
+
+class ConvertIntPowToMuls(ArmPass):
+    """
+    Replaces pow with integer exponent with a series of multiplications.
+    Only handles pow.Tensor_Scalar and not pow.Tensor_Tensor.
+    Needs to be run before doing scalar to tensor conversion.
+    """
+
+    def call_operator(self, op, args, kwargs, meta):
+        if op != exir_ops.edge.aten.pow.Tensor_Scalar:
+            return super().call_operator(op, args, kwargs, meta)
+
+        x = args[0]
+        exp = args[1]
+
+        # Handle zero first and return early
+        if exp == 0:
+            # return a tensor of ones with the same shape as x
+            return super().call_operator(
+                exir_ops.edge.aten.full_like.default, (x, 1), {}, meta, True
+            )
+
+        if not isinstance(exp, int):
+            return super().call_operator(op, args, kwargs, meta)
+
+        # Handle negative exponent
+        if exp < 0:
+            x = super().call_operator(
+                exir_ops.edge.aten.reciprocal.default, (x,), {}, meta, True
+            )
+            exp = -exp
+
+        res = x
+
+        # Consider exponentiation by squaring, if exp turns out to be large.
+        # Now we just roll out the multiplications.
+        for _ in range(exp - 1):
+            res = super().call_operator(
+                exir_ops.edge.aten.mul.Tensor, (res, x), {}, meta, True
+            )
+
+        return res
diff --git a/backends/arm/test/ops/test_pow.py b/backends/arm/test/ops/test_pow.py
@@ -71,6 +71,10 @@ class Pow_TensorScalar(torch.nn.Module):
             torch.abs(torch.randn((1, 2, 3, 6))),
             6.789,
         ),
+        "neg_base_exp_pos_integer": lambda: (
+            -torch.abs(torch.randn((1, 2, 3, 6))) - 10,
+            3,
+        ),
     }
 
     def __init__(self, exp):
diff --git a/backends/arm/test/passes/test_convert_int_pow_to_muls.py b/backends/arm/test/passes/test_convert_int_pow_to_muls.py
@@ -0,0 +1,73 @@
+# Copyright 2025 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Tuple
+
+import torch
+from executorch.backends.arm._passes import ConvertIntPowToMuls
+
+from executorch.backends.arm.test import common
+
+from executorch.backends.arm.test.tester.test_pipeline import PassPipeline
+
+input_t = Tuple[torch.nn.Module, int]  # Input x
+
+
+class Square(torch.nn.Module):
+    """
+    Basic squaring
+    """
+
+    def forward(self, x):
+        return x.square()
+
+    def get_inputs(self) -> input_t:
+        return (torch.rand(4, 4),)
+
+
+class Pow(torch.nn.Module):
+    """
+    Basic squaring
+    """
+
+    def __init__(self, exponent):
+        super().__init__()
+        self.exponent = exponent
+
+    def forward(self, x):
+        return x.pow(self.exponent)
+
+    def get_inputs(self) -> input_t:
+        return (torch.rand(4, 4),)
+
+
+test_data = {
+    "square": (Square(), 1),
+    "pow_2": (Pow(2), 1),
+    "pow_3": (Pow(3), 2),
+    "pow_0": (Pow(0), 0),
+    "pow_neg_2": (Pow(-2), 1),
+}
+
+
+@common.parametrize("data", test_data)
+def test_convert_pow_to_muls(data):
+    module = data[0]
+    nbr_muls = data[1]
+    pipeline = PassPipeline[input_t](
+        module,
+        module.get_inputs(),
+        quantize=False,
+        ops_before_pass={
+            "executorch_exir_dialects_edge__ops_aten_pow_Tensor_Scalar": 1,
+        },
+        ops_not_before_pass=[],
+        ops_after_pass={
+            "executorch_exir_dialects_edge__ops_aten_mul_Tensor": nbr_muls,
+        },
+        ops_not_after_pass=["executorch_exir_dialects_edge__ops_pow_Tensor_Scalar"],
+        pass_list=[ConvertIntPowToMuls],
+    )
+    pipeline.run()