Arm backend: Add DecomposeLinalgVectorNorm pass + tests (#10848)

wwwind · web-flow · commit 9dece67e09cf · 2025-05-15T16:12:56.000+02:00
Added decomposition of linalg vector norm.

Signed-off-by: Elena Zhelezina &lt;elena.zhelezina@arm.com&gt;
diff --git a/backends/arm/_passes/__init__.py b/backends/arm/_passes/__init__.py
@@ -24,6 +24,7 @@
 from .decompose_gelu_pass import DecomposeGeluPass  # noqa
 from .decompose_layernorm_pass import DecomposeLayerNormPass  # noqa
 from .decompose_leaky_relu_pass import DecomposeLeakyReLUPass  # noqa
+from .decompose_linalg_vector_norm_pass import DecomposeLinearVectorNormPass  # noqa
 from .decompose_linear_pass import DecomposeLinearPass  # noqa
 from .decompose_meandim_pass import DecomposeMeanDimPass  # noqa
 from .decompose_ne_pass import DecomposeNotEqualPass  # noqa
diff --git a/backends/arm/_passes/arm_pass_manager.py b/backends/arm/_passes/arm_pass_manager.py
@@ -29,6 +29,7 @@
     DecomposeLayerNormPass,
     DecomposeLeakyReLUPass,
     DecomposeLinearPass,
+    DecomposeLinearVectorNormPass,
     DecomposeMeanDimPass,
     DecomposeNotEqualPass,
     DecomposeSelectPass,
@@ -86,6 +87,7 @@ def _tosa_080_BI_pipeline(self, exported_program: ExportedProgram) -> GraphModul
         self.add_pass(ConvertSplitToSlicePass())
         self.add_pass(ConvertMmToBmmPass())
         self.add_pass(DecomposeLinearPass())
+        self.add_pass(DecomposeLinearVectorNormPass())
         self.add_pass(DecomposeMeanDimPass())
         self.add_pass(ConvertFullLikeToFullPass())
         self.add_pass(ConvertToClampPass())
@@ -133,6 +135,7 @@ def _tosa_080_MI_pipeline(self, exported_program: ExportedProgram) -> GraphModul
         self.add_pass(FuseBatchnorm2DPass(exported_program))
         self.add_pass(ConvertMmToBmmPass())
         self.add_pass(DecomposeLinearPass())
+        self.add_pass(DecomposeLinearVectorNormPass())
         self.add_pass(DecomposeLeakyReLUPass())
         self.add_pass(DecomposeBatchNormPass())
         self.add_pass(DecomposeLayerNormPass())
@@ -207,6 +210,7 @@ def transform_for_annotation_pipeline(self, graph_module: GraphModule):
         self.add_pass(DecomposeCosineSimilarityPass())
         self.add_pass(DecomposeDivPass())
         self.add_pass(DecomposeLeakyReLUPass())
+        self.add_pass(DecomposeLinearVectorNormPass())
         self.add_pass(DecomposeSqrtPass())
         self.add_pass(DecomposeSiluPass())
 
diff --git a/backends/arm/_passes/decompose_linalg_vector_norm_pass.py b/backends/arm/_passes/decompose_linalg_vector_norm_pass.py
@@ -0,0 +1,78 @@
+# Copyright 2025 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch
+from executorch.exir.pass_base import ExportPass
+
+
+class DecomposeLinearVectorNormPass(ExportPass):
+    """
+    This pass decomposes aten.linalg_vector_norm.default into more primitive ops.
+    We need to add this pass before quantization for graph annotation.
+    By default, aten.linalg_vector_norm op is decomposed during legalization to Edge IR.
+
+    The decomposition is as follows:
+
+      For p == 1:
+          out = REDUCE_SUM(ABS(x), dims, keepdim)
+
+      For p == 2:
+          out = SQRT(REDUCE_SUM(MUL(x, x), dims, keepdim))
+
+      For arbitrary p:
+          We dont support arbitrary p, because our decomposition looks like
+          out = POW(REDUCE_SUM(POW(ABS(x), p), dims, keepdim), 1/p)
+          In this case we need to wrap p into Tensor and we need to know
+          dtype prior, but we dont know this from FX graph.
+    """
+
+    torch_linalg_vector_norm = (torch.ops.aten.linalg_vector_norm.default,)
+
+    def call_operator(self, op, args, kwargs, meta):
+        if op not in self.torch_linalg_vector_norm:
+            return super().call_operator(op, args, kwargs, meta)
+
+        # Extract inputs and optional arguments.
+        # Expected args:
+        #   args[0]: input tensor
+        #   args[1]: norm order 'p' (optional, default: 2.0)
+        #   args[2]: dimensions to reduce (should be provided)
+        #   args[3]: keepdim flag (optional, default: False)
+        input_tensor = args[0]
+        norm_order = args[1] if len(args) > 1 else 2.0
+        norm_dim = args[2] if len(args) > 2 else None
+        keepdim = args[3] if len(args) > 3 else False
+
+        if norm_order not in (1, 2):
+            raise ValueError(
+                f"The order of {norm_order}\n"
+                f"is not supported for linalg_vector_norm operator"
+            )
+
+        if norm_dim is None:
+            raise ValueError("The norm_dim for linalg_vector_norm is None.")
+
+        dims = [norm_dim] if isinstance(norm_dim, int) else list(norm_dim)
+
+        # Decomposition based on norm order.
+        if norm_order == 1:
+            op1 = super().call_operator(
+                torch.ops.aten.abs.default, (input_tensor,), {}, meta
+            )
+            op2 = super().call_operator(
+                torch.ops.aten.sum.dim_IntList, (op1, dims, keepdim), {}, meta
+            )
+            return op2
+
+        elif norm_order == 2:
+            # For p == 2, decomposition is sqrt(sum(x * x, dims, keepdim))
+            op1 = super().call_operator(
+                torch.ops.aten.mul.Tensor, (input_tensor, input_tensor), {}, meta
+            )
+            op2 = super().call_operator(
+                torch.ops.aten.sum.dim_IntList, (op1, dims, keepdim), {}, meta
+            )
+            op3 = super().call_operator(torch.ops.aten.sqrt.default, (op2,), {}, meta)
+            return op3
diff --git a/backends/arm/scripts/parse_test_names.py b/backends/arm/scripts/parse_test_names.py
@@ -8,6 +8,7 @@
 CUSTOM_EDGE_OPS = [
     "linspace.default",
     "eye.default",
+    "vector_norm.default",
     "hardsigmoid.default",
     "hardswish.default",
     "linear.default",
diff --git a/backends/arm/test/ops/test_linalg_vector_norm.py b/backends/arm/test/ops/test_linalg_vector_norm.py
@@ -0,0 +1,131 @@
+# Copyright 2025 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Tuple
+
+import torch
+
+from executorch.backends.arm.test import common
+from executorch.backends.arm.test.tester.test_pipeline import (
+    EthosU55PipelineBI,
+    EthosU85PipelineBI,
+    TosaPipelineBI,
+    TosaPipelineMI,
+)
+
+input_t = Tuple[torch.Tensor]
+
+aten_op_q_decomposed_q = "torch.ops.quantized_decomposed.quantize_per_tensor.default"
+exir_op_q_decomposed = "executorch_exir_dialects_edge__ops_quantized_decomposed_quantize_per_tensor_default"
+
+
+class VectorNormModel(torch.nn.Module):
+    def __init__(
+        self,
+        ord=None,
+        dim=1,
+        keepdim=False,
+    ):
+        """
+        A simple module that applies torch.linalg.vector_norm to its input.
+        Ord is 2 by default.
+        """
+        super().__init__()
+        self.ord = ord
+        self.dim = dim
+        self.keepdim = keepdim
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        if self.ord is None and self.dim is None:
+            return torch.linalg.vector_norm(x, keepdim=self.keepdim)
+        elif self.ord is None:
+            return torch.linalg.vector_norm(x, dim=self.dim, keepdim=self.keepdim)
+        elif self.dim is None:
+            return torch.linalg.vector_norm(x, ord=self.ord, keepdim=self.keepdim)
+        else:
+            return torch.linalg.vector_norm(
+                x, ord=self.ord, dim=self.dim, keepdim=self.keepdim
+            )
+
+
+test_modules = {
+    "default": (VectorNormModel(dim=1), (torch.rand(10, 4),)),
+    "ord1": (VectorNormModel(ord=1, dim=1), (torch.rand(10, 4),)),
+    "ord2": (VectorNormModel(ord=2, dim=1), (torch.rand(10, 20),)),
+    # Norm computed along a specific dimension of a 3D tensor
+    "dim_3d": (VectorNormModel(dim=2), (torch.rand(4, 5, 6),)),
+}
+
+
+@common.parametrize("test_module", test_modules)
+def test_vector_norm_tosa_MI(test_module):
+    model, input_tensor = test_module
+
+    # We decompose LinalgVectorNorm before quantize stage to have annotations
+    # with q/dq nodes. In case of MI, this operator will be decomposed
+    # by global decompositions.
+    aten_op = "torch.ops.aten.linalg_vector_norm.default"
+    # Should not found this op
+    exir_op = "executorch_exir_dialects_edge__ops_aten_linalg_vector_norm_default"
+
+    pipeline = TosaPipelineMI[input_t](model, input_tensor, aten_op, exir_op)
+
+    pipeline.change_args("run_method_and_compare_outputs", qtol=1, atol=1e-4, rtol=1e-4)
+    pipeline.run()
+
+
+@common.parametrize("test_module", test_modules)
+def test_vector_norm_tosa_BI(test_module):
+    model, input_tensor = test_module
+
+    # Should not found this op
+    exir_op = "executorch_exir_dialects_edge__ops_aten_linalg_vector_norm_default"
+
+    pipeline = TosaPipelineBI[input_t](
+        model,
+        input_tensor,
+        aten_op_q_decomposed_q,
+        exir_op,
+        symmetric_io_quantization=True,
+    )
+    pipeline.change_args("run_method_and_compare_outputs", qtol=1, atol=1, rtol=1)
+    pipeline.run()
+
+
+@common.parametrize("test_module", test_modules)
+@common.XfailIfNoCorstone300
+def test_vector_norm_u55_BI_fvp(test_module):
+    model, input_tensor = test_module
+
+    pipeline = EthosU55PipelineBI[input_t](
+        model,
+        input_tensor,
+        aten_op_q_decomposed_q,
+        exir_op_q_decomposed,
+        run_on_fvp=True,
+        symmetric_io_quantization=True,
+    )
+    pipeline.change_args("run_method_and_compare_outputs", qtol=1, atol=1, rtol=1)
+    pipeline.pop_stage("check_not.exir")
+    pipeline.run()
+
+
+@common.parametrize("test_module", test_modules)
+@common.XfailIfNoCorstone300
+def test_vector_norm_u85_BI_fvp(test_module):
+    model, input_tensor = test_module
+
+    # The should be decomposed and annotated in DecomposeLinalgVectorNorm pass.
+    pipeline = EthosU85PipelineBI[input_t](
+        model,
+        input_tensor,
+        aten_op_q_decomposed_q,
+        exir_op_q_decomposed,
+        run_on_fvp=True,
+        symmetric_io_quantization=True,
+    )
+    pipeline.change_args("run_method_and_compare_outputs", qtol=1, atol=1, rtol=1)
+    pipeline.pop_stage("check_not.exir")
+    pipeline.run()
diff --git a/backends/arm/test/passes/test_decompose_linalg_vector_norm_pass.py b/backends/arm/test/passes/test_decompose_linalg_vector_norm_pass.py
@@ -0,0 +1,91 @@
+# Copyright 2025 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Tuple
+
+import torch
+
+from executorch.backends.arm._passes.decompose_linalg_vector_norm_pass import (
+    DecomposeLinearVectorNormPass,
+)
+from executorch.backends.arm.test import common
+from executorch.backends.arm.test.tester.test_pipeline import PassPipeline
+
+input_t = Tuple[torch.Tensor]
+
+
+class VectorNormModel(torch.nn.Module):
+    """
+    A test module with torch.linalg.vector_norm.
+    https://pytorch.org/docs/stable/generated/torch.linalg.vector_norm.html
+
+    We support only order 1 or 2.
+    """
+
+    def __init__(self, ord: float = None, dim=None, keepdim: bool = False):
+        super().__init__()
+        self.ord = ord
+        self.dim = dim
+        self.keepdim = keepdim
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        if self.ord is None and self.dim is None:
+            return torch.linalg.vector_norm(x, keepdim=self.keepdim)
+        elif self.ord is None:
+            return torch.linalg.vector_norm(x, dim=self.dim, keepdim=self.keepdim)
+        elif self.dim is None:
+            return torch.linalg.vector_norm(x, ord=self.ord, keepdim=self.keepdim)
+        else:
+            return torch.linalg.vector_norm(
+                x, ord=self.ord, dim=self.dim, keepdim=self.keepdim
+            )
+
+    def get_inputs(self) -> input_t:
+        return (torch.rand(4, 4),)
+
+
+modules = {
+    # Default uses p=2 (l2 vector norm)
+    "default_p2": VectorNormModel(dim=1),
+    # p = 1: L1 norm over all elements
+    "p1": VectorNormModel(ord=1, dim=1),
+}
+
+
+@common.parametrize("module", modules)
+def test_decompose_vector_norm_tosa_BI(module):
+    """
+    This test creates a PassPipeline that applies the DecomposeLinearVectorNormPass.
+    The expected primitive ops vary depending on the norm order:
+      - p == 1: should decompose to ABS and SUM.
+      - p == 2 (default): should decompose to MUL, SUM, and SQRT.
+      - Other p: should decompose to ABS, two instances of POW, and SUM.
+    """
+    ord_val = module.ord if module.ord is not None else 2.0
+
+    if ord_val == 1:
+        ops_after_pass = {
+            "executorch_exir_dialects_edge__ops_aten_abs_default": 1,
+            "executorch_exir_dialects_edge__ops_aten_sum_dim_IntList": 1,
+        }
+    elif ord_val == 2:
+        ops_after_pass = {
+            "executorch_exir_dialects_edge__ops_aten_pow_Tensor_Scalar": 2,
+            "executorch_exir_dialects_edge__ops_aten_sum_dim_IntList": 1,
+        }
+
+    pipeline = PassPipeline[input_t](
+        module,
+        module.get_inputs(),
+        # The op is decomposed in legalization aten -> edge, so we are not able to check ops before
+        ops_before_pass=None,
+        ops_not_before_pass=None,
+        ops_after_pass=ops_after_pass,
+        ops_not_after_pass=[
+            "executorch_exir_dialects_edge__ops_aten_linarg_vector_norm_default",
+        ],
+        pass_list=[DecomposeLinearVectorNormPass],
+    )
+    pipeline.run()