Add decomposition of div op for ArmBackend

Erik-Lundell · Erik-Lundell · commit af8f22b811e3 · 2024-09-13T14:20:27.000+02:00
Implements pass that decomposes aten.div to
aten.reciprocal and aten.mul.
This is done in the Quantizer get quantization annotation
on the decomposed operators.

Add infra for passes in ArmQuantizer

Signed-off-by: Erik Lundell &lt;erik.lundell@arm.com&gt;
Change-Id: Idd1698dc5fc82ab42b68094b405fb3a08804a45e
diff --git a/backends/arm/operators/__init__.py b/backends/arm/operators/__init__.py
@@ -15,7 +15,6 @@
     op_cat,
     op_conv2d,
     op_dequant,
-    op_div,
     op_exp,
     op_full,
     op_get_item,
diff --git a/backends/arm/passes/arm_pass_manager.py b/backends/arm/passes/arm_pass_manager.py
@@ -17,10 +17,14 @@
 from executorch.backends.arm.passes.convert_split_to_slice import (
     ConvertSplitToSlicePass,
 )
+from executorch.backends.arm.passes.decompose_div_pass import DecomposeDivPass
 from executorch.backends.arm.passes.meandim_to_averagepool_pass import (
     ConvertMeanDimToAveragePool,
 )
 from executorch.backends.arm.passes.remove_clone_pass import RemoveClonePass
+from executorch.backends.arm.passes.scalars_to_attribute_pass import (
+    ScalarsToAttributePass,
+)
 from executorch.backends.arm.passes.size_adjust_conv2d_pass import SizeAdjustConv2DPass
 from executorch.exir.backend.compile_spec_schema import CompileSpec
 from executorch.exir.pass_manager import PassManager
@@ -39,6 +43,7 @@ def transform_to_backend_pipeline(
         self.add_pass(RemoveClonePass())
         self.add_pass(ConvertExpandCopyToRepeatPass())
         self.add_pass(ConvertMeanDimToAveragePool())
+        self.add_pass(DecomposeDivPass())
         self.add_pass(ConvertSplitToSlicePass())
         for spec in compile_spec:
             if spec.key == "permute_memory_format":
@@ -47,3 +52,8 @@ def transform_to_backend_pipeline(
                     self.add_pass(AnnotateChannelsLastDimOrder())
 
         return self._transform(graph_module)
+
+    def transform_for_annotation_pipeline(self, graph_module: torch.fx.GraphModule):
+        self.add_pass(DecomposeDivPass())
+        self.add_pass(ScalarsToAttributePass())
+        return self._transform(graph_module)
diff --git a/backends/arm/passes/decompose_div_pass.py b/backends/arm/passes/decompose_div_pass.py
@@ -0,0 +1,45 @@
+# Copyright 2024 Arm Limited and/or its affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch
+from executorch.exir.dialects._ops import ops as exir_ops
+from executorch.exir.pass_base import ExportPass
+
+
+def get_div_decomposition(op) -> tuple:
+    """
+    Returns the the (reciprocal_op, mul_op), where the ops depends on if
+    the div op is in exir_ops torch.ops.aten.
+    """
+    if op == exir_ops.edge.aten.div.Tensor:
+        return (exir_ops.edge.aten.reciprocal.default, exir_ops.edge.aten.mul.Tensor)
+    if op == torch.ops.aten.div.Tensor:
+        return (torch.ops.aten.reciprocal.default, torch.ops.aten.mul.Tensor)
+    raise RuntimeError(f"Can't get div decomposition for op {op}")
+
+
+class DecomposeDivPass(ExportPass):
+    """
+    This pass decomposes div into a mul and a reciprocal node.
+
+    Example:
+        y = div(a,b)
+    Becomes:
+        x = reciprocal(b)
+        y = mul(a,x)
+    """
+
+    def call_operator(self, op, args, kwargs, meta):
+        if op not in (exir_ops.edge.aten.div.Tensor, torch.ops.aten.div.Tensor):
+            return super().call_operator(op, args, kwargs, meta)
+
+        reciprocal_op, mul_op = get_div_decomposition(op)
+
+        numerator = args[0]
+        denominator = args[1]
+        reciprocal = super().call_operator(reciprocal_op, (denominator,), {}, meta)
+
+        return super().call_operator(mul_op, (numerator, reciprocal), {}, meta)
diff --git a/backends/arm/passes/scalars_to_attribute_pass.py b/backends/arm/passes/scalars_to_attribute_pass.py
@@ -0,0 +1,69 @@
+# Copyright 2024 Arm Limited and/or its affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import cast, Union
+
+import torch
+from executorch.backends.arm.tosa_mapping import extract_tensor_meta
+
+from executorch.exir.pass_base import ExportPass, PassResult
+from torch.ao.quantization.fx.utils import get_new_attr_name_with_prefix
+from torch.fx import GraphModule, Node
+
+
+class ScalarsToAttributePass(ExportPass):
+    """
+    For ops in 'targeted_ops', convert inputs that are scalar values
+    to attribute Nodes that output the same value.
+    """
+
+    targeted_ops = [
+        torch.ops.aten.add.Tensor,
+        torch.ops.aten.sub.Tensor,
+        torch.ops.aten.sub_.Tensor,
+        torch.ops.aten.mul.Tensor,
+        torch.ops.aten.div.Tensor,
+    ]
+
+    def call(self, graph_module: GraphModule) -> GraphModule:
+        for n in graph_module.graph.nodes:
+            n = cast(Node, n)
+            if n.op != "call_function" or n.target not in self.targeted_ops:
+                continue
+
+            biggest_rank = 1
+            for arg in n.args:
+                if isinstance(arg, Node):
+                    _, shape, _ = extract_tensor_meta(arg.meta)
+                    biggest_rank = max(biggest_rank, len(shape))
+
+            new_args = []
+            for arg in n.args:
+                if isinstance(arg, Node):
+                    new_args.append(arg)
+                    continue
+
+                prefix = "_tensor_constant_"
+                get_new_attr_name = get_new_attr_name_with_prefix(prefix)
+                tensor_constant_name = get_new_attr_name(graph_module)
+                float_tensor = torch.tensor(
+                    float(cast(Union[int, float], arg))
+                ).reshape((1,) * biggest_rank)
+                graph_module.register_buffer(tensor_constant_name, float_tensor)
+                fake_mode = n.meta["val"].fake_mode
+
+                with graph_module.graph.inserting_before(n):
+                    get_attr_node = graph_module.graph.create_node(
+                        "get_attr", tensor_constant_name, (), {}
+                    )
+                    get_attr_node.meta["val"] = fake_mode.from_tensor(
+                        float_tensor, static_shapes=True
+                    )
+                    new_args.append(get_attr_node)
+            n.args = tuple(new_args)
+
+        graph_module.recompile()
+        return PassResult(graph_module, True)
diff --git a/backends/arm/quantizer/arm_quantizer.py b/backends/arm/quantizer/arm_quantizer.py
@@ -19,10 +19,10 @@
 
 import torch
 import torch.nn.functional as F
+from executorch.backends.arm.passes.arm_pass_manager import ArmPassManager
 
 from executorch.backends.arm.quantizer import arm_quantizer_utils
 from executorch.backends.arm.quantizer.arm_quantizer_utils import (
-    convert_scalars_to_attrs,
     mark_nodes_as_annotated,
     propagate_annotation,
 )
@@ -317,7 +317,8 @@ def transform_for_annotation(self, model: GraphModule) -> GraphModule:
         """An initial pass for transforming the graph to prepare it for annotation.
         Currently transforms scalar values to tensor attributes.
         """
-        return convert_scalars_to_attrs(model)
+
+        return ArmPassManager().transform_for_annotation_pipeline(graph_module=model)
 
     def annotate(self, model: GraphModule) -> GraphModule:
         """Performs the quantization annotation on the graph.
diff --git a/backends/arm/quantizer/arm_quantizer_utils.py b/backends/arm/quantizer/arm_quantizer_utils.py
@@ -12,12 +12,11 @@
 #
 
 import operator
-from typing import Callable, cast, List, Union
+from typing import Callable, cast, List
 
 import torch
 from executorch.backends.arm.quantizer.quantization_config import QuantizationConfig
 from torch._subclasses import FakeTensor
-from torch.ao.quantization.fx.utils import get_new_attr_name_with_prefix
 
 from torch.ao.quantization.quantizer import (
     QuantizationAnnotation,
@@ -196,42 +195,3 @@ def propagate_annotation(model: GraphModule) -> None:
             output_qspec=shared_qspec,
             _annotated=True,
         )
-
-
-def convert_scalars_to_attrs(model: GraphModule) -> GraphModule:
-    """For ops in 'targeted_ops', convert inputs that are scalar values
-    to attribute Nodes that output the same value.
-    #TODO Seems like this should be a pass.
-    """
-    targeted_ops = [
-        torch.ops.aten.add.Tensor,
-        torch.ops.aten.sub.Tensor,
-        torch.ops.aten.mul.Tensor,
-    ]
-    for n in model.graph.nodes:
-        n = cast(Node, n)
-        if n.op != "call_function" or n.target not in targeted_ops:
-            continue
-        args = list(n.args)
-        new_args = []
-        for i in range(len(args)):
-            if isinstance(args[i], Node):
-                new_args.append(args[i])
-                continue
-            prefix = "_tensor_constant_"
-            get_new_attr_name = get_new_attr_name_with_prefix(prefix)
-            tensor_constant_name = get_new_attr_name(model)
-            float_tensor = torch.tensor(float(cast(Union[int, float], args[i])))
-            model.register_buffer(tensor_constant_name, float_tensor)
-            fake_mode = n.meta["val"].fake_mode
-            with model.graph.inserting_before(n):
-                get_attr_node = model.graph.create_node(
-                    "get_attr", tensor_constant_name, (), {}
-                )
-                get_attr_node.meta["val"] = fake_mode.from_tensor(
-                    float_tensor, static_shapes=True
-                )
-                new_args.append(get_attr_node)
-        n.args = tuple(new_args)
-    model.recompile()
-    return model
diff --git a/backends/arm/quantizer/quantization_annotation/mul_annotator.py b/backends/arm/quantizer/quantization_annotation/mul_annotator.py
@@ -4,19 +4,15 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
-# pyre-unsafe
-
-import itertools
-import operator
 from typing import Callable, List, Optional
 
 import torch
+import torch.fx
 from executorch.backends.arm.quantizer import arm_quantizer_utils
 from executorch.backends.arm.quantizer.quantization_annotation import register_annotator
 from executorch.backends.arm.quantizer.quantization_config import QuantizationConfig
 from torch.ao.quantization.quantizer import QuantizationAnnotation
 from torch.fx import Node
-from torch.fx.passes.utils.source_matcher_utils import get_source_partitions
 
 
 @register_annotator("mul")
@@ -25,14 +21,13 @@ def _annotate_mul(
     quantization_config: QuantizationConfig,
     filter_fn: Optional[Callable[[Node], bool]] = None,
 ) -> Optional[List[List[Node]]]:
-    mul_partitions = get_source_partitions(
-        gm.graph, ["mul", "mul_", operator.mul, torch.mul, operator.imul], filter_fn
-    )
-    mul_partitions = list(itertools.chain.from_iterable(mul_partitions.values()))
+
     annotated_partitions = []
-    for mul_partition in mul_partitions:
-        annotated_partitions.append(mul_partition.nodes)
-        mul_node = mul_partition.output_nodes[0]
+    for node in gm.graph.nodes:
+        if node.target not in (torch.ops.aten.mul.Tensor,):
+            continue
+        mul_node = node
+        annotated_partitions.append([mul_node])
         if arm_quantizer_utils.is_annotated(mul_node):
             continue
 
diff --git a/backends/arm/test/ops/test_div.py b/backends/arm/test/ops/test_div.py
@@ -28,8 +28,8 @@
     ),
     (
         "op_div_rank1_rand",
-        torch.rand(5),
-        torch.rand(5),
+        torch.rand(5) * 5,
+        torch.rand(5) * 5,
         None,
     ),
     (
@@ -70,8 +70,8 @@
     ),
     (
         "op_div_rank4_large_randn",
-        200 * torch.randn(5, 10, 25, 20),
-        torch.rand(5, 10, 25, 20),
+        200 * torch.randn(5, 10, 25, 20) + 1,
+        torch.rand(5, 10, 25, 20) + 1,
         None,
     ),
 ]
@@ -81,26 +81,18 @@ class TestDiv(unittest.TestCase):
     """Tests division"""
 
     class Div(torch.nn.Module):
-        def __init__(
-            self,
-            input_: Union[torch.Tensor, torch.types.Number],
-            other_: Union[torch.Tensor, torch.types.Number],
-            rounding_mode: Optional[str] = None,
-        ):
-            super().__init__()
-            self.rounding_mode = rounding_mode
 
         def forward(
             self,
             input_: Union[torch.Tensor, torch.types.Number],
             other_: Union[torch.Tensor, torch.types.Number],
             rounding_mode: Optional[str] = None,
         ):
-            if self.rounding_mode is None:
+            if rounding_mode is None:
                 return torch.div(input=input_, other=other_)
             else:
                 return torch.div(
-                    input=input_, other=other_, rounding_mode=self.rounding_mode
+                    input=input_, other=other_, rounding_mode=rounding_mode
                 )
 
     def _test_div_tosa_MI_pipeline(
@@ -133,13 +125,15 @@ def _test_div_tosa_BI_pipeline(
             )
             .quantize()
             .export()
-            .check_count({"torch.ops.aten.div.Tensor": 1})
+            .check_count(
+                {"torch.ops.aten.reciprocal.default": 1, "torch.ops.aten.mul.Tensor": 1}
+            )
             .check(["torch.ops.quantized_decomposed"])
             .to_edge()
             .partition()
             .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
             .to_executorch()
-            .run_method_and_compare_outputs(inputs=test_data)
+            .run_method_and_compare_outputs(inputs=test_data, atol=1, rtol=0.1)
         )
 
     def _test_div_u55_BI_pipeline(
@@ -153,7 +147,9 @@ def _test_div_u55_BI_pipeline(
             )
             .quantize()
             .export()
-            .check_count({"torch.ops.aten.div.Tensor": 1})
+            .check_count(
+                {"torch.ops.aten.reciprocal.default": 1, "torch.ops.aten.mul.Tensor": 1}
+            )
             .check(["torch.ops.quantized_decomposed"])
             .to_edge()
             .partition()
@@ -170,14 +166,9 @@ def test_div_tosa_MI(
         rounding_mode: Optional[str] = None,
     ):
         test_data = (input_, other_)
-        self._test_div_tosa_MI_pipeline(
-            self.Div(input_, other_, rounding_mode=rounding_mode), test_data
-        )
+        self._test_div_tosa_MI_pipeline(self.Div(), test_data)
 
-    # Expected to fail since ArmQuantizer cannot quantize a Div layer
-    # TODO(MLETORCH-129)
     @parameterized.expand(test_data_suite)
-    @unittest.expectedFailure
     def test_div_tosa_BI(
         self,
         test_name: str,
@@ -187,12 +178,9 @@ def test_div_tosa_BI(
     ):
 
         test_data = (input_, other_)
-        self._test_div_tosa_BI_pipeline(
-            self.Div(input=input_, other=other_, rounding_mode=rounding_mode), test_data
-        )
+        self._test_div_tosa_BI_pipeline(self.Div(), test_data)
 
-    # Expected to fail since ArmQuantizer cannot quantize a Div layer
-    # TODO(MLETORCH-129)
+    # Fails due to Vela error.
     @parameterized.expand(test_data_suite)
     @unittest.expectedFailure
     def test_div_u55_BI(
@@ -203,6 +191,4 @@ def test_div_u55_BI(
         rounding_mode: Optional[str] = None,
     ):
         test_data = (input_, other_)
-        self._test_div_u55_BI_pipeline(
-            self.Div(input=input_, other=other_, rounding_mode=rounding_mode), test_data
-        )
+        self._test_div_u55_BI_pipeline(self.Div(), test_data)