Add ADD to qdq pass handling

per · per · commit bcbc4c6db70c · 2024-12-16T09:45:15.000+01:00
Signed-off-by: Per Åstrand &lt;per.astrand@arm.com&gt;
Change-Id: I9230209ed3d6cc0b5ec7a35512248648bb8380ee
diff --git a/backends/arm/_passes/arm_pass_manager.py b/backends/arm/_passes/arm_pass_manager.py
@@ -89,6 +89,7 @@ def transform_to_backend_pipeline(
                 [
                     exir_ops.edge.aten.minimum.default,
                     exir_ops.edge.aten.maximum.default,
+                    exir_ops.edge.aten.add.Tensor,
                 ]
             )
         )
diff --git a/backends/arm/operators/op_add.py b/backends/arm/operators/op_add.py
@@ -11,7 +11,6 @@
 import executorch.backends.arm.tosa_utils as tutils
 
 import serializer.tosa_serializer as ts
-import torch
 from executorch.backends.arm.operators.node_visitor import (
     NodeVisitor,
     register_node_visitor,
@@ -41,33 +40,27 @@ def define_node(
         output: TosaArg,
         is_quant_node: bool,
     ) -> None:
-        input_nodes = tutils.get_two_inputs(node)
-
-        if not is_quant_node and not all(
-            tensor.meta["val"].dtype in (torch.int8, torch.int32)
-            for tensor in input_nodes
-        ):
-            raise RuntimeError(
-                f"Unexpected non quantized {AddVisitor_080_BI.target} node."
-            )
-
-        needs_rescale = not (
-            all(tensor.meta["val"].dtype == torch.int32 for tensor in input_nodes)
-            and node.meta["val"].dtype == torch.int32
-        )
-
-        if needs_rescale:
-            # Rescale inputs to 32 bit
-            rescaled_inputs, scale = tqutils.rescale_nodes_to_int32(
-                input_nodes, tosa_graph
+        # Specification (0.80.0) states that input and output types
+        # should all be the same
+        assert inputs[0].dtype == inputs[1].dtype == output.dtype
+        # Handle int8 (quantized) and int32
+        assert inputs[0].dtype in [ts.DType.INT8, ts.DType.INT32]
+
+        if inputs[0].dtype == ts.DType.INT8:
+            rescaled_inputs, scale_back = tqutils.insert_rescale_ops_to_int32(
+                tosa_graph, inputs, node
             )
+        else:
+            # input[0].dtype == ts.DType.INT32
+            # Non quantized input, natively support by TOSA.ADD
+            rescaled_inputs = inputs
 
-            # Prepare add output tensor
+        if output.dtype == ts.DType.INT8:
             broadcasted_shape = tutils.tosa_shape(output.shape, output.dim_order)
             add_output = tosa_graph.addIntermediate(broadcasted_shape, ts.DType.INT32)
         else:
+            # output.dtype == ts.DType.INT32
             add_output = output
-            rescaled_inputs = inputs
 
         # Do the INT32 Add
         tosa_graph.addOperator(
@@ -80,10 +73,12 @@ def define_node(
             None,
         )
 
-        if needs_rescale:
+        if output.dtype == ts.DType.INT8:
             # Scale output back to 8 bit
             # pyre-ignore
-            tqutils.rescale_node_back_to_int8(node, add_output, scale, tosa_graph)
+            tqutils.insert_rescale_node_back_to_int8(
+                tosa_graph, add_output, scale_back, node
+            )
 
 
 @register_node_visitor
@@ -105,11 +100,19 @@ def define_node(
         output: TosaArg,
         is_quant_node: bool,
     ) -> None:
-        if is_quant_node:
+        # Specification (0.80.0) states that input and output types
+        # should all be the same
+        assert inputs[0].dtype == inputs[1].dtype == output.dtype
+
+        if inputs[0].dtype in [ts.DType.INT8, ts.DType.INT32]:
             # Call the inherited define_node for handling integers
             super().define_node(node, tosa_graph, inputs, output, is_quant_node)
         else:
             # FP32 Add lowering
+            assert inputs[0].dtype == ts.DType.FP32
+            assert output.dtype == ts.DType.FP32
+
+            # MI lowering
             tosa_graph.addOperator(
                 TosaOp.Op().ADD,
                 [inputs[0].name, inputs[1].name],

Original file line number	Diff line number	Diff line change
`@@ -89,6 +89,7 @@ def transform_to_backend_pipeline(`
`89`	`89`	`[`
`90`	`90`	`exir_ops.edge.aten.minimum.default,`
`91`	`91`	`exir_ops.edge.aten.maximum.default,`
	`92`	`+ exir_ops.edge.aten.add.Tensor,`
`92`	`93`	`]`
`93`	`94`	`)`
`94`	`95`	`)`