Add helper functions for Q/DQ folding pass

per · per · commit fd9eb28d1704 · 2024-12-16T09:45:15.000+01:00
Adds a helper function to retrieve QuantArgs from node.meta and cleanup
the handling a bit by introducing the __eq__ operator for QuantArgs.

Signed-off-by: Per Åstrand &lt;per.astrand@arm.com&gt;
Change-Id: I519a9a286a36a278f40ffb6c679192a54d9f940d
diff --git a/backends/arm/_passes/fold_qdq_with_annotated_qparams_pass.py b/backends/arm/_passes/fold_qdq_with_annotated_qparams_pass.py
@@ -16,6 +16,32 @@
 from torch.fx import GraphModule, Node
 
 
+def get_input_qparams(node: Node) -> dict[int, QuantArgs]:
+    """
+    Get the input quantization parameters from a node, set by the 'FoldAndAnnotateQParamsPass'.
+    Raises a ValueError if the node doesn't have any parameters set.
+    """
+    if "input_qparams" not in node.meta.keys():
+        raise ValueError(f"No input quantization parameter found in node {node}")
+    input_qparams = cast(dict[int, QuantArgs], node.meta["input_qparams"])
+    if len(input_qparams) == 0:
+        raise ValueError(f"No input quantization parameter found in node {node}")
+    return input_qparams
+
+
+def get_output_qparams(node: Node) -> dict[int, QuantArgs]:
+    """
+    Get the output quantization parameters from a node, set by the 'FoldAndAnnotateQParamsPass'.
+    Raises a ValueError if the node doesn't have any parameters set.
+    """
+    if "output_qparams" not in node.meta.keys():
+        raise ValueError(f"No output quantization parameter found in node {node}")
+    input_qparams = cast(dict[int, QuantArgs], node.meta["output_qparams"])
+    if len(input_qparams) == 0:
+        raise ValueError(f"No output quantization parameter found in node {node}")
+    return input_qparams
+
+
 class FoldAndAnnotateQParamsPass(ExportPass):
     """
     A pass that walks the graph and removes any DQ and Q nodes before and after the target
diff --git a/backends/arm/operators/op_add.py b/backends/arm/operators/op_add.py
@@ -76,9 +76,7 @@ def define_node(
         if output.dtype == ts.DType.INT8:
             # Scale output back to 8 bit
             # pyre-ignore
-            tqutils.insert_rescale_node_back_to_int8(
-                tosa_graph, add_output, scale_back, node
-            )
+            tqutils.insert_rescale_op_to_int8(tosa_graph, add_output, scale_back, node)
 
 
 @register_node_visitor
diff --git a/backends/arm/operators/op_max.py b/backends/arm/operators/op_max.py
@@ -5,11 +5,13 @@
 
 # pyre-unsafe
 
-from typing import cast, List
+from typing import List
 
 import executorch.backends.arm.tosa_quant_utils as tqutils
-
 import serializer.tosa_serializer as ts
+from executorch.backends.arm._passes.fold_qdq_with_annotated_qparams_pass import (
+    get_input_qparams,
+)
 from executorch.backends.arm.operators.node_visitor import (
     NodeVisitor,
     register_node_visitor,
@@ -38,30 +40,23 @@ def define_node(
     ) -> None:
         assert inputs[0].dtype == inputs[1].dtype
 
-        input_qparams = cast(dict[int, tqutils.QuantArgs], node.meta["input_qparams"])
-        min_output = output
-
+        max_output = output
         if inputs[0].dtype == ts.DType.INT8:
-            # insert RESCALEs to int32
-            x_scale = input_qparams[0].scale
-            x_zp = input_qparams[0].zp
-
-            y_scale = input_qparams[1].scale
-            y_zp = input_qparams[1].zp
-
+            input_qparams = get_input_qparams(node)
             assert (
-                x_zp == y_zp
-            ), "Different zp for inputs, MAX should be quantized with shared quantization!"
+                len(input_qparams) == 2
+            ), f"Both inputs needs to have quantization information for {node}"
+            # insert RESCALEs to int32
             assert (
-                x_scale == y_scale
-            ), "Different scale for input, MAX should be quantized with shared quantization!"
+                input_qparams[0] == input_qparams[1]
+            ), "Both inputs must have same quantization for MAX"
 
             operand_inputs, scale_back = tqutils.insert_rescale_ops_to_int32(
                 tosa_graph, inputs, node
             )
 
             output.shape = tosa_shape(output.shape, output.dim_order)
-            min_output = tosa_graph.addIntermediate(output.shape, ts.DType.INT32)
+            max_output = tosa_graph.addIntermediate(output.shape, ts.DType.INT32)
         else:
             operand_inputs = inputs
 
@@ -71,11 +66,9 @@ def define_node(
                 operand_inputs[0].name,
                 operand_inputs[1].name,
             ],
-            [min_output.name],
+            [max_output.name],
         )
 
         if output.dtype == ts.DType.INT8:
             # insert RESCALE from int32 back to int8
-            tqutils.insert_rescale_node_back_to_int8(
-                tosa_graph, min_output, scale_back, node
-            )
+            tqutils.insert_rescale_op_to_int8(tosa_graph, max_output, scale_back, node)
diff --git a/backends/arm/operators/op_min.py b/backends/arm/operators/op_min.py
@@ -5,11 +5,14 @@
 
 # pyre-unsafe
 
-from typing import cast, List
+from typing import List
 
 import executorch.backends.arm.tosa_quant_utils as tqutils
 
 import serializer.tosa_serializer as ts
+from executorch.backends.arm._passes.fold_qdq_with_annotated_qparams_pass import (
+    get_input_qparams,
+)
 from executorch.backends.arm.operators.node_visitor import (
     NodeVisitor,
     register_node_visitor,
@@ -38,23 +41,16 @@ def define_node(
     ) -> None:
         assert inputs[0].dtype == inputs[1].dtype
 
-        input_qparams = cast(dict[int, tqutils.QuantArgs], node.meta["input_qparams"])
         min_output = output
-
         if inputs[0].dtype == ts.DType.INT8:
-            # insert RESCALEs to int32
-            x_scale = input_qparams[0].scale
-            x_zp = input_qparams[0].zp
-
-            y_scale = input_qparams[1].scale
-            y_zp = input_qparams[1].zp
-
+            input_qparams = get_input_qparams(node)
             assert (
-                x_zp == y_zp
-            ), "Different zp for inputs, MIN should be quantized with shared quantization!"
+                len(input_qparams) == 2
+            ), f"Both inputs needs to have quantization information for {node}"
+            # insert RESCALEs to int32
             assert (
-                x_scale == y_scale
-            ), "Different scale for input, MIN should be quantized with shared quantization!"
+                input_qparams[0] == input_qparams[1]
+            ), "Both inputs must have same quantization for MIN"
 
             operand_inputs, scale_back = tqutils.insert_rescale_ops_to_int32(
                 tosa_graph, inputs, node
@@ -76,6 +72,4 @@ def define_node(
 
         if output.dtype == ts.DType.INT8:
             # insert RESCALE from int32 back to int8
-            tqutils.insert_rescale_node_back_to_int8(
-                tosa_graph, min_output, scale_back, node
-            )
+            tqutils.insert_rescale_op_to_int8(tosa_graph, min_output, scale_back, node)
diff --git a/backends/arm/tosa_quant_utils.py b/backends/arm/tosa_quant_utils.py
@@ -57,14 +57,19 @@ def insert_rescale_ops_to_int32(
     the graph upstream for DQ nodes.
     """
 
+    from executorch.backends.arm._passes.fold_qdq_with_annotated_qparams_pass import (
+        get_input_qparams,
+    )
+
     tensors = inputs.copy()
 
     # Reshape tensor according to TOSA dim order
     for tensor in tensors:
         dim_order = tensor.dim_order
         tensor.shape = [tensor.shape[i] for i in dim_order]
 
-    qargs = list(cast(dict[int, QuantArgs], node.meta["input_qparams"]).values())
+    input_qparams = get_input_qparams(node)
+    qargs = input_qparams.values()
 
     # Scale the int8 quantized input to a common scale in the integer
     # domain
@@ -84,7 +89,7 @@ def insert_rescale_ops_to_int32(
     return rescaled_nodes, min_scale
 
 
-def insert_rescale_node_back_to_int8(
+def insert_rescale_op_to_int8(
     tosa_graph: ts.TosaSerializer,
     last_tensor: TosaArg,
     scale: float,
@@ -102,9 +107,14 @@ def insert_rescale_node_back_to_int8(
     in the node meta dict as opposed to 'rescale_node_back_to_int8' which search
     the graph downstream for Q nodes.
     """
-    assert len(node.meta["output_qparams"]) == 1
+    from executorch.backends.arm._passes.fold_qdq_with_annotated_qparams_pass import (
+        get_output_qparams,
+    )
+
+    output_qparams = get_output_qparams(node)
+    assert len(output_qparams) == 1, "More than one output not supported"
 
-    qargs_out = cast(dict[int, QuantArgs], node.meta["output_qparams"])[0]
+    qargs_out = output_qparams[0]
     output_rescale_scale = scale / qargs_out.scale
 
     # Rescale Back to INT8
@@ -136,6 +146,17 @@ def quantize_value(self, x):
     def dequantize_value(self, qx: int) -> float:
         return (qx - self.zp) * self.scale
 
+    def __eq__(self, other):
+        if isinstance(other, QuantArgs):
+            return (
+                self.scale == other.scale
+                and self.zp == other.zp
+                and self.qmin == other.qmin
+                and self.qmax == other.qmax
+                and self.dtype == other.dtype
+            )
+        return False
+
     @classmethod
     def from_operator(cls, op, args):
         if op in dq_q_ops: