Remove hard coded argument types

per · per · commit 9b723140b8d7 · 2024-07-04T09:37:47.000+02:00
Move away from implicitly assuming arguments are torch.int8 and
figure out the type from the quantization nodes instead.
This is done to prepare for breaking up the TOSA conversion and
serialization into separate parts.

Signed-off-by: Per Åstrand &lt;per.astrand@arm.com&gt;
Change-Id: Id88ef8f264e6af8e90a92a00fca13cdbcc857bab
diff --git a/backends/arm/arm_backend.py b/backends/arm/arm_backend.py
@@ -17,8 +17,8 @@
 from executorch.backends.arm.arm_vela import vela_compile
 from executorch.backends.arm.operators.node_visitor import get_node_visitors
 from executorch.backends.arm.operators.op_placeholder import process_placeholder
-from executorch.backends.arm.tosa_mapping import TosaArg
-from executorch.backends.arm.tosa_quant_utils import is_quant_node
+from executorch.backends.arm.tosa_mapping import map_dtype, TosaArg
+from executorch.backends.arm.tosa_quant_utils import get_quant_node_dtype, is_quant_node
 from executorch.backends.arm.tosa_utils import (
     dbg_fail,
     dbg_tosa_dump,
@@ -280,7 +280,11 @@ def preprocess(  # noqa: C901
                         if is_permute_node_before_addmm(node)
                         else output.shape
                     ),
-                    ts.DType.INT8 if is_quant_node(node) else output.dtype,
+                    (
+                        map_dtype(get_quant_node_dtype(node))
+                        if is_quant_node(node)
+                        else output.dtype
+                    ),
                 )
 
                 # Visiting each Node
diff --git a/backends/arm/operators/op_placeholder.py b/backends/arm/operators/op_placeholder.py
@@ -8,6 +8,7 @@
 import torch
 from executorch.backends.arm.tosa_mapping import TosaArg
 from executorch.backends.arm.tosa_quant_utils import (
+    get_quant_arg_dtype,
     get_quant_node_args,
     is_quant_arg,
     q_op,
@@ -166,7 +167,7 @@ def process_placeholder(
         tensor = ts.TosaSerializerTensor(
             inputs[0].name,
             input_shape,
-            ts.DType.INT8 if is_quant_arg(node) else inputs[0].dtype,
+            get_quant_arg_dtype(node) if is_quant_arg(node) else inputs[0].dtype,
             data=None,
             placeholderFilename=inputs[0].name + ".npy",
         )
diff --git a/backends/arm/tosa_quant_utils.py b/backends/arm/tosa_quant_utils.py
@@ -10,7 +10,7 @@
 
 import serializer.tosa_serializer as ts
 import torch.fx
-from executorch.backends.arm.tosa_mapping import TosaArg
+from executorch.backends.arm.tosa_mapping import map_dtype, TosaArg
 from executorch.exir.dialects._ops import ops as exir_ops
 from serializer.tosa_serializer import TosaOp, TosaSerializerTensor
 
@@ -45,11 +45,41 @@ def is_quant_node(node: torch.fx.Node):
     )
 
 
+def get_quant_node_dtype(node: torch.fx.Node):
+    if "tosa" in node.target.__name__:
+        return node.meta["val"].dtype
+
+    if node.target in dq_q_ops:
+        return node.args[5]
+
+    # if not a tosa node, nor a q/dq op, walk the graph until we find a q op
+    consumer_node = list(node.users)[0]
+    while True:
+        if consumer_node.target in dq_q_ops:
+            return consumer_node.args[5]
+
+        # Try to move on to the next node
+        if len(consumer_node.users) == 0:
+            raise RuntimeError("No quantized node found in graph")
+        consumer_node = list(consumer_node.users)[0]
+
+
 def is_quant_arg(arg):
     consumer_node = list(arg.users)[0]
     return consumer_node.target == q_op
 
 
+def get_quant_arg_dtype(node: torch.fx.Node):
+    consumer_node = list(node.users)[0]
+
+    # Get type of quant node, args differ from per_tensor and per_channel.
+    if consumer_node.target == q_op:
+        if is_quant_arg(node):
+            return map_dtype(consumer_node.args[5])
+        else:
+            raise RuntimeError("Quantization argument not found")
+
+
 def get_quant_node_args(node: torch.fx.Node):
     """
     Get the quantization parameters from a quant node.