pytorch · digantdesai · Dec 18, 2024 · Dec 18, 2024 · Dec 18, 2024 · Dec 18, 2024
@@ -29,10 +29,6 @@
     DecomposeSoftmaxesPass,
 )
 from executorch.backends.arm._passes.decompose_var_pass import DecomposeVarPass
-from executorch.backends.arm._passes.fold_qdq_with_annotated_qparams_pass import (
-    FoldAndAnnotateQParamsPass,
-    QuantizeFullArgument,
-)
 from executorch.backends.arm._passes.keep_dims_false_to_squeeze_pass import (
     KeepDimsFalseToSqueezePass,
 )
@@ -54,7 +50,6 @@
 from executorch.backends.xnnpack._passes.remove_getitem_op import RemoveGetItemPass
 from executorch.exir import ExportedProgram
 from executorch.exir.backend.compile_spec_schema import CompileSpec
-from executorch.exir.dialects._ops import ops as exir_ops
 from executorch.exir.pass_manager import PassManager
 
 
@@ -85,19 +80,6 @@ def transform_to_backend_pipeline(
         self.add_pass(Conv1dUnsqueezePass(exported_program))
         self.add_pass(DecomposeSoftmaxesPass())
         self.add_pass(DecomposeLinearPass())
-        self.add_pass(QuantizeFullArgument())
-        self.add_pass(
-            FoldAndAnnotateQParamsPass(
-                [
-                    exir_ops.edge.aten.minimum.default,
-                    exir_ops.edge.aten.maximum.default,
-                    exir_ops.edge.aten.add.Tensor,
-                    exir_ops.edge.aten.avg_pool2d.default,
-                    exir_ops.edge.aten.convolution.default,
-                    exir_ops.edge.aten.full.default,
-                ]
-            )
-        )
         for spec in compile_spec:
             if spec.key == "permute_memory_format":
                 memory_format = spec.value.decode()

@@ -94,8 +94,6 @@ def is_node_supported(self, submodules, node: fx.Node) -> bool:
             exir_ops.edge.aten.sigmoid.default,
             exir_ops.edge.aten.mean.dim,
             exir_ops.edge.aten.mm.default,
-            exir_ops.edge.aten.minimum.default,
-            exir_ops.edge.aten.maximum.default,
             exir_ops.edge.aten.repeat.default,
             exir_ops.edge.aten.reciprocal.default,
             exir_ops.edge.aten.relu.default,

@@ -19,9 +19,7 @@
     op_get_item,
     op_hardtanh,
     op_log,
-    op_max,
     op_max_pool2d,
-    op_min,
     op_mm,
     op_mul,
     op_permute,

@@ -11,6 +11,7 @@
 import executorch.backends.arm.tosa_utils as tutils
 
 import serializer.tosa_serializer as ts
+import torch
 from executorch.backends.arm.operators.node_visitor import (
     NodeVisitor,
     register_node_visitor,
@@ -40,27 +41,33 @@ def define_node(
         output: TosaArg,
         is_quant_node: bool,
     ) -> None:
-        # Specification (0.80) states that input and output types
-        # should all be the same
-        assert inputs[0].dtype == inputs[1].dtype == output.dtype
-        # Handle int8 (quantized) and int32
-        assert inputs[0].dtype in [ts.DType.INT8, ts.DType.INT32]
-
-        if inputs[0].dtype == ts.DType.INT8:
-            rescaled_inputs, scale_back = tqutils.insert_rescale_ops_to_int32(
-                tosa_graph, inputs, node
+        input_nodes = tutils.get_two_inputs(node)
+
+        if not is_quant_node and not all(
+            tensor.meta["val"].dtype in (torch.int8, torch.int32)
+            for tensor in input_nodes
+        ):
+            raise RuntimeError(
+                f"Unexpected non quantized {AddVisitor_080_BI.target} node."
             )
-        else:
-            # input[0].dtype == ts.DType.INT32
-            # Non quantized input, natively support by TOSA.ADD
-            rescaled_inputs = inputs
 
-        if output.dtype == ts.DType.INT8:
+        needs_rescale = not (
+            all(tensor.meta["val"].dtype == torch.int32 for tensor in input_nodes)
+            and node.meta["val"].dtype == torch.int32
+        )
+
+        if needs_rescale:
+            # Rescale inputs to 32 bit
+            rescaled_inputs, scale = tqutils.rescale_nodes_to_int32(
+                input_nodes, tosa_graph
+            )
+
+            # Prepare add output tensor
             broadcasted_shape = tutils.tosa_shape(output.shape, output.dim_order)
             add_output = tosa_graph.addIntermediate(broadcasted_shape, ts.DType.INT32)
         else:
-            # output.dtype == ts.DType.INT32
             add_output = output
+            rescaled_inputs = inputs
 
         # Do the INT32 Add
         tosa_graph.addOperator(
@@ -73,10 +80,10 @@ def define_node(
             None,
         )
 
-        if output.dtype == ts.DType.INT8:
+        if needs_rescale:
             # Scale output back to 8 bit
             # pyre-ignore
-            tqutils.insert_rescale_op_to_int8(tosa_graph, add_output, scale_back, node)
+            tqutils.rescale_node_back_to_int8(node, add_output, scale, tosa_graph)
 
 
 @register_node_visitor
@@ -98,19 +105,11 @@ def define_node(
         output: TosaArg,
         is_quant_node: bool,
     ) -> None:
-        # Specification (0.80) states that input and output types
-        # should all be the same
-        assert inputs[0].dtype == inputs[1].dtype == output.dtype
-
-        if inputs[0].dtype in [ts.DType.INT8, ts.DType.INT32]:
+        if is_quant_node:
             # Call the inherited define_node for handling integers
             super().define_node(node, tosa_graph, inputs, output, is_quant_node)
         else:
             # FP32 Add lowering
-            assert inputs[0].dtype == ts.DType.FP32
-            assert output.dtype == ts.DType.FP32
-
-            # MI lowering
             tosa_graph.addOperator(
                 TosaOp.Op().ADD,
                 [inputs[0].name, inputs[1].name],