Replace Linear lowering of using Matmul with Conv2d (#1336)

Jerry-Ge · facebook-github-bot · commit 99f912ac8475 · 2023-12-07T17:11:17.000-08:00
Summary: - The existing Vela compiler doesn't support Matmul and TOSA.Fully_Connected will be deprecated - Also add support for linear layers with rank>2 + tests Pull Request resolved: #1336 Reviewed By: cccclai Differential Revision: D51922063 Pulled By: digantdesai fbshipit-source-id: a8ad11f170911c1543fe88fd2ef1a1356ac859c3
diff --git a/backends/arm/arm_backend.py b/backends/arm/arm_backend.py
@@ -19,7 +19,11 @@
 from executorch.backends.arm.operators.op_placeholder import process_placeholder
 from executorch.backends.arm.tosa_mapping import TosaArg
 from executorch.backends.arm.tosa_quant_utils import isQuantNode
-from executorch.backends.arm.tosa_utils import dbg_fail, dbg_tosa_dump
+from executorch.backends.arm.tosa_utils import (
+    dbg_fail,
+    dbg_tosa_dump,
+    is_permute_node_before_addmm,
+)
 from executorch.exir.backend.backend_details import BackendDetails, PreprocessResult
 from executorch.exir.backend.compile_spec_schema import CompileSpec
 from torch._export.exported_program import ExportedProgram
@@ -74,7 +78,9 @@ def preprocess(  # noqa: C901
                 # Add output to TOSA graph
                 tosa_graph.currRegion.currBasicBlock.addTensor(
                     output.name,
-                    output.shape,
+                    inputs[0].shape
+                    if is_permute_node_before_addmm(node)
+                    else output.shape,
                     ts.DType.INT8 if is_quant_node else output.dtype,
                 )
 
diff --git a/backends/arm/operators/op_addmm.py b/backends/arm/operators/op_addmm.py
@@ -16,7 +16,9 @@
     computeMultiplierAndShift,
     getQuantNodeArgs,
 )
-from executorch.backends.arm.tosa_utils import promote_shape
+
+from executorch.backends.arm.tosa_utils import buildReshape
+from executorch.exir.dialects._ops import ops as exir_ops
 from serializer.tosa_serializer import TosaOp
 
 
@@ -37,69 +39,89 @@ def define_node(
     ) -> None:
         bias, input, weight = inputs
 
-        output_dtype = ts.DType.INT8 if is_quant_node else output.dtype
+        N = input.shape[0]
+        input_channels = input.shape[1]
+        output_channels = weight.shape[1]
 
-        # Reshape input, weight, bias tensors
-        input_reshape_res = promote_shape(
-            tosa_graph, input, (1,) + input.shape, output_dtype
-        )
-        weight_reshape_res = promote_shape(
-            tosa_graph, weight, (1,) + weight.shape, output_dtype
+        input_new_shape = (N, 1, 1, input_channels)
+        input_reshaped = tosa_graph.addIntermediate(
+            input_new_shape,
+            ts.DType.INT8 if is_quant_node else input.dtype,
         )
 
-        bias_dtype = ts.DType.INT32 if is_quant_node else output.dtype
-        bias_reshape_res = promote_shape(
-            tosa_graph,
-            bias,
-            (
-                1,
-                1,
-            )
-            + bias.shape,
-            bias_dtype,
-        )
+        buildReshape(tosa_graph, input.name, input_new_shape, input_reshaped.name)
 
-        # Add dummy batch 1 to mm_shape
-        mm_shape = (1, input.shape[0], weight.shape[1])
-        # Define Intermediate tensor for MatMul res
-        mm_res = tosa_graph.addIntermediate(
-            mm_shape, ts.DType.INT32 if is_quant_node else output_dtype
+        weight_new_shape = (output_channels, 1, 1, input_channels)
+        weight_reshaped = tosa_graph.addIntermediate(
+            weight_new_shape,
+            ts.DType.INT8 if is_quant_node else weight.dtype,
         )
 
-        # Add MatMulOp
-        attr_matmul = ts.TosaSerializerAttribute()
-        a_zp, b_zp = (-128, 0) if is_quant_node else (0, 0)
-        attr_matmul.MatMulAttribute(a_zp, b_zp)
-        tosa_graph.addOperator(
-            TosaOp.Op().MATMUL,
-            [input_reshape_res.name, weight_reshape_res.name],
-            [mm_res.name],
-            attr_matmul,
+        buildReshape(tosa_graph, weight.name, weight_new_shape, weight_reshaped.name)
+
+        # Get the attributes of convolution.
+        attr = ts.TosaSerializerAttribute()
+        pad_attr = [0, 0, 0, 0]
+        stride_attr = [1, 1]
+        dilation_attr = [1, 1]
+
+        input_zp = -128 if is_quant_node else 0
+        attr.ConvAttribute(
+            pad=pad_attr,
+            stride=stride_attr,
+            dilation=dilation_attr,
+            input_zp=input_zp,
+            weight_zp=0,
+            local_bound=False,
         )
 
-        # Add AddOp
-        add_res = tosa_graph.addIntermediate(
-            mm_shape, ts.DType.INT32 if is_quant_node else output_dtype
+        conv2d_output_shape = (N, 1, 1, output_channels)
+        conv2d_res = tosa_graph.addIntermediate(
+            conv2d_output_shape,
+            ts.DType.INT32 if is_quant_node else output.dtype,
         )
 
+        # U55 doesn't support tosa.matmul and tosa.fully_connected will be deprecated
+        # TOSA Conv2d input is NHWC and weights are in OHWI
         tosa_graph.addOperator(
-            TosaOp.Op().ADD,
-            [bias_reshape_res.name, mm_res.name],
-            [add_res.name],
-            None,
+            TosaOp.Op().CONV2D,
+            [
+                input_reshaped.name,
+                weight_reshaped.name,
+                bias.name,
+            ],
+            [conv2d_res.name],
+            attr,
         )
 
+        result_shape = (N, output_channels)
+
         if is_quant_node:
             # Read inputs' parent nodes
-            #
             _, input_node, weight_node = node.all_input_nodes
-            input_scale, _ = getQuantNodeArgs(input_node)
+
+            # rank > 2 linear layer
+            if input_node.target == exir_ops.edge.aten.view_copy.default:
+                quant_node = input_node.all_input_nodes[0]
+                input_scale, _ = getQuantNodeArgs(quant_node)
+                consumer_node = list(node.users)[0]
+                consumer_consumer_node = list(consumer_node.users)[0]
+                (
+                    consumer_node_scale,
+                    consumer_node_node_zp,
+                ) = getQuantNodeArgs(consumer_consumer_node)
+
+            else:
+                input_scale, _ = getQuantNodeArgs(input_node)
+                consumer_node = list(node.users)[0]
+                (
+                    consumer_node_scale,
+                    consumer_node_node_zp,
+                ) = getQuantNodeArgs(consumer_node)
+
             weight_node_q_node = weight_node.all_input_nodes[0]
             weight_scale, _ = getQuantNodeArgs(weight_node_q_node)
 
-            consumer_node = list(node.users)[0]
-            consumer_node_scale, consumer_node_node_zp = getQuantNodeArgs(consumer_node)
-
             output_rescale_scale = (input_scale * weight_scale) / consumer_node_scale
             (
                 multiplier_output,
@@ -115,20 +137,20 @@ def define_node(
                 scale32=True,
                 double_round=True,
                 per_channel=False,
+                input_unsigned=False,
+                output_unsigned=False,
             )
-            add_res_int8 = tosa_graph.addIntermediate(mm_shape, ts.DType.INT8)
+
+            reshaped_res = tosa_graph.addIntermediate(result_shape, ts.DType.INT32)
+            buildReshape(tosa_graph, conv2d_res.name, result_shape, reshaped_res.name)
+
             tosa_graph.addOperator(
                 TosaOp.Op().RESCALE,
-                [add_res.name],
-                [add_res_int8.name],
+                [reshaped_res.name],
+                [output.name],
                 attr_rescale_output,
             )
-        # Reshape final result to original shape
-        attr_out = ts.TosaSerializerAttribute()
-        attr_out.ReshapeAttribute(output.shape)
-        tosa_graph.addOperator(
-            TosaOp.Op().RESHAPE,
-            [add_res_int8.name if is_quant_node else add_res.name],
-            [output.name],
-            attr_out,
-        )
+
+        else:
+            # non-quantized case
+            buildReshape(tosa_graph, conv2d_res.name, result_shape, output.name)
diff --git a/backends/arm/operators/op_conv2d.py b/backends/arm/operators/op_conv2d.py
@@ -52,7 +52,14 @@ def define_node(
         pad_attr = [val for val in pad.special for _ in (0, 1)]
         stride_attr = stride.special
         dilation_attr = dilation.special
-        attr.ConvAttribute(pad_attr, stride_attr, dilation_attr, 0, 0)
+        attr.ConvAttribute(
+            pad=pad_attr,
+            stride=stride_attr,
+            dilation=dilation_attr,
+            input_zp=0,
+            weight_zp=0,
+            local_bound=False,
+        )
 
         # Non-bias case.
         if len(node.all_input_nodes) == 2:
diff --git a/backends/arm/operators/op_permute.py b/backends/arm/operators/op_permute.py
@@ -12,6 +12,7 @@
     register_node_visitor,
 )
 from executorch.backends.arm.tosa_mapping import TosaArg
+from executorch.backends.arm.tosa_utils import is_permute_node_before_addmm
 from serializer.tosa_serializer import TosaOp
 
 
@@ -30,6 +31,13 @@ def define_node(
         output: TosaArg,
         is_quant_node: bool,
     ) -> None:
+        if is_permute_node_before_addmm(node):
+            ## Simply add an identityOp
+            tosa_graph.addOperator(
+                TosaOp.Op().IDENTITY, [inputs[0].name], [output.name]
+            )
+            return
+
         attr = ts.TosaSerializerAttribute()
         attr.TransposeAttribute(inputs[1].special)
         tosa_graph.addOperator(
diff --git a/backends/arm/operators/op_placeholder.py b/backends/arm/operators/op_placeholder.py
@@ -8,7 +8,7 @@
     isQuantArg,
     q_op,
 )
-from executorch.backends.arm.tosa_utils import getNodeArgs
+from executorch.backends.arm.tosa_utils import getNodeArgs, is_bias_node_for_addmm
 from executorch.exir.dialects._ops import ops as exir_ops
 from torch._export.exported_program import ExportedProgram
 
@@ -42,28 +42,30 @@ def process_placeholder(
                 parameter_values_quantized,
                 name=out,
             )
-        elif (
-            consumer_node.target == exir_ops.edge.aten.addmm.default
-            and list(consumer_node.users)[0].target == q_op
-        ):
+        elif is_bias_node_for_addmm(node):
             (
                 _,
                 input_node,
                 weight_node_permuted,
             ) = consumer_node.all_input_nodes
             weight_node = weight_node_permuted.all_input_nodes[0]
 
-            input_node_scale, _ = getQuantNodeArgs(input_node)
+            # input_node_scale, _ = getQuantNodeArgs(input_node)
+            if input_node.target == exir_ops.edge.aten.view_copy.default:
+                input_node_scale, _ = getQuantNodeArgs(input_node.all_input_nodes[0])
+            else:
+                input_node_scale, _ = getQuantNodeArgs(input_node)
+
             weight_node_scale, weight_node_zp = getQuantNodeArgs(weight_node)
 
-            parameter_values_quantized = (
+            bias_values_quantized = (
                 parameter_values / (input_node_scale * weight_node_scale)
             ).astype(np.int32)
 
             tosa_graph.addConst(
                 inputs[0].shape,
                 ts.DType.INT32,
-                parameter_values_quantized,
+                bias_values_quantized,
                 name=out,
             )
         elif (
diff --git a/backends/arm/test/test_models.py b/backends/arm/test/test_models.py
@@ -128,13 +128,29 @@ def forward(self, x, y):
     @register_test
     class simple_linear(torch.nn.Module):
         inputs = {
-            TosaProfile.BI: (torch.ones(100, 20),),
-            TosaProfile.MI: (torch.ones(100, 20),),
+            TosaProfile.BI: (torch.rand(1, 2),),
+            TosaProfile.MI: (torch.rand(1, 2),),
         }
 
         def __init__(self):
             super().__init__()
             torch.manual_seed(seed)
+            self.fc = torch.nn.Linear(2, 3)
+
+        def forward(self, x):
+            x = self.fc(x)
+            return x
+
+    @register_test
+    class simple_linear_rank4(torch.nn.Module):
+        inputs = {
+            TosaProfile.BI: (torch.rand(5, 10, 25, 20),),
+            TosaProfile.MI: (torch.rand(5, 10, 25, 20),),
+        }
+
+        def __init__(self):
+            super().__init__()
+            torch.manual_seed(42)
             self.fc = torch.nn.Linear(20, 30)
 
         def forward(self, x):
diff --git a/backends/arm/third-party/serialization_lib b/backends/arm/third-party/serialization_lib
@@ -1 +1 @@
-Subproject commit 9601cbda5ff42dc4762e364d90093670931e1261
+Subproject commit 92358fc122cba98e373ca2ff42b1cfe7618161e8
diff --git a/backends/arm/tosa_quant_utils.py b/backends/arm/tosa_quant_utils.py
@@ -20,6 +20,15 @@
 def isQuantNode(node):
     consumer_node = list(node.users)[0]
     input = node.all_input_nodes[0]
+
+    # For Rank > 2 Linear layers, the quant node is after the view_copy
+    if (
+        node.target == exir_ops.edge.aten.addmm.default
+        and list(node.users)[0].target == exir_ops.edge.aten.view_copy.default
+    ):
+        consumer_consumer_node = list(consumer_node.users)[0]
+        return True if consumer_consumer_node.target == q_op else False
+
     return (
         consumer_node.target == q_op
         or node.target in dq_q_ops
@@ -106,6 +115,8 @@ def buildRescale(
         scale32=is_scale32,
         double_round=is_double_round,
         per_channel=False,
+        input_unsigned=False,
+        output_unsigned=False,
     )
 
     rescale_out = tosa_fb.addIntermediate(output_shape, output_type)
@@ -129,6 +140,8 @@ def buildRescaleToInt32(
         scale32=is_scale32,
         double_round=is_double_round,
         per_channel=False,
+        input_unsigned=False,
+        output_unsigned=False,
     )
     input_A_rescaled_to_int32 = tosa_fb.addIntermediate(input.shape, ts.DType.INT32)
     tosa_fb.addOperator(
@@ -160,6 +173,8 @@ def buildRescaleFromInt32(
         scale32=is_scale32,
         double_round=is_double_round,
         per_channel=False,
+        input_unsigned=False,
+        output_unsigned=False,
     )
 
     tosa_fb.addOperator(
diff --git a/backends/arm/tosa_utils.py b/backends/arm/tosa_utils.py
diff --git a/examples/arm/arm_tosa_e2e.py b/examples/arm/arm_tosa_e2e.py