pytorch
diff --git a/‎backends/arm/arm_backend.py
Lines changed: 87 additions & 13 deletions b/‎backends/arm/arm_backend.py
Lines changed: 87 additions & 13 deletions
diff --git a/‎backends/arm/test/test_models.py
Lines changed: 145 additions & 13 deletions b/‎backends/arm/test/test_models.py
Lines changed: 145 additions & 13 deletions
@@ -227,6 +227,14 @@ def getQuantNodeArgs(node):
 
 @final
 class ArmBackend(BackendDetails):
+    # Class variable initialization
+    ssa_num = -1
+
+    @staticmethod
+    def getSSAnum():
+        ArmBackend.ssa_num += 1
+        return ArmBackend.ssa_num
+
     @staticmethod
     def preprocess(  # noqa: C901
         edge_program: ExportedProgram,
@@ -476,10 +484,13 @@ def preprocess(  # noqa: C901
                 elif exir_ops.edge.aten.convolution.default == node.target:
                     input, weight, bias, stride, pad, dilation, _, _, group = inputs
 
+                    # Currently only int8 is supported in quantized types.
+                    actual_out_type = ts.DType.INT8 if is_quant_node else outp.dtype
+
                     ## Transpose input tensor to NHWC_Order for TOSA
                     NHWC_Order = [0, 2, 3, 1]
                     input_transposed = transpose_helper(
-                        tosa_fb, input, NHWC_Order, outp.dtype
+                        tosa_fb, input, NHWC_Order, actual_out_type
                     )
 
                     ## CONV2DOp
@@ -492,6 +503,21 @@ def preprocess(  # noqa: C901
                     dilation_attr = dilation.special
                     attr.ConvAttribute(pad_attr, stride_attr, dilation_attr, 0, 0)
 
+                    if len(node.all_input_nodes) == 3:
+                        input_node, weight_node, _ = node.all_input_nodes
+                    else:
+                        input_node, weight_node = node.all_input_nodes
+
+                        # Create a zero bias tensor if not presented
+                        out_channels = weight.shape[0]
+                        bias_name = "const_bias_" + str(ArmBackend.getSSAnum())
+                        bias = tosa_fb.addConst(
+                            [out_channels],
+                            ts.DType.INT32 if is_quant_node else outp.dtype,
+                            [0] * out_channels,
+                            name=bias_name,
+                        )
+
                     if group.number > 1:
                         # Transpose weight to [KH, KW, C, M]
                         weight_HWCM_Order = [2, 3, 0, 1]
@@ -523,14 +549,17 @@ def preprocess(  # noqa: C901
                         # Transpose weight to [OC, H, W, IC]
                         weight_CHWC_Order = [0, 2, 3, 1]
                         weight_transposed = transpose_helper(
-                            tosa_fb, weight, weight_CHWC_Order, outp.dtype
+                            tosa_fb, weight, weight_CHWC_Order, actual_out_type
                         )
 
                         ## TOSA output shape is [NHWO]
                         NHWO_Order = [0, 2, 3, 1]
                         out_shape_TOSA_CONV2D = [outp.shape[i] for i in NHWO_Order]
+
+                        # The output type is int32 when input type is int8.
                         conv2d_res = tosa_fb.addIntermediate(
-                            out_shape_TOSA_CONV2D, outp.dtype
+                            out_shape_TOSA_CONV2D,
+                            ts.DType.INT32 if is_quant_node else outp.dtype,
                         )
                         tosa_fb.addOperator(
                             TosaOp.Op().CONV2D,
@@ -547,12 +576,32 @@ def preprocess(  # noqa: C901
                     NOHW_Order = [0, 3, 1, 2]
                     attr_output_transpose = ts.TosaSerializerAttribute()
                     attr_output_transpose.TransposeAttribute(NOHW_Order)
+
+                    # For quantized convolution, rescale the output value back to the same
+                    # integer value domain of the next op. Otherwise return float32 output.
+                    if is_quant_node:
+                        # Get scale_factor from input, weight, and output.
+                        output_node = list(node.users)[0]
+                        _, input_scale, _, _, _, _ = getNodeArgs(input_node)
+                        _, weight_scale, _, _, _, _ = getNodeArgs(weight_node)
+                        _, output_scale, _, _, _, _ = getNodeArgs(output_node)
+
+                        conv2d_res = tosa_quant_utils.buildRescaleOpConvOutput(
+                            tosa_fb,
+                            conv2d_res,
+                            actual_out_type,
+                            input_scale,
+                            weight_scale,
+                            output_scale,
+                        )
+
                     tosa_fb.addOperator(
                         TosaOp.Op().TRANSPOSE,
                         [conv2d_res.name],
                         [outp.name],
                         attr_output_transpose,
                     )
+
                 elif exir_ops.edge.aten.div.Tensor == node.target:
                     # Div is implemented as x/y = x*1/y
                     recip = tosa_fb.addIntermediate(inputs[1].shape, inputs[1].dtype)
@@ -802,7 +851,7 @@ def preprocess(  # noqa: C901
                     p_data = edge_program.state_dict[parameter_name]
 
                     assert isinstance(p_data, torch.Tensor), "Expect Attr to be tensor"
-                    weight_values = p_data.detach().numpy()
+                    parameter_values = p_data.detach().numpy()
 
                     # Check if they're for quantized nodes
                     consumer_node = list(node.users)[0]
@@ -811,14 +860,14 @@ def preprocess(  # noqa: C901
                             consumer_node
                         )
 
-                        weight_values_quantized = (
-                            (weight_values / weight_node_scale.number)
+                        parameter_values_quantized = (
+                            (parameter_values / weight_node_scale.number)
                             + weight_node_zp.number
                         ).astype(np.int8)
                         tosa_fb.addConst(
                             inputs[0].shape,
                             ts.DType.INT8,
-                            weight_values_quantized,
+                            parameter_values_quantized,
                             name=out,
                         )
                     elif (
@@ -837,30 +886,55 @@ def preprocess(  # noqa: C901
                             weight_node
                         )
 
-                        weight_values_quantized = (
-                            weight_values / (input_node_scale * weight_node_scale)
+                        parameter_values_quantized = (
+                            parameter_values / (input_node_scale * weight_node_scale)
                         ).astype(np.int32)
 
                         tosa_fb.addConst(
                             inputs[0].shape,
                             ts.DType.INT32,
-                            weight_values_quantized,
+                            parameter_values_quantized,
+                            name=out,
+                        )
+                    elif (
+                        consumer_node.target == exir_ops.edge.aten.convolution.default
+                        and list(consumer_node.users)[0].target == tosa_quant_utils.q_op
+                    ):
+                        (
+                            input_node,
+                            weight_node,
+                            bias_node,
+                        ) = consumer_node.all_input_nodes
+
+                        input_node_scale, _ = getQuantNodeArgs(input_node)
+                        weight_node_scale, _ = getQuantNodeArgs(weight_node)
+
+                        bias_scales = input_node_scale * weight_node_scale
+                        parameter_values_quantized = (
+                            parameter_values / bias_scales
+                        ).astype(np.int32)
+
+                        tosa_fb.addConst(
+                            inputs[0].shape,
+                            ts.DType.INT32,
+                            parameter_values_quantized,
                             name=out,
                         )
                     else:
                         tosa_fb.addConst(
-                            inputs[0].shape, inputs[0].dtype, weight_values, name=out
+                            inputs[0].shape, inputs[0].dtype, parameter_values, name=out
                         )
+
                 elif out in edge_program.graph_signature.inputs_to_buffers:
                     parameter_name = edge_program.graph_signature.inputs_to_buffers[
                         node.name
                     ]
                     p_data = edge_program.state_dict[parameter_name]
 
                     assert isinstance(p_data, torch.Tensor), "Expect Attr to be tensor"
-                    weight_values = p_data.detach().numpy()
+                    parameter_values = p_data.detach().numpy()
                     tosa_fb.addConst(
-                        inputs[0].shape, inputs[0].dtype, weight_values, name=out
+                        inputs[0].shape, inputs[0].dtype, parameter_values, name=out
                     )
                 else:
                     tensor = ts.TosaSerializerTensor(
 
@@ -9,10 +9,16 @@
 
 from enum import Enum
 
+import numpy as np
+
 import torch
 
 TestList = {}
 
+# Seed the RNG a convenient number so that we get the same random tests for each test each time
+seed = 42
+rng = np.random.default_rng(seed)
+
 
 def register_test(cls):
     TestList[cls.__name__] = cls()
@@ -103,42 +109,163 @@ class simple_linear(torch.nn.Module):
 
         def __init__(self):
             super().__init__()
-            torch.manual_seed(42)
+            torch.manual_seed(seed)
             self.fc = torch.nn.Linear(20, 30)
 
         def forward(self, x):
             x = self.fc(x)
             return x
 
+    """Currenly we compare the quantized result directly with the floating point result, to avoid a noticable
+       precision difference due to wide random numerical distribution, generate small random value range for
+       convolution testing instead for now"""
+
     @register_test
-    class simple_conv2d(torch.nn.Module):
+    class simple_conv2d_2x2_3x1x40x40_non_bias(torch.nn.Module):
+        data = torch.from_numpy(
+            np.float32(rng.integers(low=10, high=20, size=(3, 1, 40, 40)))
+        )
         inputs = {
-            TosaProfile.BI: (
-                torch.ones(
-                    1,
-                    3,
-                    256,
-                    256,
-                ),
-            ),
-            TosaProfile.MI: (torch.ones(1, 3, 256, 256),),
+            TosaProfile.BI: (data,),
+            TosaProfile.MI: (data,),
+        }
+
+        def __init__(self):
+            super().__init__()
+            self.conv2d = torch.nn.Conv2d(
+                in_channels=1, out_channels=3, kernel_size=2, stride=1, bias=False
+            )
+            with torch.no_grad():
+                self.conv2d.weight.copy_(
+                    torch.from_numpy(
+                        np.float32(rng.integers(low=1, high=10, size=(1, 1, 2, 2)))
+                    )
+                )
+
+        def forward(self, x):
+            x = self.conv2d(x)
+            return x
+
+    @register_test
+    class simple_conv2d_3x3_1x3x256x256_st1(torch.nn.Module):
+        data = torch.ones(1, 3, 256, 256)
+        inputs = {
+            TosaProfile.BI: (data,),
+            TosaProfile.MI: (data,),
         }
 
         def __init__(self):
             super().__init__()
             self.conv2d = torch.nn.Conv2d(
                 in_channels=3, out_channels=10, kernel_size=3, stride=1
             )
+            with torch.no_grad():
+                self.conv2d.weight.copy_(
+                    torch.from_numpy(
+                        np.float32(rng.integers(low=1, high=4, size=(10, 3, 3, 3)))
+                    )
+                )
+                self.conv2d.bias.copy_(
+                    torch.from_numpy(np.float32(rng.integers(low=1, high=4, size=(10))))
+                )
+
+        def forward(self, x):
+            x = self.conv2d(x)
+            return x
+
+    @register_test
+    class simple_conv2d_1x1_1x2x128x128_st1(torch.nn.Module):
+        data = torch.from_numpy(
+            np.float32(rng.integers(low=10, high=20, size=(1, 2, 128, 128)))
+        )
+        inputs = {
+            TosaProfile.BI: (data,),
+            TosaProfile.MI: (data,),
+        }
+
+        def __init__(self):
+            super().__init__()
+            self.conv2d = torch.nn.Conv2d(
+                in_channels=2, out_channels=1, kernel_size=1, stride=1
+            )
+            with torch.no_grad():
+                self.conv2d.weight.copy_(
+                    torch.from_numpy(
+                        np.float32(rng.integers(low=1, high=4, size=(1, 2, 1, 1)))
+                    )
+                )
+                self.conv2d.bias.copy_(
+                    torch.from_numpy(np.float32(rng.integers(low=1, high=4, size=(1))))
+                )
+
+        def forward(self, x):
+            x = self.conv2d(x)
+            return x
+
+    @register_test
+    class simple_conv2d_2x2_1x1x14x14_st2(torch.nn.Module):
+        data = torch.from_numpy(
+            np.float32(rng.integers(low=10, high=20, size=(1, 1, 14, 14)))
+        )
+        inputs = {
+            TosaProfile.BI: (data,),
+            TosaProfile.MI: (data,),
+        }
+
+        def __init__(self):
+            super().__init__()
+            self.conv2d = torch.nn.Conv2d(
+                in_channels=1, out_channels=1, kernel_size=2, stride=2
+            )
+            with torch.no_grad():
+                self.conv2d.weight.copy_(
+                    torch.from_numpy(
+                        np.float32(rng.integers(low=1, high=4, size=(1, 1, 2, 2)))
+                    )
+                )
+                self.conv2d.bias.copy_(
+                    torch.from_numpy(np.float32(rng.integers(low=1, high=4, size=(1))))
+                )
+
+        def forward(self, x):
+            x = self.conv2d(x)
+            return x
+
+    @register_test
+    class simple_conv2d_5x5_3x2x128x128_st1(torch.nn.Module):
+        data = torch.from_numpy(
+            np.float32(rng.integers(low=10, high=20, size=(3, 2, 128, 128)))
+        )
+        inputs = {
+            TosaProfile.BI: (data,),
+            TosaProfile.MI: (data,),
+        }
+
+        def __init__(self):
+            super().__init__()
+            self.conv2d = torch.nn.Conv2d(
+                in_channels=2, out_channels=3, kernel_size=5, stride=1
+            )
+            with torch.no_grad():
+                self.conv2d.weight.copy_(
+                    torch.from_numpy(
+                        np.float32(rng.integers(low=1, high=10, size=(1, 1, 5, 5)))
+                    )
+                )
+                self.conv2d.bias.copy_(torch.ones(3, dtype=torch.float))
 
         def forward(self, x):
             x = self.conv2d(x)
             return x
 
     @register_test
     class block_two_conv2d(torch.nn.Module):
+        data = torch.from_numpy(
+            np.float32(rng.integers(low=10, high=20, size=(1, 3, 256, 256)))
+        )
         inputs = {
-            TosaProfile.BI: (torch.ones(1, 3, 256, 256),),
-            TosaProfile.MI: (torch.ones(1, 3, 256, 256),),
+            TosaProfile.BI: (data,),
+            TosaProfile.MI: (data,),
         }
 
         def __init__(self):
@@ -149,6 +276,11 @@ def __init__(self):
             self.conv2d_2 = torch.nn.Conv2d(
                 in_channels=10, out_channels=15, kernel_size=5, stride=1
             )
+            with torch.no_grad():
+                self.conv2d.weight.copy_(torch.ones(10, 3, 5, 5, dtype=torch.float))
+                self.conv2d.bias.copy_(torch.ones(10))
+                self.conv2d_2.weight.copy_(torch.ones(15, 10, 5, 5, dtype=torch.float))
+                self.conv2d_2.bias.copy_(torch.ones(15))
 
         def forward(self, x):
             x = self.conv2d(x)