Expand QuantizationParams to include dtype and limits

per · per · commit 4e9e50daaf2a · 2024-07-01T08:55:06.000+02:00
In order to support quantization to different types then int8 keep
track of the limits and type as well.

Signed-off-by: Per Åstrand &lt;per.astrand@arm.com&gt;
Change-Id: Ia5861adfeff4d57676ff06ccf5a7a8213c34efe6
diff --git a/backends/arm/test/tester/arm_tester.py b/backends/arm/test/tester/arm_tester.py
@@ -74,7 +74,12 @@ def _get_input_params(
                 and node.args[0].name in input_names
             ):
                 qp = QuantizationParams(
-                    node_name=node.args[0].name, scale=node.args[1], zp=node.args[2]
+                    node_name=node.args[0].name,
+                    scale=node.args[1],
+                    zp=node.args[2],
+                    qmin=node.args[3],
+                    qmax=node.args[4],
+                    dtype=node.args[5],
                 )
                 quant_params.append(qp)
                 if len(quant_params) == len(
@@ -115,7 +120,12 @@ def _get_output_param(
                 and node == output_node.args[0][0]
             ):
                 quant_params = QuantizationParams(
-                    node_name=node.args[0].name, scale=node.args[1], zp=node.args[2]
+                    node_name=node.args[0].name,
+                    scale=node.args[1],
+                    zp=node.args[2],
+                    qmin=node.args[3],
+                    qmax=node.args[4],
+                    dtype=node.args[5],
                 )
                 break  # break early, there's only one output node
         assert quant_params is not None, "Quantization paramerters not found"
diff --git a/backends/arm/test/tosautil/tosa_test_utils.py b/backends/arm/test/tosautil/tosa_test_utils.py
@@ -20,16 +20,27 @@
 
 
 class QuantizationParams:
-    __slots__ = ["node_name", "zp", "scale"]
+    __slots__ = ["node_name", "zp", "scale", "qmin", "qmax", "dtype"]
 
     # todo: zps and scales can be per tensors or per channel => a list??
-    def __init__(self, node_name: str, zp: int, scale: float):
+    def __init__(
+        self,
+        node_name: str,
+        zp: int,
+        scale: float,
+        qmin: int,
+        qmax: int,
+        dtype: torch.dtype,
+    ):
         self.node_name = node_name  # not need I think, but good for error check
         self.zp = zp
         self.scale = scale
+        self.qmin = qmin
+        self.qmax = qmax
+        self.dtype = dtype
 
     def __repr__(self):
-        return f"QuantizationParams(node_name={self.node_name}, zp={self.zp}, scale={self.scale})"
+        return f"QuantizationParams(node_name={self.node_name}, zp={self.zp}, scale={self.scale}, [{self.qmin},{self.qmax}], dtype={self.dtype})"
 
 
 """
@@ -160,13 +171,13 @@ def run_tosa_ref_model(
                 assert (
                     quant_param.node_name == input_name
                 ), "These quantization params do not match the input tensor name"
-                int8_max = np.iinfo(np.int8).max
-                int8_min = np.iinfo(np.int8).min
                 data_np = (
                     ((data_np / np.float32(quant_param.scale)) + quant_param.zp)
                     .round()
-                    .clip(int8_min, int8_max)
-                    .astype(np.int8)
+                    .clip(quant_param.qmin, quant_param.qmax)
+                    .astype(
+                        f"{quant_param.dtype}".replace("torch.", "")
+                    )  # Use string format of dtype to convert to numpy dtype
                 )
             file_path = os.path.join(self.intermediate_path, input_name + ".npy")
             np.save(file_path, data_np, allow_pickle=False)