Qualcomm AI Engine Direct - Fixed uint16 tensor

shewu-quic · shewu-quic · commit e4e27901c268 · 2024-04-22T13:43:12.000+08:00
Summary:
- Fixed uint16 data type of tensor
- Fixed the bug "argument of type 'NoneType' is not iterable" in linear
op
diff --git a/backends/qualcomm/builders/node_visitor.py b/backends/qualcomm/builders/node_visitor.py
@@ -14,8 +14,6 @@
 
 from executorch.exir.dialects._ops import ops as exir_ops
 
-from .qnn_constants import QNN_uint16
-
 from .utils import get_parameter, is_graph_input, is_graph_output, is_parameter
 
 
@@ -26,7 +24,7 @@
     # Note that there is no int64 tensor data type in Qnn.
     torch.int64: PyQnnWrapper.Qnn_DataType_t.QNN_DATATYPE_UNDEFINED,
     torch.uint8: PyQnnWrapper.Qnn_DataType_t.QNN_DATATYPE_UFIXED_POINT_8,
-    QNN_uint16: PyQnnWrapper.Qnn_DataType_t.QNN_DATATYPE_UFIXED_POINT_16,
+    torch.uint16: PyQnnWrapper.Qnn_DataType_t.QNN_DATATYPE_UFIXED_POINT_16,
 }
 QNN_TENSOR_TYPE_MAP = {
     torch.float32: PyQnnWrapper.Qnn_DataType_t.QNN_DATATYPE_FLOAT_32,
@@ -35,7 +33,7 @@
     torch.int32: PyQnnWrapper.Qnn_DataType_t.QNN_DATATYPE_INT_32,
     torch.int64: PyQnnWrapper.Qnn_DataType_t.QNN_DATATYPE_INT_64,
     torch.uint8: PyQnnWrapper.Qnn_DataType_t.QNN_DATATYPE_UINT_8,
-    QNN_uint16: PyQnnWrapper.Qnn_DataType_t.QNN_DATATYPE_UINT_16,
+    torch.uint16: PyQnnWrapper.Qnn_DataType_t.QNN_DATATYPE_UINT_16,
     float: PyQnnWrapper.Qnn_DataType_t.QNN_DATATYPE_FLOAT_32,
 }
 
@@ -169,7 +167,7 @@ def get_quant_encoding_conf(
         return self.make_qnn_per_tensor_config(quant_attrs)
 
     def get_quant_tensor_value(
-        self, tensor: torch.Tensor, quant_attrs: Dict, dtype, bitwidth
+        self, tensor: torch.Tensor, quant_attrs: Dict, quant_configs: Dict
     ) -> torch.Tensor:
         if quant_attrs["encoding"] in PER_TENSOR_ENCODING:
             scale = quant_attrs["scale"]
@@ -178,16 +176,11 @@ def get_quant_tensor_value(
             scale = quant_attrs["scales"]
             zero_point = quant_attrs["zero_points"]
 
-        # To bypass torch.uint16 quantization is not supported
-        dtype = (
-            torch.int32
-            if dtype == PyQnnWrapper.Qnn_DataType_t.QNN_DATATYPE_UINT_16
-            else quant_attrs["dtype"]
-        )
+        dtype = quant_configs["dtype"]
 
         tensor = tensor.div(scale).add(zero_point).round().to(dtype)
         # Make the backends access data correctly
-        if bitwidth == 4:
+        if quant_configs.get("bitwidth") == 4:
             mask = torch.full(tensor.size(), 0x0F, dtype=torch.int8)
             tensor = torch.bitwise_and(mask, tensor)
         return tensor
@@ -236,7 +229,7 @@ def get_data_type(
                 <= torch.iinfo(torch.int16).max - torch.iinfo(torch.int16).min
             ):
                 if unsigned:
-                    quant_config["dtype"] = QNN_uint16
+                    quant_config["dtype"] = torch.uint16
                 else:
                     quant_config["dtype"] = torch.int16
             return QNN_QUANT_TYPE_MAP[quant_config["dtype"]]
@@ -327,8 +320,7 @@ def define_tensor(
                 tensor = self.get_quant_tensor_value(
                     tensor,
                     node.meta["quant_attrs"],
-                    dtype,
-                    quant_configs.get("bitwidth"),
+                    quant_configs,
                 )
             tensor_wrapper = PyQnnWrapper.TensorWrapper(
                 tensor_name,
diff --git a/backends/qualcomm/builders/op_linear.py b/backends/qualcomm/builders/op_linear.py
@@ -62,7 +62,9 @@ def define_node(
             bias_node = node.args[2]
 
             # TODO remove this when qnn sdk support
-            if "scales" in bias_node.meta.get("quant_attrs"):
+            if (
+                quant_attrs := bias_node.meta.get("quant_attrs")
+            ) and "scales" in quant_attrs:
                 print(
                     f"[WARNING] Fallback linear bias, {bias_node}. per channel bias quantization is not support yet."
                 )
diff --git a/backends/qualcomm/builders/qnn_constants.py b/backends/qualcomm/builders/qnn_constants.py
@@ -8,7 +8,6 @@
 from enum import IntEnum, unique
 
 QNN_OP_PACKAGE_NAME_QTI_AISW = "qti.aisw"
-QNN_uint16 = "uint16"
 
 # Below constants should be same as those in QNN headers.
 # Maybe someday we should expose these constants by pybind