Qualcomm AI Engine Direct - Enable SSD300_VGG16

winskuo-quic · winskuo-quic · commit 468b5f86a6a5 · 2024-04-12T16:44:01.000+08:00
diff --git a/backends/qualcomm/builders/__init__.py b/backends/qualcomm/builders/__init__.py
@@ -41,8 +41,10 @@
     op_skip_ops,
     op_slice_copy,
     op_softmax,
+    op_sqrt,
     op_squeeze,
     op_sub,
+    op_sum_int_list,
     op_tanh,
     op_transpose,
     op_unsqueeze,
@@ -86,7 +88,9 @@
     op_slice_copy,
     op_softmax,
     op_squeeze,
+    op_sqrt,
     op_sub,
+    op_sum_int_list,
     op_tanh,
     op_transpose,
     op_unsqueeze,
diff --git a/backends/qualcomm/builders/op_linear.py b/backends/qualcomm/builders/op_linear.py
@@ -62,7 +62,7 @@ def define_node(
             bias_node = node.args[2]
 
             # TODO remove this when qnn sdk support
-            if "scales" in bias_node.meta.get("quant_attrs"):
+            if "scales" in bias_node.meta.get("quant_attrs", {}):
                 print(
                     f"[WARNING] Fallback linear bias, {bias_node}. per channel bias quantization is not support yet."
                 )
diff --git a/backends/qualcomm/builders/op_log_softmax.py b/backends/qualcomm/builders/op_log_softmax.py
@@ -72,5 +72,4 @@ def define_node(
             PyQnnWrapper.Qnn_DataType_t.QNN_DATATYPE_UINT_32,
             {"data": np.uint32(dim)},
         )
-        # pdb.set_trace()
         return log_softmax_op
diff --git a/backends/qualcomm/builders/op_sqrt.py b/backends/qualcomm/builders/op_sqrt.py
@@ -0,0 +1,59 @@
+# Copyright (c) Qualcomm Innovation Center, Inc.
+# All rights reserved
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+from typing import Dict
+
+import executorch.backends.qualcomm.python.PyQnnWrapperAdaptor as PyQnnWrapper
+
+import torch
+
+from .node_visitor import NodeVisitor, register_node_visitor
+from .qnn_constants import OpSqrt, QNN_OP_PACKAGE_NAME_QTI_AISW
+
+
+@register_node_visitor
+class SQRT(NodeVisitor):
+    target = ["aten.sqrt.default"]
+
+    def __init__(self, *args) -> None:
+        super().__init__(*args)
+
+    def define_node(
+        self,
+        node: torch.fx.Node,
+        nodes_to_wrappers: Dict[torch.fx.Node, PyQnnWrapper.TensorWrapper],
+    ) -> PyQnnWrapper.PyQnnOpWrapper:
+        # tensor input
+        input_node = node.args[0]
+        input_tensor = self.get_tensor(input_node, node)
+
+        input_tensor_wrapper = self.define_tensor(
+            input_node,
+            input_tensor,
+            PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_NATIVE,
+            nodes_to_wrappers,
+            is_input_tensor=True,
+        )
+        sqrt_input_tensors = [input_tensor_wrapper]
+
+        out_tensor = self.get_tensor(node, node)
+        output_tensor_wrapper = self.define_tensor(
+            node,
+            out_tensor,
+            PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_NATIVE,
+            nodes_to_wrappers,
+            is_input_tensor=False,
+        )
+        sqrt_output_tensors = [output_tensor_wrapper]
+
+        sqrt_op = PyQnnWrapper.PyQnnOpWrapper(
+            node.name,
+            QNN_OP_PACKAGE_NAME_QTI_AISW,
+            OpSqrt.op_name,
+        )
+        sqrt_op.AddInputTensors(sqrt_input_tensors)
+        sqrt_op.AddOutputTensors(sqrt_output_tensors)
+
+        return sqrt_op
diff --git a/backends/qualcomm/builders/op_sum_int_list.py b/backends/qualcomm/builders/op_sum_int_list.py
@@ -0,0 +1,80 @@
+# Copyright (c) Qualcomm Innovation Center, Inc.
+# All rights reserved
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+from typing import cast, Dict, List
+
+import executorch.backends.qualcomm.python.PyQnnWrapperAdaptor as PyQnnWrapper
+
+import numpy as np
+import torch
+
+from .node_visitor import NodeVisitor, register_node_visitor
+from .qnn_constants import OpReduceSum, QNN_OP_PACKAGE_NAME_QTI_AISW
+
+
+@register_node_visitor
+class Sum(NodeVisitor):
+    target = ["aten.sum.dim_IntList"]
+
+    def __init__(self, *args) -> None:
+        super().__init__(*args)
+
+    def define_node(
+        self,
+        node: torch.fx.Node,
+        nodes_to_wrappers: Dict[torch.fx.Node, PyQnnWrapper.TensorWrapper],
+    ) -> PyQnnWrapper.PyQnnOpWrapper:
+
+        input_node = node.args[0]
+        input_tensor = self.get_tensor(input_node, node)
+        input_tensor_wrapper = self.define_tensor(
+            input_node,
+            input_tensor,
+            PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_NATIVE,
+            nodes_to_wrappers,
+            is_input_tensor=True,
+        )
+        sum_input_tensors = [input_tensor_wrapper]
+
+        # sum dims
+        sum_dims = cast(List[int], node.args[1])
+        sum_dims = [sum_dim % len(input_node.meta["val"].shape) for sum_dim in sum_dims]
+        if "axis_order" in node.meta:
+            sum_dims = [node.meta["axis_order"].index(sum_dim) for sum_dim in sum_dims]
+        sum_dims_shape = [len(sum_dims)]
+
+        output_tensor = self.get_tensor(node, node)
+        output_tensor_wrapper = self.define_tensor(
+            node,
+            output_tensor,
+            PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_NATIVE,
+            nodes_to_wrappers,
+            is_input_tensor=False,
+        )
+        sum_output_tensors = [output_tensor_wrapper]
+        sum_op = PyQnnWrapper.PyQnnOpWrapper(
+            node.name,
+            QNN_OP_PACKAGE_NAME_QTI_AISW,
+            OpReduceSum.op_name,
+        )
+        sum_op.AddInputTensors(sum_input_tensors)
+        sum_op.AddOutputTensors(sum_output_tensors)
+        sum_op.AddTensorParam(
+            OpReduceSum.param_axes,
+            PyQnnWrapper.Qnn_DataType_t.QNN_DATATYPE_UINT_32,
+            len(sum_dims_shape),
+            sum_dims_shape,
+            np.array(sum_dims, dtype=np.uint32),
+            True,
+        )
+
+        if len(node.args) > 2:
+            keep_dims = cast(bool, node.args[2])
+            sum_op.AddScalarParam(
+                OpReduceSum.param_keep_dims,
+                PyQnnWrapper.Qnn_DataType_t.QNN_DATATYPE_BOOL_8,
+                {"data": keep_dims},
+            )
+        return sum_op
diff --git a/backends/qualcomm/builders/qnn_constants.py b/backends/qualcomm/builders/qnn_constants.py
@@ -106,6 +106,13 @@ class OpExpandDims:
     param_axis: str = "axis"
 
 
+@dataclass(init=False, frozen=True)
+class OpReduceSum:
+    op_name: str = "ReduceSum"
+    param_axes: str = "axes"
+    param_keep_dims: str = "keep_dims"
+
+
 @dataclass(init=False, frozen=True)
 class OpFullyConnected:
     op_name: str = "FullyConnected"
@@ -123,6 +130,11 @@ class OpGelu:
     op_name: str = "Gelu"
 
 
+@dataclass(init=False, frozen=True)
+class OpSqrt:
+    op_name: str = "ElementWiseSquareRoot"
+
+
 @dataclass(init=False, frozen=True)
 class OpHardSwish:
     op_name: str = "HardSwish"
diff --git a/backends/qualcomm/passes/layout_transform.py b/backends/qualcomm/passes/layout_transform.py
@@ -52,6 +52,9 @@ class LayoutTransform(ExportPass):
         exir_ops.edge.aten.bmm.default,
         exir_ops.edge.aten.full.default,
         exir_ops.edge.aten.gelu.default,
+        exir_ops.edge.aten.sqrt.default,
+        exir_ops.edge.aten.sum.dim_IntList,
+        exir_ops.edge.aten.pow.Tensor_Scalar,
         *q_ops,
         *dq_ops,
         _operator.getitem,
@@ -109,7 +112,10 @@ def is_layout_sensitive(self, node: torch.fx.Node) -> bool:
         return node.target in self.layout_sensitive_ops
 
     def is_layout_agnostic(self, node: torch.fx.Node) -> bool:
-        if node.target == exir_ops.edge.aten.mean.dim:
+        if node.target in [
+            exir_ops.edge.aten.mean.dim,
+            exir_ops.edge.aten.sum.dim_IntList,
+        ]:
             # if dimemsion is not kept, we'll have no clue how to do layout transform
             if len(node.args) < 3 or not node.args[2]:
                 return False
diff --git a/backends/qualcomm/quantizer/utils.py b/backends/qualcomm/quantizer/utils.py
@@ -42,6 +42,7 @@ def decorator(annotator: Callable):
 
     return decorator
 
+
 def _is_input_float_tensor(node: Node):
     """Check if the input is not a float tensor, so that we can skip quantization for the node
     since observers only works with float Tensors
@@ -175,6 +176,11 @@ def annotate_rsub(node: Node, quantization_config: QuantizationConfig) -> None:
     annotate_binary(node, quantization_config)
 
 
+@register_annotator([torch.ops.aten.sum.dim_IntList])
+def annotate_sum(node: Node, quantization_config: QuantizationConfig) -> None:
+    annotate_binary(node, quantization_config)
+
+
 @register_annotator([torch.ops.aten.ceil.default])
 def annotate_ceil(node: Node, quantization_config: QuantizationConfig) -> None:
     annotate_single_in_single_out(node, quantization_config)
@@ -302,6 +308,11 @@ def annotate_slice(node: Node, quantization_config: QuantizationConfig) -> None:
     annotate_single_in_single_out(node, quantization_config)
 
 
+@register_annotator([torch.ops.aten.sqrt.default])
+def annotate_sqrt(node: Node, quantization_config: QuantizationConfig) -> None:
+    annotate_single_in_single_out(node, quantization_config)
+
+
 @register_annotator([torch.ops.aten.gelu.default])
 def annotate_gelu(node: Node, quantization_config: QuantizationConfig) -> None:
     annotate_single_in_single_out(node, quantization_config)
diff --git a/backends/qualcomm/tests/models.py b/backends/qualcomm/tests/models.py
diff --git a/backends/qualcomm/tests/test_qnn_delegate.py b/backends/qualcomm/tests/test_qnn_delegate.py
diff --git a/examples/qualcomm/oss_scripts/ssd300_vgg16.py b/examples/qualcomm/oss_scripts/ssd300_vgg16.py

Original file line number	Diff line number	Diff line change
`@@ -62,7 +62,7 @@ def define_node(`
`62`	`62`	`bias_node = node.args[2]`
`63`	`63`
`64`	`64`	`# TODO remove this when qnn sdk support`
`65`		`- if "scales" in bias_node.meta.get("quant_attrs"):`
	`65`	`+ if "scales" in bias_node.meta.get("quant_attrs", {}):`
`66`	`66`	`print(`
`67`	`67`	`f"[WARNING] Fallback linear bias, {bias_node}. per channel bias quantization is not support yet."`
`68`	`68`	`)`
Original file line number	Diff line number	Diff line change
`@@ -72,5 +72,4 @@ def define_node(`
`72`	`72`	`PyQnnWrapper.Qnn_DataType_t.QNN_DATATYPE_UINT_32,`
`73`	`73`	`{"data": np.uint32(dim)},`
`74`	`74`	`)`
`75`		`- # pdb.set_trace()`
`76`	`75`	`return log_softmax_op`