pytorch · winskuo-quic · Apr 12, 2024
@@ -41,8 +41,10 @@
     op_skip_ops,
     op_slice_copy,
     op_softmax,
+    op_sqrt,
     op_squeeze,
     op_sub,
+    op_sum_int_list,
     op_tanh,
     op_transpose,
     op_unsqueeze,
@@ -86,7 +88,9 @@
     op_slice_copy,
     op_softmax,
     op_squeeze,
+    op_sqrt,
     op_sub,
+    op_sum_int_list,
     op_tanh,
     op_transpose,
     op_unsqueeze,

@@ -62,7 +62,7 @@ def define_node(
             bias_node = node.args[2]
 
             # TODO remove this when qnn sdk support
-            if "scales" in bias_node.meta.get("quant_attrs"):
+            if "scales" in bias_node.meta.get("quant_attrs", {}):
                 print(
                     f"[WARNING] Fallback linear bias, {bias_node}. per channel bias quantization is not support yet."
                 )

@@ -72,5 +72,4 @@ def define_node(
             PyQnnWrapper.Qnn_DataType_t.QNN_DATATYPE_UINT_32,
             {"data": np.uint32(dim)},
         )
-        # pdb.set_trace()
         return log_softmax_op
@@ -0,0 +1,59 @@
+# Copyright (c) Qualcomm Innovation Center, Inc.
+# All rights reserved
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+from typing import Dict
+
+import executorch.backends.qualcomm.python.PyQnnWrapperAdaptor as PyQnnWrapper
+
+import torch
+
+from .node_visitor import NodeVisitor, register_node_visitor
+from .qnn_constants import OpSqrt, QNN_OP_PACKAGE_NAME_QTI_AISW
+
+
+@register_node_visitor
+class SQRT(NodeVisitor):
+    target = ["aten.sqrt.default"]
+
+    def __init__(self, *args) -> None:
+        super().__init__(*args)
+
+    def define_node(
+        self,
+        node: torch.fx.Node,
+        nodes_to_wrappers: Dict[torch.fx.Node, PyQnnWrapper.TensorWrapper],
+    ) -> PyQnnWrapper.PyQnnOpWrapper:
+        # tensor input
+        input_node = node.args[0]
+        input_tensor = self.get_tensor(input_node, node)
+
+        input_tensor_wrapper = self.define_tensor(
+            input_node,
+            input_tensor,
+            PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_NATIVE,
+            nodes_to_wrappers,
+            is_input_tensor=True,
+        )
+        sqrt_input_tensors = [input_tensor_wrapper]
+
+        out_tensor = self.get_tensor(node, node)
+        output_tensor_wrapper = self.define_tensor(
+            node,
+            out_tensor,
+            PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_NATIVE,
+            nodes_to_wrappers,
+            is_input_tensor=False,
+        )
+        sqrt_output_tensors = [output_tensor_wrapper]
+
+        sqrt_op = PyQnnWrapper.PyQnnOpWrapper(
+            node.name,
+            QNN_OP_PACKAGE_NAME_QTI_AISW,
+            OpSqrt.op_name,
+        )
+        sqrt_op.AddInputTensors(sqrt_input_tensors)
+        sqrt_op.AddOutputTensors(sqrt_output_tensors)
+
+        return sqrt_op
@@ -0,0 +1,80 @@
+# Copyright (c) Qualcomm Innovation Center, Inc.
+# All rights reserved
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+from typing import cast, Dict, List
+
+import executorch.backends.qualcomm.python.PyQnnWrapperAdaptor as PyQnnWrapper
+
+import numpy as np
+import torch
+
+from .node_visitor import NodeVisitor, register_node_visitor
+from .qnn_constants import OpReduceSum, QNN_OP_PACKAGE_NAME_QTI_AISW
+
+
+@register_node_visitor
+class Sum(NodeVisitor):
+    target = ["aten.sum.dim_IntList"]
+
+    def __init__(self, *args) -> None:
+        super().__init__(*args)
+
+    def define_node(
+        self,
+        node: torch.fx.Node,
+        nodes_to_wrappers: Dict[torch.fx.Node, PyQnnWrapper.TensorWrapper],
+    ) -> PyQnnWrapper.PyQnnOpWrapper:
+
+        input_node = node.args[0]
+        input_tensor = self.get_tensor(input_node, node)
+        input_tensor_wrapper = self.define_tensor(
+            input_node,
+            input_tensor,
+            PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_NATIVE,
+            nodes_to_wrappers,
+            is_input_tensor=True,
+        )
+        sum_input_tensors = [input_tensor_wrapper]
+
+        # sum dims
+        sum_dims = cast(List[int], node.args[1])
+        sum_dims = [sum_dim % len(input_node.meta["val"].shape) for sum_dim in sum_dims]
+        if "axis_order" in node.meta:
+            sum_dims = [node.meta["axis_order"].index(sum_dim) for sum_dim in sum_dims]
+        sum_dims_shape = [len(sum_dims)]
+
+        output_tensor = self.get_tensor(node, node)
+        output_tensor_wrapper = self.define_tensor(
+            node,
+            output_tensor,
+            PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_NATIVE,
+            nodes_to_wrappers,
+            is_input_tensor=False,
+        )
+        sum_output_tensors = [output_tensor_wrapper]
+        sum_op = PyQnnWrapper.PyQnnOpWrapper(
+            node.name,
+            QNN_OP_PACKAGE_NAME_QTI_AISW,
+            OpReduceSum.op_name,
+        )
+        sum_op.AddInputTensors(sum_input_tensors)
+        sum_op.AddOutputTensors(sum_output_tensors)
+        sum_op.AddTensorParam(
+            OpReduceSum.param_axes,
+            PyQnnWrapper.Qnn_DataType_t.QNN_DATATYPE_UINT_32,
+            len(sum_dims_shape),
+            sum_dims_shape,
+            np.array(sum_dims, dtype=np.uint32),
+            True,
+        )
+
+        if len(node.args) > 2:
+            keep_dims = cast(bool, node.args[2])
+            sum_op.AddScalarParam(
+                OpReduceSum.param_keep_dims,
+                PyQnnWrapper.Qnn_DataType_t.QNN_DATATYPE_BOOL_8,
+                {"data": keep_dims},
+            )
+        return sum_op
@@ -106,6 +106,13 @@ class OpExpandDims:
     param_axis: str = "axis"
 
 
+@dataclass(init=False, frozen=True)
+class OpReduceSum:
+    op_name: str = "ReduceSum"
+    param_axes: str = "axes"
+    param_keep_dims: str = "keep_dims"
+
+
 @dataclass(init=False, frozen=True)
 class OpFullyConnected:
     op_name: str = "FullyConnected"
@@ -123,6 +130,11 @@ class OpGelu:
     op_name: str = "Gelu"
 
 
+@dataclass(init=False, frozen=True)
+class OpSqrt:
+    op_name: str = "ElementWiseSquareRoot"
+
+
 @dataclass(init=False, frozen=True)
 class OpHardSwish:
     op_name: str = "HardSwish"

@@ -52,6 +52,9 @@ class LayoutTransform(ExportPass):
         exir_ops.edge.aten.bmm.default,
         exir_ops.edge.aten.full.default,
         exir_ops.edge.aten.gelu.default,
+        exir_ops.edge.aten.sqrt.default,
+        exir_ops.edge.aten.sum.dim_IntList,
+        exir_ops.edge.aten.pow.Tensor_Scalar,
         *q_ops,
         *dq_ops,
         _operator.getitem,
@@ -109,7 +112,10 @@ def is_layout_sensitive(self, node: torch.fx.Node) -> bool:
         return node.target in self.layout_sensitive_ops
 
     def is_layout_agnostic(self, node: torch.fx.Node) -> bool:
-        if node.target == exir_ops.edge.aten.mean.dim:
+        if node.target in [
+            exir_ops.edge.aten.mean.dim,
+            exir_ops.edge.aten.sum.dim_IntList,
+        ]:
             # if dimemsion is not kept, we'll have no clue how to do layout transform
             if len(node.args) < 3 or not node.args[2]:
                 return False

@@ -42,6 +42,7 @@ def decorator(annotator: Callable):
 
     return decorator
 
+
 def _is_input_float_tensor(node: Node):
     """Check if the input is not a float tensor, so that we can skip quantization for the node
     since observers only works with float Tensors
@@ -175,6 +176,11 @@ def annotate_rsub(node: Node, quantization_config: QuantizationConfig) -> None:
     annotate_binary(node, quantization_config)
 
 
+@register_annotator([torch.ops.aten.sum.dim_IntList])
+def annotate_sum(node: Node, quantization_config: QuantizationConfig) -> None:
+    annotate_binary(node, quantization_config)
+
+
 @register_annotator([torch.ops.aten.ceil.default])
 def annotate_ceil(node: Node, quantization_config: QuantizationConfig) -> None:
     annotate_single_in_single_out(node, quantization_config)
@@ -302,6 +308,11 @@ def annotate_slice(node: Node, quantization_config: QuantizationConfig) -> None:
     annotate_single_in_single_out(node, quantization_config)
 
 
+@register_annotator([torch.ops.aten.sqrt.default])
+def annotate_sqrt(node: Node, quantization_config: QuantizationConfig) -> None:
+    annotate_single_in_single_out(node, quantization_config)
+
+
 @register_annotator([torch.ops.aten.gelu.default])
 def annotate_gelu(node: Node, quantization_config: QuantizationConfig) -> None:
     annotate_single_in_single_out(node, quantization_config)