Skip to content

Commit 52f4156

Browse files
committed
Update on "[executorch][flat_tensor] flat tensor header"
Parse a FlatTensor header. The FlatTensor header contains: - flatbuffer offset - flatbuffer size - segment base offset - segment data size (total size of all segments) Mostly taken from extended_header.cpp/.h Differential Revision: [D67064570](https://our.internmc.facebook.com/intern/diff/D67064570/) [ghstack-poisoned]
2 parents b05d02c + 06748cc commit 52f4156

File tree

102 files changed

+3844
-1895
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

102 files changed

+3844
-1895
lines changed

backends/arm/_passes/fold_qdq_with_annotated_qparams_pass.py

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -32,10 +32,16 @@ def get_input_qparams(node: Node) -> dict[int, QuantArgs]:
3232
Raises a ValueError if the node doesn't have any parameters set.
3333
"""
3434
if "input_qparams" not in node.meta.keys():
35-
raise ValueError(f"No input quantization parameter found in node {node}")
35+
raise ValueError(
36+
f"No input quantization parameter found in node {node}\n"
37+
f"original_aten={node.meta.get('original_aten', 'None')}"
38+
)
3639
input_qparams = cast(dict[int, QuantArgs], node.meta["input_qparams"])
3740
if len(input_qparams) == 0:
38-
raise ValueError(f"No input quantization parameter found in node {node}")
41+
raise ValueError(
42+
f"No input quantization parameter found in node {node}\n"
43+
f"original_aten={node.meta.get('original_aten', 'None')}"
44+
)
3945
return input_qparams
4046

4147

@@ -45,11 +51,17 @@ def get_output_qparams(node: Node) -> dict[int, QuantArgs]:
4551
Raises a ValueError if the node doesn't have any parameters set.
4652
"""
4753
if "output_qparams" not in node.meta.keys():
48-
raise ValueError(f"No output quantization parameter found in node {node}")
49-
input_qparams = cast(dict[int, QuantArgs], node.meta["output_qparams"])
50-
if len(input_qparams) == 0:
51-
raise ValueError(f"No output quantization parameter found in node {node}")
52-
return input_qparams
54+
raise ValueError(
55+
f"No output quantization parameter found in node {node}\n"
56+
f"original_aten={node.meta.get('original_aten', 'None')}"
57+
)
58+
output_qparams = cast(dict[int, QuantArgs], node.meta["output_qparams"])
59+
if len(output_qparams) == 0:
60+
raise ValueError(
61+
f"No output quantization parameter found in node {node}\n"
62+
f"original_aten={node.meta.get('original_aten', 'None')}"
63+
)
64+
return output_qparams
5365

5466

5567
class FoldAndAnnotateQParamsPass(ExportPass):

backends/qualcomm/README.md

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ Please check `generate_qnn_executorch_compiler_spec()` in
2020
- Snapdragon 8 Gen 1+
2121
- Snapdragon 8 Gen 2
2222
- Snapdragon 8 Gen 3
23+
- Snapdragon 8 Elite
2324

2425
### Adding more supported Chipset
2526
Currently, users cannot add additional chipset models because the chipset ID is not accessible to community users. If you have specific chipset models you wish to add, please contact one of the authors in the `Code Reviews` section at the bottom of this page.
@@ -120,11 +121,9 @@ PRs are always welcome to help improve the codebase in a comprehensive manner. B
120121

121122
- **Code Reviews**:<br/>
122123
Please ping authors in Qualcomm AI Engine Direct related PRs for reviewing, possible candidates are listed below:
123-
- [chiwwang](https://github.com/chiwwang)
124124
- [shewu-quic](https://github.com/shewu-quic)
125125
- [chunit-quic](https://github.com/chunit-quic)
126126
- [winskuo-quic](https://github.com/winskuo-quic)
127-
- [chuntl](https://github.com/chuntl)
128127
- [haowhsu-quic](https://github.com/haowhsu-quic)
129128

130129
Thanks again for your contribution!

backends/qualcomm/_passes/convert_to_linear.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -110,11 +110,11 @@ def _convert_to_linear(
110110
# Since QNN has no keep dims for linear op, we will need to add squeeze and unsqueeze around linear node
111111
# TODO: Find a more general conditional statement.
112112
linear_output = linear_node.meta["val"]
113-
if linear_output.dim() == 3 and linear_output.shape[0] == 1:
113+
if linear_output.dim() >= 3:
114114
with gm.graph.inserting_after(input_node):
115115
input_users = list(input_node.users.keys())
116116
input_tensor = input_node.meta["val"]
117-
squeeze_dim = input_tensor.shape[-2:]
117+
squeeze_dim = (-1, input_tensor.shape[-1])
118118
squeeze_node = gm.graph.create_node(
119119
"call_function",
120120
self.view_copy,
@@ -149,7 +149,7 @@ def _convert_to_linear(
149149
unsqueeze_node.meta[k] = v
150150
# update linear node's shape
151151
linear_node.meta["val"] = linear_output.reshape(
152-
linear_output.shape[-2:]
152+
(squeeze_node.meta["val"].shape[0], linear_output.shape[-1])
153153
)
154154
for user in output_users:
155155
user.replace_input_with(linear_node, unsqueeze_node)

backends/qualcomm/_passes/layout_transform.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,34 +42,49 @@ class LayoutTransform(ExportPass):
4242
}
4343

4444
layout_agnostic_ops = {
45+
exir_ops.edge.aten.abs.default,
4546
exir_ops.edge.aten.add.Tensor,
4647
exir_ops.edge.aten.bmm.default,
4748
exir_ops.edge.aten.cat.default,
4849
exir_ops.edge.aten.ceil.default,
4950
exir_ops.edge.aten.clamp.default,
5051
exir_ops.edge.aten.constant_pad_nd.default,
5152
exir_ops.edge.aten.div.Tensor,
53+
exir_ops.edge.aten.eq.Scalar,
54+
exir_ops.edge.aten.eq.Tensor,
5255
exir_ops.edge.aten.full.default,
56+
exir_ops.edge.aten.ge.Scalar,
57+
exir_ops.edge.aten.ge.Tensor,
5358
exir_ops.edge.aten.gelu.default,
59+
exir_ops.edge.aten.gt.Scalar,
60+
exir_ops.edge.aten.gt.Tensor,
5461
exir_ops.edge.aten.hardswish.default,
5562
exir_ops.edge.aten.hardsigmoid.default,
5663
exir_ops.edge.aten.hardtanh.default,
5764
exir_ops.edge.aten.leaky_relu.default,
65+
exir_ops.edge.aten.le.Scalar,
66+
exir_ops.edge.aten.le.Tensor,
5867
exir_ops.edge.aten.linear.default,
68+
exir_ops.edge.aten.log.default,
69+
exir_ops.edge.aten.lt.Scalar,
70+
exir_ops.edge.aten.lt.Tensor,
5971
exir_ops.edge.aten._log_softmax.default,
72+
exir_ops.edge.aten.maximum.default,
6073
exir_ops.edge.aten.mean.dim,
74+
exir_ops.edge.aten.minimum.default,
6175
exir_ops.edge.aten.mul.Tensor,
6276
exir_ops.edge.aten.pow.Tensor_Scalar,
6377
exir_ops.edge.aten.prelu.default,
78+
exir_ops.edge.aten.repeat.default,
6479
exir_ops.edge.aten.relu.default,
6580
exir_ops.edge.aten._softmax.default, # TODO: Need to find a new solution to do "axis_order" to transform axis.
6681
exir_ops.edge.aten.sigmoid.default,
82+
exir_ops.edge.aten.split_with_sizes.default,
6783
exir_ops.edge.aten.sqrt.default,
6884
exir_ops.edge.aten.sub.Tensor,
6985
exir_ops.edge.aten.sum.dim_IntList,
7086
exir_ops.edge.aten.topk.default,
7187
exir_ops.edge.aten._to_copy.default,
72-
exir_ops.edge.aten.split_with_sizes.default,
7388
*q_ops,
7489
*dq_ops,
7590
_operator.getitem,

backends/qualcomm/_passes/recompose_pixel_unshuffle.py

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,8 @@ def __init__(self, quantization_capture=False):
2121
self.view_target = exir_ops.edge.aten.view_copy.default
2222
self.op = exir_ops.edge.aten.pixel_unshuffle.default
2323

24-
self.quantization_capture = quantization_capture
2524
if quantization_capture:
26-
self.reshape_target = torch.ops.aten._unsafe_view.default
25+
self.reshape_target = torch.ops.aten.reshape.default
2726
self.permute_target = torch.ops.aten.permute.default
2827
self.view_target = torch.ops.aten.view.default
2928
self.op = torch.ops.aten.pixel_unshuffle.default
@@ -35,12 +34,7 @@ def call(self, graph_module: torch.fx.GraphModule):
3534
if node.op == "call_function" and node.target == self.reshape_target:
3635
with graph.inserting_after(node):
3736

38-
# Clone op still exists between permute and reshape_target during quantization,
39-
# so we need to check for args[0].args[0] to get permute node
40-
if self.quantization_capture:
41-
premute_node = node.args[0].args[0]
42-
else:
43-
premute_node = node.args[0]
37+
premute_node = node.args[0]
4438
if any(
4539
[
4640
len(node.args[1]) != 4,

backends/qualcomm/_passes/remove_redundancy.py

Lines changed: 26 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -14,31 +14,37 @@ class RemoveRedundancy(ExportPass):
1414
Trim certain operators to reduce unnecessary overhead.
1515
"""
1616

17-
redundant_ops = {
18-
torch.clone,
19-
torch.ops.aten.clone.default,
20-
exir_ops.edge.aten.clone.default,
21-
torch.ops.aten.alias.default,
22-
exir_ops.edge.aten.alias.default,
23-
exir_ops.edge.aten.lift_fresh_copy.default,
24-
# remove this target if '_skip_dim_order' is set to False
25-
exir_ops.edge.dim_order_ops._to_dim_order_copy.default,
26-
# remove channel_last / contiguous _to_copy if '_skip_dim_order' is set to True
27-
exir_ops.edge.aten._to_copy.default,
28-
}
29-
3017
def __init__(self):
3118
super(RemoveRedundancy, self).__init__()
19+
self.redundant_ops = {
20+
torch.clone: self._default_condition,
21+
torch.ops.aten.clone.default: self._default_condition,
22+
exir_ops.edge.aten.clone.default: self._default_condition,
23+
torch.ops.aten.alias.default: self._default_condition,
24+
exir_ops.edge.aten.alias.default: self._default_condition,
25+
exir_ops.edge.aten.lift_fresh_copy.default: self._default_condition,
26+
# remove this target if '_skip_dim_order' is set to False
27+
exir_ops.edge.dim_order_ops._to_dim_order_copy.default: self._dim_order_op_condition,
28+
# remove channel_last / contiguous _to_copy if '_skip_dim_order' is set to True
29+
exir_ops.edge.aten._to_copy.default: self._to_copy_op_condition,
30+
}
31+
32+
def _dim_order_op_condition(self, node):
33+
dim_order = node.kwargs.get("dim_order")
34+
# skip if there contains layout hint
35+
# e.g. (0, 2, 3, 1) != (0, 1, 2, 3)
36+
return dim_order != list(range(len(dim_order)))
37+
38+
def _to_copy_op_condition(self, node):
39+
return "memory_format" in node.kwargs
40+
41+
def _default_condition(self, ndoe):
42+
return True
3243

3344
def _remove(self, graph_module: torch.fx.GraphModule) -> torch.fx.GraphModule:
3445
for n in graph_module.graph.nodes:
35-
if n.target not in self.redundant_ops:
36-
continue
37-
38-
# do not remove cast operator
39-
if (
40-
n.target == exir_ops.edge.aten._to_copy.default
41-
and "memory_format" not in n.kwargs
46+
if n.target not in self.redundant_ops or not self.redundant_ops[n.target](
47+
n
4248
):
4349
continue
4450

backends/qualcomm/builders/__init__.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,9 @@
66

77
from . import (
88
node_visitor,
9+
op_abs,
910
op_add,
11+
op_arange,
1012
op_avg_pool2d,
1113
op_batch_norm,
1214
op_bmm,
@@ -19,26 +21,36 @@
1921
op_dequantize,
2022
op_div,
2123
op_embedding,
24+
op_eq,
2225
op_expand,
26+
op_full_like,
27+
op_ge,
2328
op_gelu,
2429
op_group_norm,
30+
op_gt,
2531
op_hardsigmoid,
2632
op_hardswish,
2733
op_hardtanh,
2834
op_index,
2935
op_index_put,
3036
op_layer_norm,
37+
op_le,
3138
op_linear,
39+
op_log,
3240
op_log_softmax,
41+
op_lt,
3342
op_matmul,
43+
op_max,
3444
op_max_pool2d,
3545
op_mean_dim,
46+
op_min,
3647
op_mul,
3748
op_pad,
3849
op_pow,
3950
op_prelu,
4051
op_quantize,
4152
op_relu,
53+
op_repeat,
4254
op_reshape,
4355
op_rms_norm,
4456
op_rsqrt,
@@ -65,7 +77,9 @@
6577

6678
__all__ = [
6779
node_visitor,
80+
op_abs,
6881
op_add,
82+
op_arange,
6983
op_avg_pool2d,
7084
op_batch_norm,
7185
op_bmm,
@@ -78,26 +92,36 @@
7892
op_dequantize,
7993
op_div,
8094
op_embedding,
95+
op_eq,
8196
op_expand,
97+
op_full_like,
98+
op_ge,
8299
op_gelu,
83100
op_group_norm,
101+
op_gt,
84102
op_hardswish,
85103
op_hardtanh,
86104
op_hardsigmoid,
87105
op_index,
88106
op_index_put,
89107
op_layer_norm,
108+
op_le,
90109
op_linear,
110+
op_log,
91111
op_log_softmax,
112+
op_lt,
92113
op_matmul,
114+
op_max,
93115
op_max_pool2d,
94116
op_mean_dim,
117+
op_min,
95118
op_mul,
96119
op_pad,
97120
op_pow,
98121
op_prelu,
99122
op_quantize,
100123
op_relu,
124+
op_repeat,
101125
op_reshape,
102126
op_rms_norm,
103127
op_rsqrt,

backends/qualcomm/builders/op_abs.py

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
# Copyright (c) Qualcomm Innovation Center, Inc.
2+
# All rights reserved
3+
#
4+
# This source code is licensed under the BSD-style license found in the
5+
# LICENSE file in the root directory of this source tree.
6+
from typing import Dict
7+
8+
import executorch.backends.qualcomm.python.PyQnnWrapperAdaptor as PyQnnWrapper
9+
10+
import torch
11+
12+
from .node_visitor import NodeVisitor, register_node_visitor
13+
from .qnn_constants import OpElementWiseAbs, QNN_OP_PACKAGE_NAME_QTI_AISW
14+
15+
16+
@register_node_visitor
17+
class Abs(NodeVisitor):
18+
target = ["aten.abs.default"]
19+
20+
def __init__(self, *args) -> None:
21+
super().__init__(*args)
22+
23+
def define_node(
24+
self,
25+
node: torch.fx.Node,
26+
nodes_to_wrappers: Dict[torch.fx.Node, PyQnnWrapper.TensorWrapper],
27+
) -> PyQnnWrapper.PyQnnOpWrapper:
28+
out_tensor = self.get_tensor(node, node)
29+
output_tensor_wrapper = self.define_tensor(
30+
node,
31+
node,
32+
out_tensor,
33+
PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_NATIVE,
34+
nodes_to_wrappers,
35+
)
36+
abs_output_tensors = [output_tensor_wrapper]
37+
38+
input_node = node.args[0]
39+
input_tensor_wrapper = self.define_tensor(
40+
input_node,
41+
node,
42+
self.get_tensor(input_node, node),
43+
PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_NATIVE,
44+
nodes_to_wrappers,
45+
)
46+
abs_input_tensors = [input_tensor_wrapper]
47+
48+
abs_op = PyQnnWrapper.PyQnnOpWrapper(
49+
node.name,
50+
QNN_OP_PACKAGE_NAME_QTI_AISW,
51+
OpElementWiseAbs.op_name,
52+
)
53+
abs_op.AddInputTensors(abs_input_tensors)
54+
abs_op.AddOutputTensors(abs_output_tensors)
55+
56+
return abs_op

0 commit comments

Comments
 (0)