Skip to content

Commit 61ad48d

Browse files
winskuo-quicfacebook-github-bot
authored andcommitted
Qualcomm AI Engine Direct - Requantization Mechanism Implementation (#2823)
Summary: - Implement requantization so mix quantization ( e.g., 8bit quant + 16 bit quant) can be properly delegated to QNN. - Reusing test_qnn_backend_view_permute_matmul unit test to check mix quantization is working as expected. - Added etdump logic back to qnn_executor_runner that was deleted unintentionally during this PR: a531ca5#diff-f3647de74042ac9a417e2d4000a6f2db00c22c89fd028e9433d3c79ffb7d56f6 - Refactor common arguments in VIT. Pull Request resolved: #2823 Reviewed By: kirklandsign Differential Revision: D55705289 Pulled By: cccclai fbshipit-source-id: c01744ce05f0ab27cf25fc28856ce150462470d5
1 parent 86b326a commit 61ad48d

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

50 files changed

+263
-176
lines changed

backends/qualcomm/builders/node_visitor.py

Lines changed: 22 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -150,19 +150,19 @@ def make_qnn_per_tensor_config(self, quant_attrs: Dict):
150150
quant_config,
151151
)
152152

153-
def get_quant_encoding_conf(self, node: torch.fx.Node) -> Tuple[Any, Dict]:
153+
def get_quant_encoding_conf(
154+
self, node: torch.fx.Node, is_input_tensor: bool = False
155+
) -> Tuple[Any, Dict]:
154156
if not node.meta.get("quant_attrs", None):
155157
return (
156158
PyQnnWrapper.Qnn_QuantizationEncoding_t.QNN_QUANTIZATION_ENCODING_UNDEFINED,
157159
{},
158160
)
159-
160161
quant_attrs = (
161-
node.meta["requantize"]["dq_attrs"]
162-
if "requantize" in node.meta
162+
node.meta["requantize"]
163+
if "requantize" in node.meta and is_input_tensor
163164
else node.meta["quant_attrs"]
164165
)
165-
166166
if quant_attrs["encoding"] in PER_CHANNEL_ENCODING:
167167
return self.make_qnn_per_channel_config(node, quant_attrs)
168168

@@ -275,15 +275,27 @@ def define_custom_tensor_wrapper(
275275
nodes_to_wrappers[node_name] = tensor_wrapper
276276
return tensor_wrapper
277277

278-
def define_value(
278+
def define_tensor(
279279
self,
280280
node: torch.fx.Node,
281281
tensor: torch.Tensor,
282282
tensor_type: PyQnnWrapper.Qnn_TensorType_t,
283283
nodes_to_wrappers: Dict[str, PyQnnWrapper.TensorWrapper],
284-
is_tensor: bool,
284+
is_input_tensor: bool,
285285
node_name: str = None,
286+
is_tensor: bool = True,
286287
) -> PyQnnWrapper.TensorWrapper:
288+
"""
289+
Covert torch.Tensor to TensorWrapper
290+
291+
Args:
292+
node: EdgeIR Node
293+
tensor: EdgeIR Tensor
294+
tensor_type: QNN tensor type
295+
nodes_to_wrappers: Set contains edge_graph values(node targets)
296+
is_input_tensor: Whether tensor is a fake input tensor relatively to
297+
the op builder that is calling this function
298+
"""
287299
if node_name is None:
288300
node_name = node.name
289301

@@ -294,7 +306,9 @@ def define_value(
294306
tensor_name = "output_" + tensor_name
295307
dims = [1] if len(tensor.size()) == 0 else tensor.size()
296308
tensor_type = self.get_tensor_type(node, tensor_type)
297-
quant_encoding, quant_configs = self.get_quant_encoding_conf(node)
309+
quant_encoding, quant_configs = self.get_quant_encoding_conf(
310+
node, is_input_tensor
311+
)
298312
dtype = self.get_data_type(tensor, quant_configs, is_tensor)
299313
if isinstance(tensor, torch._subclasses.fake_tensor.FakeTensor):
300314
tensor_wrapper = PyQnnWrapper.TensorWrapper(
@@ -330,52 +344,6 @@ def define_value(
330344
nodes_to_wrappers[node_name] = tensor_wrapper
331345
return tensor_wrapper
332346

333-
def define_scalar(
334-
self,
335-
node: torch.fx.Node,
336-
tensor: torch.Tensor,
337-
tensor_type: PyQnnWrapper.Qnn_TensorType_t,
338-
nodes_to_wrappers: Dict[torch.fx.Node, PyQnnWrapper.TensorWrapper],
339-
) -> PyQnnWrapper.TensorWrapper:
340-
"""
341-
Covert constant scalar to TensorWrapper
342-
343-
Args:
344-
tensor: EdgeIR Tensor
345-
nodes_to_wrappers: Set contains edge_graph values(node targets)
346-
"""
347-
return self.define_value(
348-
node,
349-
tensor,
350-
tensor_type,
351-
nodes_to_wrappers,
352-
is_tensor=False,
353-
)
354-
355-
def define_tensor(
356-
self,
357-
node: torch.fx.Node,
358-
tensor: torch.Tensor,
359-
tensor_type: PyQnnWrapper.Qnn_TensorType_t,
360-
nodes_to_wrappers: Dict[str, PyQnnWrapper.TensorWrapper],
361-
node_name: str = None,
362-
) -> PyQnnWrapper.TensorWrapper:
363-
"""
364-
Covert torch.Tensor to TensorWrapper
365-
366-
Args:
367-
tensor: EdgeIR Tensor
368-
nodes_to_wrappers: Set contains edge_graph values(node targets)
369-
"""
370-
return self.define_value(
371-
node,
372-
tensor,
373-
tensor_type,
374-
nodes_to_wrappers,
375-
is_tensor=True,
376-
node_name=node_name,
377-
)
378-
379347
def define_node(
380348
self,
381349
node: torch.fx.Node,

backends/qualcomm/builders/op_add.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ def define_node(
3131
out_tensor,
3232
PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_NATIVE,
3333
nodes_to_wrappers,
34+
is_input_tensor=False,
3435
)
3536
add_output_tensors = [output_tensor_wrapper]
3637

@@ -45,6 +46,7 @@ def define_node(
4546
input_tensor,
4647
tensor_type,
4748
nodes_to_wrappers,
49+
is_input_tensor=True,
4850
)
4951
add_input_tensors.append(input_tensor_wrapper)
5052

backends/qualcomm/builders/op_avg_pool2d.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ def define_node(
3333
input_tensor,
3434
PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_NATIVE,
3535
nodes_to_wrappers,
36+
is_input_tensor=True,
3637
)
3738

3839
output_tensor = self.get_tensor(node, node)
@@ -41,6 +42,7 @@ def define_node(
4142
output_tensor,
4243
PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_NATIVE,
4344
nodes_to_wrappers,
45+
is_input_tensor=False,
4446
)
4547
# kernel info
4648
filter_size = cast(List[int], node.args[1])

backends/qualcomm/builders/op_batch_norm.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ def define_node(
3838
input_tensor,
3939
PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_NATIVE,
4040
nodes_to_wrappers,
41+
is_input_tensor=True,
4142
)
4243

4344
bias_node = node.args[2]
@@ -52,6 +53,7 @@ def define_node(
5253
bias_tensor,
5354
PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_STATIC,
5455
nodes_to_wrappers,
56+
is_input_tensor=False,
5557
)
5658

5759
filter_tensor = filter_tensor / torch.sqrt(var_tensor + eps)
@@ -60,6 +62,7 @@ def define_node(
6062
filter_tensor,
6163
PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_STATIC,
6264
nodes_to_wrappers,
65+
is_input_tensor=False,
6366
)
6467

6568
batch_norm_input_tensors = [
@@ -74,6 +77,7 @@ def define_node(
7477
output_tensor,
7578
PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_NATIVE,
7679
nodes_to_wrappers,
80+
is_input_tensor=False,
7781
)
7882
batch_norm_output_tensors = [output_tensor_wrapper]
7983

backends/qualcomm/builders/op_bmm.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ def define_node(
3535
input_tensor,
3636
PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_NATIVE,
3737
nodes_to_wrappers,
38+
is_input_tensor=True,
3839
)
3940
bmm_input_tensors.append(input_tensor_wrapper)
4041

@@ -44,6 +45,7 @@ def define_node(
4445
output_tensor,
4546
PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_NATIVE,
4647
nodes_to_wrappers,
48+
is_input_tensor=False,
4749
)
4850
bmm_output_tensors = [output_tensor_wrapper]
4951

backends/qualcomm/builders/op_cast.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ def define_node(
3333
input_tensor,
3434
PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_NATIVE,
3535
nodes_to_wrappers,
36+
is_input_tensor=True,
3637
)
3738

3839
output_tensor = self.get_tensor(node, node)
@@ -42,6 +43,7 @@ def define_node(
4243
output_tensor,
4344
PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_NATIVE,
4445
nodes_to_wrappers,
46+
is_input_tensor=False,
4547
)
4648

4749
cast_op = PyQnnWrapper.PyQnnOpWrapper(

backends/qualcomm/builders/op_cat.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ def define_node(
3737
input_tensor,
3838
PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_NATIVE,
3939
nodes_to_wrappers,
40+
is_input_tensor=True,
4041
)
4142
)
4243

@@ -52,6 +53,7 @@ def define_node(
5253
output_tensor,
5354
PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_NATIVE,
5455
nodes_to_wrappers,
56+
is_input_tensor=False,
5557
)
5658

5759
# node args[1] might not exist

backends/qualcomm/builders/op_ceil.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ def define_node(
3232
input_tensor,
3333
PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_NATIVE,
3434
nodes_to_wrappers,
35+
is_input_tensor=True,
3536
)
3637

3738
output_tensor = self.get_tensor(node, node)
@@ -40,6 +41,7 @@ def define_node(
4041
output_tensor,
4142
PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_NATIVE,
4243
nodes_to_wrappers,
44+
is_input_tensor=False,
4345
)
4446

4547
ceil_op = PyQnnWrapper.PyQnnOpWrapper(

backends/qualcomm/builders/op_clamp.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ def define_node(
3333
input_tensor,
3434
PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_NATIVE,
3535
nodes_to_wrappers,
36+
is_input_tensor=True,
3637
)
3738

3839
# default value of output_min and output_max
@@ -53,6 +54,7 @@ def define_node(
5354
output_tensor,
5455
PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_NATIVE,
5556
nodes_to_wrappers,
57+
is_input_tensor=False,
5658
)
5759

5860
clamp_op = PyQnnWrapper.PyQnnOpWrapper(

backends/qualcomm/builders/op_conv2d.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ def _define_conv1d(
9696
op_wrapper_list = [] # op_wrapper to return
9797
unsqueeze_input_node = node.args[0]
9898
input_quant_encoding, input_quant_configs = self.get_quant_encoding_conf(
99-
unsqueeze_input_node
99+
unsqueeze_input_node,
100100
)
101101

102102
unsqueeze_input_tensor = self.get_tensor(unsqueeze_input_node, node)
@@ -105,6 +105,7 @@ def _define_conv1d(
105105
unsqueeze_input_tensor,
106106
PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_NATIVE,
107107
nodes_to_wrappers,
108+
is_input_tensor=True,
108109
)
109110
unsqueeze_output_tensor = unsqueeze_input_tensor.unsqueeze(1).contiguous()
110111
dtype = self.get_data_type(unsqueeze_output_tensor, input_quant_configs, True)
@@ -144,6 +145,7 @@ def _define_conv1d(
144145
filter_tensor,
145146
PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_STATIC,
146147
nodes_to_wrappers,
148+
is_input_tensor=False,
147149
)
148150
conv_input_tensors = [unsqueeze_output_tensor_wrapper, filter_tensor_wrapper]
149151
if node.args[2] is not None:
@@ -154,6 +156,7 @@ def _define_conv1d(
154156
bias_tensor,
155157
PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_STATIC,
156158
nodes_to_wrappers,
159+
is_input_tensor=False,
157160
)
158161
conv_input_tensors.append(bias_tensor_wrapper)
159162

@@ -221,7 +224,8 @@ def _define_conv1d(
221224
squeeze_output_tensor,
222225
PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_NATIVE,
223226
nodes_to_wrappers,
224-
node.name,
227+
is_input_tensor=False,
228+
node_name=node.name,
225229
)
226230
squeeze_op.AddInputTensors([conv_output_tensor_wrapper])
227231
squeeze_op.AddOutputTensors([squeeze_output_tensor_wrapper])
@@ -244,6 +248,7 @@ def define_node(
244248
input_tensor,
245249
PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_NATIVE,
246250
nodes_to_wrappers,
251+
is_input_tensor=True,
247252
)
248253

249254
filter_node = node.args[1]
@@ -256,6 +261,7 @@ def define_node(
256261
filter_tensor,
257262
PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_STATIC,
258263
nodes_to_wrappers,
264+
is_input_tensor=False,
259265
)
260266
conv_input_tensors = [input_tensor_wrapper, filter_tensor_wrapper]
261267

@@ -267,6 +273,7 @@ def define_node(
267273
bias_tensor,
268274
PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_STATIC,
269275
nodes_to_wrappers,
276+
is_input_tensor=False,
270277
)
271278
conv_input_tensors.append(bias_tensor_wrapper)
272279

@@ -276,6 +283,7 @@ def define_node(
276283
output_tensor,
277284
PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_NATIVE,
278285
nodes_to_wrappers,
286+
is_input_tensor=False,
279287
)
280288
conv_output_tensors = [output_tensor_wrapper]
281289

backends/qualcomm/builders/op_depth_to_space.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ def define_node(
3434
input_tensor,
3535
PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_NATIVE,
3636
nodes_to_wrappers,
37+
is_input_tensor=True,
3738
)
3839

3940
output_tensor = self.get_tensor(node, node)
@@ -42,6 +43,7 @@ def define_node(
4243
output_tensor,
4344
PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_NATIVE,
4445
nodes_to_wrappers,
46+
is_input_tensor=False,
4547
)
4648

4749
block_size = []

backends/qualcomm/builders/op_dequantize.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ def define_node(
3030
input_tensor,
3131
PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_NATIVE,
3232
nodes_to_wrappers,
33+
is_input_tensor=True,
3334
)
3435
dequant_input_tensors.append(inp_tensor_wrapper)
3536

@@ -39,6 +40,7 @@ def define_node(
3940
output_tensor,
4041
PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_NATIVE,
4142
nodes_to_wrappers,
43+
is_input_tensor=False,
4244
)
4345
dequant_output_tensors = [output_tensor_wrapper]
4446

backends/qualcomm/builders/op_div.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ def define_node(
3131
out_tensor,
3232
PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_NATIVE,
3333
nodes_to_wrappers,
34+
is_input_tensor=False,
3435
)
3536
div_output_tensors = [output_tensor_wrapper]
3637

@@ -45,6 +46,7 @@ def define_node(
4546
input_tensor,
4647
tensor_type,
4748
nodes_to_wrappers,
49+
is_input_tensor=True,
4850
)
4951
div_input_tensors.append(input_tensor_wrapper)
5052

0 commit comments

Comments
 (0)