Skip to content

Commit 6d0f947

Browse files
committed
Pull request pytorch#5: Initial implementation of QDQ Cluster Recogniser
Merge in AITEC/executorch from pf-qdq to main-nxp * commit '66ffa0e48b8d5d45ed98e8a53c671be1b3210958': Conv2d test for the QDQ clustering mechanism [EIEX-64] Integrate QDQ clusters with the partitioner [EIEX-43] Implementation of QDQ cluster recogniser
2 parents 098a47b + 66ffa0e commit 6d0f947

File tree

2 files changed

+138
-6
lines changed

2 files changed

+138
-6
lines changed

backends/nxp/neutron_partitioner.py

Lines changed: 88 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
# Partitioner for the NXP Neutron NPU
77

88
import logging
9+
import operator
910
from typing import final, List
1011

1112
import torch
@@ -43,28 +44,109 @@
4344
# exir_ops.edge.aten.sub.Scalar,
4445
# exir_ops.edge.aten.tanh.default,
4546
# operator.getitem,
46-
47-
# QDQ ops
48-
exir_ops.edge.quantized_decomposed.quantize_per_tensor.default,
49-
exir_ops.edge.quantized_decomposed.dequantize_per_tensor.default,
5047
]
5148

5249
class NeutronSupportedOperators(OperatorSupportBase):
5350
def is_node_supported(self, submodules, node: torch.fx.Node) -> bool:
54-
# check if the PyTorch op get called is supported for Neutron
55-
return node.op == "call_function" and node.target in NeutronSupportedOperatorsList
51+
"""
52+
Check if the PyTorch op that gets called is supported for Neutron
53+
or if it is part of a QDQ cluster.
54+
"""
55+
return (
56+
node.op == "call_function" and node.target in NeutronSupportedOperatorsList
57+
) or "cluster" in node.meta
5658

5759
@final
5860
class NeutronPartitioner(Partitioner):
5961
def __init__(self, compile_spec: List[CompileSpec]) -> None:
6062
self.delegation_spec = DelegationSpec(NeutronBackend.__name__, compile_spec)
6163

64+
def is_quant_node(self, node: torch.fx.node.Node):
65+
return node.target in {
66+
exir_ops.edge.quantized_decomposed.quantize_per_channel.default,
67+
exir_ops.edge.quantized_decomposed.quantize_per_tensor.default,
68+
exir_ops.edge.quantized_decomposed.quantize_per_tensor.tensor,
69+
}
70+
71+
def is_dequant_node(self, node: torch.fx.node.Node):
72+
return node.target in {
73+
exir_ops.edge.quantized_decomposed.dequantize_per_channel.default,
74+
exir_ops.edge.quantized_decomposed.dequantize_per_tensor.default,
75+
exir_ops.edge.quantized_decomposed.dequantize_per_tensor.tensor,
76+
}
77+
78+
def tag_clusters(self, nodes):
79+
"""
80+
Identifies clusters of nodes that involve quantisation and dequantisation
81+
operations. It tags these nodes with a cluster name, which can be used
82+
later for partitioning and optimising the graph.
83+
84+
Clustering is the process of grouping nodes in the computation graph that are related
85+
to quantisation and dequantisation operations. This is useful for optimising the graph
86+
for execution on specialized hardware.
87+
"""
88+
def get_dequant_inputs(node):
89+
"""
90+
This function returns all the dequant operators which produce inputs to the node.
91+
However, if the operator has 3 inputs and only one comes from dequant, the function
92+
will return true and consequently the code condition `if dequant_inputs:` will be true.
93+
94+
This is done to handle the unexpected behavior of the NeutronQuantizer with the bias tensor (EIEX-66).
95+
"""
96+
return [
97+
input_node for input_node in node.args
98+
if isinstance(input_node, torch.fx.node.Node) and self.is_dequant_node(input_node)
99+
]
100+
101+
def get_quant_outputs(node):
102+
"""
103+
Retrieve the quantised outputs of a given node.
104+
105+
This function examines the outputs of the provided node to identify
106+
quantised nodes. It also checks if the output operation is a call to the
107+
`operator.getitem` function and then inspects the operator's output to
108+
find quantised nodes.
109+
"""
110+
quant_outputs = []
111+
for user in node.users:
112+
if user.op == "call_function" and user.target == operator.getitem:
113+
for grandchild in user.users:
114+
if self.is_quant_node(grandchild):
115+
quant_outputs.append(grandchild)
116+
elif self.is_quant_node(user):
117+
quant_outputs.append(user)
118+
return quant_outputs
119+
120+
def tag_node_and_related(node, cluster_name, dequant_inputs, quant_outputs):
121+
# Tags a node and its related dequant and quant nodes with a specified cluster name
122+
logging.info(f"Tagging node {node} as {cluster_name}")
123+
node.meta["cluster"] = cluster_name
124+
for dequant_node in dequant_inputs:
125+
dequant_node.meta["cluster"] = cluster_name
126+
for quant_node in quant_outputs:
127+
quant_node.meta["cluster"] = cluster_name
128+
129+
for node in nodes:
130+
if node.op == "call_function":
131+
dequant_inputs = get_dequant_inputs(node)
132+
quant_outputs = get_quant_outputs(node)
133+
if dequant_inputs and quant_outputs:
134+
cluster_name = f"{node.name}_cluster"
135+
tag_node_and_related(node, cluster_name, dequant_inputs, quant_outputs)
136+
62137
def partition(self, exported_program: ExportedProgram) -> PartitionResult:
63138
# Run the CapabilityBasedPartitioner to return the largest possible
64139
# subgraphs containing the nodes with the tags
65140
logging.info("NeutronPartitioner::partition")
66141
partition_tags = {}
67142

143+
graph_module = exported_program.graph_module
144+
nodes = list(graph_module.graph.nodes)
145+
146+
self.tag_clusters(nodes)
147+
148+
graph_module.recompile()
149+
68150
capability_partitioner = CapabilityBasedPartitioner(
69151
exported_program.graph_module,
70152
NeutronSupportedOperators(),
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
import torch
2+
3+
from executorch.backends.nxp.neutron_partitioner import NeutronPartitioner
4+
from executorch.backends.nxp.nxp_backend import generate_neutron_compile_spec
5+
from executorch.examples.nxp.aot_neutron_compile import post_training_quantize
6+
from executorch.examples.portable import export_to_edge
7+
8+
9+
class Conv2dNoBiasModule(torch.nn.Module):
10+
def __init__(self):
11+
super().__init__()
12+
13+
self.conv = torch.nn.Conv2d(
14+
in_channels=4, out_channels=8, kernel_size=3, bias=False, stride=2, dilation=1
15+
)
16+
17+
def forward(self, x):
18+
return self.conv(x)
19+
20+
21+
def test_conv2d_partitioner():
22+
calibration_inputs = [(torch.randn((1, 4, 32, 32)),), (torch.randn((1, 4, 32, 32)),)]
23+
model = Conv2dNoBiasModule()
24+
example_input = (torch.ones(1, 4, 32, 32),)
25+
26+
exir_program_aten = torch._export.capture_pre_autograd_graph(model, example_input)
27+
exir_program_aten_quant = post_training_quantize(exir_program_aten, calibration_inputs)
28+
edge_program_manager = export_to_edge(exir_program_aten_quant, example_input)
29+
30+
partitioner = NeutronPartitioner(generate_neutron_compile_spec("rt700"))
31+
edge_program = edge_program_manager.to_backend(partitioner)
32+
33+
# Get subgraph (module) that is delegated to neutron
34+
lowered_module = edge_program.exported_program().graph_module.lowered_module_0
35+
nodes = list(lowered_module.original_module.graph.nodes)
36+
37+
assert len(nodes) == 7
38+
39+
q_x_node = nodes[1]
40+
dq_w_node = nodes[2]
41+
dq_x_node = nodes[3]
42+
conv_node = nodes[4]
43+
q_y_node = nodes[5]
44+
45+
assert "cluster" not in q_x_node.meta
46+
assert dq_w_node.meta["cluster"] == "aten_convolution_default_cluster"
47+
assert dq_x_node.meta["cluster"] == "aten_convolution_default_cluster"
48+
assert conv_node.meta["cluster"] == "aten_convolution_default_cluster"
49+
assert q_y_node.meta["cluster"] == "aten_convolution_default_cluster"
50+

0 commit comments

Comments
 (0)