Pull request pytorch#5: Initial implementation of QDQ Cluster Recogniser

Slattz · Slattz · commit 6d0f947cd7ed · 2024-11-08T17:48:26.000+01:00
Merge in AITEC/executorch from pf-qdq to main-nxp

* commit '66ffa0e48b8d5d45ed98e8a53c671be1b3210958':
  Conv2d test for the QDQ clustering mechanism
  [EIEX-64] Integrate QDQ clusters with the partitioner
  [EIEX-43] Implementation of QDQ cluster recogniser
diff --git a/backends/nxp/neutron_partitioner.py b/backends/nxp/neutron_partitioner.py
@@ -6,6 +6,7 @@
 # Partitioner for the NXP Neutron NPU
 
 import logging
+import operator
 from typing import final, List
 
 import torch
@@ -43,28 +44,109 @@
     # exir_ops.edge.aten.sub.Scalar,
     # exir_ops.edge.aten.tanh.default,
     # operator.getitem,
-
-    # QDQ ops
-    exir_ops.edge.quantized_decomposed.quantize_per_tensor.default,
-    exir_ops.edge.quantized_decomposed.dequantize_per_tensor.default,
 ]
 
 class NeutronSupportedOperators(OperatorSupportBase):
     def is_node_supported(self, submodules, node: torch.fx.Node) -> bool:
-        # check if the PyTorch op get called is supported for Neutron
-        return node.op == "call_function" and node.target in NeutronSupportedOperatorsList
+        """
+        Check if the PyTorch op that gets called is supported for Neutron
+        or if it is part of a QDQ cluster.
+        """
+        return (
+            node.op == "call_function" and node.target in NeutronSupportedOperatorsList
+        ) or "cluster" in node.meta
 
 @final
 class NeutronPartitioner(Partitioner):
     def __init__(self, compile_spec: List[CompileSpec]) -> None:
         self.delegation_spec = DelegationSpec(NeutronBackend.__name__, compile_spec)
 
+    def is_quant_node(self, node: torch.fx.node.Node):
+        return node.target in {
+            exir_ops.edge.quantized_decomposed.quantize_per_channel.default,
+            exir_ops.edge.quantized_decomposed.quantize_per_tensor.default,
+            exir_ops.edge.quantized_decomposed.quantize_per_tensor.tensor,
+        }
+    
+    def is_dequant_node(self, node: torch.fx.node.Node):
+        return node.target in {
+            exir_ops.edge.quantized_decomposed.dequantize_per_channel.default,
+            exir_ops.edge.quantized_decomposed.dequantize_per_tensor.default,
+            exir_ops.edge.quantized_decomposed.dequantize_per_tensor.tensor,
+        }
+
+    def tag_clusters(self, nodes):
+        """
+        Identifies clusters of nodes that involve quantisation and dequantisation 
+        operations. It tags these nodes with a cluster name, which can be used
+        later for partitioning and optimising the graph.
+
+        Clustering is the process of grouping nodes in the computation graph that are related
+        to quantisation and dequantisation operations. This is useful for optimising the graph
+        for execution on specialized hardware.
+        """
+        def get_dequant_inputs(node):
+            """
+            This function returns all the dequant operators which produce inputs to the node.
+            However, if the operator has 3 inputs and only one comes from dequant, the function
+            will return true and consequently the code condition `if dequant_inputs:` will be true.
+
+            This is done to handle the unexpected behavior of the NeutronQuantizer with the bias tensor (EIEX-66).
+            """
+            return [
+                input_node for input_node in node.args
+                if isinstance(input_node, torch.fx.node.Node) and self.is_dequant_node(input_node)
+            ]
+
+        def get_quant_outputs(node):
+            """
+            Retrieve the quantised outputs of a given node.
+
+            This function examines the outputs of the provided node to identify
+            quantised nodes. It also checks if the output operation is a call to the
+            `operator.getitem` function and then inspects the operator's output to
+            find quantised nodes.
+            """
+            quant_outputs = []
+            for user in node.users:
+                if user.op == "call_function" and user.target == operator.getitem:
+                    for grandchild in user.users:
+                        if self.is_quant_node(grandchild):
+                            quant_outputs.append(grandchild)
+                elif self.is_quant_node(user):
+                    quant_outputs.append(user)
+            return quant_outputs
+
+        def tag_node_and_related(node, cluster_name, dequant_inputs, quant_outputs):
+            # Tags a node and its related dequant and quant nodes with a specified cluster name
+            logging.info(f"Tagging node {node} as {cluster_name}")
+            node.meta["cluster"] = cluster_name
+            for dequant_node in dequant_inputs:
+                dequant_node.meta["cluster"] = cluster_name
+            for quant_node in quant_outputs:
+                quant_node.meta["cluster"] = cluster_name
+
+        for node in nodes:
+            if node.op == "call_function":
+                dequant_inputs = get_dequant_inputs(node)
+                quant_outputs = get_quant_outputs(node)
+                if dequant_inputs and quant_outputs:
+                    cluster_name = f"{node.name}_cluster"
+                    tag_node_and_related(node, cluster_name, dequant_inputs, quant_outputs)
+
     def partition(self, exported_program: ExportedProgram) -> PartitionResult:
         # Run the CapabilityBasedPartitioner to return the largest possible
         # subgraphs containing the nodes with the tags
         logging.info("NeutronPartitioner::partition")
         partition_tags = {}
 
+        graph_module = exported_program.graph_module
+        nodes = list(graph_module.graph.nodes)
+
+        self.tag_clusters(nodes)
+        
+        graph_module.recompile()
+
         capability_partitioner = CapabilityBasedPartitioner(
             exported_program.graph_module,
             NeutronSupportedOperators(),
diff --git a/backends/nxp/tests/test_qdq_clustering_conv.py b/backends/nxp/tests/test_qdq_clustering_conv.py
@@ -0,0 +1,50 @@
+import torch
+
+from executorch.backends.nxp.neutron_partitioner import NeutronPartitioner
+from executorch.backends.nxp.nxp_backend import generate_neutron_compile_spec
+from executorch.examples.nxp.aot_neutron_compile import post_training_quantize
+from executorch.examples.portable import export_to_edge
+
+
+class Conv2dNoBiasModule(torch.nn.Module):
+    def __init__(self):
+        super().__init__()
+
+        self.conv = torch.nn.Conv2d(
+            in_channels=4, out_channels=8, kernel_size=3, bias=False, stride=2, dilation=1
+        )
+
+    def forward(self, x):
+        return self.conv(x)
+
+
+def test_conv2d_partitioner():
+    calibration_inputs = [(torch.randn((1, 4, 32, 32)),), (torch.randn((1, 4, 32, 32)),)]
+    model = Conv2dNoBiasModule()
+    example_input = (torch.ones(1, 4, 32, 32),)
+
+    exir_program_aten = torch._export.capture_pre_autograd_graph(model, example_input)
+    exir_program_aten_quant = post_training_quantize(exir_program_aten, calibration_inputs)
+    edge_program_manager = export_to_edge(exir_program_aten_quant, example_input)
+
+    partitioner = NeutronPartitioner(generate_neutron_compile_spec("rt700"))
+    edge_program = edge_program_manager.to_backend(partitioner)
+
+    # Get subgraph (module) that is delegated to neutron
+    lowered_module = edge_program.exported_program().graph_module.lowered_module_0
+    nodes = list(lowered_module.original_module.graph.nodes)
+
+    assert len(nodes) == 7
+
+    q_x_node = nodes[1]
+    dq_w_node = nodes[2]
+    dq_x_node = nodes[3]
+    conv_node = nodes[4]
+    q_y_node = nodes[5]
+
+    assert "cluster" not in q_x_node.meta
+    assert dq_w_node.meta["cluster"] == "aten_convolution_default_cluster"
+    assert dq_x_node.meta["cluster"] == "aten_convolution_default_cluster"
+    assert conv_node.meta["cluster"] == "aten_convolution_default_cluster"
+    assert q_y_node.meta["cluster"] == "aten_convolution_default_cluster"
+