Add support for quantized transposed conv

digantdesai · web-flow · commit 00b0ce55c655 · 2025-02-05T20:33:23.000-08:00
Differential Revision: D68939306 Pull Request resolved: #8090
diff --git a/backends/xnnpack/quantizer/xnnpack_quantizer.py b/backends/xnnpack/quantizer/xnnpack_quantizer.py
@@ -249,8 +249,9 @@ class XNNPACKQuantizer(Quantizer):
     STATIC_OPS = [
         "linear_relu",
         "linear",
-        "conv_relu",
         "conv",
+        "conv_transpose",
+        "conv_relu",
         "conv_transpose_relu",
         "adaptive_avg_pool2d",
         # TODO: move this to BoltNNQuantizer?
diff --git a/backends/xnnpack/quantizer/xnnpack_quantizer_utils.py b/backends/xnnpack/quantizer/xnnpack_quantizer_utils.py
@@ -91,6 +91,16 @@ class OperatorConfig(NamedTuple):
     operators: list[OperatorPatternType]
 
 
+def is_relu_node(node: Node) -> bool:
+    """
+    Check if a given node is a relu node
+    """
+    return node.op == "call_function" and node.target in [
+        torch.ops.aten.relu.default,
+        torch.ops.aten.relu_.default,
+    ]
+
+
 def _is_annotated(nodes: list[Node]):
     """
     Given a list of nodes (that represents an operator pattern),
@@ -231,10 +241,7 @@ def _annotate_linear_relu(
     weight_qspec = get_weight_qspec(quantization_config)
     bias_qspec = get_bias_qspec(quantization_config)
     for node in gm.graph.nodes:
-        if node.op != "call_function" or node.target not in [
-            torch.ops.aten.relu.default,
-            torch.ops.aten.relu_.default,
-        ]:
+        if not is_relu_node(node):
             continue
         relu_node = node
         maybe_linear_node = node.args[0]
@@ -285,21 +292,28 @@ def _annotate_linear_relu(
     return annotated_partitions
 
 
-@register_annotator("conv")
-def _annotate_conv(
+def _do_annotate_conv(
     gm: torch.fx.GraphModule,
     quantization_config: Optional[QuantizationConfig],
     filter_fn: Optional[Callable[[Node], bool]] = None,
+    is_conv_transpose: bool = False,
 ) -> Optional[list[list[Node]]]:
     annotated_partitions = []
+    is_conv_node = _is_conv_transpose_node if is_conv_transpose else _is_conv_node
+
     for n in gm.graph.nodes:
-        if n.op != "call_function" or n.target not in [
-            torch.ops.aten.conv1d.default,
-            torch.ops.aten.conv2d.default,
-        ]:
+        if not is_conv_node(n):
             continue
         conv_node = n
 
+        # This is hacky!
+        # We do not want to annotate conv node independently if there is a conv + relu pattern
+        # So we skip if the conv node is consumed by a single relu node
+        if len(conv_node.users) == 1:
+            user = list(conv_node.users.keys())[0]
+            if is_relu_node(user):
+                continue
+
         input_qspec_map = {}
         input_act = conv_node.args[0]
         assert isinstance(input_act, Node)
@@ -341,10 +355,7 @@ def _do_annotate_conv_relu(
 ):
     annotated_partitions = []
     for n in gm.graph.nodes:
-        if n.op != "call_function" or n.target not in [
-            torch.ops.aten.relu.default,
-            torch.ops.aten.relu_.default,
-        ]:
+        if not is_relu_node(n):
             continue
         relu_node = n
         maybe_conv_node = n.args[0]
@@ -393,6 +404,26 @@ def _do_annotate_conv_relu(
     return annotated_partitions
 
 
+@register_annotator("conv")
+def _annotate_conv(
+    gm: torch.fx.GraphModule,
+    quantization_config: Optional[QuantizationConfig],
+    filter_fn: Optional[Callable[[Node], bool]] = None,
+) -> Optional[list[list[Node]]]:
+    return _do_annotate_conv(
+        gm, quantization_config, filter_fn, is_conv_transpose=False
+    )
+
+
+@register_annotator("conv_transpose")
+def _annotate_transpose_conv(
+    gm: torch.fx.GraphModule,
+    quantization_config: Optional[QuantizationConfig],
+    filter_fn: Optional[Callable[[Node], bool]] = None,
+) -> Optional[list[list[Node]]]:
+    return _do_annotate_conv(gm, quantization_config, filter_fn, is_conv_transpose=True)
+
+
 @register_annotator("conv_relu")
 def _annotate_conv_relu(
     gm: torch.fx.GraphModule,
@@ -744,10 +775,7 @@ def _annotate_add_relu(  # noqa: C901
 ) -> Optional[list[list[Node]]]:
     annotated_partitions = []
     for node in gm.graph.nodes:
-        if node.op != "call_function" or node.target not in [
-            torch.ops.aten.relu.default,
-            torch.ops.aten.relu_.default,
-        ]:
+        if not is_relu_node(node):
             continue
         relu_node = node
         maybe_add = node.args[0]
@@ -872,10 +900,7 @@ def _annotate_mul_relu(  # noqa: C901
 ) -> Optional[list[list[Node]]]:
     annotated_partitions = []
     for node in gm.graph.nodes:
-        if node.op != "call_function" or node.target not in [
-            torch.ops.aten.relu.default,
-            torch.ops.aten.relu_.default,
-        ]:
+        if not is_relu_node(node):
             continue
         relu_node = node
         maybe_mul = node.args[0]
diff --git a/backends/xnnpack/test/ops/test_conv2d.py b/backends/xnnpack/test/ops/test_conv2d.py
@@ -243,15 +243,14 @@ def test_qs8_conv2d_test(self) -> None:
                 self._test(
                     Conv2d(bias=has_bias, transpose=transpose),
                     quant_config=get_symmetric_quantization_config(),
-                    check_quantized=not transpose,  # XNNPackQuantizer does not quantize this pattern yet
                 )
 
     def test_qs8_conv2d_per_channel(self) -> None:
         for transpose in (True, False):
             self._test(
                 Conv2d(transpose=transpose),
                 quant_config=get_symmetric_quantization_config(is_per_channel=True),
-                check_quantized=not transpose,  # XNNPackQuantizer does not quantize this pattern yet
+                delegated=not transpose,  # XNNPACK does not support per input channel quantization for transpose convolutions with groups > 1
             )
 
     def test_fp32_conv2d_seq(self) -> None:
@@ -264,7 +263,6 @@ def test_qs8_conv2d_seq(self) -> None:
                 Conv2dSeq(transpose=transpose),
                 conv_count=2,
                 quant_config=get_symmetric_quantization_config(),
-                check_quantized=not transpose,  # XNNPackQuantizer does not quantize this pattern yet
             )
 
     def test_fp32_conv2d_single_int_params(self):
@@ -282,7 +280,6 @@ def test_fp32_conv2d_depthwise(self):
         # - Groups must equal In Channels
         # - Out Channels must be a positive multiple of In Channels
         for transpose in (True, False):
-
             self._test(
                 Conv2d(groups=2, in_channels=2, out_channels=6, transpose=transpose)
             )
@@ -292,7 +289,6 @@ def test_qs8_conv2d_depthwise(self):
             self._test(
                 Conv2d(groups=2, in_channels=2, out_channels=6, transpose=transpose),
                 quant_config=get_symmetric_quantization_config(),
-                check_quantized=not transpose,  # XNNPackQuantizer does not quantize this pattern yet
             )
 
     def test_fp32_conv2d_bn(self):
@@ -384,7 +380,6 @@ def test_qs8_conv2d_bn(self):
                 Conv2dBatchNorm(transpose=transpose),
                 quant_config=get_symmetric_quantization_config(),
                 conv_count=2,
-                check_quantized=not transpose,  # XNNPackQuantizer does not quantize this pattern yet
             )
 
     def test_qs8_conv2d_relu(self):
@@ -415,7 +410,7 @@ def get_inputs(self):
             self._test(
                 ConvReLU(transpose=transpose),
                 quant_config=get_symmetric_quantization_config(is_per_channel=True),
-                delegated=not transpose,
+                delegated=not transpose,  # XNNPACK does not support per input channel quantization for transpose convolutions with groups > 1
             )
 
     def test_qs8_conv2d_dw_relu(self):
@@ -467,9 +462,8 @@ def get_inputs(self):
                     quant_config=get_symmetric_quantization_config(
                         is_per_channel=per_channel_quant
                     ),
-                    # xnnpack only supports per output channel quantization for transposed convolutions
-                    # XNNPackQuantizer quantizes per input channel currently
-                    delegated=not transpose or not per_channel_quant,
+                    # XNNPACK does not support per input channel quantization for transpose convolutions with groups > 1
+                    delegated=not (transpose and per_channel_quant),
                 )
 
     def test_qs8_conv2d_relu_seq(self):