Support unlift=false for per_channel qparams (#868)

digantdesai · facebook-github-bot · commit c33932c54aa0 · 2023-10-13T13:36:45.000-07:00
Summary: Pull Request resolved: #868 This was a TODO, is now needed to support Saliency model with `unlift=False` Reviewed By: kimishpatel Differential Revision: D50197184 fbshipit-source-id: c172fd77653fd458b9eb6bb56248416cddf8483d
diff --git a/backends/xnnpack/operators/node_visitor.py b/backends/xnnpack/operators/node_visitor.py
@@ -458,7 +458,9 @@ def define_nodes_tensor_inputs_outputs(
             )
             # Define Weight Node
             weight_node = get_input_node(node, input_type_map.node_weight)
-            weight_quant_params = QuantParams.from_weights(weight_node)
+            weight_quant_params = QuantParams.from_weights(
+                weight_node, self._exported_program
+            )
             self.define_tensor(
                 weight_node,
                 xnn_graph,
diff --git a/backends/xnnpack/operators/op_conv2d.py b/backends/xnnpack/operators/op_conv2d.py
@@ -73,7 +73,9 @@ def define_node(
         # shape for xnnpack convolution is (oc, height, width, inc/groups), to convert
         # to the proper shape, this is essentially a NCHW to NHWC conversion
         weight_node = get_input_node(node, 1)
-        weight_quant_params = QuantParams.from_weights(weight_node)
+        weight_quant_params = QuantParams.from_weights(
+            weight_node, self._exported_program
+        )
         self.define_tensor(
             weight_node,
             xnn_graph,
diff --git a/backends/xnnpack/operators/quant_params.py b/backends/xnnpack/operators/quant_params.py
@@ -11,7 +11,11 @@
 import torch
 from executorch.backends.xnnpack.passes.tag_implicit_q_dq_pass import TagImplicitQDqPass
 from executorch.backends.xnnpack.utils.quant_utils import is_dequant, is_quant
-from executorch.backends.xnnpack.utils.utils import check_or_raise, is_param_node
+from executorch.backends.xnnpack.utils.utils import (
+    check_or_raise,
+    get_param_tensor,
+    is_param_node,
+)
 from executorch.exir.dialects._ops import ops as exir_ops
 from torch.export import ExportedProgram
 
@@ -97,7 +101,9 @@ def _from_dynamic_input_node(cls, quant_node: torch.fx.Node) -> QuantParams:
         )
 
     @classmethod
-    def from_q_dq_node(cls, quant_node: torch.fx.Node) -> QuantParams:
+    def from_q_dq_node(
+        cls, quant_node: torch.fx.Node, ep: Optional[ExportedProgram] = None
+    ) -> QuantParams:
         check_or_raise(
             is_quant(quant_node) or is_dequant(quant_node),
             f"building quantizer from q/dq node but was given node:{quant_node}",
@@ -119,11 +125,18 @@ def from_q_dq_node(cls, quant_node: torch.fx.Node) -> QuantParams:
         if per_channel:
             assert isinstance(scale, torch.fx.Node) and isinstance(scale.target, str)
             assert isinstance(zp, torch.fx.Node) and isinstance(zp.target, str)
-            # TODO: use get_param_tensor()
-            scale = getattr(quant_node.graph.owning_module, scale.target)
-            zp = getattr(quant_node.graph.owning_module, zp.target)
-            axis = cast(int, quant_node.args[3])
+            assert (
+                ep is not None
+            ), "ExportedProgram must be provided to extract per channel params"
+
+            def _get_tensor(node):
+                param = get_param_tensor(ep, node)
+                assert param is not None, f"Expected to find param tensor for {node}"
+                return cast(torch.Tensor, param)
 
+            scale = _get_tensor(scale)
+            zp = _get_tensor(zp)
+            axis = cast(int, quant_node.args[3])
         check_or_raise(
             bool(
                 quant_node.args[-1] != torch.uint8
@@ -152,7 +165,9 @@ def from_q_dq_node(cls, quant_node: torch.fx.Node) -> QuantParams:
         )
 
     @classmethod
-    def from_weights(cls, tensor_node: torch.fx.Node) -> Optional[QuantParams]:
+    def from_weights(
+        cls, tensor_node: torch.fx.Node, ep: Optional[ExportedProgram] = None
+    ) -> Optional[QuantParams]:
         # Ignore transpose for weights
         # TODO:T148540997 remove the t_copy/permute_copy check when convert addmm to linear
         dq = (
@@ -180,7 +195,7 @@ def from_weights(cls, tensor_node: torch.fx.Node) -> Optional[QuantParams]:
             f"q->dq->permute_copy not derived from static weight, input to the q node: {q.all_input_nodes[0]}",
         )
 
-        return cls.from_q_dq_node(q)
+        return cls.from_q_dq_node(q, ep)
 
     @classmethod
     def from_inputs(
diff --git a/backends/xnnpack/test/TARGETS b/backends/xnnpack/test/TARGETS
@@ -112,6 +112,7 @@ python_unittest(
     srcs = [
         "ops/add.py",
         "ops/conv1d.py",
+        "ops/conv2d.py",
     ],
     deps = [
         "//caffe2:torch",
diff --git a/backends/xnnpack/test/ops/conv2d.py b/backends/xnnpack/test/ops/conv2d.py
@@ -0,0 +1,96 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import unittest
+from typing import Optional
+
+import torch
+
+from executorch.backends.xnnpack.test.tester import Quantize, Tester
+from torch.ao.quantization.quantizer.xnnpack_quantizer import (
+    get_symmetric_quantization_config,
+)
+from torch.ao.quantization.quantizer.xnnpack_quantizer_utils import QuantizationConfig
+
+
+class Conv2d(torch.nn.Module):
+    def __init__(
+        self,
+        in_channels=2,
+        out_channels=1,
+        kernel_size=(3, 3),
+        stride=(2, 2),
+        padding=(1, 1),
+        dilation=(1, 1),
+        groups=1,
+        bias=True,
+        padding_mode="zeros",
+        batches=1,
+        width=8,
+        height=8,
+    ):
+        super().__init__()
+        self.batches = batches
+        self.width = width
+        self.height = height
+        self.in_channels = in_channels
+
+        self.conv = torch.nn.Conv2d(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=padding,
+            dilation=dilation,
+            groups=groups,
+            bias=bias,
+            padding_mode=padding_mode,
+        )
+
+    def forward(self, x):
+        return self.conv(x)
+
+    def get_inputs(self):
+        return (torch.randn(self.batches, self.in_channels, self.height, self.width),)
+
+
+class TestConv2d(unittest.TestCase):
+    def _test(
+        self, m: torch.nn.Module, quant_config: Optional[QuantizationConfig] = None
+    ):
+        tester = Tester(m.eval(), m.get_inputs())
+
+        if quant_config is not None:
+            tester = tester.quantize(Quantize(quantization_config=quant_config))
+            tester.check(["torch.ops.quantized_decomposed"])
+
+        (
+            tester.export()
+            .check_count({"torch.ops.aten.convolution.default": 1})
+            .to_edge()
+            .check_count(
+                {"executorch_exir_dialects_edge__ops_aten_convolution_default": 1}
+            )
+            .partition()
+            .check_not(["executorch_exir_dialects_edge__ops_aten_convolution_default"])
+            .check_count({"torch.ops.executorch_call_delegate": 1})
+            .to_executorch()
+            .serialize()
+            .run_method()
+            .compare_outputs()
+        )
+
+    def test_conv2d(self) -> None:
+        self._test(Conv2d())
+
+    def test_qconv2d(self) -> None:
+        self._test(Conv2d(), quant_config=get_symmetric_quantization_config())
+
+    def test_qconv2d_per_channel(self) -> None:
+        self._test(
+            Conv2d(),
+            quant_config=get_symmetric_quantization_config(is_per_channel=True),
+        )