Add constraint to not partition standalone batch norm (#1501)

GregoryComer · facebook-github-bot · commit 30732fe8cc0d · 2024-01-05T03:04:56.000-08:00
Summary: Pull Request resolved: #1501 The XNNPACK backend does not current support lowering standalone (non-fused) batch norms. This will be done in the near future, but models with standalone batch norms are failing to lower as the op is partitioned but cannot be lowered. This change adds a op-level constraint for batch norm to the XNNPACK partitioner to only partition batch norms that can be fused. This constraint will be relaxed once standalone batch norm is fully supported. Reviewed By: mcr229 Differential Revision: D52491544 fbshipit-source-id: 861744d836b0cbfc07700bc411e9677ba80367df
diff --git a/backends/xnnpack/partition/xnnpack_partitioner.py b/backends/xnnpack/partition/xnnpack_partitioner.py
@@ -22,6 +22,9 @@
     UNSUPPORTED_QUANT_MODULES,
 )
 from executorch.backends.xnnpack.partition.graphs.bilinear_2d import bilinear2d_graphs
+from executorch.backends.xnnpack.passes.fuse_batch_norm_with_conv import (
+    FuseBatchNormWithConvPass,
+)
 from executorch.backends.xnnpack.utils.utils import get_input_node, is_param_node
 from executorch.backends.xnnpack.xnnpack_preprocess import XnnpackBackend
 
@@ -374,6 +377,23 @@ def amax(node: torch.fx.Node, ep: ExportedProgram) -> bool:  # noqa
         dim_arg_val = cast(int, node.args[1])
         return is_keep_dim and (dim_arg_val == 2 or dim_arg_val == 3)
 
+    @_constraint(exir_ops.edge.aten._native_batch_norm_legit_no_training.default)
+    def batch_norm(node: torch.fx.Node, ep: ExportedProgram) -> bool:  # noqa
+        """
+        Only support batch norms that can be fused with convolutions.
+        This will be removed once standalone batch norm is supported.
+        """
+
+        # TODO(gjcomer) Remove after standalone batch norm (T171796544).
+
+        conv_node = node.args[0]
+        assert isinstance(conv_node, torch.fx.Node)
+
+        if conv_node.target != exir_ops.edge.aten.convolution.default:
+            return False
+
+        return FuseBatchNormWithConvPass.can_fuse(conv_node, node, ep)
+
 
 class XnnpackFloatingPointPartitioner(Partitioner):
     """
diff --git a/backends/xnnpack/passes/fuse_batch_norm_with_conv.py b/backends/xnnpack/passes/fuse_batch_norm_with_conv.py
@@ -11,6 +11,7 @@
 from executorch.backends.xnnpack.passes.xnnpack_pass import XNNPACKPass
 
 from executorch.backends.xnnpack.utils.utils import get_param_tensor, is_param_node
+from executorch.exir import ExportedProgram
 from executorch.exir.dialects._ops import ops as exir_ops
 from executorch.exir.pass_base import PassResult
 
@@ -21,9 +22,8 @@ class FuseBatchNormWithConvPass(XNNPACKPass):
     """
     Batch Norm can be implemented using 1x1 Depthwise Convolution. However doing so will increase
     memory usage since we serialize new weights to represent the convolution. In most cases,
-    Batch norm is used after convoluution. The 1x1 depthwise convolution can then be fused
+    Batch norm is used after convolution. The 1x1 depthwise convolution can then be fused
     with the previous convolution
-
     """
 
     def call(self, graph_module: torch.fx.GraphModule):
@@ -48,20 +48,7 @@ def call(self, graph_module: torch.fx.GraphModule):
             ):
                 continue
 
-            # All the users of batchnorm node must be getitem ops. batchnorm
-            # returns a 3-element tuple. Each user must only access the first
-            # element of the tuple.
-            if [
-                (user.target == operator.getitem and user.args[1] == 0)
-                for user in bn.users
-            ].count(False):
-                continue
-
-            # Check that the weights for conv and batchnorm are both params
-            if [
-                is_param_node(self.exported_program, node)
-                for node in {conv.args[1], bn.args[1]}
-            ].count(False):
+            if not self.can_fuse(conv, bn, self.exported_program):
                 continue
 
             # Get the parameters from conv op
@@ -138,3 +125,35 @@ def call(self, graph_module: torch.fx.GraphModule):
         graph_module = super().call(graph_module).graph_module
 
         return PassResult(graph_module, True)
+
+    @staticmethod
+    def can_fuse(
+        conv: torch.fx.Node, bn: torch.fx.Node, program: ExportedProgram
+    ) -> bool:
+        """
+        Determine whether a batch norm node can be fused with a preceding conv node.
+        """
+
+        # All the users of batchnorm node must be getitem ops. batchnorm
+        # returns a 3-element tuple. Each user must only access the first
+        # element of the tuple.
+        if [
+            (user.target == operator.getitem and user.args[1] == 0) for user in bn.users
+        ].count(False):
+            return False
+
+        conv_weights = conv.args[1]
+        bn_weights = bn.args[1]
+
+        # Check that the weights for conv and batchnorm are both params
+        if not isinstance(conv_weights, torch.fx.Node) or not isinstance(
+            bn_weights, torch.fx.Node
+        ):
+            return False
+
+        if [is_param_node(program, node) for node in {conv_weights, bn_weights}].count(
+            False
+        ):
+            return False
+
+        return True
diff --git a/backends/xnnpack/test/passes/test_batch_norm_fusion.py b/backends/xnnpack/test/passes/test_batch_norm_fusion.py
@@ -55,3 +55,27 @@ def test_q8_batch_norm_fusion(self):
             .run_method()
             .compare_outputs()
         )
+
+    def test_fp32_batch_norm_no_fusion_doesnt_partition(self):
+        """
+        We do not currently support standalone batch norms (i.e. batch norms that are
+        not fused with a conv). This is planned, but until implemented, this test ensures
+        that we do not partition the standalone batch norm and then fail to lower.
+        """
+
+        class BN(torch.nn.Module):
+            def __init__(self):
+                super().__init__()
+                self.bn = torch.nn.BatchNorm2d(2)
+
+            def forward(self, x):
+                return self.bn(x)
+
+        (
+            Tester(BN(), (torch.randn(2, 2, 4, 4),))
+            .export()
+            .to_edge()
+            .check_count({self.bn_name: 1})
+            .partition()
+            .check_count({self.bn_name: 1})
+        )