Add dynamic shape support for layer_norm/native_group_norm/group_norm (#2908)

lanluo-nvidia · web-flow · commit 1ca262f9fb1c · 2024-06-13T22:54:39.000-07:00
diff --git a/py/torch_tensorrt/dynamo/conversion/aten_ops_converters.py b/py/torch_tensorrt/dynamo/conversion/aten_ops_converters.py
@@ -165,7 +165,9 @@ def aten_ops_layer_norm(
 
 
 @dynamo_tensorrt_converter(
-    torch.ops.aten.native_group_norm.default, capability_validator=one_user_validator
+    torch.ops.aten.native_group_norm.default,
+    capability_validator=one_user_validator,
+    supports_dynamic_shapes=True,
 )
 @enforce_tensor_types(
     {
@@ -195,8 +197,16 @@ def aten_ops_native_group_norm(
     )
 
 
-@dynamo_tensorrt_converter(torch.ops.aten.group_norm.default)
-@dynamo_tensorrt_converter(torch.ops.aten.group_norm)
+@dynamo_tensorrt_converter(
+    torch.ops.aten.group_norm.default,
+    capability_validator=one_user_validator,
+    supports_dynamic_shapes=True,
+)
+@dynamo_tensorrt_converter(
+    torch.ops.aten.group_norm,
+    capability_validator=one_user_validator,
+    supports_dynamic_shapes=True,
+)
 @enforce_tensor_types(
     {
         0: (TRTTensor,),
@@ -581,7 +591,7 @@ def aten_ops_neg(
 
 
 try:
-    import modelopt.torch.quantization as mtq
+    import modelopt.torch.quantization as mtq  # noqa: F401
 
     assert torch.ops.trt.quantize_fp8.default
 except Exception as e:
diff --git a/py/torch_tensorrt/dynamo/conversion/impl/normalization/ops.py b/py/torch_tensorrt/dynamo/conversion/impl/normalization/ops.py
@@ -111,7 +111,6 @@ def layer_norm(
 ) -> Union[TRTTensor, Tuple[TRTTensor, torch.Tensor, torch.Tensor]]:
     dims = list(range(len(input.shape) - len(normalized_shape), len(input.shape)))
     axes = get_axes_for_reduce_op(dims)
-
     weight = get_trt_tensor(ctx, weight, f"{name}_weight")
     bias = get_trt_tensor(ctx, bias, f"{name}_bias")
     if tuple(input.shape) != tuple(weight.shape):
@@ -153,157 +152,80 @@ def native_group_norm(
     assert (
         len(input.shape) >= 3
     ), f"The input dimension should not be less than 3, got {len(input.shape)}!"
-    B, C = input.shape[0], input.shape[1]
 
+    B = input.shape[0]
+    # if C is provided, it must be as same as the channel from the input shape,
+    # else if C is zero, we should get the channel from the input shape
+    if C == 0:
+        C = input.shape[1]
+    assert (
+        C == input.shape[1]
+    ), f"The number of Channel={C} must be equal to the number of channels in the input shape={input.shape[1]}"
     # Groups are a subdivision of the channel dimension.
     assert (
         C % group == 0
     ), f"The num of channels ({C}) should be divisible by num_groups ({group})!"
+    input = get_trt_tensor(ctx, input, f"{name}_input")
 
-    if weight is None:
-        weight = to_numpy(1.0)
+    shape = list(input.shape)
 
-    if bias is None:
-        bias = to_numpy(0.0)
+    for i, s in enumerate(shape):
+        if i == 0 and s > 0:
+            shape[i] = B * group
+        elif i == 1:
+            shape[i] = C // group
+        elif i > 1 and s == -1:
+            shape[i] = 0
 
     # Normalize every group.
     reshaped_input = impl.shuffle.reshape(
         ctx,
         target,
         source_ir,
-        name,
+        f"{name}_reshape_input",
         input,
-        (B * group, -1),
-    )
-
-    dim = 1
-
-    # E[X]
-    mean_trt = impl.reduce.mean(
-        ctx,
-        target,
-        source_ir,
-        f"{name}_mean",
-        reshaped_input,
-        dim,
-        True,
-    )
-
-    # X - E[X]
-    sub_trt = impl.elementwise.sub(
-        ctx,
-        target,
-        source_ir,
-        f"{name}_sub",
-        reshaped_input,
-        mean_trt,
-    )
-
-    # variance = mean(pow(sub_trt, 2))
-    pow_trt = get_trt_tensor(ctx, 2, f"{name}_power", np.float32)
-    pow_var = impl.elementwise.pow(
-        ctx,
-        target,
-        source_ir,
-        f"{name}_pow",
-        sub_trt,
-        pow_trt,
-    )
-
-    var_trt = impl.reduce.mean(
-        ctx,
-        target,
-        source_ir,
-        f"{name}_mean_var",
-        pow_var,
-        dim,
-        True,
-    )
-
-    # sqrt((var + eps))
-    eps_trt = get_trt_tensor(ctx, eps, f"{name}_eps", np.float32)
-    add_trt = impl.elementwise.add(
-        ctx,
-        target,
-        source_ir,
-        f"{name}_add",
-        var_trt,
-        eps_trt,
-    )
-    sqrt_trt = impl.unary.sqrt(
-        ctx,
-        target,
-        source_ir,
-        f"{name}_sqrt",
-        add_trt,
-    )
-
-    # y = (X - E[X]) / sqrt((var + eps))
-    div_trt = impl.elementwise.div(
-        ctx,
-        target,
-        source_ir,
-        f"{name}_div",
-        sub_trt,
-        sqrt_trt,
-    )
-
-    # y * gamma + beta
-    gamma_trt = get_trt_tensor(ctx, weight, f"{name}_gamma")
-    beta_trt = get_trt_tensor(ctx, bias, f"{name}_beta")
-
-    output = impl.shuffle.reshape(
-        ctx,
-        target,
-        source_ir,
-        f"{name}_reshape_div",
-        div_trt,
-        input.shape,
-    )
-
-    weight_bias_shape = (1, C) + (1,) * (len(input.shape) - 2)
-
-    reshaped_gamma = impl.shuffle.reshape(
-        ctx,
-        target,
-        source_ir,
-        f"{name}_reshape_gamma",
-        gamma_trt,
-        weight_bias_shape,
-    )
-
-    output = impl.elementwise.mul(
-        ctx,
-        target,
-        source_ir,
-        f"{name}_mul_gamma",
-        output,
-        reshaped_gamma,
-    )
-
-    reshaped_bias = impl.shuffle.reshape(
-        ctx,
-        target,
-        source_ir,
-        f"{name}_reshape_beta",
-        beta_trt,
-        weight_bias_shape,
+        shape,
     )
 
-    output = impl.elementwise.add(
-        ctx,
-        target,
-        source_ir,
-        f"{name}_add_beta",
-        output,
-        reshaped_bias,
+    weight = get_trt_tensor(ctx, weight, f"{name}_weight")
+    bias = get_trt_tensor(ctx, bias, f"{name}_bias")
+    if tuple(reshaped_input.shape) != tuple(weight.shape):
+        weight = impl.slice.expand(
+            ctx,
+            target,
+            source_ir,
+            f"{name}_expand_weight",
+            weight,
+            reshaped_input.shape,
+        )
+    if tuple(reshaped_input.shape) != tuple(bias.shape):
+        bias = impl.slice.expand(
+            ctx, target, source_ir, f"{name}_expand_bias", bias, reshaped_input.shape
+        )
+    dims = list(range(1, len(input.shape)))
+    axes = get_axes_for_reduce_op(dims)
+    group_norm = ctx.net.add_normalization(reshaped_input, weight, bias, axes)
+    group_norm.epsilon = eps
+    group_norm.compute_precision = input.dtype
+    set_layer_name(group_norm, target, f"{name}_group_norm", source_ir)
+    output = group_norm.get_output(0)
+
+    shape = list(output.shape)
+    for i, s in enumerate(shape):
+        if i == 0 and s > 0:
+            shape[i] = B
+        elif i == 1:
+            shape[i] = C
+        elif i > 1 and s == -1:
+            shape[i] = 0
+
+    reshaped_output = impl.shuffle.reshape(
+        ctx, target, source_ir, f"{name}_reshape_output", output, shape
     )
-
     if return_mean_rstd:
         # return fake mean and rstd for now
-        return output, None, None
-
-    return output
+        return reshaped_output, None, None
+    return reshaped_output
 
 
 def group_norm(
diff --git a/tests/py/dynamo/conversion/test_group_norm_aten.py b/tests/py/dynamo/conversion/test_group_norm_aten.py
@@ -1,4 +1,5 @@
 import torch
+from parameterized import parameterized
 from torch.testing._internal.common_utils import run_tests
 from torch_tensorrt import Input
 
@@ -43,6 +44,31 @@ def forward(self, x):
                 inputs,
             )
 
+    def test_groupnorm_with_dynamic_shape(self):
+        class GroupNorm(torch.nn.Module):
+            def forward(self, x):
+                return torch.ops.aten.group_norm.default(
+                    x,
+                    2,
+                    torch.ones((6,)),
+                    torch.zeros((6,)),
+                    1e-05,
+                    True,
+                )
+
+        input_specs = [
+            Input(
+                dtype=torch.float32,
+                min_shape=(3, 6, 24, 24),
+                opt_shape=(5, 6, 24, 24),
+                max_shape=(8, 6, 48, 24),
+            ),
+        ]
+        self.run_test_with_dynamic_shape(
+            GroupNorm(),
+            input_specs,
+        )
+
 
 class TestNativeGroupNormConverter(DispatchTestCase):
     def test_groupnorm1d(self):
@@ -86,6 +112,43 @@ def forward(self, x):
                 inputs,
             )
 
+    @parameterized.expand(
+        [
+            (5, 4, 4, 2, (2, 4, 2), (3, 4, 2), (5, 4, 4)),
+            (5, 4, 2 * 2, 2, (2, 4, 2, 2), (3, 4, 2, 2), (5, 4, 2, 2)),
+            (5, 9, 6 * 3, 3, (3, 9, 3, 3), (4, 9, 3, 3), (5, 9, 6, 3)),
+            (8, 9, 6 * 6, 3, (3, 9, 2, 3, 2), (5, 9, 3, 3, 2), (8, 9, 6, 3, 2)),
+        ]
+    )
+    def test_groupnorm_with_dynamic_shape(
+        self, N, C, HxW, groups, min_shape, opt_shape, max_shape
+    ):
+        class GroupNorm(torch.nn.Module):
+            def forward(self, x):
+                return torch.ops.aten.native_group_norm.default(
+                    x,
+                    torch.ones((C,)),
+                    torch.zeros((C,)),
+                    N,
+                    C,
+                    HxW,
+                    groups,
+                    1e-5,
+                )[0]
+
+        input_specs = [
+            Input(
+                dtype=torch.float32,
+                min_shape=min_shape,
+                opt_shape=opt_shape,
+                max_shape=max_shape,
+            ),
+        ]
+        self.run_test_with_dynamic_shape(
+            GroupNorm(),
+            input_specs,
+        )
+
 
 if __name__ == "__main__":
     run_tests()
diff --git a/tests/py/dynamo/conversion/test_layer_norm_aten.py b/tests/py/dynamo/conversion/test_layer_norm_aten.py
@@ -82,9 +82,35 @@ def forward(self, x):
 
         input_specs = [
             Input(
-                shape=(-1, 3, 224, 224),
                 dtype=torch.float32,
-                shape_ranges=[((1, 3, 224, 224), (5, 3, 224, 224), (10, 3, 224, 224))],
+                min_shape=(1, 3, 224, 224),
+                opt_shape=(5, 3, 224, 224),
+                max_shape=(10, 3, 224, 224),
+            ),
+        ]
+
+        self.run_test_with_dynamic_shape(
+            LayerNorm(),
+            input_specs,
+        )
+
+    def test_layernorm_with_dynamic_shape_1(self):
+        class LayerNorm(torch.nn.Module):
+            def forward(self, x):
+                return torch.ops.aten.native_layer_norm.default(
+                    x,
+                    torch.tensor([3]),
+                    torch.ones((3)),
+                    torch.zeros((3)),
+                    1e-05,
+                )[0]
+
+        input_specs = [
+            Input(
+                dtype=torch.float32,
+                min_shape=(1, 2, 3),
+                opt_shape=(3, 3, 3),
+                max_shape=(4, 5, 3),
             ),
         ]