dynamic shape for squeeze ops

keehyuna · keehyuna · commit 2cf4eaaae6ab · 2024-07-10T19:20:42.000+09:00
Not support for dds(data-dependent shape) input
diff --git a/py/torch_tensorrt/dynamo/conversion/impl/squeeze.py b/py/torch_tensorrt/dynamo/conversion/impl/squeeze.py
@@ -1,12 +1,17 @@
 from typing import Optional, Sequence, Union
 
+import numpy as np
+import tensorrt as trt
 from torch.fx.node import Target
 from torch_tensorrt.dynamo._SourceIR import SourceIR
 from torch_tensorrt.dynamo.conversion._ConversionContext import ConversionContext
-from torch_tensorrt.dynamo.conversion.converter_utils import get_positive_dim
-from torch_tensorrt.fx.converters.converter_utils import set_layer_name
+from torch_tensorrt.dynamo.conversion.converter_utils import (
+    cast_trt_tensor,
+    get_positive_dim,
+    set_layer_name,
+)
+from torch_tensorrt.dynamo.conversion.impl.elementwise import ne
 from torch_tensorrt.fx.types import TRTTensor
-from torch_tensorrt.fx.utils import get_dynamic_dims
 
 
 def squeeze(
@@ -29,24 +34,90 @@ def squeeze(
             dims.append(dim)
 
     new_dims = []
+    dim_has_dynamic_shape = False
     for dim in dims:
         dim = get_positive_dim(
             dim,
             len(input.shape),
         )
 
-        assert input.shape[dim] != -1, "We don't support squeeze dynamic dim."
-        assert (
-            len(get_dynamic_dims(input.shape)) <= 1
-        ), "Currently more than one dynamic dim for input to squeeze is not supported."
+        if input.shape[dim] == -1:
+            dim_has_dynamic_shape = True
         new_dims.append(dim)
 
-    output_shape = []
-    for i, s in enumerate(input.shape):
-        if (i in new_dims) and s == 1:
-            continue
-        output_shape.append(s)
     layer = ctx.net.add_shuffle(input)
-    layer.reshape_dims = tuple(output_shape)
     set_layer_name(layer, target, name, source_ir)
+    if dim_has_dynamic_shape:
+        num_shape = len(input.shape)
+
+        tensor_shape_layer = ctx.net.add_shape(input)
+        tensor_shape = tensor_shape_layer.get_output(0)
+        tensor_shape = cast_trt_tensor(
+            ctx, tensor_shape, trt.int32, name + "shape_casted", "shape"
+        )
+
+        # change it to get_trt_tensor
+        one_layer = ctx.net.add_constant(
+            (num_shape,),
+            np.ascontiguousarray([1] * num_shape, np.int32),
+        )
+        set_layer_name(one_layer, target, name + "_one", source_ir)
+
+        zero_layer = ctx.net.add_constant(
+            (num_shape,),
+            np.zeros((num_shape,), dtype=np.int32),
+        )
+        set_layer_name(zero_layer, target, name + "_zero", source_ir)
+
+        # append last element value
+        num_append = num_shape - len(new_dims)
+        if num_append > 0:
+            new_dims += [new_dims[-1]] * num_append
+
+        index_value = np.array(new_dims, dtype=np.int32)
+        index_layer = ctx.net.add_constant(index_value.shape, index_value)
+        set_layer_name(index_layer, target, name + "_index", source_ir)
+
+        scatter_layer = ctx.net.add_scatter(
+            zero_layer.get_output(0),
+            index_layer.get_output(0),
+            one_layer.get_output(0),
+            trt.ScatterMode.ELEMENT,
+        )
+        set_layer_name(scatter_layer, target, name + "_scatter", source_ir)
+
+        #  [1, 2, 1, 3, 1]
+        #  [0, 0, 1, 1, 1]
+        #  [t, t, f, t, f]
+        ne_tensor = ne(
+            ctx,
+            target,
+            source_ir,
+            name + "_ne",
+            tensor_shape,
+            scatter_layer.get_output(0),
+        )
+
+        #  [t, t, f, t, f] -> [0, 1, 3]
+        non_zero_layer = ctx.net.add_non_zero(ne_tensor)
+        set_layer_name(non_zero_layer, target, name + "_non_zero", source_ir)
+
+        non_zero_shuffle_layer = ctx.net.add_shuffle(non_zero_layer.get_output(0))
+        set_layer_name(non_zero_shuffle_layer, target, name + "_shuffle", source_ir)
+        non_zero_shuffle_layer.second_transpose = (1, 0)
+
+        #  (1,2,1,3,1) + [0, 1, 3 ,4] -> [1, 2, 3, 1]
+        gather_layer = ctx.net.add_gather_v2(
+            tensor_shape, non_zero_shuffle_layer.get_output(0), mode=trt.GatherMode.ND
+        )
+        set_layer_name(gather_layer, target, name + "_gather", source_ir)
+
+        layer.set_input(1, gather_layer.get_output(0))
+    else:
+        output_shape = []
+        for i, s in enumerate(input.shape):
+            if (i in new_dims) and s == 1:
+                continue
+            output_shape.append(s)
+        layer.reshape_dims = tuple(output_shape)
     return layer.get_output(0)
diff --git a/tests/py/dynamo/conversion/test_squeeze_aten.py b/tests/py/dynamo/conversion/test_squeeze_aten.py
@@ -46,25 +46,52 @@ def forward(self, x):
 class TestSqueezeConverter(DispatchTestCase):
     @parameterized.expand(
         [
-            ("2d_dim", (1), (-1, 1), [((1, 1), (1, 1), (3, 1))]),
-            ("3d_one_dim", (1), (-1, 2, 1), [((1, 2, 1), (1, 2, 1), (3, 2, 1))]),
+            (
+                "2d_dim",
+                (1),
+                (1, 1),
+                (1, 1),
+                (3, 1),
+                torch.half,
+                torch.half,
+            ),
+            (
+                "3d_one_dim",
+                (1),
+                (1, 2, 1),
+                (1, 2, 1),
+                (3, 2, 1),
+                torch.float,
+                torch.float,
+            ),
+            (
+                "3d_one_dim_dynamic",
+                (0),
+                (1, 2, 1),
+                (1, 2, 1),
+                (3, 2, 1),
+                torch.float,
+                torch.float,
+            ),
         ]
     )
-    def test_squeeze(self, _, dim, init_size, shape_range):
+    def test_squeeze(self, _, dim, min_shape, opt_shape, max_shape, type, output_type):
         class Squeeze(nn.Module):
             def forward(self, x):
                 return torch.ops.aten.squeeze.dim(x, dim)
 
         input_specs = [
             Input(
-                shape=init_size,
-                dtype=torch.float32,
-                shape_ranges=shape_range,
+                min_shape=min_shape,
+                opt_shape=opt_shape,
+                max_shape=max_shape,
+                dtype=type,
             ),
         ]
         self.run_test_with_dynamic_shape(
             Squeeze(),
             input_specs,
+            output_dtypes=[output_type],
         )