pytorch · peri044 · Feb 16, 2023 · Mar 30, 2023 · Mar 31, 2023 · Mar 31, 2023
diff --git a/core/runtime/TRTEngine.cpp b/core/runtime/TRTEngine.cpp
@@ -148,10 +148,10 @@ TRTEngine::TRTEngine(
 }
 
 TRTEngine::~TRTEngine() {
-  rt.reset();
   trt_engine_profiler.reset();
   exec_ctx.reset();
   cuda_engine.reset();
+  rt.reset();
 }
 
 void TRTEngine::disable_profiling() {

diff --git a/examples/fx/lower_example.py b/examples/fx/lower_example.py
@@ -4,6 +4,7 @@
 
 import torch
 import torchvision
+import torch_tensorrt
 from torch_tensorrt.fx import compile
 from torch_tensorrt.fx.utils import LowerPrecision
 
@@ -98,13 +99,17 @@ def benchmark(
 
     model = model.cuda().eval()
     inputs = [x.cuda() for x in inputs]
-
+    # inputs = [torch_tensorrt.Input(shape=(128, 3, 224, 224), dtype=torch.float32)]
+    # inputs = [torch_tensorrt.Input(min_shape=[1, 3, 224, 224],
+    #                                opt_shape=[8, 3, 224, 224],
+    #                                max_shape=[32, 3, 224, 224],
+    #                                dtype=torch.float32)]
     # benchmark base configuration
     conf = Configuration(batch_iter=batch_iter, batch_size=batch_size)
 
     configurations = [
         # Baseline
-        replace(conf, name="CUDA Eager", trt=False),
+        # replace(conf, name="CUDA Eager", trt=False),
         # FP32
         replace(
             conf,
@@ -115,14 +120,14 @@ def benchmark(
             accuracy_rtol=1e-3,
         ),
         # FP16
-        replace(
-            conf,
-            name="TRT FP16 Eager",
-            trt=True,
-            jit=False,
-            fp16=True,
-            accuracy_rtol=1e-2,
-        ),
+        # replace(
+        #     conf,
+        #     name="TRT FP16 Eager",
+        #     trt=True,
+        #     jit=False,
+        #     fp16=True,
+        #     accuracy_rtol=1e-2,
+        # ),
     ]
 
     results = [
@@ -189,8 +194,12 @@ def run_configuration_benchmark(
             max_batch_size=conf.batch_size,
             lower_precision=LowerPrecision.FP16 if conf.fp16 else LowerPrecision.FP32,
             explicit_batch_dimension=True,
+            dynamic_batch=False,
+        )
+        random_inputs = [torch.randn((128, 3, 224, 224), dtype=torch.float32).cuda()]
+        time = benchmark_torch_function(
+            conf.batch_iter, lambda: lowered_module(*random_inputs)
         )
-        time = benchmark_torch_function(conf.batch_iter, lambda: lowered_module(*input))
     else:
         print("Lowering with JIT is not available!", "red")
 

diff --git a/examples/fx/lower_example_aten.py b/examples/fx/lower_example_aten.py
@@ -4,6 +4,7 @@
 
 import torch
 import torchvision
+import torch_tensorrt
 from torch_tensorrt.fx import compile
 from torch_tensorrt.fx.utils import LowerPrecision
 
@@ -97,21 +98,25 @@ def benchmark(
     """
 
     model = model.cuda().eval()
-    inputs = [x.cuda() for x in inputs]
-
+    # inputs = [x.cuda() for x in inputs]
+    inputs = [torch_tensorrt.Input(shape=(128, 3, 224, 224), dtype=torch.float32)]
+    # inputs = [torch_tensorrt.Input(min_shape=[1, 3, 224, 224],
+    #                                opt_shape=[8, 3, 224, 224],
+    #                                max_shape=[32, 3, 224, 224],
+    #                                dtype=torch.float32)]
     # benchmark base configuration
     conf = Configuration(batch_iter=batch_iter, batch_size=batch_size)
 
     configurations = [
         # Baseline
-        replace(conf, name="CUDA Eager", trt=False),
+        # replace(conf, name="CUDA Eager", trt=False),
         # FP16
         replace(
             conf,
-            name="TRT FP16 Eager",
+            name="TRT FP32 Eager",
             trt=True,
             jit=False,
-            fp16=True,
+            fp16=False,
             accuracy_rtol=1e-2,
         ),
     ]
@@ -182,7 +187,10 @@ def run_configuration_benchmark(
             explicit_batch_dimension=True,
             is_aten=True,
         )
-        time = benchmark_torch_function(conf.batch_iter, lambda: lowered_module(*input))
+        random_inputs = [torch.randn((128, 3, 224, 224), dtype=torch.float32).cuda()]
+        time = benchmark_torch_function(
+            conf.batch_iter, lambda: lowered_module(*random_inputs)
+        )
     else:
         print("Lowering with JIT is not available!", "red")
 

diff --git a/py/setup.py b/py/setup.py
@@ -350,6 +350,7 @@ def run(self):
 if FX_ONLY:
     ext_modules = None
     packages = [
+        "torch_tensorrt",
         "torch_tensorrt.fx",
         "torch_tensorrt.fx.converters",
         "torch_tensorrt.fx.passes",
@@ -358,6 +359,7 @@ def run(self):
         "torch_tensorrt.fx.tracer.dispatch_tracer",
     ]
     package_dir = {
+        "torch_tensorrt": "torch_tensorrt/",
         "torch_tensorrt.fx": "torch_tensorrt/fx",
         "torch_tensorrt.fx.converters": "torch_tensorrt/fx/converters",
         "torch_tensorrt.fx.passes": "torch_tensorrt/fx/passes",
@@ -437,7 +439,9 @@ def run(self):
             "bin/*",
             "BUILD",
             "WORKSPACE",
-        ],
+        ]
+        if not FX_ONLY
+        else ["_Input.py"]
     },
     exclude_package_data={
         "": ["*.cpp"],

diff --git a/py/torch_tensorrt/_Input.py b/py/torch_tensorrt/_Input.py
@@ -4,7 +4,6 @@
 import torch
 
 from torch_tensorrt import _enums
-from torch_tensorrt import _C
 
 
 class Input(object):
@@ -41,6 +40,7 @@ class _ShapeMode(Enum):
     DOMAIN_OFFSET = 2.0
     low_tensor_domain_incl = 0.0
     high_tensor_domain_excl = low_tensor_domain_incl + DOMAIN_OFFSET
+    torch_dtype = None
 
     def __init__(self, *args, **kwargs):
         """__init__ Method for torch_tensorrt.Input
@@ -138,6 +138,9 @@ def __init__(self, *args, **kwargs):
             )
 
         if "dtype" in kwargs:
+            if isinstance(kwargs["dtype"], torch.dtype):
+                self.torch_dtype = kwargs["dtype"]
+
             self.dtype = Input._parse_dtype(kwargs["dtype"])
             self._explicit_set_dtype = True
 
@@ -173,59 +176,6 @@ def __str__(self) -> str:
         else:
             raise RuntimeError("Unknown input shape mode")
 
-    def _to_internal(self) -> _C.Input:
-        internal_in = _C.Input()
-        if self.shape_mode == Input._ShapeMode.DYNAMIC:
-            if not Input._supported_input_size_type(self.shape["min_shape"]):
-                raise TypeError(
-                    "Input shape specifications for inputs are required to be a List, tuple or torch.Size, found type: "
-                    + str(type(self.shape["min_shape"]))
-                    + " for min_shape"
-                )
-            else:
-                internal_in.min = self.shape["min_shape"]
-
-            if not Input._supported_input_size_type(self.shape["opt_shape"]):
-                raise TypeError(
-                    "Input shape specifications for inputs are required to be a List, tuple or torch.Size, found type: "
-                    + str(type(self.shape["opt_shape"]))
-                    + " for opt_shape"
-                )
-            else:
-                internal_in.opt = self.shape["opt_shape"]
-
-            if not Input._supported_input_size_type(self.shape["max_shape"]):
-                raise TypeError(
-                    "Input shape specifications for inputs are required to be a List, tuple or torch.Size, found type: "
-                    + str(type(self.shape["max_shape"]))
-                    + " for max_shape"
-                )
-            else:
-                internal_in.max = self.shape["max_shape"]
-            internal_in.input_is_dynamic = True
-        else:
-            if not Input._supported_input_size_type(self.shape):
-                raise TypeError(
-                    "Input shape specifications for inputs are required to be a List, tuple or torch.Size, found type: "
-                    + str(type(self.shape))
-                    + " for shape"
-                )
-            else:
-                internal_in.opt = self.shape
-            internal_in.input_is_dynamic = False
-
-        if self.dtype != _enums.dtype.unknown:
-            self._explicit_set_dtype = True
-        else:
-            self._explicit_set_dtype = False
-
-        internal_in.dtype = Input._parse_dtype(self.dtype)
-        internal_in._explicit_set_dtype = self._explicit_set_dtype
-        internal_in.format = Input._parse_format(self.format)
-
-        internal_in.tensor_domain = Input._parse_tensor_domain(self.tensor_domain)
-        return internal_in
-
     @staticmethod
     def _supported_input_size_type(input_size: Any) -> bool:
         if isinstance(input_size, torch.Size):
@@ -304,6 +254,7 @@ def _parse_tensor_domain(domain: Optional[Tuple[float, float]]) -> Tuple:
                 Input.low_tensor_domain_incl,
                 Input.high_tensor_domain_excl,
             )
+
         elif len(domain) == 2:
             domain_lo, domain_hi = domain
 
@@ -416,8 +367,10 @@ def example_tensor(self, optimization_profile_field: str = None) -> torch.Tensor
             )
 
         if self.shape_mode == Input._ShapeMode.STATIC:
-            return torch.randn(self.shape).to(dtype=self.dtype)
+            return torch.randn(self.shape).to(
+                dtype=self.dtype if not self.torch_dtype else self.torch_dtype
+            )
         else:
             return torch.randn(self.shape[optimization_profile_field]).to(
-                dtype=self.dtype
+                dtype=self.dtype if not self.torch_dtype else self.torch_dtype
             )
diff --git a/py/torch_tensorrt/_compile.py b/py/torch_tensorrt/_compile.py
@@ -142,9 +142,8 @@ def compile(
         return torch_tensorrt.fx.compile(
             module,
             inputs,
-            lower_precision=lower_precision,
-            max_batch_size=inputs[0].size(0),
             explicit_batch_dimension=True,
+            lower_precision=lower_precision,
             dynamic_batch=False,
             **kwargs,
         )

diff --git a/py/torch_tensorrt/fx/__init__.py b/py/torch_tensorrt/fx/__init__.py
@@ -8,7 +8,7 @@
     tensorrt_converter,
 )
 from .fx2trt import TRTInterpreter, TRTInterpreterResult  # noqa
-from .input_tensor_spec import generate_input_specs, InputTensorSpec  # noqa
+from .input_tensor_spec import InputTensorSpec  # noqa
 from .lower_setting import LowerSetting  # noqa
 from .trt_module import TRTModule  # noqa
 from .lower import compile  # usort: skip  #noqa

diff --git a/py/torch_tensorrt/fx/fx2trt.py b/py/torch_tensorrt/fx/fx2trt.py
@@ -153,7 +153,6 @@ def validate_conversion(self):
 
     def run(
         self,
-        max_batch_size=64,
         max_workspace_size=1 << 25,
         lower_precision=LowerPrecision.FP16,
         sparse_weights=False,
@@ -167,7 +166,6 @@ def run(
         """
         Build TensorRT engine with some configs.
         Args:
-            max_batch_size: set accordingly for maximum batch size you will use.
             max_workspace_size: set to the maximum size we can afford for temporary buffer
             lower_precision: the precision model layers are running on (TensorRT will choose the best perforamnce precision).
             sparse_weights: allow the builder to examine weights and use optimized functions when weights have suitable sparsity
@@ -207,7 +205,6 @@ def run(
         )
         build_engine_start_time = datetime.now()
 
-        self.builder.max_batch_size = max_batch_size
         builder_config = self.builder.create_builder_config()
         builder_config.max_workspace_size = max_workspace_size