pytorch
diff --git a/‎.github/workflows/build-test-windows.yml
Lines changed: 4 additions & 4 deletions b/‎.github/workflows/build-test-windows.yml
Lines changed: 4 additions & 4 deletions
diff --git a/‎.github/workflows/build-test.yml
Lines changed: 8 additions & 15 deletions b/‎.github/workflows/build-test.yml
Lines changed: 8 additions & 15 deletions
diff --git a/‎core/runtime/execute_engine.cpp
Lines changed: 23 additions & 3 deletions b/‎core/runtime/execute_engine.cpp
Lines changed: 23 additions & 3 deletions
diff --git a/‎py/torch_tensorrt/_Input.py
Lines changed: 9 additions & 1 deletion b/‎py/torch_tensorrt/_Input.py
Lines changed: 9 additions & 1 deletion
diff --git a/‎py/torch_tensorrt/dynamo/_tracer.py
Lines changed: 3 additions & 7 deletions b/‎py/torch_tensorrt/dynamo/_tracer.py
Lines changed: 3 additions & 7 deletions
diff --git a/‎py/torch_tensorrt/dynamo/backend/backends.py
Lines changed: 2 additions & 0 deletions b/‎py/torch_tensorrt/dynamo/backend/backends.py
Lines changed: 2 additions & 0 deletions
diff --git a/‎py/torch_tensorrt/dynamo/conversion/_TRTInterpreter.py
Lines changed: 25 additions & 12 deletions b/‎py/torch_tensorrt/dynamo/conversion/_TRTInterpreter.py
Lines changed: 25 additions & 12 deletions
diff --git a/‎py/torch_tensorrt/dynamo/conversion/_conversion.py
Lines changed: 8 additions & 8 deletions b/‎py/torch_tensorrt/dynamo/conversion/_conversion.py
Lines changed: 8 additions & 8 deletions
@@ -72,7 +72,7 @@ jobs:
         export USE_HOST_DEPS=1
         pushd .
         cd tests/py/dynamo
-        ${CONDA_RUN} python -m pip install --pre pytest-xdist timm transformers parameterized expecttest==0.1.6 --use-deprecated=legacy-resolver
+        ${CONDA_RUN} python -m pip install --pre pytest-xdist timm==0.9.16 transformers==4.39.3 parameterized expecttest==0.1.6 --use-deprecated=legacy-resolver
         ${CONDA_RUN} python -m pytest --junitxml=${RUNNER_TEST_RESULTS_DIR}/dynamo_converters_test_results.xml -n 10 conversion/
         popd
 
@@ -98,7 +98,7 @@ jobs:
         export USE_HOST_DEPS=1
         pushd .
         cd tests/py/dynamo
-        ${CONDA_RUN} python -m pip install --pre pytest-xdist timm transformers parameterized expecttest==0.1.6 --use-deprecated=legacy-resolver
+        ${CONDA_RUN} python -m pip install --pre pytest-xdist timm==0.9.16 transformers==4.39.3 parameterized expecttest==0.1.6 --use-deprecated=legacy-resolver
         ${CONDA_RUN} python -m pytest --junitxml=${RUNNER_TEST_RESULTS_DIR}/dynamo_fe_test_results.xml --ir dynamo models/test_models_export.py
         ${CONDA_RUN} python -m pytest --junitxml=${RUNNER_TEST_RESULTS_DIR}/dyn_models_export.xml --ir dynamo models/test_dyn_models.py
         popd
@@ -125,7 +125,7 @@ jobs:
         export USE_HOST_DEPS=1
         pushd .
         cd tests/py/dynamo
-        ${CONDA_RUN} python -m pip install --pre pytest-xdist timm transformers parameterized expecttest==0.1.6 --use-deprecated=legacy-resolver
+        ${CONDA_RUN} python -m pip install --pre pytest-xdist timm==0.9.16 transformers==4.39.3 parameterized expecttest==0.1.6 --use-deprecated=legacy-resolver
         ${CONDA_RUN} python -m pytest -n 10 --junitxml=${RUNNER_TEST_RESULTS_DIR}/torch_compile_be_test_results.xml backend/
         ${CONDA_RUN} python -m pytest -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/torch_comple_be_e2e_test_results.xml --ir torch_compile models/test_models.py
         popd
@@ -152,7 +152,7 @@ jobs:
         export USE_HOST_DEPS=1
         pushd .
         cd tests/py/dynamo
-        ${CONDA_RUN} python -m pip install --pre pytest-xdist timm transformers parameterized expecttest==0.1.6 --use-deprecated=legacy-resolver
+        ${CONDA_RUN} python -m pip install --pre pytest-xdist timm==0.9.16 transformers==4.39.3 parameterized expecttest==0.1.6 --use-deprecated=legacy-resolver
         ${CONDA_RUN} python -m pytest -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_runtime_test_results.xml runtime/
         ${CONDA_RUN} python -m pytest -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_partitioning_test_results.xml partitioning/
         ${CONDA_RUN} python -m pytest -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_lowering_test_results.xml lowering/
 
@@ -78,16 +78,15 @@ jobs:
       script: |
         export USE_HOST_DEPS=1
         export LD_LIBRARY_PATH=/usr/lib64:$LD_LIBRARY_PATH
-        export LD_LIBRARY_PATH=/opt/torch-tensorrt-builds/TensorRT-10.0.1.6/lib:$LD_LIBRARY_PATH
         pushd .
         cd tests/modules
         # Don't use requirements.txt here as it contains tensorrt and torch which should have been installed by now.
-        ${CONDA_RUN} python -m pip install numpy packaging pyyaml transformers timm pybind11==2.6.2
+        ${CONDA_RUN} python -m pip install numpy packaging pyyaml transformers==4.39.3 timm==0.9.16 pybind11==2.6.2
         ${CONDA_RUN} python hub.py
         popd
         pushd .
         cd tests/py/ts
-        ${CONDA_RUN} python -m pip install --pre pytest timm transformers parameterized expecttest==0.1.6 --use-deprecated=legacy-resolver
+        ${CONDA_RUN} python -m pip install --pre pytest timm==0.9.16 transformers==4.39.3 parameterized expecttest==0.1.6 --use-deprecated=legacy-resolver
         ${CONDA_RUN} python -m pytest --junitxml=${RUNNER_TEST_RESULTS_DIR}/ts_api_test_results.xml api/
         ${CONDA_RUN} python -m pytest --junitxml=${RUNNER_TEST_RESULTS_DIR}/ts_models_test_results.xml models/
         ${CONDA_RUN} python -m pytest --junitxml=${RUNNER_TEST_RESULTS_DIR}/ts_integrations_test_results.xml integrations/
@@ -115,10 +114,9 @@ jobs:
       pre-script: ${{ matrix.pre-script }}
       script: |
         export USE_HOST_DEPS=1
-        export LD_LIBRARY_PATH=/opt/torch-tensorrt-builds/TensorRT-10.0.1.6/lib:$LD_LIBRARY_PATH
         pushd .
         cd tests/py/dynamo
-        ${CONDA_RUN} python -m pip install --pre pytest-xdist timm transformers parameterized expecttest==0.1.6 --use-deprecated=legacy-resolver
+        ${CONDA_RUN} python -m pip install --pre pytest-xdist timm==0.9.16 transformers==4.39.3 parameterized expecttest==0.1.6 --use-deprecated=legacy-resolver
         ${CONDA_RUN} python -m pytest --junitxml=${RUNNER_TEST_RESULTS_DIR}/dynamo_converters_test_results.xml -n 10 conversion/
         popd
 
@@ -144,10 +142,9 @@ jobs:
       pre-script: ${{ matrix.pre-script }}
       script: |
         export USE_HOST_DEPS=1
-        export LD_LIBRARY_PATH=/opt/torch-tensorrt-builds/TensorRT-10.0.1.6/lib:$LD_LIBRARY_PATH
         pushd .
         cd tests/py/dynamo
-        ${CONDA_RUN} python -m pip install --pre pytest timm transformers parameterized expecttest==0.1.6 --use-deprecated=legacy-resolver
+        ${CONDA_RUN} python -m pip install --pre pytest timm==0.9.16 transformers==4.39.3 parameterized expecttest==0.1.6 --use-deprecated=legacy-resolver
         ${CONDA_RUN} python -m pytest --junitxml=${RUNNER_TEST_RESULTS_DIR}/dynamo_fe_test_results.xml --ir dynamo models/test_models_export.py
         ${CONDA_RUN} python -m pytest --junitxml=${RUNNER_TEST_RESULTS_DIR}/dyn_models_export.xml --ir dynamo models/test_dyn_models.py
         popd
@@ -174,10 +171,9 @@ jobs:
       pre-script: ${{ matrix.pre-script }}
       script: |
         export USE_HOST_DEPS=1
-        export LD_LIBRARY_PATH=/opt/torch-tensorrt-builds/TensorRT-10.0.1.6/lib:$LD_LIBRARY_PATH
         pushd .
         cd tests/py/dynamo
-        ${CONDA_RUN} python -m pip install --pre pytest timm transformers parameterized expecttest==0.1.6 --use-deprecated=legacy-resolver
+        ${CONDA_RUN} python -m pip install --pre pytest timm==0.9.16 transformers==4.39.3 parameterized expecttest==0.1.6 --use-deprecated=legacy-resolver
         ${CONDA_RUN} python -m pytest --junitxml=${RUNNER_TEST_RESULTS_DIR}/export_serde_test_results.xml --ir dynamo models/test_export_serde.py
         popd
 
@@ -203,10 +199,9 @@ jobs:
       pre-script: ${{ matrix.pre-script }}
       script: |
         export USE_HOST_DEPS=1
-        export LD_LIBRARY_PATH=/opt/torch-tensorrt-builds/TensorRT-10.0.1.6/lib:$LD_LIBRARY_PATH
         pushd .
         cd tests/py/dynamo
-        ${CONDA_RUN} python -m pip install --pre pytest-xdist timm transformers parameterized expecttest==0.1.6 --use-deprecated=legacy-resolver
+        ${CONDA_RUN} python -m pip install --pre pytest-xdist timm==0.9.16 transformers==4.39.3 parameterized expecttest==0.1.6 --use-deprecated=legacy-resolver
         ${CONDA_RUN} python -m pytest -n 10 --junitxml=${RUNNER_TEST_RESULTS_DIR}/torch_compile_be_test_results.xml backend/
         ${CONDA_RUN} python -m pytest -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/torch_comple_be_e2e_test_results.xml --ir torch_compile models/test_models.py
         ${CONDA_RUN} python -m pytest --junitxml=${RUNNER_TEST_RESULTS_DIR}/torch_compile_dyn_models_export.xml --ir torch_compile models/test_dyn_models.py
@@ -234,10 +229,9 @@ jobs:
       pre-script: ${{ matrix.pre-script }}
       script: |
         export USE_HOST_DEPS=1
-        export LD_LIBRARY_PATH=/opt/torch-tensorrt-builds/TensorRT-10.0.1.6/lib:$LD_LIBRARY_PATH
         pushd .
         cd tests/py/dynamo
-        ${CONDA_RUN} python -m pip install --pre pytest-xdist timm transformers parameterized expecttest==0.1.6 --use-deprecated=legacy-resolver
+        ${CONDA_RUN} python -m pip install --pre pytest-xdist timm==0.9.16 transformers==4.39.3 parameterized expecttest==0.1.6 --use-deprecated=legacy-resolver
         ${CONDA_RUN} python -m pytest -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_runtime_test_results.xml runtime/
         ${CONDA_RUN} python -m pytest -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_partitioning_test_results.xml partitioning/
         ${CONDA_RUN} python -m pytest -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_lowering_test_results.xml lowering/
@@ -264,9 +258,8 @@ jobs:
       pre-script: ${{ matrix.pre-script }}
       script: |
         export USE_HOST_DEPS=1
-        export LD_LIBRARY_PATH=/opt/torch-tensorrt-builds/TensorRT-10.0.1.6/lib:$LD_LIBRARY_PATH
         pushd .
         cd tests/py/core
-        ${CONDA_RUN} python -m pip install --pre pytest-xdist timm transformers parameterized expecttest==0.1.6 --use-deprecated=legacy-resolver
+        ${CONDA_RUN} python -m pip install --pre pytest-xdist timm==0.9.16 transformers==4.39.3 parameterized expecttest==0.1.6 --use-deprecated=legacy-resolver
         ${CONDA_RUN} python -m pytest -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_core_test_results.xml .
         popd
@@ -124,6 +124,8 @@ std::vector<at::Tensor> execute_engine(std::vector<at::Tensor> inputs, c10::intr
     }
   }
 
+  // this is a buffer to store shape tensor input addresses throughout the runtime scope
+  std::list<std::vector<int32_t>> inputShapeTensorValues;
   {
     std::unique_ptr<torch::autograd::profiler::RecordProfile> input_profiler_guard;
     if (compiled_engine->profile_execution) {
@@ -142,12 +144,30 @@ std::vector<at::Tensor> execute_engine(std::vector<at::Tensor> inputs, c10::intr
       auto dims = core::util::toDims(inputs[i].sizes());
       auto shape = core::util::toVec(dims);
       LOG_DEBUG("Input Name: " << name << " Shape: " << dims);
-      compiled_engine->exec_ctx->setInputShape(name.c_str(), dims);
-      compiled_engine->exec_ctx->setTensorAddress(name.c_str(), inputs[i].view(shape).contiguous().data_ptr());
+      if (compiled_engine->cuda_engine->isShapeInferenceIO(name.c_str())) {
+        // Shape tensor inputs are casted to int32 explicitly.
+        // Refer to
+        // https://github.com/NVIDIA/TensorRT/blob/d2f4ef789a9a6ffdf37b55c3f81b486225f6b380/samples/common/sampleInference.cpp#L435
+        auto input_cpu = inputs[i].clone().contiguous().cpu().to(torch::kInt32);
+        std::vector<int32_t> inputs_cpu_vec(
+            input_cpu.data_ptr<int32_t>(), input_cpu.data_ptr<int32_t>() + input_cpu.numel());
+        inputShapeTensorValues.emplace_back(inputs_cpu_vec);
+        compiled_engine->exec_ctx->setTensorAddress(name.c_str(), inputShapeTensorValues.back().data());
+      } else {
+        compiled_engine->exec_ctx->setInputShape(name.c_str(), dims);
+        compiled_engine->exec_ctx->setTensorAddress(name.c_str(), inputs[i].view(shape).contiguous().data_ptr());
+      }
     }
 
+    // Check if input shapes can be inferred.
+    int32_t const io_size{compiled_engine->cuda_engine->getNbIOTensors()};
+    std::vector<char const*> names(io_size);
+    int32_t const nbNames = compiled_engine->exec_ctx->inferShapes(names.size(), names.data());
     TORCHTRT_CHECK(
-        compiled_engine->exec_ctx->allInputShapesSpecified(), "Not enough inputs provided (runtime.RunCudaEngine)");
+        nbNames == 0,
+        "The shapes of the inputs: "
+            << names
+            << " cannot be inferred. This could happen if the input tensor addresses/shapes haven't been configured correctly");
   }
 
   std::vector<at::Tensor> outputs(compiled_engine->num_io.second);
 
@@ -47,6 +47,7 @@ class _ShapeMode(Enum):
     high_tensor_domain_excl: float = low_tensor_domain_incl + DOMAIN_OFFSET
     torch_tensor: torch.Tensor = None
     name: str = ""
+    is_shape_tensor: bool = False
 
     def __init__(self, *args: Any, **kwargs: Any) -> None:
         """__init__ Method for torch_tensorrt.Input
@@ -161,6 +162,9 @@ def __init__(self, *args: Any, **kwargs: Any) -> None:
         else:
             self._explicit_set_dtype = False
 
+        if "is_shape_tensor" in kwargs:
+            self.is_shape_tensor = kwargs["is_shape_tensor"]
+
         if "format" in kwargs:
             self.format = memory_format._from(kwargs["format"])
 
@@ -174,7 +178,11 @@ def __init__(self, *args: Any, **kwargs: Any) -> None:
         if "torch_tensor" in kwargs:
             self.torch_tensor = kwargs["torch_tensor"]
         else:
-            if self.shape_mode == Input._ShapeMode.DYNAMIC:
+            if self.is_shape_tensor:
+                self.torch_tensor = torch.tensor(
+                    kwargs["opt_shape"], dtype=kwargs["dtype"]
+                )
+            elif self.shape_mode == Input._ShapeMode.DYNAMIC:
                 self.torch_tensor = self.example_tensor("opt_shape")
             else:
                 self.torch_tensor = self.example_tensor()
 
@@ -58,13 +58,9 @@ def trace(
 
     device = to_torch_device(kwargs.get("device", default_device()))
     torch_inputs = get_torch_inputs(inputs, device)
-    dynamic_shapes = {}
+    dynamic_shapes = []
     for input in inputs:
         if isinstance(input, Input) and input.shape_mode == Input._ShapeMode.DYNAMIC:
-            if not input.name:
-                raise AssertionError(
-                    f"Expected a name for a dynamic input with shape {input.shape} but found none"
-                )
             min_shape = input.shape["min_shape"]
             opt_shape = input.shape["opt_shape"]
             max_shape = input.shape["max_shape"]
@@ -80,8 +76,8 @@ def trace(
                         max=max_shape[dim],
                     )
 
-            dynamic_shapes[input.name] = dynamic_dims
+            dynamic_shapes.append(dynamic_dims)
 
-    exp_program = export(mod, tuple(torch_inputs), dynamic_shapes=dynamic_shapes)
+    exp_program = export(mod, tuple(torch_inputs), dynamic_shapes=tuple(dynamic_shapes))
 
     return exp_program
@@ -96,6 +96,8 @@ def _pretraced_backend(
 
             gm = apply_lowering_passes(gm, torch_inputs)
 
+            logger.debug("Lowered Input graph:\n " + str(gm.graph))
+
             torchtrt_inputs = prepare_inputs(
                 torch_inputs, disable_memory_format_check=True
             )
 
@@ -4,6 +4,7 @@
 from typing import Any, Callable, Dict, List, NamedTuple, Optional, Sequence, Set
 
 import numpy as np
+import tensorrt as trt
 import torch
 import torch.fx
 from torch.fx.node import _get_qualified_name
@@ -22,10 +23,10 @@
     get_node_name,
     get_trt_tensor,
 )
+from torch_tensorrt.dynamo.utils import DYNAMIC_DIM
 from torch_tensorrt.fx.observer import Observer
 from torch_tensorrt.logging import TRT_LOGGER
 
-import tensorrt as trt
 from packaging import version
 
 _LOGGER: logging.Logger = logging.getLogger(__name__)
@@ -365,18 +366,29 @@ def placeholder(self, target: str, args: Any, kwargs: Any) -> trt.ITensor:
             max_shape = current_input.shape["max_shape"]
             # TODO: Does not support disjoint optimization profiles?
             assert self.optimization_profiles is not None
-            self.optimization_profiles[0].set_shape(
-                target, min_shape, opt_shape, max_shape
-            )
-
             assert len(min_shape) == len(opt_shape) == len(max_shape)
-            for i in range(len(min_shape)):
-                if min_shape[i] == opt_shape[i] == max_shape[i]:
-                    shape.append(min_shape[i])
-                else:
-                    # -1 to represent the dynamic dimension
-                    shape.append(-1)
-        elif current_input.shape_mode == Input._ShapeMode.STATIC:
+            if current_input.is_shape_tensor:
+                # For shape_tensors, min/opt/max_shapes correspond to actual values
+                # of the shapes provided during runtime
+                self.optimization_profiles[0].set_shape_input(
+                    target, min_shape, opt_shape, max_shape
+                )
+                shape.append(len(opt_shape))
+            else:
+                self.optimization_profiles[0].set_shape(
+                    target, min_shape, opt_shape, max_shape
+                )
+
+                for i in range(len(min_shape)):
+                    if min_shape[i] == opt_shape[i] == max_shape[i]:
+                        shape.append(min_shape[i])
+                    else:
+                        # -1 to represent the dynamic dimension
+                        shape.append(DYNAMIC_DIM)
+        elif (
+            not current_input.is_shape_tensor
+            and current_input.shape_mode == Input._ShapeMode.STATIC
+        ):
             assert isinstance(current_input.shape, tuple)
             shape = list(current_input.shape)
         else:
@@ -388,6 +400,7 @@ def placeholder(self, target: str, args: Any, kwargs: Any) -> trt.ITensor:
         _LOGGER.debug(
             f"Adding input to in-progress INetwork: {target} [shape={shape}, dtype={trt_input_dtype}]"
         )
+
         return self.ctx.net.add_input(
             name=target,
             shape=tuple(shape),
 
@@ -4,7 +4,9 @@
 import logging
 from typing import List, Sequence
 
+import tensorrt as trt
 import torch
+from torch.fx.experimental.proxy_tensor import maybe_disable_fake_tensor_mode
 from torch_tensorrt._Device import Device
 from torch_tensorrt._enums import dtype
 from torch_tensorrt._features import ENABLED_FEATURES
@@ -17,8 +19,6 @@
 from torch_tensorrt.dynamo.runtime import PythonTorchTensorRTModule, TorchTensorRTModule
 from torch_tensorrt.dynamo.utils import get_torch_inputs
 
-import tensorrt as trt
-
 logger = logging.getLogger(__name__)
 
 
@@ -28,12 +28,12 @@ def infer_module_output_dtypes(
     device: Device,
     truncate_double: bool = False,
 ) -> List[dtype]:
-    torch_inputs = get_torch_inputs(inputs, device)
-    module = module.to(device.to(torch.device))
-    module_outputs = module(*torch_inputs)
-
-    if not isinstance(module_outputs, (list, tuple)):
-        module_outputs = [module_outputs]
+    with maybe_disable_fake_tensor_mode():
+        torch_inputs = get_torch_inputs(inputs, device)
+        module = module.to(device.to(torch.device))
+        module_outputs = module(*torch_inputs)
+        if not isinstance(module_outputs, (list, tuple)):
+            module_outputs = [module_outputs]
 
     # Int64 outputs can sometimes be generated from within other operators
     # such as aten.sum - such outputs can be truncated
Original file line number	Diff line number	Diff line change
`@@ -96,6 +96,8 @@ def _pretraced_backend(`
`96`	`96`
`97`	`97`	`gm = apply_lowering_passes(gm, torch_inputs)`
`98`	`98`
	`99`	`+ logger.debug("Lowered Input graph:\n " + str(gm.graph))`
	`100`	`+`
`99`	`101`	`torchtrt_inputs = prepare_inputs(`
`100`	`102`	`torch_inputs, disable_memory_format_check=True`
`101`	`103`	`)`