pytorch
diff --git a/‎.circleci/config.yml
Lines changed: 5 additions & 18 deletions b/‎.circleci/config.yml
Lines changed: 5 additions & 18 deletions
diff --git a/‎py/torch_tensorrt/_Input.py
Lines changed: 20 additions & 3 deletions b/‎py/torch_tensorrt/_Input.py
Lines changed: 20 additions & 3 deletions
diff --git a/‎py/torch_tensorrt/dynamo/fx_ts_compat/Dynamic_Shape_Support.md
Lines changed: 0 additions & 137 deletions b/‎py/torch_tensorrt/dynamo/fx_ts_compat/Dynamic_Shape_Support.md
Lines changed: 0 additions & 137 deletions
diff --git a/‎py/torch_tensorrt/dynamo/fx_ts_compat/fx2trt.py
Lines changed: 1 addition & 1 deletion b/‎py/torch_tensorrt/dynamo/fx_ts_compat/fx2trt.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎py/torch_tensorrt/dynamo/fx_ts_compat/input_tensor_spec.py
Lines changed: 2 additions & 2 deletions b/‎py/torch_tensorrt/dynamo/fx_ts_compat/input_tensor_spec.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎py/torch_tensorrt/dynamo/fx_ts_compat/lower.py
Lines changed: 24 additions & 7 deletions b/‎py/torch_tensorrt/dynamo/fx_ts_compat/lower.py
Lines changed: 24 additions & 7 deletions
diff --git a/‎py/torch_tensorrt/dynamo/fx_ts_compat/lower_setting.py
Lines changed: 3 additions & 3 deletions b/‎py/torch_tensorrt/dynamo/fx_ts_compat/lower_setting.py
Lines changed: 3 additions & 3 deletions
diff --git a/‎py/torch_tensorrt/dynamo/fx_ts_compat/passes/lower_pass_manager_builder.py
Lines changed: 1 addition & 1 deletion b/‎py/torch_tensorrt/dynamo/fx_ts_compat/passes/lower_pass_manager_builder.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎py/torch_tensorrt/dynamo/fx_ts_compat/test/core/test_import_fx2trt.py
Lines changed: 0 additions & 18 deletions b/‎py/torch_tensorrt/dynamo/fx_ts_compat/test/core/test_import_fx2trt.py
Lines changed: 0 additions & 18 deletions
@@ -711,15 +711,15 @@ commands:
 # =================== FX tests end ======================== #
 
 # =================== Dynamo tests start ======================== #
-  test-dynamo-fx_ts_core:
-    description: "Test the Dynamo core"
+  test-dynamo-fx_ts:
+    description: "Test the Dynamo fx_ts_compat path"
     steps:
       - run:
-          name: Run Dynamo core tests
+          name: Run Dynamo fx_ts_compat core tests
           command: |
             cd py/torch_tensorrt/dynamo/fx_ts_compat/test
             pushd core/
-            pytest --junitxml=/tmp/artifacts/test_results/dynamo/core/test_results.xml
+            pytest --junitxml=/tmp/artifacts/test_results/dynamo/fx_ts_compat/test_results.xml
             popd
 
       - store_test_results:
@@ -737,27 +737,14 @@ commands:
             pushd test/
             pip3 install timm
             pip3 install transformers
-            pytest --junitxml=/tmp/artifacts/test_results/dynamo/test_results.xml --ir torch_compile
+            pytest --junitxml=/tmp/artifacts/test_results/dynamo/torch_compile/test_results.xml --ir torch_compile
             popd
 
       - store_test_results:
           path: /tmp/artifacts
       - store_artifacts:
           path: /tmp/testlogs
 
-  test-dynamo-fx_ts:
-    description: "Test the dynamo backend"
-    steps:
-      - run:
-          name: Run dynamo tests
-          command: |
-            mkdir -p /tmp/artifacts/test_results
-      - test-dynamo-fx_ts_core
-      - store_test_results:
-          path: /tmp/artifacts
-      - store_artifacts:
-          path: /tmp/testlogs
-
 # =================== Dynamo tests end ======================== #
 
 # Define a job to be invoked later in a workflow.
 
@@ -40,7 +40,7 @@ class _ShapeMode(Enum):
     DOMAIN_OFFSET = 2.0
     low_tensor_domain_incl = 0.0
     high_tensor_domain_excl = low_tensor_domain_incl + DOMAIN_OFFSET
-    torch_dtype = None
+    torch_dtype = torch.float32
 
     def __init__(self, *args, **kwargs):
         """__init__ Method for torch_tensorrt.Input
@@ -142,6 +142,7 @@ def __init__(self, *args, **kwargs):
                 self.torch_dtype = kwargs["dtype"]
 
             self.dtype = Input._parse_dtype(kwargs["dtype"])
+            self.torch_dtype = Input._to_torch_dtype(self.dtype)
             self._explicit_set_dtype = True
 
         if "format" in kwargs:
@@ -215,6 +216,22 @@ def _parse_dtype(dtype: Any) -> _enums.dtype:
                 + str(type(dtype))
             )
 
+    @staticmethod
+    def _to_torch_dtype(dtype: _enums.dtype) -> torch.dtype:
+        if dtype == _enums.dtype.long:
+            return torch.long
+        elif dtype == _enums.dtype.int32:
+            return torch.int32
+        elif dtype == _enums.dtype.half:
+            return torch.half
+        elif dtype == _enums.dtype.float:
+            return torch.float
+        elif dtype == _enums.dtype.bool:
+            return torch.bool
+        else:
+            # Default torch_dtype used in FX path
+            return torch.float32
+
     def is_trt_dtype(self) -> bool:
         return self.dtype != _enums.dtype.long
 
@@ -368,9 +385,9 @@ def example_tensor(self, optimization_profile_field: str = None) -> torch.Tensor
 
         if self.shape_mode == Input._ShapeMode.STATIC:
             return torch.rand(self.shape).to(
-                dtype=self.dtype if not self.torch_dtype else self.torch_dtype
+                dtype=self.torch_dtype
             )
         else:
             return torch.rand(self.shape[optimization_profile_field]).to(
-                dtype=self.dtype if not self.torch_dtype else self.torch_dtype
+                dtype=self.torch_dtype
             )
@@ -16,7 +16,7 @@
 from torch_tensorrt.dynamo.fx_ts_compat import CONVERTERS
 from .input_tensor_spec import InputTensorSpec
 from torch_tensorrt.fx.observer import Observer
-from .utils import get_dynamic_dims, LowerPrecision, torch_dtype_to_trt
+from torch_tensorrt.fx.utils import get_dynamic_dims, LowerPrecision, torch_dtype_to_trt
 
 _LOGGER: logging.Logger = logging.getLogger(__name__)
 
 
@@ -2,8 +2,8 @@
 
 import torch
 
-from .types import Shape, ShapeRange
-from .utils import get_dynamic_dims
+from torch_tensorrt.fx.types import Shape, ShapeRange
+from torch_tensorrt.fx.utils import get_dynamic_dims
 from torch_tensorrt._Input import Input
 
 
 
@@ -14,12 +14,12 @@
 from .lower_setting import LowerSetting
 from .passes.lower_pass_manager_builder import LowerPassManagerBuilder
 from .passes.pass_utils import PassFunc, validate_inference
-from .tools.timing_cache_utils import TimingCacheManager
-from .tools.trt_splitter import TRTSplitter, TRTSplitterSetting
+from torch_tensorrt.fx.tools.timing_cache_utils import TimingCacheManager
+from torch_tensorrt.fx.tools.trt_splitter import TRTSplitter, TRTSplitterSetting
 
 from torch_tensorrt.fx.tracer.acc_tracer import acc_tracer
 from torch_tensorrt.fx.trt_module import TRTModule
-from .utils import LowerPrecision
+from torch_tensorrt.fx.utils import LowerPrecision
 from torch_tensorrt._Device import Device
 
 logger = logging.getLogger(__name__)
@@ -36,12 +36,23 @@ def compile(
     enabled_precisions=set(),
     min_block_size: int = 3,
     workspace_size=0,
-    verbose_log=False,
+    dla_sram_size=1048576,
+    dla_local_dram_size=1073741824,
+    dla_global_dram_size=536870912,
+    calibrator=None,
+    truncate_long_and_double=False,
+    require_full_compilation=False,
+    debug=False,
+    refit=False,
     timing_cache_prefix="",
     save_timing_cache=False,
     cuda_graph_batch_size=-1,
     is_aten=False,
     use_experimental_fx_rt=False,
+    num_avg_timing_iters=1,
+    torch_executed_ops=[],
+    torch_executed_modules=[],
+    **kwargs,
 ) -> nn.Module:
     """
     Takes in original module, input and lowering setting, run lowering workflow to turn module
@@ -52,7 +63,7 @@ def compile(
         input: Input for module.
         min_block_size: Minimal number of nodes for an accelerated submodule
         workspace_size: Maximum size of workspace given to TensorRT.
-        verbose_log: Enable verbose log for TensorRT if set True.
+        debug: Enable verbose log for TensorRT if set True.
         timing_cache_prefix: Timing cache file name for timing cache used by fx2trt.
         save_timing_cache: Update timing cache with current timing cache data if set to True.
         cuda_graph_batch_size: Cuda graph batch size, default to be -1.
@@ -65,6 +76,12 @@ def compile(
             "The experimental unifed runtime only supports explicit batch. Please make sure to set explicit_batch_dimension=True when use_experimental_fx_rt=True"
         )
 
+    logger.warn(
+        "For ir=fx_ts_compat backend only the "
+        + "following arguments are supported: "
+        + "{enabled_precisions, debug, workspace_size, device, disable_tf32, sparse_weights, min_block_size}"
+    )
+
     # Parse precision into LowerPrecision
     lower_precision = LowerPrecision.FP32
     if torch.float16 in enabled_precisions:
@@ -100,7 +117,7 @@ def compile(
         sparse_weights=sparse_weights,
         workspace_size=workspace_size,
         lower_precision=lower_precision,
-        verbose_log=verbose_log,
+        debug=debug,
         timing_cache_prefix=timing_cache_prefix,
         save_timing_cache=save_timing_cache,
         cuda_graph_batch_size=cuda_graph_batch_size,
@@ -148,7 +165,7 @@ def __call__(self, mod, input, split_name) -> TRTInterpreterResult:
             explicit_batch_dimension=self.lower_setting.explicit_batch_dimension,
             explicit_precision=self.lower_setting.explicit_precision,
             logger_level=trt.Logger.VERBOSE
-            if self.lower_setting.verbose_log
+            if self.lower_setting.debug
             else trt.Logger.WARNING,
         )
 
 
@@ -9,7 +9,7 @@
     fuse_permute_linear,
     fuse_permute_matmul,
 )
-from .utils import LowerPrecision
+from torch_tensorrt.fx.utils import LowerPrecision
 
 
 @dc.dataclass
@@ -54,7 +54,7 @@ class LowerSetting(LowerSettingBasic):
     as (a->b->c->d)=>(e). Current basic fuse patterns are:
     permute->linear
     permute->matmul
-    verbose_log: Enable TensorRT engine verbose log mode.
+    debug: Enable TensorRT engine verbose log mode.
     algo_selector: Enable TensorRT algorithm selector at execution time.
     timing_cache_prefix: TensorRT timing cache file path. TensorRT engine will use timing
     cache file at execution time if valid timing cache file is provided.
@@ -85,7 +85,7 @@ class LowerSetting(LowerSettingBasic):
             [fuse_permute_matmul, fuse_permute_linear]
         )
     )
-    verbose_log: bool = False
+    debug: bool = False
     algo_selector = None
     timing_cache_prefix: str = ""
     save_timing_cache: bool = False
 
@@ -8,7 +8,7 @@
 from torch.fx.passes.pass_manager import inplace_wrapper, PassManager
 from torch.fx.passes.shape_prop import ShapeProp
 from torch.fx.passes.splitter_base import generate_inputs_for_submodules, SplitResult
-from torch_tensorrt.dynamo.fx_ts_compat.utils import LowerPrecision
+from torch_tensorrt.fx.utils import LowerPrecision
 from torch_tensorrt import _Input
 from ..input_tensor_spec import InputTensorSpec