pytorch
diff --git a/‎py/torch_tensorrt/__init__.py
Lines changed: 3 additions & 3 deletions b/‎py/torch_tensorrt/__init__.py
Lines changed: 3 additions & 3 deletions
diff --git a/‎py/torch_tensorrt/_compile.py
Lines changed: 36 additions & 17 deletions b/‎py/torch_tensorrt/_compile.py
Lines changed: 36 additions & 17 deletions
diff --git a/‎py/torch_tensorrt/dynamo/_compiler.py
Lines changed: 53 additions & 102 deletions b/‎py/torch_tensorrt/dynamo/_compiler.py
Lines changed: 53 additions & 102 deletions
diff --git a/‎py/torch_tensorrt/dynamo/_defaults.py
Lines changed: 2 additions & 1 deletion b/‎py/torch_tensorrt/dynamo/_defaults.py
Lines changed: 2 additions & 1 deletion
diff --git a/‎py/torch_tensorrt/dynamo/_settings.py
Lines changed: 3 additions & 3 deletions b/‎py/torch_tensorrt/dynamo/_settings.py
Lines changed: 3 additions & 3 deletions
@@ -92,11 +92,11 @@ def _find_lib(name: str, paths: List[str]) -> str:
 def _register_with_torch() -> None:
     trtorch_dir = os.path.dirname(__file__)
     if os.path.isfile(trtorch_dir + "/lib/libtorchtrt.so"):
-        assert ENABLED_FEATURES.torchscript_frontend == True
-        assert ENABLED_FEATURES.torch_tensorrt_runtime == True
+        assert ENABLED_FEATURES.torchscript_frontend
+        assert ENABLED_FEATURES.torch_tensorrt_runtime
         torch.ops.load_library(trtorch_dir + "/lib/libtorchtrt.so")
     elif os.path.isfile(trtorch_dir + "/lib/libtorchtrt_runtime.so"):
-        assert ENABLED_FEATURES.torch_tensorrt_runtime == True
+        assert ENABLED_FEATURES.torch_tensorrt_runtime
         torch.ops.load_library(trtorch_dir + "/lib/libtorchtrt_runtime.so")
 
 
 
@@ -9,6 +9,7 @@
 from torch_tensorrt._enums import dtype
 from torch_tensorrt._features import ENABLED_FEATURES
 from torch_tensorrt._Input import Input
+from torch_tensorrt.dynamo import _defaults
 from torch_tensorrt.fx import InputTensorSpec
 from torch_tensorrt.fx.lower import compile as fx_compile
 from torch_tensorrt.fx.utils import LowerPrecision
@@ -17,6 +18,9 @@
 if ENABLED_FEATURES.torchscript_frontend:
     import torch_tensorrt.ts
     from torch_tensorrt.ts._compiler import compile as torchscript_compile
+    from torch_tensorrt.ts._compiler import (
+        convert_method_to_trt_engine as ts_convert_method_to_trt_engine,
+    )
 
 if ENABLED_FEATURES.dynamo_frontend:
     from torch._export import ExportedProgram
@@ -88,20 +92,34 @@ def _get_target_fe(module_type: _ModuleType, ir: str) -> _IRType:
     ir_targets_dynamo = ir == "dynamo"
     ir_targets_torch_compile = ir == "torch_compile"
 
-    if (
-        module_is_tsable and ir_targets_torchscript
-    ) and ENABLED_FEATURES.torchscript_frontend:
-        return _IRType.ts
-    elif (module_is_fxable and ir_targets_fx) and ENABLED_FEATURES.fx_frontend:
-        return _IRType.fx
-    elif (
-        (module_is_fxable or module_is_exportable) and ir_targets_dynamo
-    ) and ENABLED_FEATURES.dynamo_frontend:
-        return _IRType.dynamo
-    elif (
-        module_is_fxable and ir_targets_torch_compile
-    ) and ENABLED_FEATURES.dynamo_frontend:
-        return _IRType.torch_compile
+    if module_is_tsable and ir_targets_torchscript:
+        if ENABLED_FEATURES.torchscript_frontend:
+            return _IRType.ts
+        else:
+            raise ValueError(
+                "Requested using the TS frontend but the TS frontend is not available in this build of Torch-TensorRT"
+            )
+    elif module_is_fxable and ir_targets_fx:
+        if ENABLED_FEATURES.fx_frontend:
+            return _IRType.fx
+        else:
+            raise ValueError(
+                "Requested using the FX frontend but the FX frontend is not available in this build of Torch-TensorRT"
+            )
+    elif (module_is_fxable or module_is_exportable) and ir_targets_dynamo:
+        if ENABLED_FEATURES.dynamo_frontend:
+            return _IRType.dynamo
+        else:
+            raise ValueError(
+                "Requested using the Dynamo frontend but the Dynamo frontend is not available in this build of Torch-TensorRT"
+            )
+    elif module_is_fxable and ir_targets_torch_compile:
+        if ENABLED_FEATURES.dynamo_frontend:
+            return _IRType.torch_compile
+        else:
+            raise ValueError(
+                "Requested using the Torch-TensorRT torch.compile backend but the Torch-TensorRT torch.compile backend is not available in this build of Torch-TensorRT"
+            )
     else:
         if ir == "default":
             # Options are listed in order of preference
@@ -169,9 +187,9 @@ def compile(
     Returns:
         torch.nn.Module: Compiled Module, when run it will execute via TensorRT
     """
-    input_list = inputs if inputs is not None else []
+    input_list = inputs if inputs else []
     enabled_precisions_set: Set[dtype | torch.dtype] = (
-        enabled_precisions if enabled_precisions is not None else {dtype.float}
+        enabled_precisions if enabled_precisions else _defaults.ENABLED_PRECISIONS
     )
 
     module_type = _parse_module_type(module)
@@ -309,13 +327,14 @@ def convert_method_to_trt_engine(
                 "Module was provided as a torch.nn.Module, trying to script the module with torch.jit.script. In the event of a failure please preconvert your module to TorchScript"
             )
             ts_mod = torch.jit.script(module)
-        return torch_tensorrt.ts.convert_method_to_trt_engine(
+        serialized_engine: bytes = ts_convert_method_to_trt_engine(
             ts_mod,
             inputs=inputs,
             method_name=method_name,
             enabled_precisions=enabled_precisions_set,
             **kwargs,
         )
+        return serialized_engine
     elif target_ir == _IRType.fx:
         raise RuntimeError(
             "convert_method_to_trt_engine call is not supported for ir=fx"
 
@@ -10,34 +10,7 @@
 from torch_tensorrt._Device import Device
 from torch_tensorrt._enums import EngineCapability, dtype
 from torch_tensorrt._Input import Input
-from torch_tensorrt.dynamo import partitioning
-from torch_tensorrt.dynamo._defaults import (
-    DEBUG,
-    DEVICE,
-    DISABLE_TF32,
-    DLA_GLOBAL_DRAM_SIZE,
-    DLA_LOCAL_DRAM_SIZE,
-    DLA_SRAM_SIZE,
-    DRYRUN,
-    ENABLE_EXPERIMENTAL_DECOMPOSITIONS,
-    ENGINE_CAPABILITY,
-    HARDWARE_COMPATIBLE,
-    MAX_AUX_STREAMS,
-    MIN_BLOCK_SIZE,
-    NUM_AVG_TIMING_ITERS,
-    OPTIMIZATION_LEVEL,
-    OUTPUT_FORMAT,
-    PASS_THROUGH_BUILD_FAILURES,
-    PRECISION,
-    REFIT,
-    REQUIRE_FULL_COMPILATION,
-    SPARSE_WEIGHTS,
-    TRUNCATE_LONG_AND_DOUBLE,
-    USE_FAST_PARTITIONER,
-    USE_PYTHON_RUNTIME,
-    VERSION_COMPATIBLE,
-    WORKSPACE_SIZE,
-)
+from torch_tensorrt.dynamo import _defaults, partitioning
 from torch_tensorrt.dynamo._DryRunTracker import (
     DryRunTracker,
     PerSubgraphData,
@@ -72,35 +45,35 @@ def compile(
     exported_program: ExportedProgram,
     inputs: Tuple[Any, ...],
     *,
-    device: Optional[Union[Device, torch.device, str]] = DEVICE,
-    disable_tf32: bool = DISABLE_TF32,
-    sparse_weights: bool = SPARSE_WEIGHTS,
-    enabled_precisions: Set[torch.dtype | dtype] | Tuple[torch.dtype | dtype] = (
-        dtype.float32,
-    ),
-    engine_capability: EngineCapability = ENGINE_CAPABILITY,
-    refit: bool = REFIT,
-    debug: bool = DEBUG,
-    num_avg_timing_iters: int = NUM_AVG_TIMING_ITERS,
-    workspace_size: int = WORKSPACE_SIZE,
-    dla_sram_size: int = DLA_SRAM_SIZE,
-    dla_local_dram_size: int = DLA_LOCAL_DRAM_SIZE,
-    dla_global_dram_size: int = DLA_GLOBAL_DRAM_SIZE,
-    truncate_long_and_double: bool = TRUNCATE_LONG_AND_DOUBLE,
-    require_full_compilation: bool = REQUIRE_FULL_COMPILATION,
-    min_block_size: int = MIN_BLOCK_SIZE,
+    device: Optional[Union[Device, torch.device, str]] = _defaults.DEVICE,
+    disable_tf32: bool = _defaults.DISABLE_TF32,
+    sparse_weights: bool = _defaults.SPARSE_WEIGHTS,
+    enabled_precisions: (
+        Set[torch.dtype | dtype] | Tuple[torch.dtype | dtype]
+    ) = _defaults.ENABLED_PRECISIONS,
+    engine_capability: EngineCapability = _defaults.ENGINE_CAPABILITY,
+    refit: bool = _defaults.REFIT,
+    debug: bool = _defaults.DEBUG,
+    num_avg_timing_iters: int = _defaults.NUM_AVG_TIMING_ITERS,
+    workspace_size: int = _defaults.WORKSPACE_SIZE,
+    dla_sram_size: int = _defaults.DLA_SRAM_SIZE,
+    dla_local_dram_size: int = _defaults.DLA_LOCAL_DRAM_SIZE,
+    dla_global_dram_size: int = _defaults.DLA_GLOBAL_DRAM_SIZE,
+    truncate_long_and_double: bool = _defaults.TRUNCATE_LONG_AND_DOUBLE,
+    require_full_compilation: bool = _defaults.REQUIRE_FULL_COMPILATION,
+    min_block_size: int = _defaults.MIN_BLOCK_SIZE,
     torch_executed_ops: Optional[Collection[Target]] = None,
     torch_executed_modules: Optional[List[str]] = None,
-    pass_through_build_failures: bool = PASS_THROUGH_BUILD_FAILURES,
-    max_aux_streams: Optional[int] = MAX_AUX_STREAMS,
-    version_compatible: bool = VERSION_COMPATIBLE,
-    optimization_level: Optional[int] = OPTIMIZATION_LEVEL,
-    use_python_runtime: bool = USE_PYTHON_RUNTIME,
-    use_fast_partitioner: bool = USE_FAST_PARTITIONER,
-    enable_experimental_decompositions: bool = ENABLE_EXPERIMENTAL_DECOMPOSITIONS,
-    dryrun: bool = DRYRUN,
-    hardware_compatible: bool = HARDWARE_COMPATIBLE,
-    output_format: str = OUTPUT_FORMAT,
+    pass_through_build_failures: bool = _defaults.PASS_THROUGH_BUILD_FAILURES,
+    max_aux_streams: Optional[int] = _defaults.MAX_AUX_STREAMS,
+    version_compatible: bool = _defaults.VERSION_COMPATIBLE,
+    optimization_level: Optional[int] = _defaults.OPTIMIZATION_LEVEL,
+    use_python_runtime: bool = _defaults.USE_PYTHON_RUNTIME,
+    use_fast_partitioner: bool = _defaults.USE_FAST_PARTITIONER,
+    enable_experimental_decompositions: bool = _defaults.ENABLE_EXPERIMENTAL_DECOMPOSITIONS,
+    dryrun: bool = _defaults.DRYRUN,
+    hardware_compatible: bool = _defaults.HARDWARE_COMPATIBLE,
+    output_format: str = _defaults.OUTPUT_FORMAT,
     **kwargs: Any,
 ) -> Union[ExportedProgram, torch.jit.ScriptModule, torch.fx.GraphModule]:
     """Compile a TorchScript module for NVIDIA GPUs using TensorRT
@@ -182,6 +155,7 @@ def compile(
     # Prepare torch_trt inputs
     inputs = prepare_inputs(inputs)
     device = to_torch_tensorrt_device(device)
+    enabled_precisions = {dtype._from(p) for p in enabled_precisions}
 
     if not isinstance(exported_program, ExportedProgram):
         raise AssertionError(
@@ -198,21 +172,10 @@ def compile(
     gm = apply_lowering_passes(gm, torch_inputs)
     logger.debug("Lowered Input graph: " + str(gm.graph))
 
-    if dtype.float16 in enabled_precisions or dtype.half in enabled_precisions:
-        precision = dtype.float16
-    elif dtype.float32 in enabled_precisions or dtype.float in enabled_precisions:
-        precision = dtype.float32
-    elif len(enabled_precisions) == 0:
-        logger.info(f"No precision specified, defaulting to {PRECISION}")
-        precision = PRECISION
-    else:
-        raise ValueError(
-            f"Precision {enabled_precisions} not supported in the Dynamo Path"
-        )
-    enabled_precisions = {dtype._from(e) for e in enabled_precisions}
-
     compilation_options = {
-        "precision": precision,
+        "enabled_precisions": (
+            enabled_precisions if enabled_precisions else _defaults.ENABLED_PRECISIONS
+        ),
         "debug": debug,
         "device": device,
         "workspace_size": workspace_size,
@@ -459,28 +422,28 @@ def convert_module_to_trt_engine(
     enabled_precisions: Set[torch.dtype | dtype] | Tuple[torch.dtype | dtype] = (
         dtype.float32,
     ),
-    debug: bool = DEBUG,
-    workspace_size: int = WORKSPACE_SIZE,
-    min_block_size: int = MIN_BLOCK_SIZE,
+    debug: bool = _defaults.DEBUG,
+    workspace_size: int = _defaults.WORKSPACE_SIZE,
+    min_block_size: int = _defaults.MIN_BLOCK_SIZE,
     torch_executed_ops: Optional[Set[str]] = None,
-    pass_through_build_failures: bool = PASS_THROUGH_BUILD_FAILURES,
-    max_aux_streams: Optional[int] = MAX_AUX_STREAMS,
-    version_compatible: bool = VERSION_COMPATIBLE,
-    optimization_level: Optional[int] = OPTIMIZATION_LEVEL,
-    use_python_runtime: Optional[bool] = USE_PYTHON_RUNTIME,
-    truncate_long_and_double: bool = TRUNCATE_LONG_AND_DOUBLE,
-    use_fast_partitioner: bool = USE_FAST_PARTITIONER,
-    enable_experimental_decompositions: bool = ENABLE_EXPERIMENTAL_DECOMPOSITIONS,
+    pass_through_build_failures: bool = _defaults.PASS_THROUGH_BUILD_FAILURES,
+    max_aux_streams: Optional[int] = _defaults.MAX_AUX_STREAMS,
+    version_compatible: bool = _defaults.VERSION_COMPATIBLE,
+    optimization_level: Optional[int] = _defaults.OPTIMIZATION_LEVEL,
+    use_python_runtime: Optional[bool] = _defaults.USE_PYTHON_RUNTIME,
+    truncate_long_and_double: bool = _defaults.TRUNCATE_LONG_AND_DOUBLE,
+    use_fast_partitioner: bool = _defaults.USE_FAST_PARTITIONER,
+    enable_experimental_decompositions: bool = _defaults.ENABLE_EXPERIMENTAL_DECOMPOSITIONS,
     device: Device = Device._current_device(),
-    require_full_compilation: bool = REQUIRE_FULL_COMPILATION,
-    disable_tf32: bool = DISABLE_TF32,
-    sparse_weights: bool = SPARSE_WEIGHTS,
-    refit: bool = REFIT,
-    engine_capability: EngineCapability = ENGINE_CAPABILITY,
-    num_avg_timing_iters: int = NUM_AVG_TIMING_ITERS,
-    dla_sram_size: int = DLA_SRAM_SIZE,
-    dla_local_dram_size: int = DLA_LOCAL_DRAM_SIZE,
-    dla_global_dram_size: int = DLA_GLOBAL_DRAM_SIZE,
+    require_full_compilation: bool = _defaults.REQUIRE_FULL_COMPILATION,
+    disable_tf32: bool = _defaults.DISABLE_TF32,
+    sparse_weights: bool = _defaults.SPARSE_WEIGHTS,
+    refit: bool = _defaults.REFIT,
+    engine_capability: EngineCapability = _defaults.ENGINE_CAPABILITY,
+    num_avg_timing_iters: int = _defaults.NUM_AVG_TIMING_ITERS,
+    dla_sram_size: int = _defaults.DLA_SRAM_SIZE,
+    dla_local_dram_size: int = _defaults.DLA_LOCAL_DRAM_SIZE,
+    dla_global_dram_size: int = _defaults.DLA_GLOBAL_DRAM_SIZE,
     calibrator: object = None,
     allow_shape_tensors: bool = False,
 ) -> bytes:
@@ -569,22 +532,10 @@ def convert_module_to_trt_engine(
     input_list = prepare_inputs(input_list)
     device = to_torch_tensorrt_device(device)
 
-    if dtype.float16 in enabled_precisions or dtype.half in enabled_precisions:
-        precision = dtype.float16
-    elif dtype.float32 in enabled_precisions or dtype.float in enabled_precisions:
-        precision = dtype.float32
-    elif len(enabled_precisions) == 0:
-        logger.info(f"No precision specified, defaulting to {PRECISION}")
-        precision = PRECISION
-    else:
-        raise ValueError(
-            f"Precision {enabled_precisions} not supported in the Dynamo Path"
-        )
-
     enabled_precisions = {dtype._from(e) for e in enabled_precisions}
 
     compilation_options = {
-        "precision": precision,
+        "enabled_precisions": enabled_precisions,
         "debug": debug,
         "workspace_size": workspace_size,
         "min_block_size": min_block_size,
 
@@ -2,7 +2,7 @@
 from torch_tensorrt._Device import Device
 from torch_tensorrt._enums import EngineCapability, dtype
 
-PRECISION = dtype.float32
+ENABLED_PRECISIONS = {dtype.f32}
 DEBUG = False
 DEVICE = None
 DISABLE_TF32 = False
@@ -27,6 +27,7 @@
 DRYRUN = False
 HARDWARE_COMPATIBLE = False
 OUTPUT_FORMAT = "exported_program"
+SUPPORTED_KERNEL_PRECISIONS = {dtype.f32, dtype.f16, dtype.i8}
 
 
 def default_device() -> Device:
 
@@ -12,6 +12,7 @@
     DLA_SRAM_SIZE,
     DRYRUN,
     ENABLE_EXPERIMENTAL_DECOMPOSITIONS,
+    ENABLED_PRECISIONS,
     ENGINE_CAPABILITY,
     HARDWARE_COMPATIBLE,
     MAX_AUX_STREAMS,
@@ -20,7 +21,6 @@
     OPTIMIZATION_LEVEL,
     OUTPUT_FORMAT,
     PASS_THROUGH_BUILD_FAILURES,
-    PRECISION,
     REFIT,
     REQUIRE_FULL_COMPILATION,
     SPARSE_WEIGHTS,
@@ -38,7 +38,7 @@ class CompilationSettings:
     """Compilation settings for Torch-TensorRT Dynamo Paths
 
     Args:
-        precision (torch.dtype): Model Layer precision
+        enabled_precisions (Set[dtype]): Available kernel dtype precisions
         debug (bool): Whether to print out verbose debugging information
         workspace_size (int): Workspace TRT is allowed to use for the module (0 is default)
         min_block_size (int): Minimum number of operators per TRT-Engine Block
@@ -73,7 +73,7 @@ class CompilationSettings:
         output_format (str): Output format of the result of TRT compilation. Options include "exported_program" (or) "ep" | "torchscript" (or) "ts" | "graph_module" (or) "fx". Default is "exported_program"
     """
 
-    precision: dtype = field(default_factory=lambda: PRECISION)
+    enabled_precisions: dtype = field(default_factory=lambda: ENABLED_PRECISIONS)
     debug: bool = DEBUG
     workspace_size: int = WORKSPACE_SIZE
     min_block_size: int = MIN_BLOCK_SIZE