refactor: Modify prepare_inputs, remove lower_precision

peri044 · peri044 · commit f3c7fc7c7ce8 · 2023-07-14T14:40:26.000-07:00
Signed-off-by: Dheeraj Peri &lt;peri.dheeraj@gmail.com&gt;

chore: refactor

Signed-off-by: Dheeraj Peri &lt;peri.dheeraj@gmail.com&gt;

chore: Address review comments

Signed-off-by: Dheeraj Peri &lt;peri.dheeraj@gmail.com&gt;

chore: address review comments

Signed-off-by: Dheeraj Peri &lt;peri.dheeraj@gmail.com&gt;
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -742,7 +742,6 @@ commands:
           command: |
             cd tests/py/dynamo/backend/
             pytest --junitxml=/tmp/artifacts/test_results/dynamo/torch_compile/test_results.xml
-            popd
 
       - store_test_results:
           path: /tmp/artifacts
@@ -759,7 +758,6 @@ commands:
             pip3 install timm
             pip3 install transformers
             pytest test_models.py --junitxml=/tmp/artifacts/test_results/dynamo/backend/test_results.xml --ir torch_compile
-            popd
 
       - store_test_results:
           path: /tmp/artifacts
@@ -776,7 +774,6 @@ commands:
             pip3 install timm
             pip3 install transformers
             pytest test_models_export.py --junitxml=/tmp/artifacts/test_results/dynamo/backend/test_results.xml --ir dynamo
-            popd
 
       - store_test_results:
           path: /tmp/artifacts
diff --git a/py/torch_tensorrt/_compile.py b/py/torch_tensorrt/_compile.py
@@ -67,9 +67,11 @@ def _get_target_ir(module_type: _ModuleType, ir: str) -> _IRType:
                 )
                 return _IRType.dynamo
             elif module_is_tsable:
-                raise ValueError(
-                    "Input graph is a Torchscript module but the ir provided is default (dynamo). Please set ir=torchscript to compile."
+                logging.log(
+                    logging.Level.Warning,
+                    "Input graph is a Torchscript module but the ir provided is default (dynamo). Please set ir=torchscript to suppress the warning. Compiling the module with ir=ts",
                 )
+                return _IRType.ts
             else:
                 raise ValueError("Module was provided with in an unsupported format")
         else:
@@ -154,18 +156,40 @@ def compile(
             dynamic_batch=False,
             **kwargs,
         )
-    elif target_ir == _IRType.dynamo or target_ir == _IRType.torch_compile:
+    elif target_ir == _IRType.dynamo:
         return torch_tensorrt.dynamo.compile(
             module,
             inputs=inputs,
             enabled_precisions=enabled_precisions,
-            ir=target_ir.name,
             **kwargs,
         )
+    elif target_ir == _IRType.torch_compile:
+        return torch_compile(
+            module, inputs, enabled_precisions=enabled_precisions, **kwargs
+        )
     else:
         raise RuntimeError("Module is an unknown format or the ir requested is unknown")
 
 
+def torch_compile(module, inputs, **kwargs):
+
+    from torch_tensorrt.dynamo.utils import prepare_inputs, prepare_device
+    from torch_tensorrt.dynamo.backend import torch_tensorrt_backend
+    from torch_tensorrt import Device
+    import collections.abc
+
+    if not isinstance(inputs, collections.abc.Sequence):
+        inputs = [inputs]
+
+    device = kwargs.get("device", Device._current_device())
+    torchtrt_inputs, torch_inputs = prepare_inputs(inputs, prepare_device(device))
+    model = torch.compile(module, backend=torch_tensorrt_backend, options={**kwargs})
+    # Ensure compilation occurs by calling the function with provided inputs
+    model(*torch_inputs)
+
+    return model
+
+
 def convert_method_to_trt_engine(
     module: Any,
     method_name: str,
diff --git a/py/torch_tensorrt/dynamo/__init__.py b/py/torch_tensorrt/dynamo/__init__.py
@@ -1,2 +1,3 @@
 from ._settings import *
 from .compile import compile
+from .aten_tracer import trace
diff --git a/py/torch_tensorrt/dynamo/_defaults.py b/py/torch_tensorrt/dynamo/_defaults.py
@@ -1,7 +1,6 @@
-from torch_tensorrt.fx.utils import LowerPrecision
+import torch
 
-
-PRECISION = LowerPrecision.FP32
+PRECISION = torch.float32
 DEBUG = False
 WORKSPACE_SIZE = 0
 MIN_BLOCK_SIZE = 5
diff --git a/py/torch_tensorrt/dynamo/_settings.py b/py/torch_tensorrt/dynamo/_settings.py
@@ -1,7 +1,6 @@
 from dataclasses import dataclass, field
 from typing import Optional, Sequence
-
-from torch_tensorrt.fx.utils import LowerPrecision
+import torch
 from torch_tensorrt.dynamo._defaults import (
     PRECISION,
     DEBUG,
@@ -17,7 +16,7 @@
 
 @dataclass
 class CompilationSettings:
-    precision: LowerPrecision = PRECISION
+    precision: torch.dtype = PRECISION
     debug: bool = DEBUG
     workspace_size: int = WORKSPACE_SIZE
     min_block_size: int = MIN_BLOCK_SIZE
diff --git a/py/torch_tensorrt/dynamo/backend/__init__.py b/py/torch_tensorrt/dynamo/backend/__init__.py
@@ -1,2 +1 @@
 from .backends import torch_tensorrt_backend
-from .compile import compile
diff --git a/py/torch_tensorrt/dynamo/compile.py b/py/torch_tensorrt/dynamo/compile.py
@@ -6,7 +6,6 @@
 
 from typing import Any, Optional, Sequence
 from torch_tensorrt import EngineCapability, Device
-from torch_tensorrt.fx.utils import LowerPrecision
 from torch.fx.passes.pass_manager import PassManager
 from torch.fx.passes.shape_prop import ShapeProp
 from torch_tensorrt.dynamo.aten_tracer import trace
@@ -78,117 +77,50 @@ def compile(
     if not isinstance(inputs, collections.abc.Sequence):
         inputs = [inputs]
 
-    inputs = prepare_inputs(inputs, prepare_device(device))
+    torchtrt_inputs, torch_inputs = prepare_inputs(inputs, prepare_device(device))
 
     if (
         torch.float16 in enabled_precisions
         or torch_tensorrt.dtype.half in enabled_precisions
     ):
-        lower_precision = LowerPrecision.FP16
+        precision = torch.float16
     elif (
         torch.float32 in enabled_precisions
         or torch_tensorrt.dtype.float in enabled_precisions
     ):
-        lower_precision = LowerPrecision.FP32
+        precision = torch.float32
     elif len(enabled_precisions) == 0:
         logger.info(f"No precision specified, defaulting to {PRECISION}")
-        lower_precision = PRECISION
+        precision = PRECISION
     else:
         raise ValueError(
             f"Precision {enabled_precisions} not supported in the Dynamo Path"
         )
 
-    if kwargs.get("ir", "dynamo") == "torch_compile":
-        custom_backend = create_backend(
-            precision=lower_precision,
-            debug=debug,
-            workspace_size=workspace_size,
-            min_block_size=min_block_size,
-            torch_executed_ops=torch_executed_ops,
-            pass_through_build_failures=pass_through_build_failures,
-            max_aux_streams=max_aux_streams,
-            version_compatible=version_compatible,
-            optimization_level=optimization_level,
-            use_python_runtime=use_python_runtime,
-            **kwargs,
-        )
-        model = torch.compile(gm, backend=custom_backend)
-        # Ensure compilation occurs by calling the function with provided inputs
-        model(*inputs)
-        return model
-
+    compilation_options = {
+        "precision": precision,
+        "debug": debug,
+        "workspace_size": workspace_size,
+        "min_block_size": min_block_size,
+        "torch_executed_ops": torch_executed_ops,
+        "pass_through_build_failures": pass_through_build_failures,
+        "max_aux_streams": max_aux_streams,
+        "version_compatible": version_compatible,
+        "optimization_level": optimization_level,
+        "use_python_runtime": use_python_runtime,
+    }
+
+    settings = CompilationSettings(**compilation_options)
+    model = trace(gm, torch_inputs, **kwargs)
+
+    if kwargs.get("use_capability_partitioner", None):
+        model = lower_model(model, torch_inputs)
+        return _compile_module(model, torch_inputs, settings)
     else:
-        settings = CompilationSettings(
-            debug=debug,
-            precision=lower_precision,
-            workspace_size=workspace_size,
-            min_block_size=min_block_size,
-            torch_executed_ops=torch_executed_ops,
-            pass_through_build_failures=pass_through_build_failures,
-            max_aux_streams=max_aux_streams,
-            version_compatible=version_compatible,
-            optimization_level=optimization_level,
-            use_python_runtime=use_python_runtime,
-        )
+        split_result = lower_model_using_trt_splitter(model, torch_inputs)
+        trt_module = _compile_graph(split_result, torch_inputs, settings)
 
-        model = trace(gm, inputs, **kwargs)
-
-        if kwargs.get("use_capability_partitioner", None):
-            model = lower_model(model, inputs)
-            return _compile_module(model, inputs, settings)
-        else:
-            split_result = lower_model_using_trt_splitter(model, inputs)
-            trt_module = _compile_graph(split_result, inputs, settings)
-
-            return trt_module
-
-
-def create_backend(
-    precision: LowerPrecision = PRECISION,
-    debug: bool = DEBUG,
-    workspace_size: int = WORKSPACE_SIZE,
-    min_block_size: int = MIN_BLOCK_SIZE,
-    torch_executed_ops: Sequence[str] = set(),
-    pass_through_build_failures: bool = PASS_THROUGH_BUILD_FAILURES,
-    max_aux_streams: Optional[int] = MAX_AUX_STREAMS,
-    version_compatible: bool = VERSION_COMPATIBLE,
-    optimization_level: Optional[int] = OPTIMIZATION_LEVEL,
-    use_python_runtime: Optional[bool] = USE_PYTHON_RUNTIME,
-    **kwargs,
-):
-    """Create torch.compile backend given specified arguments
-
-    Args:
-        precision: Model Layer precision
-        debug: Whether to print out verbose debugging information
-        workspace_size: Workspace TRT is allowed to use for the module (0 is default)
-        min_block_size: Minimum number of operators per TRT-Engine Block
-        torch_executed_ops: Sequence of operations to run in Torch, regardless of converter coverage
-        pass_through_build_failures: Whether to fail on TRT engine build errors (True) or not (False)
-        max_aux_streams: Maximum number of allowed auxiliary TRT streams for each engine
-        version_compatible: Provide version forward-compatibility for engine plan files
-        optimization_level: Builder optimization 0-5, higher levels imply longer build time,
-            searching for more optimization options. TRT defaults to 3
-        use_python_runtime: Whether to strictly use Python runtime or C++ runtime. To auto-select a runtime
-            based on C++ dependency presence (preferentially choosing C++ runtime if available), leave the
-            argument as None
-    Returns:
-        Backend for torch.compile
-    """
-    return partial(
-        torch_tensorrt_backend,
-        debug=debug,
-        precision=precision,
-        workspace_size=workspace_size,
-        min_block_size=min_block_size,
-        torch_executed_ops=torch_executed_ops,
-        pass_through_build_failures=pass_through_build_failures,
-        max_aux_streams=max_aux_streams,
-        version_compatible=version_compatible,
-        optimization_level=optimization_level,
-        use_python_runtime=use_python_runtime,
-        **kwargs,
-    )
+        return trt_module
 
 
 def _compile_graph(
@@ -234,7 +166,7 @@ def lower_model(model: torch.nn.Module, inputs: Any, **kwargs):
         [fuse_permute_matmul, fuse_permute_linear]
     )
     lowered_model = graph_optimization_pm(model)
-    if isinstance(lowered_model, torch.fx.GraphModule):
-        ShapeProp(lowered_model).propagate(*inputs)
+    # if isinstance(lowered_model, torch.fx.GraphModule):
+    #     ShapeProp(lowered_model).propagate(*inputs)
 
     return lowered_model
diff --git a/py/torch_tensorrt/dynamo/conversion/conversion.py b/py/torch_tensorrt/dynamo/conversion/conversion.py
@@ -41,7 +41,7 @@ def convert_module(
     )
     interpreter_result = interpreter.run(
         workspace_size=settings.workspace_size,
-        lower_precision=settings.precision,
+        precision=settings.precision,
         profiling_verbosity=(
             trt.ProfilingVerbosity.VERBOSE
             if settings.debug
diff --git a/py/torch_tensorrt/dynamo/conversion/trt_interpreter.py b/py/torch_tensorrt/dynamo/conversion/trt_interpreter.py
@@ -19,7 +19,6 @@
 from torch_tensorrt.fx.observer import Observer
 from torch_tensorrt.fx.utils import (
     get_dynamic_dims,
-    LowerPrecision,
     unified_dtype_converter,
     Frameworks,
 )
@@ -98,7 +97,7 @@ def validate_conversion(self):
     def run(
         self,
         workspace_size=0,
-        lower_precision=LowerPrecision.FP16,
+        precision=torch.float32,
         sparse_weights=False,
         disable_tf32=False,
         force_fp32_output=False,
@@ -115,7 +114,7 @@ def run(
         Build TensorRT engine with some configs.
         Args:
             workspace_size: Amount of memory used by TensorRT to store intermediate buffers within an operation.
-            lower_precision: the precision model layers are running on (TensorRT will choose the best perforamnce precision).
+            precision: the precision model layers are running on (TensorRT will choose the best perforamnce precision).
             sparse_weights: allow the builder to examine weights and use optimized functions when weights have suitable sparsity
             force_fp32_output: force output to be fp32
             strict_type_constraints: Usually we should set it to False unless we want to control the precision of certain layer for numeric reasons.
@@ -131,22 +130,14 @@ def run(
         """
         TRT_INTERPRETER_CALL_PRE_OBSERVER.observe(self.module)
 
-        # For float outputs, we set their dtype to fp16 only if lower_precision == LowerPrecision.FP16 and
+        # For float outputs, we set their dtype to fp16 only if precision == torch.float16 and
         # force_fp32_output=False. Overriden by specifying output_dtypes
-        self.output_fp16 = (
-            not force_fp32_output and lower_precision == LowerPrecision.FP16
-        )
+        self.output_fp16 = not force_fp32_output and precision == torch.float16
 
-        if (
-            lower_precision == LowerPrecision.INT8
-            and not self.builder.platform_has_fast_int8
-        ):
+        if precision == torch.int8 and not self.builder.platform_has_fast_int8:
             raise RuntimeError("Current platform doesn't support fast native int8!")
 
-        if (
-            lower_precision == LowerPrecision.FP16
-            and not self.builder.platform_has_fast_fp16
-        ):
+        if precision == torch.float16 and not self.builder.platform_has_fast_fp16:
             warnings.warn("Current platform doesn't support fast native fp16!")
 
         self.input_specs_iter = 0
@@ -190,10 +181,10 @@ def run(
                 _LOGGER.info(f"Using optimization level {optimization_level}")
                 builder_config.builder_optimization_level = optimization_level
 
-        if lower_precision == LowerPrecision.FP16:
+        if precision == torch.float16:
             builder_config.set_flag(trt.BuilderFlag.FP16)
 
-        if lower_precision == LowerPrecision.INT8:
+        if precision == torch.int8:
             builder_config.set_flag(trt.BuilderFlag.INT8)
 
         if sparse_weights:
diff --git a/py/torch_tensorrt/dynamo/runtime/_PythonTorchTRTModule.py b/py/torch_tensorrt/dynamo/runtime/_PythonTorchTRTModule.py
@@ -7,6 +7,12 @@
 
 
 class TRTModule(torch.nn.Module):
+    """TRTModule is a PyTorch module which encompasses an arbitrary TensorRT Engine.
+
+    This module is backed by the Torch-TensorRT runtime and is only compatibile with
+    FX / Dynamo / Python deployments. This module cannot be serialized to torchscript via torch.jit.trace for C++ deployment.
+    """
+
     def __init__(
         self, engine=None, input_names=None, output_names=None, cuda_graph_batch_size=-1
     ):
diff --git a/py/torch_tensorrt/dynamo/runtime/_TorchTensorRTModule.py b/py/torch_tensorrt/dynamo/runtime/_TorchTensorRTModule.py
@@ -13,7 +13,7 @@ class TorchTensorRTModule(torch.nn.Module):
 
     This module is backed by the Torch-TensorRT runtime and is fully compatibile with both
     FX / Python deployments (just ``import torch_tensorrt`` as part of the application) as
-    well as TorchScript / C++ deployments since TRTModule can be passed to ``torch.jit.trace``
+    well as TorchScript / C++ deployments since TorchTensorRTModule can be passed to ``torch.jit.trace``
     and then saved.
 
     The forward function is simpily forward(*args: torch.Tensor) -> Tuple[torch.Tensor] where
diff --git a/py/torch_tensorrt/dynamo/runtime/__init__.py b/py/torch_tensorrt/dynamo/runtime/__init__.py
@@ -1,2 +1,2 @@
-from ._PythonTRTModule import TRTModule
+from ._PythonTorchTRTModule import TRTModule
 from ._TorchTensorRTModule import TorchTensorRTModule
diff --git a/py/torch_tensorrt/dynamo/utils.py b/py/torch_tensorrt/dynamo/utils.py
diff --git a/tests/py/dynamo/models/test_models.py b/tests/py/dynamo/models/test_models.py
diff --git a/tests/py/dynamo/models/test_models_export.py b/tests/py/dynamo/models/test_models_export.py

Original file line number	Diff line number	Diff line change
`@@ -1,2 +1,3 @@`
`1`	`1`	`from ._settings import *`
`2`	`2`	`from .compile import compile`
	`3`	`+from .aten_tracer import trace`
Original file line number	Diff line number	Diff line change
`@@ -1,2 +1 @@`
`1`	`1`	`from .backends import torch_tensorrt_backend`
`2`		`-from .compile import compile`
Original file line number	Diff line number	Diff line change
`@@ -41,7 +41,7 @@ def convert_module(`
`41`	`41`	`)`
`42`	`42`	`interpreter_result = interpreter.run(`
`43`	`43`	`workspace_size=settings.workspace_size,`
`44`		`- lower_precision=settings.precision,`
	`44`	`+ precision=settings.precision,`
`45`	`45`	`profiling_verbosity=(`
`46`	`46`	`trt.ProfilingVerbosity.VERBOSE`
`47`	`47`	`if settings.debug`
Original file line number	Diff line number	Diff line change
`@@ -1,2 +1,2 @@`
`1`		`-from ._PythonTRTModule import TRTModule`
	`1`	`+from ._PythonTorchTRTModule import TRTModule`
`2`	`2`	`from ._TorchTensorRTModule import TorchTensorRTModule`