fix: Unify export/compile compilation utilities

gs-olive · gs-olive · commit cb2be86e8fd9 · 2023-06-01T15:30:21.000-07:00
- Add support for new TRT 8.6 utilities, including auxiliary streams,
version compatibility, and optimization levels
- Add support for TRTModuleNext use during compilation with Dynamo
compile
- Improve documentation of features and version checking for TRT feature
compatibility
diff --git a/py/torch_tensorrt/dynamo/backend/__init__.py b/py/torch_tensorrt/dynamo/backend/__init__.py
@@ -4,7 +4,7 @@
 import torch_tensorrt
 from functools import partial
 
-from typing import Any, Sequence
+from typing import Any, Optional, Sequence
 from torch_tensorrt import EngineCapability, Device
 from torch_tensorrt.fx.utils import LowerPrecision
 
@@ -17,6 +17,10 @@
     MAX_WORKSPACE_SIZE,
     MIN_BLOCK_SIZE,
     PASS_THROUGH_BUILD_FAILURES,
+    MAX_AUX_STREAMS,
+    VERSION_COMPATIBLE,
+    OPTIMIZATION_LEVEL,
+    USE_EXPERIMENTAL_RT,
 )
 
 
@@ -45,6 +49,10 @@ def compile(
     min_block_size=MIN_BLOCK_SIZE,
     torch_executed_ops=[],
     torch_executed_modules=[],
+    max_aux_streams=MAX_AUX_STREAMS,
+    version_compatible=VERSION_COMPATIBLE,
+    optimization_level=OPTIMIZATION_LEVEL,
+    use_experimental_rt=USE_EXPERIMENTAL_RT,
     **kwargs,
 ):
     if debug:
@@ -86,6 +94,10 @@ def compile(
         workspace_size=workspace_size,
         min_block_size=min_block_size,
         torch_executed_ops=torch_executed_ops,
+        max_aux_streams=max_aux_streams,
+        version_compatible=version_compatible,
+        optimization_level=optimization_level,
+        use_experimental_rt=use_experimental_rt,
         **kwargs,
     )
 
@@ -109,6 +121,10 @@ def create_backend(
     min_block_size: int = MIN_BLOCK_SIZE,
     torch_executed_ops: Sequence[str] = set(),
     pass_through_build_failures: bool = PASS_THROUGH_BUILD_FAILURES,
+    max_aux_streams: Optional[int] = MAX_AUX_STREAMS,
+    version_compatible: bool = VERSION_COMPATIBLE,
+    optimization_level: Optional[int] = OPTIMIZATION_LEVEL,
+    use_experimental_rt: bool = USE_EXPERIMENTAL_RT,
     **kwargs,
 ):
     """Create torch.compile backend given specified arguments
@@ -117,7 +133,14 @@ def create_backend(
         precision:
         debug: Whether to print out verbose debugging information
         workspace_size: Maximum workspace TRT is allowed to use for the module
-        precision: Model Layer precision
+        min_block_size: Minimum number of operators per TRT-Engine Block
+        torch_executed_ops: Sequence of operations to run in Torch, regardless of converter coverage
+        pass_through_build_failures: Whether to fail on TRT engine build errors (True) or not (False)
+        max_aux_streams: Maximum number of allowed auxiliary TRT streams for each engine
+        version_compatible: Provide version forward-compatibility for engine plan files
+        optimization_level: Builder optimization 0-5, higher levels imply longer build time,
+            searching for more optimization options. TRT defaults to 3
+        use_experimental_rt: Whether to use the new experimental TRTModuleNext for TRT engines
     Returns:
         Backend for torch.compile
     """
@@ -131,6 +154,10 @@ def create_backend(
         min_block_size=min_block_size,
         torch_executed_ops=torch_executed_ops,
         pass_through_build_failures=pass_through_build_failures,
+        max_aux_streams=max_aux_streams,
+        version_compatible=version_compatible,
+        optimization_level=optimization_level,
+        use_experimental_rt=use_experimental_rt,
     )
 
     return partial(
diff --git a/py/torch_tensorrt/dynamo/backend/_defaults.py b/py/torch_tensorrt/dynamo/backend/_defaults.py
@@ -6,3 +6,7 @@
 MAX_WORKSPACE_SIZE = 20 << 30
 MIN_BLOCK_SIZE = 5
 PASS_THROUGH_BUILD_FAILURES = False
+MAX_AUX_STREAMS = None
+VERSION_COMPATIBLE = False
+OPTIMIZATION_LEVEL = None
+USE_EXPERIMENTAL_RT = False
diff --git a/py/torch_tensorrt/dynamo/backend/_settings.py b/py/torch_tensorrt/dynamo/backend/_settings.py
@@ -1,5 +1,5 @@
 from dataclasses import dataclass, field
-from typing import Sequence
+from typing import Optional, Sequence
 
 from torch_tensorrt.fx.utils import LowerPrecision
 from torch_tensorrt.dynamo.backend._defaults import (
@@ -8,6 +8,10 @@
     MAX_WORKSPACE_SIZE,
     MIN_BLOCK_SIZE,
     PASS_THROUGH_BUILD_FAILURES,
+    MAX_AUX_STREAMS,
+    VERSION_COMPATIBLE,
+    OPTIMIZATION_LEVEL,
+    USE_EXPERIMENTAL_RT,
 )
 
 
@@ -19,3 +23,7 @@ class CompilationSettings:
     min_block_size: int = MIN_BLOCK_SIZE
     torch_executed_ops: Sequence[str] = field(default_factory=set)
     pass_through_build_failures: bool = PASS_THROUGH_BUILD_FAILURES
+    max_aux_streams: Optional[int] = MAX_AUX_STREAMS
+    version_compatible: bool = VERSION_COMPATIBLE
+    optimization_level: Optional[int] = OPTIMIZATION_LEVEL
+    use_experimental_rt: bool = USE_EXPERIMENTAL_RT
diff --git a/py/torch_tensorrt/dynamo/backend/backends.py b/py/torch_tensorrt/dynamo/backend/backends.py
@@ -135,6 +135,7 @@ def _compile_module(
             submodule,
             submodule_inputs,
             settings=settings,
+            name=name,
         )
 
         # Replace FX Module with TRT Module
diff --git a/py/torch_tensorrt/dynamo/backend/conversion.py b/py/torch_tensorrt/dynamo/backend/conversion.py
@@ -3,7 +3,7 @@
 from torch_tensorrt.fx.trt_module import TRTModule
 from torch_tensorrt import TRTModuleNext
 from torch_tensorrt.dynamo.backend._settings import CompilationSettings
-from torch_tensorrt.fx.fx2trt import (
+from torch_tensorrt.dynamo.fx_ts_compat.fx2trt import (
     InputTensorSpec,
     TRTInterpreter,
 )
@@ -15,30 +15,48 @@ def convert_module(
     module: torch.fx.GraphModule,
     inputs: Sequence[torch.Tensor],
     settings: CompilationSettings = CompilationSettings(),
+    name: str = "",
 ) -> Union[TRTModuleNext, TRTModule]:
     """Convert an FX module to a TRT module
     Args:
         module: FX GraphModule to convert
         inputs: Sequence of Tensors representing inputs to the module
         settings: Compilation settings
+        name: TRT engine name
     Returns:
         TRTModule or TRTModuleNext
     """
-    interp = TRTInterpreter(
+    interpreter = TRTInterpreter(
         module,
         InputTensorSpec.from_tensors(inputs),
         explicit_batch_dimension=True,
         logger_level=(trt.Logger.VERBOSE if settings.debug else trt.Logger.WARNING),
     )
 
-    r = interp.run(
+    interpreter_result = interpreter.run(
         max_workspace_size=settings.workspace_size,
         lower_precision=settings.precision,
         profiling_verbosity=(
             trt.ProfilingVerbosity.VERBOSE
             if settings.debug
             else trt.ProfilingVerbosity.LAYER_NAMES_ONLY
         ),
+        max_aux_streams=settings.max_aux_streams,
+        version_compatible=settings.version_compatible,
+        optimization_level=settings.optimization_level,
     )
 
-    return TRTModule(*r)
+    return (
+        TRTModuleNext(
+            serialized_engine=interpreter_result.engine,
+            name=name,
+            input_binding_names=interpreter_result.input_names,
+            output_binding_names=interpreter_result.output_names,
+        )
+        if settings.use_experimental_rt
+        else TRTModule(
+            engine=interpreter_result.engine,
+            input_names=interpreter_result.input_names,
+            output_names=interpreter_result.output_names,
+        )
+    )
diff --git a/py/torch_tensorrt/dynamo/fx_ts_compat/fx2trt.py b/py/torch_tensorrt/dynamo/fx_ts_compat/fx2trt.py
@@ -1,6 +1,7 @@
 import logging
 import warnings
 from datetime import datetime
+from packaging import version
 from typing import Any, Callable, Dict, List, NamedTuple, Optional, Sequence
 
 import numpy
@@ -224,14 +225,14 @@ def run(
             cache = builder_config.create_timing_cache(b"")
         builder_config.set_timing_cache(cache, False)
 
-        if trt.__version__ >= "8.2":
+        if version.parse(trt.__version__) >= version.parse("8.2"):
             builder_config.profiling_verbosity = (
                 profiling_verbosity
                 if profiling_verbosity
                 else trt.ProfilingVerbosity.LAYER_NAMES_ONLY
             )
 
-        if trt.__version__ >= "8.6":
+        if version.parse(trt.__version__) >= version.parse("8.6"):
             if max_aux_streams is not None:
                 _LOGGER.info(f"Setting max aux streams to {max_aux_streams}")
                 builder_config.max_aux_streams = max_aux_streams

Original file line number	Diff line number	Diff line change
`@@ -135,6 +135,7 @@ def _compile_module(`
`135`	`135`	`submodule,`
`136`	`136`	`submodule_inputs,`
`137`	`137`	`settings=settings,`
	`138`	`+ name=name,`
`138`	`139`	`)`
`139`	`140`
`140`	`141`	`# Replace FX Module with TRT Module`