fix issues from the comments

zewenli98 · zewenli98 · commit 5a47b1e3846f · 2023-11-29T14:02:27.000-08:00
diff --git a/py/torch_tensorrt/dynamo/__init__.py b/py/torch_tensorrt/dynamo/__init__.py
@@ -7,10 +7,8 @@
 logger = logging.getLogger(__name__)
 
 if version.parse(sanitized_torch_version()) >= version.parse("2.1.dev"):
-    from ._compiler import compile
+    from ._compiler import compile, convert_method_to_trt_engine
     from ._exporter import export
     from ._settings import CompilationSettings
     from ._SourceIR import SourceIR
     from ._tracer import trace
-
-from torch_tensorrt.dynamo._compiler import *  # noqa: F403
diff --git a/py/torch_tensorrt/dynamo/_compiler.py b/py/torch_tensorrt/dynamo/_compiler.py
@@ -4,7 +4,6 @@
 import logging
 from typing import Any, List, Optional, Sequence, Set, Tuple, Union
 
-import tensorrt as trt
 import torch
 import torch_tensorrt
 from torch.export import ExportedProgram
@@ -33,13 +32,11 @@
 )
 from torch_tensorrt.dynamo.conversion import (
     CompilationSettings,
+    UnsupportedOperatorException,
     convert_module,
+    interpret_module,
     repair_long_or_double_inputs,
 )
-from torch_tensorrt.dynamo.conversion._TRTInterpreter import (
-    TRTInterpreter,
-    TRTInterpreterResult,
-)
 from torch_tensorrt.dynamo.lowering import apply_lowering_passes
 from torch_tensorrt.dynamo.utils import (
     get_torch_inputs,
@@ -327,51 +324,6 @@ def compile_module(
     return partitioned_module
 
 
-def interpreter(
-    module: torch.fx.GraphModule,
-    inputs: Sequence[Input],
-    settings: CompilationSettings = CompilationSettings(),
-    name: str = "",
-) -> TRTInterpreterResult:
-    torch_inputs = get_torch_inputs(inputs, settings.device)
-    module_outputs = module(*torch_inputs)
-
-    if not isinstance(module_outputs, (list, tuple)):
-        module_outputs = [module_outputs]
-
-    # Int64 outputs can sometimes be generated from within other operators
-    # such as aten.sum - such outputs can be truncated
-    output_dtypes = []
-    for output in module_outputs:
-        if settings.truncate_long_and_double and output.dtype == torch.float64:
-            output_dtypes.append(torch.float32)
-        elif settings.truncate_long_and_double and output.dtype == torch.int64:
-            output_dtypes.append(torch.int32)
-        else:
-            output_dtypes.append(output.dtype)
-
-    interpreter = TRTInterpreter(
-        module,
-        inputs,
-        logger_level=(trt.Logger.VERBOSE if settings.debug else trt.Logger.WARNING),
-        output_dtypes=output_dtypes,
-        compilation_settings=settings,
-    )
-    interpreter_result = interpreter.run(
-        workspace_size=settings.workspace_size,
-        precision=settings.precision,
-        profiling_verbosity=(
-            trt.ProfilingVerbosity.VERBOSE
-            if settings.debug
-            else trt.ProfilingVerbosity.LAYER_NAMES_ONLY
-        ),
-        max_aux_streams=settings.max_aux_streams,
-        version_compatible=settings.version_compatible,
-        optimization_level=settings.optimization_level,
-    )
-    return interpreter_result
-
-
 def convert_method_to_trt_engine(
     module: torch.fx.GraphModule,
     method_name: str = "forward",
@@ -391,6 +343,9 @@ def convert_method_to_trt_engine(
     truncate_long_and_double: int = False,
     calibrator: object = None,
     allow_shape_tensors: bool = False,
+    max_aux_streams: Optional[int] = MAX_AUX_STREAMS,
+    version_compatible: bool = VERSION_COMPATIBLE,
+    optimization_level: Optional[int] = OPTIMIZATION_LEVEL,
 ) -> bytes:
     if debug:
         set_log_level(logger.parent, logging.DEBUG)
@@ -428,15 +383,20 @@ def convert_method_to_trt_engine(
         "device": device,
         "workspace_size": workspace_size,
         "truncate_long_and_double": truncate_long_and_double,
-        "max_aux_streams": MAX_AUX_STREAMS,
-        "version_compatible": VERSION_COMPATIBLE,
-        "optimization_level": OPTIMIZATION_LEVEL,
+        "max_aux_streams": max_aux_streams,
+        "version_compatible": version_compatible,
+        "optimization_level": optimization_level,
     }
 
     settings = CompilationSettings(**compilation_options)
     logger.info("Compilation Settings: %s\n", settings)
-    interpreter_result = interpreter(module, input_list, settings, method_name)
-
+    try:
+        interpreter_result = interpret_module(module, input_list, settings, method_name)
+    except UnsupportedOperatorException:
+        logger.error(
+            f"Conversion of module {module} not currently fully supported or convertible!",
+            exc_info=True,
+        )
     import io
 
     with io.BytesIO() as engine_bytes:
diff --git a/py/torch_tensorrt/dynamo/conversion/__init__.py b/py/torch_tensorrt/dynamo/conversion/__init__.py
@@ -1,5 +1,5 @@
 from . import aten_ops_converters, ops_evaluators, prims_ops_converters
-from ._conversion import convert_module
+from ._conversion import convert_module, interpret_module
 from ._ConversionContext import ConversionContext
 from ._ConverterRegistry import *  # noqa: F403
 from ._TRTInterpreter import *  # noqa: F403
diff --git a/py/torch_tensorrt/dynamo/conversion/_conversion.py b/py/torch_tensorrt/dynamo/conversion/_conversion.py
@@ -3,33 +3,24 @@
 import io
 from typing import Sequence
 
+import tensorrt as trt
 import torch
 from torch_tensorrt._Input import Input
 from torch_tensorrt.dynamo._settings import CompilationSettings
-from torch_tensorrt.dynamo.conversion._TRTInterpreter import TRTInterpreter
+from torch_tensorrt.dynamo.conversion._TRTInterpreter import (
+    TRTInterpreter,
+    TRTInterpreterResult,
+)
 from torch_tensorrt.dynamo.runtime import PythonTorchTensorRTModule, TorchTensorRTModule
 from torch_tensorrt.dynamo.utils import get_torch_inputs
 
-import tensorrt as trt
-
 
-def convert_module(
+def interpret_module(
     module: torch.fx.GraphModule,
     inputs: Sequence[Input],
     settings: CompilationSettings = CompilationSettings(),
     name: str = "",
-) -> PythonTorchTensorRTModule | TorchTensorRTModule:
-    """Convert an FX module to a TRT module
-    Args:
-        module: FX GraphModule to convert
-        inputs: Sequence of Tensors representing inputs to the module
-        settings: Compilation settings
-        name: TRT engine name
-    Returns:
-        _PythonTorchTensorRTModule or TorchTensorRTModule
-    """
-    # Specify module output data types to ensure TRT output types agree with
-    # that of the equivalent Torch module
+) -> TRTInterpreterResult:
     torch_inputs = get_torch_inputs(inputs, settings.device)
     module_outputs = module(*torch_inputs)
 
@@ -66,6 +57,27 @@ def convert_module(
         version_compatible=settings.version_compatible,
         optimization_level=settings.optimization_level,
     )
+    return interpreter_result
+
+
+def convert_module(
+    module: torch.fx.GraphModule,
+    inputs: Sequence[Input],
+    settings: CompilationSettings = CompilationSettings(),
+    name: str = "",
+) -> PythonTorchTensorRTModule | TorchTensorRTModule:
+    """Convert an FX module to a TRT module
+    Args:
+        module: FX GraphModule to convert
+        inputs: Sequence of Tensors representing inputs to the module
+        settings: Compilation settings
+        name: TRT engine name
+    Returns:
+        _PythonTorchTensorRTModule or TorchTensorRTModule
+    """
+    # Specify module output data types to ensure TRT output types agree with
+    # that of the equivalent Torch module
+    interpreter_result = interpret_module(module, inputs, settings, name)
 
     if settings.use_python_runtime:
         return PythonTorchTensorRTModule(