fix: Move key functions, fix bugs

gs-olive · gs-olive · commit aa0dda88d2f7 · 2023-04-08T15:06:06.000-07:00
- Improve overall functionality, fix bugs
- Move functions into __init__.py
- Improve overall documentation, comments, function header typing, and
code organization
diff --git a/py/torch_tensorrt/dynamo/__init__.py b/py/torch_tensorrt/dynamo/__init__.py
@@ -1,2 +1,119 @@
-from _compiler import compile
-from backends import create_backend
+import torch
+import logging
+import torch_tensorrt
+from functools import partial
+
+from typing import Sequence, Any
+from torch_tensorrt import EngineCapability, Device
+from torch_tensorrt.fx.utils import LowerPrecision
+
+from torch_tensorrt.dynamo._settings import CompilationSettings
+from torch_tensorrt.dynamo.backends import tensorrt_backend
+from torch_tensorrt.dynamo._defaults import (
+    PRECISION,
+    DEBUG,
+    MAX_WORKSPACE_SIZE,
+    MAX_NUM_TRT_ENGINES,
+)
+
+
+logger = logging.getLogger(__name__)
+
+
+def compile(
+    gm: torch.nn.Module,
+    example_inputs: Sequence[Any],
+    *,
+    device=Device._current_device(),
+    disable_tf32=False,
+    sparse_weights=False,
+    enabled_precisions=set(),
+    refit=False,
+    debug=DEBUG,
+    capability=EngineCapability.default,
+    num_avg_timing_iters=1,
+    workspace_size=MAX_WORKSPACE_SIZE,
+    dla_sram_size=1048576,
+    dla_local_dram_size=1073741824,
+    dla_global_dram_size=536870912,
+    calibrator=None,
+    truncate_long_and_double=False,
+    require_full_compilation=False,
+    min_block_size=3,
+    torch_executed_ops=[],
+    torch_executed_modules=[],
+    **kwargs,
+):
+
+    logger.warn(
+        "The Dynamo backend is an experimental feature, for which only the "
+        + "following arguments are supported: "
+        + "{enabled_precisions, debug, workspace_size, max_num_trt_engines}"
+    )
+
+    if (
+        torch.float16 in enabled_precisions
+        or torch_tensorrt.dtype.half in enabled_precisions
+    ):
+        lower_precision = LowerPrecision.FP16
+    elif (
+        torch.float32 in enabled_precisions
+        or torch_tensorrt.dtype.float in enabled_precisions
+    ):
+        lower_precision = LowerPrecision.FP32
+    elif len(enabled_precisions) == 0:
+        logger.info(f"No precision specified, defaulting to {PRECISION}")
+        lower_precision = PRECISION
+    else:
+        raise ValueError(
+            f"Precision {enabled_precisions} not supported in the Dynamo Path"
+        )
+
+    custom_backend = create_backend(
+        precision=lower_precision,
+        debug=debug,
+        workspace_size=workspace_size,
+        **kwargs,
+    )
+
+    model = torch.compile(gm, backend=custom_backend)
+
+    # Ensure compilation occurs by calling the function with provided inputs
+    model(*example_inputs)
+
+    return model
+
+
+from torch_tensorrt.fx.utils import LowerPrecision
+
+logger = logging.getLogger(__name__)
+
+
+def create_backend(
+    precision: LowerPrecision = PRECISION,
+    debug: bool = DEBUG,
+    workspace_size: int = MAX_WORKSPACE_SIZE,
+    max_num_trt_engines: int = MAX_NUM_TRT_ENGINES,
+    **kwargs,
+):
+    """Create torch.compile backend given specified arguments
+
+    Args:
+        precision:
+        debug: Whether to print out verbose debugging information
+        workspace_size: Maximum workspace TRT is allowed to use for the module
+        precision: Model Layer precision
+    Returns:
+        Backend for torch.compile
+    """
+    settings = CompilationSettings(
+        debug=debug,
+        precision=precision,
+        workspace_size=workspace_size,
+        max_num_trt_engines=max_num_trt_engines,
+    )
+
+    return partial(
+        tensorrt_backend,
+        settings=settings,
+    )
diff --git a/py/torch_tensorrt/dynamo/_compiler.py b/py/torch_tensorrt/dynamo/_compiler.py
diff --git a/py/torch_tensorrt/dynamo/backends.py b/py/torch_tensorrt/dynamo/backends.py
@@ -1,15 +1,9 @@
+from typing import Sequence
 import torch
-import logging
 import traceback
 from functools import partial
 import torch._dynamo as td
 
-from torch_tensorrt.dynamo._defaults import (
-    PRECISION,
-    DEBUG,
-    MAX_WORKSPACE_SIZE,
-    MAX_NUM_TRT_ENGINES,
-)
 from torch_tensorrt.dynamo._settings import CompilationSettings
 from torch_tensorrt.dynamo.lowering._decompositions import get_decompositions
 from torch_tensorrt.dynamo.lowering._partition import partition, get_submod_inputs
@@ -19,49 +13,14 @@
 
 from torch._functorch.aot_autograd import aot_module_simplified, make_boxed_compiler
 
-from torch_tensorrt.fx.utils import LowerPrecision
-
-logger = logging.getLogger(__name__)
-
-
-def create_backend(
-    precision: LowerPrecision = PRECISION,
-    debug: bool = DEBUG,
-    workspace_size: int = MAX_WORKSPACE_SIZE,
-    max_num_trt_engines: int = MAX_NUM_TRT_ENGINES,
-    **kwargs
-):
-    """Create torch.compile backend given specified arguments
-
-    Args:
-        precision:
-        debug: Whether to print out verbose debugging information
-        workspace_size: Maximum workspace TRT is allowed to use for the module
-        precision: Model Layer precision
-    Returns:
-        Backend for torch.compile
-    """
-    settings = CompilationSettings(
-        debug=debug,
-        precision=precision,
-        workspace_size=workspace_size,
-        max_num_trt_engines=max_num_trt_engines,
-    )
-
-    return partial(
-        tensorrt_backend,
-        settings=settings,
-    )
-
 
 @td.register_backend(name="tensorrt")
 @fake_tensor_unsupported
 def tensorrt_backend(
-    gm: torch.Module,
-    sample_inputs,
+    gm: torch.nn.Module,
+    sample_inputs: Sequence[torch.Tensor],
     settings: CompilationSettings = CompilationSettings(),
 ):
-
     custom_backend = partial(
         fx_dynamo_backend,
         settings=settings,
@@ -80,10 +39,18 @@ def tensorrt_backend(
 @fake_tensor_unsupported
 def fx_dynamo_backend(
     gm: torch.fx.GraphModule,
-    example_inputs,
+    example_inputs: Sequence[torch.Tensor],
     settings: CompilationSettings = CompilationSettings(),
 ):
-    """Helper function to manage translation of FX module to TRT engines"""
+    """Helper function to manage translation of FX module to TRT engines
+
+    Args:
+        module: FX GraphModule to convert
+        inputs: Inputs to the module
+        settings: Compilation settings
+    Returns:
+        Compiled FX GraphModule
+    """
     try:
         trt_compiled = compile_module(
             gm,
@@ -102,7 +69,7 @@ def fx_dynamo_backend(
 
 def compile_module(
     gm: torch.fx.GraphModule,
-    example_inputs,
+    example_inputs: Sequence[torch.Tensor],
     settings: CompilationSettings = CompilationSettings(),
 ) -> torch.fx.GraphModule:
     """Compile an FX module
diff --git a/py/torch_tensorrt/dynamo/lowering/_partition.py b/py/torch_tensorrt/dynamo/lowering/_partition.py
@@ -38,15 +38,19 @@ def is_node_supported(
 
     def print_support_overview(self, num_trt_blocks: Optional[int] = None):
         if num_trt_blocks is not None:
-            print(f"Number of TensorRT-Accelerated Subgraphs: {num_trt_blocks}\n")
+            print(f"\nNumber of TensorRT-Accelerated Subgraphs: {num_trt_blocks}")
 
-        print("Supported Nodes:")
+        print("\nSupported Nodes:")
         for node_name in self.supported_operators:
-            print(node_name)
+            print("-", node_name)
 
-        print("\nUnsupported Nodes:")
-        for node_name in self.unsupported_operators:
-            print(node_name)
+        if len(self.unsupported_operators) != 0:
+            print("\nUnsupported Nodes:")
+            for node_name in self.unsupported_operators:
+                print("-", node_name)
+            print("\n")
+        else:
+            print("\nAll Nodes Supported\n")
 
 
 def partition(
@@ -88,7 +92,9 @@ def partition(
 
 
 def get_submod_inputs(
-    mod: torch.fx.GraphModule, submod: torch.fx.GraphModule, inputs
+    mod: torch.fx.GraphModule,
+    submod: torch.fx.GraphModule,
+    inputs: Sequence[torch.Tensor],
 ) -> Sequence[torch.Tensor]:
     """Helper function to get inputs to a Torch submodule