Implemented basic pipeline for Refitting (#2886)

cehongwang · cehongwang · commit 50ba22322174 · 2024-07-12T15:17:03.000-07:00
diff --git a/docsrc/py_api/dynamo.rst b/docsrc/py_api/dynamo.rst
@@ -26,7 +26,6 @@ Functions
 
 .. autofunction:: refit_module_weights
 
-
 Classes
 --------
 
diff --git a/examples/dynamo/refit_engine_example.py b/examples/dynamo/refit_engine_example.py
@@ -91,7 +91,7 @@
 print("Refit successfully!")
 
 # %%
-# Alternative Workflow using Python Runtime
+# Alterative Workflow using Python Runtime
 # -----------------------------
 
 # Currently python runtime does not support engine serialization. So the refitting will be done in the same runtime.
diff --git a/py/torch_tensorrt/dynamo/_compiler.py b/py/torch_tensorrt/dynamo/_compiler.py
@@ -176,6 +176,18 @@ def compile(
 
     if kwarg_inputs is None:
         kwarg_inputs = {}
+
+    if "refit" in kwargs.keys():
+        warnings.warn(
+            "Refit is deprecated. Please use make_refitable=True if you want to enable refitting of the engine.",
+            DeprecationWarning,
+            stacklevel=2,
+        )
+        if make_refitable:
+            raise ValueError("Use flag make_refitable only. Flag refit is deprecated.")
+        else:
+            make_refitable = kwargs["refit"]
+
     engine_capability = EngineCapability._from(engine_capability)
 
     if torch_executed_modules is not None and torch_executed_modules:
diff --git a/py/torch_tensorrt/dynamo/_refit.py b/py/torch_tensorrt/dynamo/_refit.py
@@ -6,6 +6,7 @@
 from typing import Any, Sequence, Tuple
 
 import numpy as np
+import tensorrt as trt
 import torch
 from torch.export import ExportedProgram
 from torch_tensorrt._enums import dtype
@@ -42,8 +43,6 @@
 )
 from torch_tensorrt.logging import TRT_LOGGER
 
-import tensorrt as trt
-
 logger = logging.getLogger(__name__)
 
 
@@ -96,16 +95,12 @@ def construct_refit_mapping(
         layer_type: str = layer.type.name
         if layer_type in MODULE_MAP:
             # Cast the parent class to child class to access attributes
-            # For example: ILayer does not have ILayer.kernel/ILayer.bias
+            # For example: ILayer does not have ILayer.kernal/ILayer.bias
             # So we cast it to IConvolutionLayer and access the attributes
             layer.__class__ = MODULE_MAP[layer_type][0]
             for weight_type, weight_name in MODULE_MAP[layer_type][1]:
                 weight = layer.__getattribute__(weight_type).copy()
-                weight_dtype_opt = dtype.try_from(weight.dtype)
-                assert (
-                    weight_dtype_opt is not None
-                ), f"Weights {weight_name} has unsupported dtype {weight.dtype}"
-                weight_dtype = weight_dtype_opt.to(trt.DataType)
+                weight_dtype = dtype.try_from(weight.dtype).to(trt.DataType)
                 weight_map[f"{layer.name} {weight_name}"] = (
                     weight,
                     weight_dtype,

-Original file line number
+Diff line change
 .. autofunction:: refit_module_weights
+-
 Classes
 --------