fix: distingush engines based on compilation settings in addition to graph structure

narendasan · narendasan · commit 53c42b0ee625 · 2024-09-11T09:47:21.000-07:00
Signed-off-by: Naren Dasan &lt;naren@narendasan.com&gt;
diff --git a/py/torch_tensorrt/dynamo/_engine_cache.py b/py/torch_tensorrt/dynamo/_engine_cache.py
@@ -75,7 +75,6 @@ def get_hash(
             engine_specs_data = pickletools.optimize(engine_specs_data)
         engine_specs_hash = sha256_hash(engine_specs_data)
 
-        # TODO: Super first idea I had hash combination solution @Evan please iterate on this
         hash_val: str = graph_hash_val + input_specs_hash + engine_specs_hash
 
         return hash_val
@@ -95,6 +94,8 @@ def pack(
             serialized_engine (bytes): serialized TRT engine
             input_names (List[str]): input names of TRT engine
             output_names (List[str]): output names of TRT engine
+            input_specs (Sequence[Input]): input specs of TRT engine
+            compilation_settings (CompilationSettings): compilation settings of TRT engine
             weight_name_map (Optional[Dict[Any, Any]]): weight name map for refitting
 
         Returns:
@@ -121,7 +122,7 @@ def unpack(packed_obj: bytes) -> UnpackedCacheHit:
             packed_obj (bytes): packed blob
 
         Returns:
-            Tuple[bytes, List[str], List[str], CompilationSettings, Optional[Dict[str, Any]]]: serialized engine, input names, output names, CompilationSettings, weight name map
+            Tuple[bytes, List[str], List[str], Sequence[Input], CompilationSettings, Optional[Dict[str, Any]]]: serialized engine, input names, output names, input specs, CompilationSettings, weight name map
         """
         unpacked = pickle.loads(packed_obj)
         return (
@@ -283,11 +284,7 @@ def LRU() -> None:
         else:
             LRU()
 
-    def save(
-        self,
-        hash: str,
-        blob: bytes,
-    ) -> None:
+    def save(self, hash: str, blob: bytes, *args: Any, **kwargs: Any) -> None:
         blob_size = len(blob)
         if blob_size > self.total_engine_cache_size:
             _LOGGER.warning(
@@ -324,7 +321,7 @@ def save(
                 f"The size {blob_size} is still larger than the available cache size {self.available_engine_cache_size}."
             )
 
-    def load(self, hash: str) -> Optional[bytes]:
+    def load(self, hash: str, *args: Any, **kwargs: Any) -> Optional[bytes]:
         directory = os.path.join(self.engine_cache_dir, hash)
         if os.path.exists(directory):
             blob_path = os.path.join(directory, "blob.bin")
diff --git a/py/torch_tensorrt/dynamo/conversion/_TRTInterpreter.py b/py/torch_tensorrt/dynamo/conversion/_TRTInterpreter.py
@@ -557,7 +557,7 @@ def run(
                     )
                     assert (
                         setting_compatiblity
-                    ), f"Attempted to refit a prebuilt engine with incompatible settings: {incompattible_settings}, (old_settings: {engine_compilation_settings}, new_settings: {self.compilation_settings})"
+                    ), f"Attempted to refit a cached engine with incompatible settings: {incompattible_settings}, (old_settings: {engine_compilation_settings}, new_settings: {self.compilation_settings})"
 
                     for i, e in enumerate(
                         [
@@ -567,7 +567,7 @@ def run(
                     ):
                         assert (
                             e
-                        ), f"Found that cached engine was built for a different input size (input: {i}, cached size: {cached_engine_input_specs[i]}, new size: {self.input_specs[i]}"
+                        ), f"Attempted to refit a cached engine built for a different input size (input: {i}, cached size: {cached_engine_input_specs[i]}, new size: {self.input_specs[i]}"
 
                     _LOGGER.info(
                         "Found the cached engine that corresponds to this graph. It is directly loaded."
diff --git a/py/torch_tensorrt/dynamo/utils.py b/py/torch_tensorrt/dynamo/utils.py
@@ -499,7 +499,7 @@ def parse_dynamo_kwargs(
 
     # If cache_built_engines and reuse_cached_engines are True but custom_engine_cache is not provided,
     # then create a default disk engine cache
-    #
+
     engine_cache = None
     if kwargs.get("cache_built_engines") or kwargs.get("reuse_cached_engines"):
         assert kwargs.get(
diff --git a/tests/py/dynamo/models/test_engine_cache.py b/tests/py/dynamo/models/test_engine_cache.py
@@ -9,7 +9,7 @@
 import torch_tensorrt as torch_trt
 import torchvision.models as models
 from torch.testing._internal.common_utils import TestCase
-from torch_tensorrt.dynamo._defaults import ENGINE_CACHE_DIR
+from torch_tensorrt.dynamo._defaults import TIMING_CACHE_PATH
 from torch_tensorrt.dynamo._engine_cache import BaseEngineCache
 from torch_tensorrt.dynamo._settings import CompilationSettings
 from torch_tensorrt.dynamo.utils import COSINE_THRESHOLD, cosine_similarity
@@ -160,9 +160,9 @@ def test_engine_settings_is_not_equal(self):
         )
         input_specs2 = (
             torch_trt.Input(
-                min_shape=(1, 3, 300, 300),
-                opt_shape=(100, 3, 300, 300),
-                max_shape=(200, 3, 300, 300),
+                min_shape=(1, 3, 224, 224),
+                opt_shape=(100, 3, 224, 224),
+                max_shape=(200, 3, 224, 224),
             ),
         )
         settings2 = CompilationSettings(
@@ -192,6 +192,10 @@ def test_dynamo_compile_with_default_disk_engine_cache(self):
         if os.path.exists(engine_cache_dir):
             shutil.rmtree(engine_cache_dir)
 
+        def remove_timing_cache(path=TIMING_CACHE_PATH):
+            if os.path.exists(path):
+                os.remove(path)
+
         # The 1st iteration is to measure the compilation time without engine caching
         # The 2nd and 3rd iterations are to measure the compilation time with engine caching.
         # Since the 2nd iteration needs to compile and save the engine, it will be slower than the 1st iteration.
@@ -202,6 +206,8 @@ def test_dynamo_compile_with_default_disk_engine_cache(self):
         start = torch.cuda.Event(enable_timing=True)
         end = torch.cuda.Event(enable_timing=True)
         for i in range(3):
+            remove_timing_cache()
+            torch._dynamo.reset()
             if i == 0:
                 cache_built_engines = False
                 reuse_cached_engines = False
@@ -351,6 +357,10 @@ def test_torch_compile_with_default_disk_engine_cache(self):
         if os.path.exists(engine_cache_dir):
             shutil.rmtree(engine_cache_dir)
 
+        def remove_timing_cache(path=TIMING_CACHE_PATH):
+            if os.path.exists(path):
+                os.remove(path)
+
         # The 1st iteration is to measure the compilation time without engine caching
         # The 2nd and 3rd iterations are to measure the compilation time with engine caching.
         # Since the 2nd iteration needs to compile and save the engine, it will be slower than the 1st iteration.
@@ -361,7 +371,9 @@ def test_torch_compile_with_default_disk_engine_cache(self):
         start = torch.cuda.Event(enable_timing=True)
         end = torch.cuda.Event(enable_timing=True)
         for i in range(3):
-            # remove timing cache and reset dynamo for engine caching messurement
+            # remove timing cache and reset dynamo for engine caching measurement
+            remove_timing_cache()
+            torch._dynamo.reset()
             if i == 0:
                 cache_built_engines = False
                 reuse_cached_engines = False

Original file line number	Diff line number	Diff line change
`@@ -557,7 +557,7 @@ def run(`
`557`	`557`	`)`
`558`	`558`	`assert (`
`559`	`559`	`setting_compatiblity`
`560`		`- ), f"Attempted to refit a prebuilt engine with incompatible settings: {incompattible_settings}, (old_settings: {engine_compilation_settings}, new_settings: {self.compilation_settings})"`
	`560`	`+ ), f"Attempted to refit a cached engine with incompatible settings: {incompattible_settings}, (old_settings: {engine_compilation_settings}, new_settings: {self.compilation_settings})"`
`561`	`561`
`562`	`562`	`for i, e in enumerate(`
`563`	`563`	`[`
`@@ -567,7 +567,7 @@ def run(`
`567`	`567`	`):`
`568`	`568`	`assert (`
`569`	`569`	`e`
`570`		`- ), f"Found that cached engine was built for a different input size (input: {i}, cached size: {cached_engine_input_specs[i]}, new size: {self.input_specs[i]}"`
	`570`	`+ ), f"Attempted to refit a cached engine built for a different input size (input: {i}, cached size: {cached_engine_input_specs[i]}, new size: {self.input_specs[i]}"`
`571`	`571`
`572`	`572`	`_LOGGER.info(`
`573`	`573`	`"Found the cached engine that corresponds to this graph. It is directly loaded."`