update dynamo path

zewenli98 · zewenli98 · commit cc4ee8259384 · 2024-07-16T14:17:27.000-07:00
diff --git a/examples/dynamo/engine_caching_example.py b/examples/dynamo/engine_caching_example.py
@@ -1,10 +1,9 @@
-import time
+import os
 
 import numpy as np
 import torch
 import torch_tensorrt as torch_trt
 import torchvision.models as models
-from torch.fx.experimental.proxy_tensor import maybe_disable_fake_tensor_mode
 
 np.random.seed(0)
 torch.manual_seed(0)
@@ -19,127 +18,47 @@
 min_block_size = 0
 use_python_runtime = False
 torch_executed_ops = {}
+TIMING_CACHE_PATH = "/tmp/timing_cache.bin"
 
 
-def dynamo_path():
-    ############### warmup ###############
-    inputs = [torch.rand(size).to("cuda")]
-    t1 = time.time()
-    trt_gm = torch_trt.dynamo.compile(
-        exp_program,
-        tuple(inputs),
-        use_python_runtime=use_python_runtime,
-        enabled_precisions=enabled_precisions,
-        debug=debug,
-        min_block_size=min_block_size,
-        torch_executed_ops=torch_executed_ops,
-        make_refitable=True,
-        ignore_engine_cache=True,
-    )  # Output is a torch.fx.GraphModule
-    t2 = time.time()
+def remove_timing_cache(path=TIMING_CACHE_PATH):
+    if os.path.exists(path):
+        os.remove(path)
 
-    ############### compile for the first time ###############
-    inputs = [torch.rand(size).to("cuda")]
-    t3 = time.time()
-    trt_gm1 = torch_trt.dynamo.compile(
-        exp_program,
-        tuple(inputs),
-        use_python_runtime=use_python_runtime,
-        enabled_precisions=enabled_precisions,
-        debug=debug,
-        min_block_size=min_block_size,
-        torch_executed_ops=torch_executed_ops,
-        make_refitable=True,
-        ignore_engine_cache=False,
-    )  # Output is a torch.fx.GraphModule
-    t4 = time.time()
-    # Check the output
-    outputs = trt_gm1(*inputs)
-    print("----------> 1st output:", outputs)
 
-    ############### compile for the second time ###############
-    inputs = [torch.rand(size).to("cuda")]
-    t5 = time.time()
-    trt_gm2 = torch_trt.dynamo.compile(
-        exp_program,
-        tuple(inputs),
-        use_python_runtime=use_python_runtime,
-        enabled_precisions=enabled_precisions,
-        debug=debug,
-        min_block_size=min_block_size,
-        torch_executed_ops=torch_executed_ops,
-        make_refitable=True,
-        ignore_engine_cache=False,
-    )  # Output is a torch.fx.GraphModule
-    t6 = time.time()
-    # Check the output
-    outputs = trt_gm2(*inputs)
-    print("----------> 2nd output:", outputs)
+def dynamo_path(iterations=3):
+    outputs = []
+    times = []
+    start = torch.cuda.Event(enable_timing=True)
+    end = torch.cuda.Event(enable_timing=True)
+    for i in range(iterations):
+        inputs = [torch.rand(size).to("cuda")]
+        remove_timing_cache()
+        if i == 0:  # warmup
+            ignore_engine_cache = True
+        else:
+            ignore_engine_cache = False
 
-    print("----------> warmup compilation time:", t2 - t1, "seconds")
-    print("----------> 1st compilation time:", t4 - t3, "seconds")
-    print("----------> 2nd compilation time:", t6 - t5, "seconds")
+        start.record()
+        trt_gm = torch_trt.dynamo.compile(
+            exp_program,
+            tuple(inputs),
+            use_python_runtime=use_python_runtime,
+            enabled_precisions=enabled_precisions,
+            debug=debug,
+            min_block_size=min_block_size,
+            torch_executed_ops=torch_executed_ops,
+            make_refitable=True,
+            ignore_engine_cache=ignore_engine_cache,
+        )
+        end.record()
+        torch.cuda.synchronize()
+        times.append(start.elapsed_time(end))
+        outputs.append(trt_gm(*inputs))
 
-
-def compile_path():
-    inputs = [torch.rand(size).to("cuda")]
-    model = models.resnet18(pretrained=True).eval().to("cuda")
-    t1 = time.time()
-    model = torch.compile(
-        model,
-        backend="tensorrt",
-        options={
-            "use_python_runtime": use_python_runtime,
-            "enabled_precisions": enabled_precisions,
-            "debug": debug,
-            "min_block_size": min_block_size,
-            "torch_executed_ops": torch_executed_ops,
-            "make_refitable": True,
-            "ignore_engine_cache": True,
-        },
-    )
-    t2 = time.time()
-    print("---------->", model(*inputs))
-
-    t3 = time.time()
-    model1 = torch.compile(
-        model,
-        backend="tensorrt",
-        options={
-            "use_python_runtime": use_python_runtime,
-            "enabled_precisions": enabled_precisions,
-            "debug": debug,
-            "min_block_size": min_block_size,
-            "torch_executed_ops": torch_executed_ops,
-            "make_refitable": True,
-            "ignore_engine_cache": False,
-        },
-    )
-    t4 = time.time()
-    print("----------> 1st output:", model1(*inputs))
-
-    t5 = time.time()
-    model2 = torch.compile(
-        model,
-        backend="tensorrt",
-        options={
-            "use_python_runtime": use_python_runtime,
-            "enabled_precisions": enabled_precisions,
-            "debug": debug,
-            "min_block_size": min_block_size,
-            "torch_executed_ops": torch_executed_ops,
-            "make_refitable": True,
-            "ignore_engine_cache": False,
-        },
-    )
-    t6 = time.time()
-    print("----------> 2nd output:", model2(*inputs))
-
-    print("----------> warmup compilation time:", t2 - t1, "seconds")
-    print("----------> 1st compilation time:", t4 - t3, "seconds")
-    print("----------> 2nd compilation time:", t6 - t5, "seconds")
+    print("-----dynamo_path-----> output:", outputs)
+    print("-----dynamo_path-----> compilation time:", times, "seconds")
 
 
 if __name__ == "__main__":
     dynamo_path()
-    compile_path()
diff --git a/py/torch_tensorrt/dynamo/_engine_caching.py b/py/torch_tensorrt/dynamo/_engine_caching.py
@@ -98,7 +98,7 @@ def has_available_cache_size(self, serialized_engine: bytes) -> bool:
         Returns:
             bool: whether the cache has available size for the serialized engine
         """
-        return len(serialized_engine) <= self.available_engine_cache_size
+        return serialized_engine.nbytes <= self.available_engine_cache_size
 
     def clear_cache(self, size: int) -> None:
 
@@ -114,8 +114,8 @@ def save(
         input_names: List[str],
         output_names: List[str],
     ) -> None:
-        serialized_engine_size = len(serialized_engine)
-        if serialized_engine_size <= self.total_engine_cache_size:
+        serialized_engine_size = serialized_engine.nbytes
+        if serialized_engine_size > self.total_engine_cache_size:
             _LOGGER.warning(
                 f"The serialized engine cannot be saved because the size of the engine {serialized_engine_size} is larger than the total cache size {self.total_engine_cache_size}."
             )
@@ -124,13 +124,15 @@ def save(
         if not self.has_available_cache_size(serialized_engine):
             self.clear_cache(serialized_engine_size)
 
-        path = os.path.join(
-            self.engine_cache_dir, f"{hash}/engine_{input_names}_{output_names}.trt"
-        )
-        os.makedirs(os.path.dirname(path), exist_ok=True)
-        with open(path, "wb") as f:
-            f.write(serialized_engine)
-        _LOGGER.info(f"A TRT engine was cached to {path}")
+        if self.has_available_cache_size(serialized_engine):
+            path = os.path.join(
+                self.engine_cache_dir,
+                f"{hash}/engine--{input_names}--{output_names}.trt",
+            )
+            os.makedirs(os.path.dirname(path), exist_ok=True)
+            with open(path, "wb") as f:
+                f.write(serialized_engine)
+            _LOGGER.info(f"A TRT engine was cached to {path}")
 
     def load(self, hash: str) -> Tuple[Optional[bytes], List[str], List[str]]:
         directory = os.path.join(self.engine_cache_dir, hash)
@@ -141,7 +143,7 @@ def load(self, hash: str) -> Tuple[Optional[bytes], List[str], List[str]]:
             ), f"There are more than one engine {engine_list} under {directory}."
             path = os.path.join(directory, engine_list[0])
             input_names_str, output_names_str = (
-                engine_list[0].split(".")[0].split("_")[1:]
+                engine_list[0].split(".trt")[0].split("--")[1:]
             )
             input_names = ast.literal_eval(input_names_str)
             output_names = ast.literal_eval(output_names_str)