fix CI errors

zewenli98 · zewenli98 · commit e91e766353f1 · 2024-08-28T18:31:35.000-07:00
diff --git a/tests/py/dynamo/conversion/test_bitwise_and_aten.py b/tests/py/dynamo/conversion/test_bitwise_and_aten.py
@@ -141,7 +141,12 @@ def forward(self, lhs_val, rhs_val):
             mod, inputs, dynamic_shapes=({1: dyn_dim}, {0: dyn_dim})
         )
         trt_mod = torch_tensorrt.dynamo.compile(
-            fx_mod, inputs=inputs, enable_precisions={torch.bool}, min_block_size=1
+            fx_mod,
+            inputs=inputs,
+            enable_precisions={torch.bool},
+            min_block_size=1,
+            cache_built_engines=False,
+            reuse_cached_engines=False,
         )
         with torch.no_grad():
             cuda_inputs = []
diff --git a/tests/py/dynamo/conversion/test_embedding_bag_aten.py b/tests/py/dynamo/conversion/test_embedding_bag_aten.py
@@ -484,7 +484,12 @@ def forward(self, weights, indices, offsets, per_sample_weights=None):
             dynamic_shapes["per_sample_weights"] = {}
         fx_mod = torch.export.export(mod, inputs, dynamic_shapes=dynamic_shapes)
         trt_mod = torch_tensorrt.dynamo.compile(
-            fx_mod, inputs=inputs, enable_precisions=torch.float32, min_block_size=1
+            fx_mod,
+            inputs=inputs,
+            enable_precisions=torch.float32,
+            min_block_size=1,
+            cache_built_engines=False,
+            reuse_cached_engines=False,
         )
         # use the inputs with different shape to inference:
         if per_sample_weights is None:
diff --git a/tests/py/dynamo/conversion/test_index_select_aten.py b/tests/py/dynamo/conversion/test_index_select_aten.py
@@ -109,7 +109,12 @@ def forward(self, source_tensor, indice_tensor):
 
         fx_mod = torch.export.export(mod, inputs, dynamic_shapes=dynamic_shapes)
         trt_mod = torch_tensorrt.dynamo.compile(
-            fx_mod, inputs=inputs, enable_precisions=torch.float32, min_block_size=1
+            fx_mod,
+            inputs=inputs,
+            enable_precisions=torch.float32,
+            min_block_size=1,
+            cache_built_engines=False,
+            reuse_cached_engines=False,
         )
         # use different shape of inputs for inference:
         inputs = (source_tensor_1, indice_tensor)
diff --git a/tests/py/dynamo/models/test_dtype_support.py b/tests/py/dynamo/models/test_dtype_support.py
@@ -41,6 +41,8 @@ def forward(self, x):
             truncate_double=True,
             min_block_size=1,
             use_python_runtime=False,
+            cache_built_engines=False,
+            reuse_cached_engines=False,
         )
 
         torch_model_results = mod(in_tensor)
@@ -79,6 +81,8 @@ def forward(self, x):
             truncate_double=True,
             min_block_size=1,
             use_python_runtime=True,
+            cache_built_engines=False,
+            reuse_cached_engines=False,
         )
 
         torch_model_results = mod(in_tensor)
@@ -123,6 +127,8 @@ def forward(self, x):
             truncate_double=False,
             min_block_size=1,
             use_python_runtime=False,
+            cache_built_engines=False,
+            reuse_cached_engines=False,
         )
 
         torch_model_results = mod(in_tensor)
@@ -162,6 +168,8 @@ def forward(self, x):
             truncate_double=False,
             min_block_size=1,
             use_python_runtime=True,
+            cache_built_engines=False,
+            reuse_cached_engines=False,
         )
 
         torch_model_results = mod(in_tensor)
@@ -214,6 +222,8 @@ def forward(self, x):
             enabled_precisions={torch.float, torch.bfloat16, torch.half},
             min_block_size=1,
             use_python_runtime=False,
+            cache_built_engines=False,
+            reuse_cached_engines=False,
         )
 
         torch_model_results = mod(in_tensor)
@@ -252,6 +262,8 @@ def forward(self, x):
             enabled_precisions={torch.float, torch.bfloat16, torch.half},
             min_block_size=1,
             use_python_runtime=True,
+            cache_built_engines=False,
+            reuse_cached_engines=False,
         )
 
         torch_model_results = mod(in_tensor)
@@ -289,6 +301,8 @@ def forward(self, x):
                 debug=True,
                 min_block_size=1,
                 device=device,
+                cache_built_engines=False,
+                reuse_cached_engines=False,
             )
 
             torch_model_results = mod(*inputs)
diff --git a/tests/py/dynamo/models/test_dyn_models.py b/tests/py/dynamo/models/test_dyn_models.py
@@ -39,6 +39,8 @@ def forward(self, x):
         "ir": ir,
         "pass_through_build_failures": True,
         "min_block_size": 1,
+        "cache_built_engines": False,
+        "reuse_cached_engines": False,
     }
     if ir == "torch_compile":
         input_bs4 = torch.randn((4, 3, 224, 224)).to("cuda")
@@ -96,6 +98,8 @@ def forward(self, x):
         "pass_through_build_failures": True,
         "torch_executed_ops": {"torch.ops.aten.abs.default"},
         "min_block_size": 1,
+        "cache_built_engines": False,
+        "reuse_cached_engines": False,
     }
 
     if ir == "torch_compile":
@@ -147,6 +151,8 @@ def forward(self, x):
         "ir": ir,
         "pass_through_build_failures": True,
         "min_block_size": 1,
+        "cache_built_engines": False,
+        "reuse_cached_engines": False,
     }
 
     if ir == "torch_compile":
@@ -190,6 +196,8 @@ def test_resnet_dynamic(ir):
         "ir": ir,
         "pass_through_build_failures": True,
         "min_block_size": 1,
+        "cache_built_engines": False,
+        "reuse_cached_engines": False,
     }
 
     if ir == "torch_compile":
@@ -252,6 +260,8 @@ def forward(self, x):
         "pass_through_build_failures": True,
         "optimization_level": 1,
         "min_block_size": 1,
+        "cache_built_engines": False,
+        "reuse_cached_engines": False,
     }
 
     trt_mod = torchtrt.compile(model, **compile_spec)
@@ -284,6 +294,8 @@ def forward(self, x):
         "enabled_precisions": {torch.float},
         "ir": ir,
         "min_block_size": 1,
+        "cache_built_engines": False,
+        "reuse_cached_engines": False,
     }
     inputs_bs2 = torch.randn(2, 2, 10).to("cuda")
     if ir == "torch_compile":
@@ -338,6 +350,8 @@ def forward(self, x):
         "pass_through_build_failures": True,
         "min_block_size": 1,
         "torch_executed_ops": {"torch.ops.aten.add.Tensor"},
+        "cache_built_engines": False,
+        "reuse_cached_engines": False,
     }
 
     # Compile the model
diff --git a/tests/py/dynamo/models/test_engine_cache.py b/tests/py/dynamo/models/test_engine_cache.py
@@ -21,6 +21,8 @@ def __init__(
         engine_cache_dir: str,
     ) -> None:
         self.engine_cache_dir = engine_cache_dir
+        if not os.path.exists(self.engine_cache_dir):
+            os.makedirs(self.engine_cache_dir, exist_ok=True)
 
     def save(
         self,
@@ -99,18 +101,18 @@ def test_dynamo_compile_with_default_disk_engine_cache(self):
         cos_sim = cosine_similarity(results[0], results[1])
         assertions.assertTrue(
             cos_sim > COSINE_THRESHOLD,
-            msg=f"test_dynamo_compile_with_default_disk_engine_cache: results[0] doesn't match with results[1]. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}",
+            msg=f"results[0] doesn't match with results[1]. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}",
         )
 
         cos_sim = cosine_similarity(results[1], results[2])
         assertions.assertTrue(
             cos_sim > COSINE_THRESHOLD,
-            msg=f"test_dynamo_compile_with_default_disk_engine_cache: results[1] doesn't match with results[2]. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}",
+            msg=f"results[1] doesn't match with results[2]. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}",
         )
 
         assertions.assertTrue(
             times[0] > times[2],
-            msg=f"test_dynamo_compile_with_default_disk_engine_cache: Engine caching didn't speed up the compilation. Time taken without engine caching: {times[0]} ms, time taken with engine caching: {times[2]} ms",
+            msg=f"Engine caching didn't speed up the compilation. Time taken without engine caching: {times[0]} ms, time taken with engine caching: {times[2]} ms",
         )
 
     def test_dynamo_compile_with_custom_engine_cache(self):
@@ -167,18 +169,18 @@ def test_dynamo_compile_with_custom_engine_cache(self):
         cos_sim = cosine_similarity(results[0], results[1])
         assertions.assertTrue(
             cos_sim > COSINE_THRESHOLD,
-            msg=f"test_dynamo_compile_with_custom_engine_cache: results[0] doesn't match with results[1]. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}",
+            msg=f"results[0] doesn't match with results[1]. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}",
         )
 
         cos_sim = cosine_similarity(results[1], results[2])
         assertions.assertTrue(
             cos_sim > COSINE_THRESHOLD,
-            msg=f"test_dynamo_compile_with_custom_engine_cache: results[1] doesn't match with results[2]. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}",
+            msg=f"results[1] doesn't match with results[2]. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}",
         )
 
         assertions.assertTrue(
             times[0] > times[2],
-            msg=f"test_dynamo_compile_with_custom_engine_cache: Engine caching didn't speed up the compilation. Time taken without engine caching: {times[0]} ms, time taken with engine caching: {times[2]} ms",
+            msg=f"Engine caching didn't speed up the compilation. Time taken without engine caching: {times[0]} ms, time taken with engine caching: {times[2]} ms",
         )
 
     def test_torch_compile_with_default_disk_engine_cache(self):
@@ -231,18 +233,18 @@ def test_torch_compile_with_default_disk_engine_cache(self):
         cos_sim = cosine_similarity(results[0], results[1])
         assertions.assertTrue(
             cos_sim > COSINE_THRESHOLD,
-            msg=f"test_torch_compile_with_default_disk_engine_cache: results[0] doesn't match with results[1]. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}",
+            msg=f"results[0] doesn't match with results[1]. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}",
         )
 
         cos_sim = cosine_similarity(results[1], results[2])
         assertions.assertTrue(
             cos_sim > COSINE_THRESHOLD,
-            msg=f"test_torch_compile_with_default_disk_engine_cache: results[1] doesn't match with results[2]. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}",
+            msg=f"results[1] doesn't match with results[2]. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}",
         )
 
         assertions.assertTrue(
             times[0] > times[2],
-            msg=f"test_torch_compile_with_default_disk_engine_cache: Engine caching didn't speed up the compilation. Time taken without engine caching: {times[0]} ms, time taken with engine caching: {times[2]} ms",
+            msg=f"Engine caching didn't speed up the compilation. Time taken without engine caching: {times[0]} ms, time taken with engine caching: {times[2]} ms",
         )
 
     def test_torch_compile_with_custom_engine_cache(self):
@@ -295,16 +297,16 @@ def test_torch_compile_with_custom_engine_cache(self):
         cos_sim = cosine_similarity(results[0], results[1])
         assertions.assertTrue(
             cos_sim > COSINE_THRESHOLD,
-            msg=f"test_torch_compile_with_custom_engine_cache: results[0] doesn't match with results[1]. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}",
+            msg=f"results[0] doesn't match with results[1]. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}",
         )
 
         cos_sim = cosine_similarity(results[1], results[2])
         assertions.assertTrue(
             cos_sim > COSINE_THRESHOLD,
-            msg=f"test_torch_compile_with_custom_engine_cache: results[1] doesn't match with results[2]. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}",
+            msg=f"results[1] doesn't match with results[2]. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}",
         )
 
         assertions.assertTrue(
             times[0] > times[2],
-            msg=f"test_torch_compile_with_custom_engine_cache: Engine caching didn't speed up the compilation. Time taken without engine caching: {times[0]} ms, time taken with engine caching: {times[2]} ms",
+            msg=f"Engine caching didn't speed up the compilation. Time taken without engine caching: {times[0]} ms, time taken with engine caching: {times[2]} ms",
         )
diff --git a/tests/py/dynamo/models/test_export_kwargs_serde.py b/tests/py/dynamo/models/test_export_kwargs_serde.py
@@ -63,6 +63,8 @@ def forward(self, x, b=5, c=None, d=None):
         "optimization_level": 1,
         "min_block_size": 1,
         "ir": "dynamo",
+        "cache_built_engines": False,
+        "reuse_cached_engines": False,
     }
 
     exp_program = torch.export.export(model, args=tuple(args), kwargs=kwargs)
@@ -122,6 +124,8 @@ def forward(self, x, b=5, c=None, d=None):
         "optimization_level": 1,
         "min_block_size": 1,
         "ir": "dynamo",
+        "cache_built_engines": False,
+        "reuse_cached_engines": False,
     }
 
     exp_program = torchtrt.dynamo.trace(model, **compile_spec)
@@ -190,6 +194,8 @@ def forward(self, x, b=5, c=None, d=None):
         "optimization_level": 1,
         "min_block_size": 1,
         "ir": "dynamo",
+        "cache_built_engines": False,
+        "reuse_cached_engines": False,
     }
 
     exp_program = torchtrt.dynamo.trace(model, **compile_spec)
@@ -271,6 +277,8 @@ def forward(self, x, b=None, c=None, d=None, e=[]):
         "optimization_level": 1,
         "min_block_size": 1,
         "ir": "dynamo",
+        "cache_built_engines": False,
+        "reuse_cached_engines": False,
     }
 
     exp_program = torchtrt.dynamo.trace(model, **compile_spec)
@@ -358,6 +366,8 @@ def forward(self, x, b=None, c=None, d=None, e=[]):
         "optimization_level": 1,
         "min_block_size": 1,
         "ir": "dynamo",
+        "cache_built_engines": False,
+        "reuse_cached_engines": False,
     }
 
     exp_program = torchtrt.dynamo.trace(model, **compile_spec)
@@ -444,6 +454,8 @@ def forward(self, x, b=None, c=None, d=None, e=[]):
         "optimization_level": 1,
         "min_block_size": 1,
         "ir": "dynamo",
+        "cache_built_engines": False,
+        "reuse_cached_engines": False,
     }
 
     exp_program = torchtrt.dynamo.trace(model, **compile_spec)
@@ -505,6 +517,8 @@ def forward(self, x, b=5, c=None, d=None):
         "optimization_level": 1,
         "min_block_size": 1,
         "ir": "dynamo",
+        "cache_built_engines": False,
+        "reuse_cached_engines": False,
     }
 
     exp_program = torch.export.export(model, args=tuple(args), kwargs=kwargs)
diff --git a/tests/py/dynamo/models/test_export_serde.py b/tests/py/dynamo/models/test_export_serde.py
diff --git a/tests/py/dynamo/models/test_models.py b/tests/py/dynamo/models/test_models.py
diff --git a/tests/py/dynamo/models/test_models_export.py b/tests/py/dynamo/models/test_models_export.py

Original file line number	Diff line number	Diff line change
`@@ -141,7 +141,12 @@ def forward(self, lhs_val, rhs_val):`
`141`	`141`	`mod, inputs, dynamic_shapes=({1: dyn_dim}, {0: dyn_dim})`
`142`	`142`	`)`
`143`	`143`	`trt_mod = torch_tensorrt.dynamo.compile(`
`144`		`- fx_mod, inputs=inputs, enable_precisions={torch.bool}, min_block_size=1`
	`144`	`+ fx_mod,`
	`145`	`+ inputs=inputs,`
	`146`	`+ enable_precisions={torch.bool},`
	`147`	`+ min_block_size=1,`
	`148`	`+ cache_built_engines=False,`
	`149`	`+ reuse_cached_engines=False,`
`145`	`150`	`)`
`146`	`151`	`with torch.no_grad():`
`147`	`152`	`cuda_inputs = []`