@@ -41,6 +41,10 @@ def dynamo_path(iterations=3):
41
41
model , args = example_inputs , dynamic_shapes = {"x" : {0 : batch }}
42
42
)
43
43
44
+ # The 1st iteration is to measure the compilation time without engine caching
45
+ # The 2nd and 3rd iterations are to measure the compilation time with engine caching.
46
+ # Since the 2nd iteration needs to compile and save the engine, it will be slower than the 1st iteration.
47
+ # The 3rd iteration should be faster than the 1st iteration because it loads the cached engine.
44
48
for i in range (iterations ):
45
49
inputs = [torch .rand ((100 + i , 3 , 224 , 224 )).to ("cuda" )]
46
50
remove_timing_cache () # remove timing cache for engine caching messurement
@@ -133,6 +137,10 @@ def compile_path(iterations=3):
133
137
start = torch .cuda .Event (enable_timing = True )
134
138
end = torch .cuda .Event (enable_timing = True )
135
139
140
+ # The 1st iteration is to measure the compilation time without engine caching
141
+ # The 2nd and 3rd iterations are to measure the compilation time with engine caching.
142
+ # Since the 2nd iteration needs to compile and save the engine, it will be slower than the 1st iteration.
143
+ # The 3rd iteration should be faster than the 1st iteration because it loads the cached engine.
136
144
for i in range (iterations ):
137
145
inputs = [torch .rand (size ).to ("cuda" )]
138
146
# remove timing cache and reset dynamo for engine caching messurement
0 commit comments