Skip to content

Commit 925c76b

Browse files
committed
chore: updates to dynamo in perf_run
Signed-off-by: Dheeraj Peri <[email protected]>
1 parent 9131695 commit 925c76b

File tree

1 file changed

+72
-38
lines changed

1 file changed

+72
-38
lines changed

tools/perf/perf_run.py

Lines changed: 72 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,8 @@
1717
# Importing supported Backends
1818
import torch
1919
import torch_tensorrt as torchtrt
20-
from torch_tensorrt.fx.lower import compile
21-
from torch_tensorrt.fx.utils import LowerPrecision
20+
# from torch_tensorrt.fx.lower import compile
21+
# from torch_tensorrt.fx.utils import LowerPrecision
2222

2323
import tensorrt as trt
2424
from utils import (
@@ -134,21 +134,17 @@ def run_torch_tensorrt(
134134
# Runs inference using FX2TRT backend
135135
def run_fx2trt(model, input_tensors, params, precision, batch_size):
136136
print("Running FX2TRT for precision: ", precision, " batch_size : ", batch_size)
137-
if precision == "fp32":
138-
precision = LowerPrecision.FP32
139-
elif precision == "fp16":
140-
precision = LowerPrecision.FP16
137+
if precision == "fp16":
141138
model.half()
142139
input_tensors = [tensor.half() for tensor in input_tensors]
140+
143141
# Run lowering eager mode benchmark
144142
start_compile = time.time_ns()
145-
model = compile(
143+
model = torchtrt.compile(
146144
model,
147-
input_tensors,
148-
max_batch_size=batch_size,
149-
lower_precision=precision,
150-
verbose_log=False,
151-
explicit_batch_dimension=True,
145+
ir="fx",
146+
inputs=input_tensors,
147+
enabled_precisions={torch.float16 if precision=="fp16" else torch.float32},
152148
)
153149
end_compile = time.time_ns()
154150
compile_time_ms = (end_compile - start_compile) / 1e6
@@ -173,6 +169,57 @@ def run_fx2trt(model, input_tensors, params, precision, batch_size):
173169

174170
recordStats("FX-TensorRT", timings, precision, batch_size, compile_time_ms)
175171

172+
def run_dynamo(model, input_tensors, params, precision, batch_size):
173+
dynamo_backend = params["dynamo_backend"]
174+
print("Running Dynamo with backend: ", dynamo_backend, " for precision: ", precision, " batch_size : ", batch_size)
175+
176+
if precision == "fp16":
177+
input_tensors = [tensor.half() for tensor in input_tensors]
178+
179+
fp16_mode = True if precision == "fp16" else False
180+
# dynamo_backend_params = {"fp16_mode" : fp16_mode}
181+
# model = torch.compile(
182+
# model,
183+
# mode="default",
184+
# dynamic=False,
185+
# fullgraph=False,
186+
# backend=dynamo_backend,
187+
# # **dynamo_backend_params
188+
# )
189+
import torch._dynamo as dynamo
190+
model = dynamo.optimize(dynamo_backend, nopython=True)(model)
191+
# Compile and measure the time
192+
with torch.no_grad():
193+
start_compile = time.time_ns()
194+
features = model(*input_tensors)
195+
end_compile = time.time_ns()
196+
compile_time_ms = (end_compile - start_compile) / 1e6
197+
iters = params.get("iterations", 20)
198+
# import pdb; pdb.set_trace()
199+
print("============= DONE 0 ==================")
200+
201+
print("============= DONE 1 ==================")
202+
# Warm up
203+
model = torch._dynamo.run(model)
204+
# import pdb; pdb.set_trace()
205+
206+
exported_model, _ = torch._dynamo.export(model, *input_tensors)
207+
for i in range(WARMUP_ITER):
208+
print("==== ITER: ", i)
209+
features = exported_model(*input_tensors)
210+
211+
torch.cuda.synchronize()
212+
print("============= DONE 2 ==================")
213+
timings = []
214+
for i in range(iters):
215+
start_time = timeit.default_timer()
216+
features = exported_model(*input_tensors)
217+
torch.cuda.synchronize()
218+
end_time = timeit.default_timer()
219+
meas_time = end_time - start_time
220+
timings.append(meas_time)
221+
222+
recordStats("Dynamo-" + dynamo_backend, timings, precision, batch_size, compile_time_ms)
176223

177224
def torch_dtype_from_trt(dtype):
178225
if dtype == trt.int8:
@@ -274,7 +321,6 @@ def run(
274321
truncate_long_and_double=False,
275322
batch_size=1,
276323
is_trt_engine=False,
277-
use_dynamo=False,
278324
model_torch=None,
279325
):
280326
for backend in backends:
@@ -307,7 +353,7 @@ def run(
307353
)
308354
continue
309355

310-
if backend == "all" and not use_dynamo:
356+
if backend == "all":
311357
run_torch(model, input_tensors, params, precision, batch_size)
312358
run_torch_tensorrt(
313359
model,
@@ -327,8 +373,9 @@ def run(
327373
batch_size,
328374
)
329375
run_fx2trt(model_torch, input_tensors, params, precision, batch_size)
376+
run_dynamo(model_torch, input_tensors, params, precision, batch_size)
330377

331-
elif backend == "torchscript" and not use_dynamo:
378+
elif backend == "torchscript":
332379
run_torch(model, input_tensors, params, precision, batch_size)
333380
run_torch_tensorrt(
334381
model,
@@ -348,10 +395,10 @@ def run(
348395
batch_size,
349396
)
350397

351-
elif backend == "torch" and not use_dynamo:
398+
elif backend == "torch":
352399
run_torch(model, input_tensors, params, precision, batch_size)
353400

354-
elif backend == "torch_tensorrt" and not use_dynamo:
401+
elif backend == "torch_tensorrt":
355402
run_torch_tensorrt(
356403
model,
357404
input_tensors,
@@ -374,6 +421,8 @@ def run(
374421
is_trt_engine,
375422
batch_size,
376423
)
424+
elif backend == "dynamo":
425+
run_dynamo(model_torch, input_tensors, params, precision, batch_size)
377426

378427

379428
# Generate report
@@ -500,15 +549,10 @@ def load_torch_model(params):
500549
action="store_true",
501550
help="Boolean flag to determine if the user provided model is a TRT engine or not",
502551
)
503-
arg_parser.add_argument(
504-
"--dynamo",
505-
action="store_true",
506-
help="Boolean flag to determine if the user provided model should be compiled with torch._dynamo",
507-
)
508552
arg_parser.add_argument(
509553
"--dynamo_backend",
510554
type=str,
511-
default="inductor",
555+
default="fx2trt",
512556
help="List of backends to use in Torchdynamo. Select options: inductor|fx2trt",
513557
)
514558
arg_parser.add_argument(
@@ -591,8 +635,6 @@ def load_torch_model(params):
591635

592636
model_name_torch = params["model_torch"]
593637
model_torch = None
594-
use_dynamo = params["dynamo"]
595-
dynamo_backend = params["dynamo_backend"]
596638

597639
# Load TorchScript model, if provided
598640
if os.path.exists(model_name):
@@ -615,21 +657,12 @@ def load_torch_model(params):
615657
+ "or provide a torch model file"
616658
)
617659

618-
if use_dynamo and (model_torch is None):
660+
backends = parse_backends(params["backends"])
661+
if "dynamo" in backends and (model_torch is None):
619662
raise ValueError(
620-
"No Pytorch model (nn.Module) is provided for torchdynamo compilation. Please provide a pytorch model"
621-
)
622-
623-
if use_dynamo and model_torch:
624-
model_torch = torch.compile(
625-
model_torch,
626-
"default",
627-
dynamic=False,
628-
fullgraph=False,
629-
backend=dynamo_backend,
663+
"No Pytorch model (nn.Module) is provided for torchdynamo compilation. Please provide a pytorch model using --model_torch argument"
630664
)
631665

632-
backends = parse_backends(params["backends"])
633666
truncate_long_and_double = params["truncate"]
634667
batch_size = params["batch_size"]
635668
is_trt_engine = params["is_trt_engine"]
@@ -639,9 +672,11 @@ def load_torch_model(params):
639672
input_tensors = parse_inputs(
640673
params["inputs"], precision_to_dtype(precision)
641674
)
675+
642676
if not is_trt_engine and (precision == "fp16" or precision == "half"):
643677
# If model is TensorRT serialized engine then model.half will report failure
644678
model = model.half()
679+
645680
status = run(
646681
model,
647682
backends,
@@ -651,7 +686,6 @@ def load_torch_model(params):
651686
truncate_long_and_double,
652687
batch_size,
653688
is_trt_engine,
654-
use_dynamo,
655689
model_torch=model_torch,
656690
)
657691

0 commit comments

Comments
 (0)