@@ -6,52 +6,96 @@ MODELS_DIR="models"
6
6
python hub.py
7
7
8
8
batch_sizes=(1 2 4 8 16 32 64 128 256)
9
+ large_model_batch_sizes=(1 2 4 8 16 32 64)
9
10
10
- # Benchmark VGG16 model
11
+
12
+ # Benchmark VGG16 model
11
13
echo " Benchmarking VGG16 model"
12
14
for bs in ${batch_sizes[@]}
13
15
do
14
16
python perf_run.py --model ${MODELS_DIR} /vgg16_scripted.jit.pt \
15
17
--model_torch ${MODELS_DIR} /vgg16_pytorch.pt \
16
- --precision fp32,fp16 --inputs=" (${bs} , 3, 224, 224)" \
18
+ --precision fp16 --inputs=" (${bs} , 3, 224, 224)" \
19
+ --batch_size ${bs} \
20
+ --truncate \
21
+ --backends torch,ts_trt,dynamo,torch_compile,inductor \
22
+ --report " vgg16_perf_bs${bs} .txt"
23
+ done
24
+
25
+ Benchmark AlexNet model
26
+ echo " Benchmarking AlexNet model"
27
+ for bs in ${batch_sizes[@]}
28
+ do
29
+ python perf_run.py --model ${MODELS_DIR} /alexnet_scripted.jit.pt \
30
+ --model_torch ${MODELS_DIR} /alexnet_pytorch.pt \
31
+ --precision fp16 --inputs=" (${bs} , 3, 227, 227)" \
17
32
--batch_size ${bs} \
33
+ --truncate \
18
34
--backends torch,ts_trt,dynamo,torch_compile,inductor \
19
- --report " vgg_perf_bs ${bs} .txt"
35
+ --report " alexnet_perf_bs ${bs} .txt"
20
36
done
21
37
22
- # # Benchmark Resnet50 model
38
+ Benchmark Resnet50 model
23
39
echo " Benchmarking Resnet50 model"
24
40
for bs in ${batch_sizes[@]}
25
41
do
26
42
python perf_run.py --model ${MODELS_DIR} /resnet50_scripted.jit.pt \
27
43
--model_torch ${MODELS_DIR} /resnet50_pytorch.pt \
28
- --precision fp32, fp16 --inputs=" (${bs} , 3, 224, 224)" \
44
+ --precision fp16 --inputs=" (${bs} , 3, 224, 224)" \
29
45
--batch_size ${bs} \
46
+ --truncate \
30
47
--backends torch,ts_trt,dynamo,torch_compile,inductor \
31
- --report " rn50_perf_bs ${bs} .txt"
48
+ --report " resnet50_perf_bs ${bs} .txt"
32
49
done
33
50
34
51
# # Benchmark VIT model
35
52
echo " Benchmarking VIT model"
36
53
for bs in ${batch_sizes[@]}
37
54
do
38
55
python perf_run.py --model ${MODELS_DIR} /vit_scripted.jit.pt \
39
- --precision fp32,fp16 --inputs=" (${bs} , 3, 224, 224)" \
56
+ --model_torch ${MODELS_DIR} /vit_pytorch.pt \
57
+ --precision fp16 --inputs=" (${bs} , 3, 224, 224)" \
40
58
--batch_size ${bs} \
59
+ --truncate \
41
60
--backends torch,ts_trt,dynamo,torch_compile,inductor \
42
61
--report " vit_perf_bs${bs} .txt"
43
62
done
44
63
64
+ # Benchmark VIT Large model
65
+ echo " Benchmarking VIT Large model"
66
+ for bs in ${large_model_batch_sizes[@]}
67
+ do
68
+ python perf_run.py --model ${MODELS_DIR} /vit_large_scripted.jit.pt \
69
+ --model_torch ${MODELS_DIR} /vit_large_pytorch.pt \
70
+ --precision fp16 --inputs=" (${bs} , 3, 224, 224)" \
71
+ --truncate \
72
+ --batch_size ${bs} \
73
+ --backends torch,ts_trt,dynamo,torch_compile,inductor \
74
+ --report " vit_large_perf_bs${bs} .txt"
75
+
45
76
# # Benchmark EfficientNet-B0 model
46
77
echo " Benchmarking EfficientNet-B0 model"
47
78
for bs in ${batch_sizes[@]}
48
79
do
49
80
python perf_run.py --model ${MODELS_DIR} /efficientnet_b0_scripted.jit.pt \
50
81
--model_torch ${MODELS_DIR} /efficientnet_b0_pytorch.pt \
51
- --precision fp32, fp16 --inputs=" (${bs} , 3, 224, 224)" \
82
+ --precision fp16 --inputs=" (${bs} , 3, 224, 224)" \
52
83
--batch_size ${bs} \
84
+ --truncate \
53
85
--backends torch,ts_trt,dynamo,torch_compile,inductor \
54
- --report " eff_b0_perf_bs${bs} .txt"
86
+ --report " efficientnet_b0_perf_bs${bs} .txt"
87
+ done
88
+
89
+ # Benchmark Stable Diffusion UNet model
90
+ echo " Benchmarking SD UNet model"
91
+ for bs in ${large_model_batch_sizes[@]}
92
+ do
93
+ python perf_run.py --model_torch ${MODELS_DIR} /sd_unet_pytorch.pt \
94
+ --precision fp16 --inputs=" (${bs} , 4, 128, 128)@fp16;(${bs} )@fp16;(${bs} , 1, 768)@fp16" \
95
+ --batch_size ${bs} \
96
+ --backends torch_compile \
97
+ --truncate \
98
+ --report " sd_unet_perf_bs1.txt"
55
99
done
56
100
57
101
# Benchmark BERT model
@@ -60,7 +104,7 @@ for bs in ${batch_sizes[@]}
60
104
do
61
105
python perf_run.py --model ${MODELS_DIR} /bert_base_uncased_traced.jit.pt \
62
106
--model_torch " bert_base_uncased" \
63
- --precision fp32 --inputs=" (${bs} , 128)@int32;(${bs} , 128)@int32" \
107
+ --precision fp16 --inputs=" (${bs} , 128)@int32;(${bs} , 128)@int32" \
64
108
--batch_size ${bs} \
65
109
--backends torch,ts_trt,dynamo,torch_compile,inductor \
66
110
--truncate \
0 commit comments