Skip to content

Commit c46a6e6

Browse files
committed
[LV] Remove unnecessary getRuntimeVF call when computing vector TC.
As Step is VF * UF, there is no need to compute it again, which may require multiple instructions for scalable VFs.
1 parent 87374a8 commit c46a6e6

25 files changed

+76
-227
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2759,9 +2759,8 @@ InnerLoopVectorizer::getOrCreateVectorTripCount(BasicBlock *InsertBlock) {
27592759
if (Cost->foldTailByMasking()) {
27602760
assert(isPowerOf2_32(VF.getKnownMinValue() * UF) &&
27612761
"VF*UF must be a power of 2 when folding tail by masking");
2762-
Value *NumLanes = getRuntimeVF(Builder, Ty, VF * UF);
2763-
TC = Builder.CreateAdd(
2764-
TC, Builder.CreateSub(NumLanes, ConstantInt::get(Ty, 1)), "n.rnd.up");
2762+
TC = Builder.CreateAdd(TC, Builder.CreateSub(Step, ConstantInt::get(Ty, 1)),
2763+
"n.rnd.up");
27652764
}
27662765

27672766
// Now we need to generate the expression for the part of the loop that the

llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,7 @@ define void @clamped_tc_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range(1,1
99
; CHECK: vector.ph:
1010
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
1111
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4
12-
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
13-
; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4
14-
; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1
12+
; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1
1513
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 8, [[TMP4]]
1614
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
1715
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
@@ -102,9 +100,7 @@ define void @clamped_tc_max_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range
102100
; CHECK: vector.ph:
103101
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
104102
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4
105-
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
106-
; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4
107-
; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1
103+
; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1
108104
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[WIDE_TRIP_COUNT]], [[TMP4]]
109105
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
110106
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]

llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -781,9 +781,7 @@ define i32 @header_mask_and_invariant_compare(ptr %A, ptr %B, ptr %C, ptr %D, pt
781781
; PRED: vector.ph:
782782
; PRED-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
783783
; PRED-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 4
784-
; PRED-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
785-
; PRED-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4
786-
; PRED-NEXT: [[TMP7:%.*]] = sub i64 [[TMP6]], 1
784+
; PRED-NEXT: [[TMP7:%.*]] = sub i64 [[TMP4]], 1
787785
; PRED-NEXT: [[N_RND_UP:%.*]] = add i64 [[TMP0]], [[TMP7]]
788786
; PRED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP4]]
789787
; PRED-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]

llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -157,9 +157,7 @@ define void @iv_casts(ptr %dst, ptr %src, i32 %x, i64 %N) #0 {
157157
; PRED: vector.ph:
158158
; PRED-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
159159
; PRED-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 8
160-
; PRED-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
161-
; PRED-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 8
162-
; PRED-NEXT: [[TMP8:%.*]] = sub i64 [[TMP7]], 1
160+
; PRED-NEXT: [[TMP8:%.*]] = sub i64 [[TMP5]], 1
163161
; PRED-NEXT: [[N_RND_UP:%.*]] = add i64 [[TMP0]], [[TMP8]]
164162
; PRED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]]
165163
; PRED-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]

llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll

Lines changed: 12 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -55,9 +55,7 @@ define void @test_widen(ptr noalias %a, ptr readnone %b) #4 {
5555
; TFCOMMON-NEXT: entry:
5656
; TFCOMMON-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
5757
; TFCOMMON-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2
58-
; TFCOMMON-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
59-
; TFCOMMON-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2
60-
; TFCOMMON-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1
58+
; TFCOMMON-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1
6159
; TFCOMMON-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP4]]
6260
; TFCOMMON-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
6361
; TFCOMMON-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
@@ -85,9 +83,7 @@ define void @test_widen(ptr noalias %a, ptr readnone %b) #4 {
8583
; TFA_INTERLEAVE-NEXT: entry:
8684
; TFA_INTERLEAVE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
8785
; TFA_INTERLEAVE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4
88-
; TFA_INTERLEAVE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
89-
; TFA_INTERLEAVE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4
90-
; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1
86+
; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1
9187
; TFA_INTERLEAVE-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP4]]
9288
; TFA_INTERLEAVE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
9389
; TFA_INTERLEAVE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
@@ -203,9 +199,7 @@ define void @test_if_then(ptr noalias %a, ptr readnone %b) #4 {
203199
; TFCOMMON-NEXT: entry:
204200
; TFCOMMON-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
205201
; TFCOMMON-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2
206-
; TFCOMMON-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
207-
; TFCOMMON-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2
208-
; TFCOMMON-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1
202+
; TFCOMMON-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1
209203
; TFCOMMON-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP4]]
210204
; TFCOMMON-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
211205
; TFCOMMON-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
@@ -238,9 +232,7 @@ define void @test_if_then(ptr noalias %a, ptr readnone %b) #4 {
238232
; TFA_INTERLEAVE-NEXT: entry:
239233
; TFA_INTERLEAVE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
240234
; TFA_INTERLEAVE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4
241-
; TFA_INTERLEAVE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
242-
; TFA_INTERLEAVE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4
243-
; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1
235+
; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1
244236
; TFA_INTERLEAVE-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP4]]
245237
; TFA_INTERLEAVE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
246238
; TFA_INTERLEAVE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
@@ -381,9 +373,7 @@ define void @test_widen_if_then_else(ptr noalias %a, ptr readnone %b) #4 {
381373
; TFCOMMON-NEXT: entry:
382374
; TFCOMMON-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
383375
; TFCOMMON-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2
384-
; TFCOMMON-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
385-
; TFCOMMON-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2
386-
; TFCOMMON-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1
376+
; TFCOMMON-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1
387377
; TFCOMMON-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP4]]
388378
; TFCOMMON-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
389379
; TFCOMMON-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
@@ -417,9 +407,7 @@ define void @test_widen_if_then_else(ptr noalias %a, ptr readnone %b) #4 {
417407
; TFA_INTERLEAVE-NEXT: entry:
418408
; TFA_INTERLEAVE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
419409
; TFA_INTERLEAVE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4
420-
; TFA_INTERLEAVE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
421-
; TFA_INTERLEAVE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4
422-
; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1
410+
; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1
423411
; TFA_INTERLEAVE-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP4]]
424412
; TFA_INTERLEAVE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
425413
; TFA_INTERLEAVE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
@@ -688,9 +676,7 @@ define void @test_widen_optmask(ptr noalias %a, ptr readnone %b) #4 {
688676
; TFALWAYS-NEXT: entry:
689677
; TFALWAYS-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
690678
; TFALWAYS-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2
691-
; TFALWAYS-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
692-
; TFALWAYS-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2
693-
; TFALWAYS-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1
679+
; TFALWAYS-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1
694680
; TFALWAYS-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP4]]
695681
; TFALWAYS-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
696682
; TFALWAYS-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
@@ -718,9 +704,7 @@ define void @test_widen_optmask(ptr noalias %a, ptr readnone %b) #4 {
718704
; TFFALLBACK-NEXT: entry:
719705
; TFFALLBACK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
720706
; TFFALLBACK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2
721-
; TFFALLBACK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
722-
; TFFALLBACK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2
723-
; TFFALLBACK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1
707+
; TFFALLBACK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1
724708
; TFFALLBACK-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP4]]
725709
; TFFALLBACK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
726710
; TFFALLBACK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
@@ -748,9 +732,7 @@ define void @test_widen_optmask(ptr noalias %a, ptr readnone %b) #4 {
748732
; TFA_INTERLEAVE-NEXT: entry:
749733
; TFA_INTERLEAVE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
750734
; TFA_INTERLEAVE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4
751-
; TFA_INTERLEAVE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
752-
; TFA_INTERLEAVE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4
753-
; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1
735+
; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1
754736
; TFA_INTERLEAVE-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP4]]
755737
; TFA_INTERLEAVE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
756738
; TFA_INTERLEAVE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
@@ -870,9 +852,7 @@ define double @test_widen_fmuladd_and_call(ptr noalias %a, ptr readnone %b, doub
870852
; TFALWAYS-NEXT: entry:
871853
; TFALWAYS-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
872854
; TFALWAYS-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2
873-
; TFALWAYS-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
874-
; TFALWAYS-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2
875-
; TFALWAYS-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1
855+
; TFALWAYS-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1
876856
; TFALWAYS-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP4]]
877857
; TFALWAYS-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
878858
; TFALWAYS-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
@@ -907,9 +887,7 @@ define double @test_widen_fmuladd_and_call(ptr noalias %a, ptr readnone %b, doub
907887
; TFFALLBACK-NEXT: entry:
908888
; TFFALLBACK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
909889
; TFFALLBACK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2
910-
; TFFALLBACK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
911-
; TFFALLBACK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2
912-
; TFFALLBACK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1
890+
; TFFALLBACK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1
913891
; TFFALLBACK-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP4]]
914892
; TFFALLBACK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
915893
; TFFALLBACK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
@@ -944,9 +922,7 @@ define double @test_widen_fmuladd_and_call(ptr noalias %a, ptr readnone %b, doub
944922
; TFA_INTERLEAVE-NEXT: entry:
945923
; TFA_INTERLEAVE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
946924
; TFA_INTERLEAVE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4
947-
; TFA_INTERLEAVE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
948-
; TFA_INTERLEAVE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4
949-
; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1
925+
; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1
950926
; TFA_INTERLEAVE-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP4]]
951927
; TFA_INTERLEAVE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
952928
; TFA_INTERLEAVE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]

llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -159,9 +159,7 @@ define i32 @chained_recurrences(i32 %x, i64 %y, ptr %src.1, i32 %z, ptr %src.2)
159159
; PRED: vector.ph:
160160
; PRED-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
161161
; PRED-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4
162-
; PRED-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
163-
; PRED-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 4
164-
; PRED-NEXT: [[TMP5:%.*]] = sub i64 [[TMP4]], 1
162+
; PRED-NEXT: [[TMP5:%.*]] = sub i64 [[TMP2]], 1
165163
; PRED-NEXT: [[N_RND_UP:%.*]] = add i64 [[TMP0]], [[TMP5]]
166164
; PRED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP2]]
167165
; PRED-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
@@ -391,9 +389,7 @@ define i16 @reduce_udiv(ptr %src, i16 %x, i64 %N) #0 {
391389
; PRED: vector.ph:
392390
; PRED-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
393391
; PRED-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 8
394-
; PRED-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
395-
; PRED-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 8
396-
; PRED-NEXT: [[TMP5:%.*]] = sub i64 [[TMP4]], 1
392+
; PRED-NEXT: [[TMP5:%.*]] = sub i64 [[TMP2]], 1
397393
; PRED-NEXT: [[N_RND_UP:%.*]] = add i64 [[TMP0]], [[TMP5]]
398394
; PRED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP2]]
399395
; PRED-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]

llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll

Lines changed: 7 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -130,9 +130,7 @@ define float @fadd_strict(ptr noalias nocapture readonly %a, i64 %n) #0 {
130130
; CHECK-ORDERED-TF: vector.ph:
131131
; CHECK-ORDERED-TF-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
132132
; CHECK-ORDERED-TF-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 8
133-
; CHECK-ORDERED-TF-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
134-
; CHECK-ORDERED-TF-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 8
135-
; CHECK-ORDERED-TF-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1
133+
; CHECK-ORDERED-TF-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1
136134
; CHECK-ORDERED-TF-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP4]]
137135
; CHECK-ORDERED-TF-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
138136
; CHECK-ORDERED-TF-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
@@ -390,9 +388,7 @@ define float @fadd_strict_unroll(ptr noalias nocapture readonly %a, i64 %n) #0 {
390388
; CHECK-ORDERED-TF: vector.ph:
391389
; CHECK-ORDERED-TF-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
392390
; CHECK-ORDERED-TF-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 32
393-
; CHECK-ORDERED-TF-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
394-
; CHECK-ORDERED-TF-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 32
395-
; CHECK-ORDERED-TF-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1
391+
; CHECK-ORDERED-TF-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1
396392
; CHECK-ORDERED-TF-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP4]]
397393
; CHECK-ORDERED-TF-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
398394
; CHECK-ORDERED-TF-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
@@ -705,9 +701,7 @@ define void @fadd_strict_interleave(ptr noalias nocapture readonly %a, ptr noali
705701
; CHECK-ORDERED-TF: vector.ph:
706702
; CHECK-ORDERED-TF-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
707703
; CHECK-ORDERED-TF-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 4
708-
; CHECK-ORDERED-TF-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
709-
; CHECK-ORDERED-TF-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4
710-
; CHECK-ORDERED-TF-NEXT: [[TMP7:%.*]] = sub i64 [[TMP6]], 1
704+
; CHECK-ORDERED-TF-NEXT: [[TMP7:%.*]] = sub i64 [[TMP4]], 1
711705
; CHECK-ORDERED-TF-NEXT: [[N_RND_UP:%.*]] = add i64 [[TMP2]], [[TMP7]]
712706
; CHECK-ORDERED-TF-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP4]]
713707
; CHECK-ORDERED-TF-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
@@ -967,9 +961,7 @@ define float @fadd_of_sum(ptr noalias nocapture readonly %a, ptr noalias nocaptu
967961
; CHECK-ORDERED-TF: vector.ph:
968962
; CHECK-ORDERED-TF-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
969963
; CHECK-ORDERED-TF-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4
970-
; CHECK-ORDERED-TF-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
971-
; CHECK-ORDERED-TF-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 4
972-
; CHECK-ORDERED-TF-NEXT: [[TMP5:%.*]] = sub i64 [[TMP4]], 1
964+
; CHECK-ORDERED-TF-NEXT: [[TMP5:%.*]] = sub i64 [[TMP2]], 1
973965
; CHECK-ORDERED-TF-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP5]]
974966
; CHECK-ORDERED-TF-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP2]]
975967
; CHECK-ORDERED-TF-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
@@ -1208,9 +1200,7 @@ define float @fadd_conditional(ptr noalias nocapture readonly %a, ptr noalias no
12081200
; CHECK-ORDERED-TF: vector.ph:
12091201
; CHECK-ORDERED-TF-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
12101202
; CHECK-ORDERED-TF-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4
1211-
; CHECK-ORDERED-TF-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
1212-
; CHECK-ORDERED-TF-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4
1213-
; CHECK-ORDERED-TF-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1
1203+
; CHECK-ORDERED-TF-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1
12141204
; CHECK-ORDERED-TF-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP4]]
12151205
; CHECK-ORDERED-TF-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
12161206
; CHECK-ORDERED-TF-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
@@ -1683,9 +1673,7 @@ define float @fmuladd_strict(ptr %a, ptr %b, i64 %n) #0 {
16831673
; CHECK-ORDERED-TF: vector.ph:
16841674
; CHECK-ORDERED-TF-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
16851675
; CHECK-ORDERED-TF-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 32
1686-
; CHECK-ORDERED-TF-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
1687-
; CHECK-ORDERED-TF-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 32
1688-
; CHECK-ORDERED-TF-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1
1676+
; CHECK-ORDERED-TF-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1
16891677
; CHECK-ORDERED-TF-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP4]]
16901678
; CHECK-ORDERED-TF-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
16911679
; CHECK-ORDERED-TF-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
@@ -2079,9 +2067,7 @@ define float @fmuladd_strict_fmf(ptr %a, ptr %b, i64 %n) #0 {
20792067
; CHECK-ORDERED-TF: vector.ph:
20802068
; CHECK-ORDERED-TF-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
20812069
; CHECK-ORDERED-TF-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 32
2082-
; CHECK-ORDERED-TF-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
2083-
; CHECK-ORDERED-TF-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 32
2084-
; CHECK-ORDERED-TF-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1
2070+
; CHECK-ORDERED-TF-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1
20852071
; CHECK-ORDERED-TF-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP4]]
20862072
; CHECK-ORDERED-TF-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
20872073
; CHECK-ORDERED-TF-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]

llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -85,9 +85,7 @@ define void @cost_store_i8(ptr %dst) #0 {
8585
; PRED: vector.ph:
8686
; PRED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
8787
; PRED-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 16
88-
; PRED-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
89-
; PRED-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 16
90-
; PRED-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1
88+
; PRED-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1
9189
; PRED-NEXT: [[N_RND_UP:%.*]] = add i64 101, [[TMP4]]
9290
; PRED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
9391
; PRED-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]

llvm/test/Transforms/LoopVectorize/AArch64/sve-low-trip-count.ll

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,6 @@ define void @trip7_i64(ptr noalias nocapture noundef %dst, ptr noalias nocapture
66
; CHECK-LABEL: @trip7_i64(
77
; CHECK: = call i64 @llvm.vscale.i64()
88
; CHECK-NEXT: = mul i64
9-
; CHECK: = call i64 @llvm.vscale.i64()
10-
; CHECK-NEXT: = mul i64
119
; CHECK: [[VSCALE:%.*]] = call i64 @llvm.vscale.i64()
1210
; CHECK-NEXT: [[VF:%.*]] = mul i64 [[VSCALE]], 2
1311
; CHECK: vector.body:

llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-forced.ll

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -47,9 +47,7 @@ define void @simple_memset(i32 %val, ptr %ptr, i64 %n) #0 {
4747
; CHECK: vector.ph:
4848
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
4949
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4
50-
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
51-
; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4
52-
; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1
50+
; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1
5351
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[UMAX]], [[TMP4]]
5452
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
5553
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]

0 commit comments

Comments
 (0)