Skip to content

Commit a9ac22b

Browse files
committed
[LV] Add users for loads to make tests more robust.
Update a few tests to add users to loads to avoid them being optimized out by future changes. In cases the unused loads didn't matter for the test, remove them.
1 parent ecb1d84 commit a9ac22b

File tree

3 files changed

+41
-32
lines changed

3 files changed

+41
-32
lines changed

llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll

Lines changed: 28 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ target triple = "aarch64-unknown-linux-gnu"
88
; the loop, preventing the gep (and consequently the loop induction
99
; update variable) from being classified as 'uniform'.
1010

11-
define void @test_no_scalarization(ptr %a, i32 %idx, i32 %n) #0 {
11+
define void @test_no_scalarization(ptr %a, ptr noalias %b, i32 %idx, i32 %n) #0 {
1212
; CHECK-LABEL: @test_no_scalarization(
1313
; CHECK-NEXT: L.entry:
1414
; CHECK-NEXT: [[TMP0:%.*]] = add nsw i32 [[IDX:%.*]], 1
@@ -39,21 +39,26 @@ define void @test_no_scalarization(ptr %a, i32 %idx, i32 %n) #0 {
3939
; CHECK: vector.body:
4040
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
4141
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 2 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
42-
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i64, ptr [[A:%.*]], <vscale x 2 x i32> [[VEC_IND]]
43-
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <vscale x 2 x ptr> [[TMP12]], i32 0
44-
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr double, ptr [[TMP13]], i32 0
45-
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x double>, ptr [[TMP14]], align 8
46-
; CHECK-NEXT: [[TMP15:%.*]] = call i32 @llvm.vscale.i32()
47-
; CHECK-NEXT: [[TMP16:%.*]] = mul i32 [[TMP15]], 2
48-
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP16]]
49-
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i32> [[VEC_IND]], [[DOTSPLAT2]]
50-
; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
51-
; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
52-
; CHECK: middle.block:
42+
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i32 [[IDX]], [[INDEX]]
43+
; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[OFFSET_IDX]], 0
44+
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i64, ptr [[A:%.*]], <vscale x 2 x i32> [[VEC_IND]]
45+
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <vscale x 2 x ptr> [[TMP13]], i32 0
46+
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr double, ptr [[TMP14]], i32 0
47+
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x double>, ptr [[TMP15]], align 8
48+
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i64, ptr [[B:%.*]], i32 [[TMP12]]
49+
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr double, ptr [[TMP16]], i32 0
50+
; CHECK-NEXT: store <vscale x 2 x double> [[WIDE_LOAD]], ptr [[TMP17]], align 8
5351
; CHECK-NEXT: [[TMP18:%.*]] = call i32 @llvm.vscale.i32()
5452
; CHECK-NEXT: [[TMP19:%.*]] = mul i32 [[TMP18]], 2
55-
; CHECK-NEXT: [[TMP20:%.*]] = sub i32 [[TMP19]], 1
56-
; CHECK-NEXT: [[TMP21:%.*]] = extractelement <vscale x 2 x ptr> [[TMP12]], i32 [[TMP20]]
53+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP19]]
54+
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i32> [[VEC_IND]], [[DOTSPLAT2]]
55+
; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
56+
; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
57+
; CHECK: middle.block:
58+
; CHECK-NEXT: [[TMP21:%.*]] = call i32 @llvm.vscale.i32()
59+
; CHECK-NEXT: [[TMP22:%.*]] = mul i32 [[TMP21]], 2
60+
; CHECK-NEXT: [[TMP23:%.*]] = sub i32 [[TMP22]], 1
61+
; CHECK-NEXT: [[TMP24:%.*]] = extractelement <vscale x 2 x ptr> [[TMP13]], i32 [[TMP23]]
5762
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]]
5863
; CHECK-NEXT: br i1 [[CMP_N]], label [[L_EXIT:%.*]], label [[SCALAR_PH]]
5964
; CHECK: scalar.ph:
@@ -62,12 +67,14 @@ define void @test_no_scalarization(ptr %a, i32 %idx, i32 %n) #0 {
6267
; CHECK: L.LoopBody:
6368
; CHECK-NEXT: [[INDVAR:%.*]] = phi i32 [ [[INDVAR_NEXT:%.*]], [[L_LOOPBODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
6469
; CHECK-NEXT: [[INDVAR_NEXT]] = add nsw i32 [[INDVAR]], 1
65-
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr i64, ptr [[A]], i32 [[INDVAR]]
66-
; CHECK-NEXT: [[TMP23:%.*]] = load double, ptr [[TMP22]], align 8
67-
; CHECK-NEXT: [[TMP24:%.*]] = icmp slt i32 [[INDVAR_NEXT]], [[N]]
68-
; CHECK-NEXT: br i1 [[TMP24]], label [[L_LOOPBODY]], label [[L_EXIT]], !llvm.loop [[LOOP2:![0-9]+]]
70+
; CHECK-NEXT: [[TMP25:%.*]] = getelementptr i64, ptr [[A]], i32 [[INDVAR]]
71+
; CHECK-NEXT: [[TMP26:%.*]] = load double, ptr [[TMP25]], align 8
72+
; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr i64, ptr [[B]], i32 [[INDVAR]]
73+
; CHECK-NEXT: store double [[TMP26]], ptr [[GEP_B]], align 8
74+
; CHECK-NEXT: [[TMP27:%.*]] = icmp slt i32 [[INDVAR_NEXT]], [[N]]
75+
; CHECK-NEXT: br i1 [[TMP27]], label [[L_LOOPBODY]], label [[L_EXIT]], !llvm.loop [[LOOP3:![0-9]+]]
6976
; CHECK: L.exit:
70-
; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi ptr [ [[TMP22]], [[L_LOOPBODY]] ], [ [[TMP21]], [[MIDDLE_BLOCK]] ]
77+
; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi ptr [ [[TMP25]], [[L_LOOPBODY]] ], [ [[TMP24]], [[MIDDLE_BLOCK]] ]
7178
; CHECK-NEXT: store i64 1, ptr [[DOTLCSSA]], align 8
7279
; CHECK-NEXT: ret void
7380
;
@@ -79,6 +86,8 @@ L.LoopBody: ; preds = %L.LoopBody, %L.entr
7986
%indvar.next = add nsw i32 %indvar, 1
8087
%0 = getelementptr i64, ptr %a, i32 %indvar
8188
%1 = load double, ptr %0, align 8
89+
%gep.b = getelementptr i64, ptr %b, i32 %indvar
90+
store double %1, ptr %gep.b
8291
%2 = icmp slt i32 %indvar.next, %n
8392
br i1 %2, label %L.LoopBody, label %L.exit
8493

llvm/test/Transforms/LoopVectorize/X86/pr48340.ll

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ target triple = "x86_64-unknown-linux-gnu"
77
%0 = type { i32 }
88
%1 = type { i64 }
99

10-
define void @foo(ptr %p, ptr %p.last) unnamed_addr #0 {
10+
define ptr @foo(ptr %p, ptr %p.last) unnamed_addr #0 {
1111
; CHECK-LABEL: @foo(
1212
; CHECK-NEXT: entry:
1313
; CHECK-NEXT: [[P3:%.*]] = ptrtoint ptr [[P:%.*]] to i64
@@ -40,6 +40,7 @@ define void @foo(ptr %p, ptr %p.last) unnamed_addr #0 {
4040
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
4141
; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
4242
; CHECK: middle.block:
43+
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x ptr> [[WIDE_MASKED_GATHER6]], i32 3
4344
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
4445
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
4546
; CHECK: scalar.ph:
@@ -50,9 +51,10 @@ define void @foo(ptr %p, ptr %p.last) unnamed_addr #0 {
5051
; CHECK-NEXT: [[P_INC]] = getelementptr inbounds i64, ptr [[P2]], i64 128
5152
; CHECK-NEXT: [[V:%.*]] = load ptr, ptr [[P2]], align 8
5253
; CHECK-NEXT: [[B:%.*]] = icmp eq ptr [[P_INC]], [[P_LAST]]
53-
; CHECK-NEXT: br i1 [[B]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP2:![0-9]+]]
54+
; CHECK-NEXT: br i1 [[B]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
5455
; CHECK: exit:
55-
; CHECK-NEXT: ret void
56+
; CHECK-NEXT: [[V_LCSSA:%.*]] = phi ptr [ [[V]], [[LOOP]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ]
57+
; CHECK-NEXT: ret ptr [[V_LCSSA]]
5658
;
5759
entry:
5860
br label %loop
@@ -65,10 +67,10 @@ loop:
6567
br i1 %b, label %exit, label %loop
6668

6769
exit:
68-
ret void
70+
ret ptr %v
6971
}
7072

71-
define void @bar(ptr %p, ptr %p.last) unnamed_addr #0 {
73+
define ptr @bar(ptr %p, ptr %p.last) unnamed_addr #0 {
7274
; CHECK-LABEL: @bar(
7375
; CHECK-NEXT: entry:
7476
; CHECK-NEXT: [[P3:%.*]] = ptrtoint ptr [[P:%.*]] to i64
@@ -101,6 +103,7 @@ define void @bar(ptr %p, ptr %p.last) unnamed_addr #0 {
101103
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
102104
; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
103105
; CHECK: middle.block:
106+
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x ptr> [[WIDE_MASKED_GATHER6]], i32 3
104107
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
105108
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
106109
; CHECK: scalar.ph:
@@ -113,7 +116,8 @@ define void @bar(ptr %p, ptr %p.last) unnamed_addr #0 {
113116
; CHECK-NEXT: [[B:%.*]] = icmp eq ptr [[P_INC]], [[P_LAST]]
114117
; CHECK-NEXT: br i1 [[B]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
115118
; CHECK: exit:
116-
; CHECK-NEXT: ret void
119+
; CHECK-NEXT: [[V_LCSSA:%.*]] = phi ptr [ [[V]], [[LOOP]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ]
120+
; CHECK-NEXT: ret ptr [[V_LCSSA]]
117121
;
118122
entry:
119123
br label %loop
@@ -126,7 +130,7 @@ loop:
126130
br i1 %b, label %exit, label %loop
127131

128132
exit:
129-
ret void
133+
ret ptr %v
130134
}
131135

132136
attributes #0 = { "target-cpu"="skylake" }

llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains.ll

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -475,9 +475,8 @@ define void @test_first_order_recurrences_and_induction(ptr %ptr) {
475475
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
476476
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[VECTOR_RECUR]], <4 x i64> [[VEC_IND]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
477477
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[PTR:%.*]], i64 [[TMP0]]
478-
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0
479-
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP3]], align 2
480478
; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i64> [[TMP1]], <i64 10, i64 10, i64 10, i64 10>
479+
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0
481480
; CHECK-NEXT: store <4 x i64> [[TMP4]], ptr [[TMP3]], align 4
482481
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
483482
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
@@ -497,7 +496,6 @@ loop:
497496
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
498497
%iv.next = add nuw nsw i64 %iv, 1
499498
%gep.ptr = getelementptr inbounds i64, ptr %ptr, i64 %iv
500-
%for.1.next = load i64, ptr %gep.ptr, align 2
501499
%add.1 = add i64 %for.1, 10
502500
store i64 %add.1, ptr %gep.ptr
503501
%exitcond.not = icmp eq i64 %iv.next, 1000
@@ -518,9 +516,8 @@ define void @test_first_order_recurrences_and_induction2(ptr %ptr) {
518516
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
519517
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[VECTOR_RECUR]], <4 x i64> [[VEC_IND]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
520518
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[PTR:%.*]], i64 [[TMP0]]
521-
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0
522-
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP3]], align 2
523519
; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i64> [[TMP1]], <i64 10, i64 10, i64 10, i64 10>
520+
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0
524521
; CHECK-NEXT: store <4 x i64> [[TMP4]], ptr [[TMP3]], align 4
525522
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
526523
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
@@ -540,7 +537,6 @@ loop:
540537
%for.1 = phi i64 [ 22, %entry ], [ %iv, %loop ]
541538
%iv.next = add nuw nsw i64 %iv, 1
542539
%gep.ptr = getelementptr inbounds i64, ptr %ptr, i64 %iv
543-
%for.1.next = load i64, ptr %gep.ptr, align 2
544540
%add.1 = add i64 %for.1, 10
545541
store i64 %add.1, ptr %gep.ptr
546542
%exitcond.not = icmp eq i64 %iv.next, 1000

0 commit comments

Comments
 (0)