Skip to content

Commit f330bc2

Browse files
committed
[LV] Stengthen loop-invariance checks in isPredicatedInst
Check loop-invariance against SCEV as well.
1 parent 4959a34 commit f330bc2

File tree

4 files changed

+12
-200
lines changed

4 files changed

+12
-200
lines changed

llvm/lib/Analysis/LoopAccessAnalysis.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2806,8 +2806,8 @@ LoopAccessInfo::recordAnalysis(StringRef RemarkName, const Instruction *I) {
28062806

28072807
bool LoopAccessInfo::isInvariant(Value *V) const {
28082808
auto *SE = PSE->getSE();
2809-
// TODO: Is this really what we want? Even without FP SCEV, we may want some
2810-
// trivially loop-invariant FP values to be considered invariant.
2809+
if (TheLoop->isLoopInvariant(V))
2810+
return true;
28112811
if (!SE->isSCEVable(V->getType()))
28122812
return false;
28132813
const SCEV *S = SE->getSCEV(V);

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3102,14 +3102,14 @@ bool LoopVectorizationCostModel::isPredicatedInst(Instruction *I) const {
31023102
// is correct. The easiest form of the later is to require that all values
31033103
// stored are the same.
31043104
return !(Legal->isInvariant(getLoadStorePointerOperand(I)) &&
3105-
TheLoop->isLoopInvariant(cast<StoreInst>(I)->getValueOperand()));
3105+
Legal->isInvariant(cast<StoreInst>(I)->getValueOperand()));
31063106
}
31073107
case Instruction::UDiv:
31083108
case Instruction::SDiv:
31093109
case Instruction::SRem:
31103110
case Instruction::URem:
31113111
// If the divisor is loop-invariant no predication is needed.
3112-
return !TheLoop->isLoopInvariant(I->getOperand(1));
3112+
return !Legal->isInvariant(I->getOperand(1));
31133113
}
31143114
}
31153115

llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll

Lines changed: 2 additions & 112 deletions
Original file line numberDiff line numberDiff line change
@@ -17,126 +17,16 @@ define void @test(ptr %p, i64 %a, i8 %b) {
1717
; CHECK-NEXT: [[TMP3:%.*]] = zext <16 x i8> [[BROADCAST_SPLAT]] to <16 x i32>
1818
; CHECK-NEXT: br label [[FOR_COND:%.*]]
1919
; CHECK: vector.body:
20-
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE8:%.*]] ]
21-
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <16 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE8]] ]
20+
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[FOR_COND]] ]
21+
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <16 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[FOR_COND]] ]
2222
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 [[INDEX]], i32 9)
2323
; CHECK-NEXT: [[TMP4:%.*]] = icmp sge <16 x i32> [[VEC_IND]], splat (i32 2)
2424
; CHECK-NEXT: [[TMP5:%.*]] = select <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i1> [[TMP4]], <16 x i1> zeroinitializer
2525
; CHECK-NEXT: [[PREDPHI:%.*]] = select <16 x i1> [[TMP5]], <16 x i32> [[TMP2]], <16 x i32> [[TMP3]]
2626
; CHECK-NEXT: [[TMP6:%.*]] = shl <16 x i32> [[PREDPHI]], splat (i32 8)
2727
; CHECK-NEXT: [[TMP8:%.*]] = trunc <16 x i32> [[TMP6]] to <16 x i8>
28-
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 0
29-
; CHECK-NEXT: br i1 [[TMP9]], label [[PRED_STORE_IF:%.*]], label [[VECTOR_BODY:%.*]]
30-
; CHECK: pred.store.if:
31-
; CHECK-NEXT: [[TMP19:%.*]] = extractelement <16 x i8> [[TMP8]], i32 0
32-
; CHECK-NEXT: store i8 [[TMP19]], ptr [[P]], align 1
33-
; CHECK-NEXT: br label [[VECTOR_BODY]]
34-
; CHECK: pred.store.continue:
35-
; CHECK-NEXT: [[CMP_N:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 1
36-
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH:%.*]]
37-
; CHECK: pred.store.if3:
38-
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <16 x i8> [[TMP8]], i32 1
39-
; CHECK-NEXT: store i8 [[TMP12]], ptr [[P]], align 1
40-
; CHECK-NEXT: br label [[SCALAR_PH]]
41-
; CHECK: pred.store.continue4:
42-
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 2
43-
; CHECK-NEXT: br i1 [[TMP13]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]]
44-
; CHECK: pred.store.if5:
45-
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <16 x i8> [[TMP8]], i32 2
46-
; CHECK-NEXT: store i8 [[TMP14]], ptr [[P]], align 1
47-
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]]
48-
; CHECK: pred.store.continue6:
49-
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 3
50-
; CHECK-NEXT: br i1 [[TMP15]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE9:%.*]]
51-
; CHECK: pred.store.if7:
52-
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <16 x i8> [[TMP8]], i32 3
53-
; CHECK-NEXT: store i8 [[TMP16]], ptr [[P]], align 1
54-
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE9]]
55-
; CHECK: pred.store.continue8:
56-
; CHECK-NEXT: [[TMP17:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 4
57-
; CHECK-NEXT: br i1 [[TMP17]], label [[PRED_STORE_IF9:%.*]], label [[PRED_STORE_CONTINUE10:%.*]]
58-
; CHECK: pred.store.if9:
59-
; CHECK-NEXT: [[TMP18:%.*]] = extractelement <16 x i8> [[TMP8]], i32 4
60-
; CHECK-NEXT: store i8 [[TMP18]], ptr [[P]], align 1
61-
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE10]]
62-
; CHECK: pred.store.continue10:
63-
; CHECK-NEXT: [[TMP41:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 5
64-
; CHECK-NEXT: br i1 [[TMP41]], label [[PRED_STORE_IF11:%.*]], label [[PRED_STORE_CONTINUE12:%.*]]
65-
; CHECK: pred.store.if11:
66-
; CHECK-NEXT: [[TMP20:%.*]] = extractelement <16 x i8> [[TMP8]], i32 5
67-
; CHECK-NEXT: store i8 [[TMP20]], ptr [[P]], align 1
68-
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE12]]
69-
; CHECK: pred.store.continue12:
70-
; CHECK-NEXT: [[TMP21:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 6
71-
; CHECK-NEXT: br i1 [[TMP21]], label [[PRED_STORE_IF13:%.*]], label [[PRED_STORE_CONTINUE14:%.*]]
72-
; CHECK: pred.store.if13:
73-
; CHECK-NEXT: [[TMP22:%.*]] = extractelement <16 x i8> [[TMP8]], i32 6
74-
; CHECK-NEXT: store i8 [[TMP22]], ptr [[P]], align 1
75-
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE14]]
76-
; CHECK: pred.store.continue14:
77-
; CHECK-NEXT: [[TMP23:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 7
78-
; CHECK-NEXT: br i1 [[TMP23]], label [[PRED_STORE_IF15:%.*]], label [[PRED_STORE_CONTINUE16:%.*]]
79-
; CHECK: pred.store.if15:
80-
; CHECK-NEXT: [[TMP24:%.*]] = extractelement <16 x i8> [[TMP8]], i32 7
81-
; CHECK-NEXT: store i8 [[TMP24]], ptr [[P]], align 1
82-
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE16]]
83-
; CHECK: pred.store.continue16:
84-
; CHECK-NEXT: [[TMP25:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 8
85-
; CHECK-NEXT: br i1 [[TMP25]], label [[PRED_STORE_IF17:%.*]], label [[PRED_STORE_CONTINUE18:%.*]]
86-
; CHECK: pred.store.if17:
87-
; CHECK-NEXT: [[TMP26:%.*]] = extractelement <16 x i8> [[TMP8]], i32 8
88-
; CHECK-NEXT: store i8 [[TMP26]], ptr [[P]], align 1
89-
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE18]]
90-
; CHECK: pred.store.continue18:
91-
; CHECK-NEXT: [[TMP27:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 9
92-
; CHECK-NEXT: br i1 [[TMP27]], label [[PRED_STORE_IF19:%.*]], label [[PRED_STORE_CONTINUE20:%.*]]
93-
; CHECK: pred.store.if19:
94-
; CHECK-NEXT: [[TMP28:%.*]] = extractelement <16 x i8> [[TMP8]], i32 9
95-
; CHECK-NEXT: store i8 [[TMP28]], ptr [[P]], align 1
96-
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE20]]
97-
; CHECK: pred.store.continue20:
98-
; CHECK-NEXT: [[TMP29:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 10
99-
; CHECK-NEXT: br i1 [[TMP29]], label [[PRED_STORE_IF21:%.*]], label [[PRED_STORE_CONTINUE22:%.*]]
100-
; CHECK: pred.store.if21:
101-
; CHECK-NEXT: [[TMP30:%.*]] = extractelement <16 x i8> [[TMP8]], i32 10
102-
; CHECK-NEXT: store i8 [[TMP30]], ptr [[P]], align 1
103-
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE22]]
104-
; CHECK: pred.store.continue22:
105-
; CHECK-NEXT: [[TMP31:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 11
106-
; CHECK-NEXT: br i1 [[TMP31]], label [[PRED_STORE_IF23:%.*]], label [[PRED_STORE_CONTINUE24:%.*]]
107-
; CHECK: pred.store.if23:
108-
; CHECK-NEXT: [[TMP32:%.*]] = extractelement <16 x i8> [[TMP8]], i32 11
109-
; CHECK-NEXT: store i8 [[TMP32]], ptr [[P]], align 1
110-
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE24]]
111-
; CHECK: pred.store.continue24:
112-
; CHECK-NEXT: [[TMP33:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 12
113-
; CHECK-NEXT: br i1 [[TMP33]], label [[PRED_STORE_IF25:%.*]], label [[PRED_STORE_CONTINUE26:%.*]]
114-
; CHECK: pred.store.if25:
115-
; CHECK-NEXT: [[TMP34:%.*]] = extractelement <16 x i8> [[TMP8]], i32 12
116-
; CHECK-NEXT: store i8 [[TMP34]], ptr [[P]], align 1
117-
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE26]]
118-
; CHECK: pred.store.continue26:
119-
; CHECK-NEXT: [[TMP35:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 13
120-
; CHECK-NEXT: br i1 [[TMP35]], label [[PRED_STORE_IF27:%.*]], label [[PRED_STORE_CONTINUE28:%.*]]
121-
; CHECK: pred.store.if27:
122-
; CHECK-NEXT: [[TMP36:%.*]] = extractelement <16 x i8> [[TMP8]], i32 13
123-
; CHECK-NEXT: store i8 [[TMP36]], ptr [[P]], align 1
124-
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE28]]
125-
; CHECK: pred.store.continue28:
126-
; CHECK-NEXT: [[TMP37:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 14
127-
; CHECK-NEXT: br i1 [[TMP37]], label [[PRED_STORE_IF29:%.*]], label [[PRED_STORE_CONTINUE30:%.*]]
128-
; CHECK: pred.store.if29:
129-
; CHECK-NEXT: [[TMP38:%.*]] = extractelement <16 x i8> [[TMP8]], i32 14
130-
; CHECK-NEXT: store i8 [[TMP38]], ptr [[P]], align 1
131-
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE30]]
132-
; CHECK: pred.store.continue30:
133-
; CHECK-NEXT: [[TMP39:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 15
134-
; CHECK-NEXT: br i1 [[TMP39]], label [[PRED_STORE_IF31:%.*]], label [[PRED_STORE_CONTINUE8]]
135-
; CHECK: pred.store.if31:
13628
; CHECK-NEXT: [[TMP40:%.*]] = extractelement <16 x i8> [[TMP8]], i32 15
13729
; CHECK-NEXT: store i8 [[TMP40]], ptr [[P]], align 1
138-
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE8]]
139-
; CHECK: pred.store.continue32:
14030
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <16 x i32> [[VEC_IND]], splat (i32 16)
14131
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16
14232
; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[FOR_COND]], !llvm.loop [[LOOP0:![0-9]+]]

llvm/test/Transforms/LoopVectorize/predicatedinst-loop-invariant.ll

Lines changed: 6 additions & 84 deletions
Original file line numberDiff line numberDiff line change
@@ -17,42 +17,16 @@ define void @loop_invariant_store(ptr %p, i64 %a, i8 %b) {
1717
; CHECK-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[BROADCAST_SPLAT]] to <4 x i32>
1818
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
1919
; CHECK: [[VECTOR_BODY]]:
20-
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE8:.*]] ]
21-
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE8]] ]
20+
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
21+
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
2222
; CHECK-NEXT: [[TMP4:%.*]] = icmp ule <4 x i32> [[VEC_IND]], splat (i32 8)
2323
; CHECK-NEXT: [[TMP5:%.*]] = icmp sge <4 x i32> [[VEC_IND]], splat (i32 2)
2424
; CHECK-NEXT: [[TMP6:%.*]] = select <4 x i1> [[TMP4]], <4 x i1> [[TMP5]], <4 x i1> zeroinitializer
2525
; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP6]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]]
2626
; CHECK-NEXT: [[TMP7:%.*]] = shl <4 x i32> [[PREDPHI]], splat (i32 8)
2727
; CHECK-NEXT: [[TMP8:%.*]] = trunc <4 x i32> [[TMP7]] to <4 x i8>
28-
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i1> [[TMP4]], i32 0
29-
; CHECK-NEXT: br i1 [[TMP16]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
30-
; CHECK: [[PRED_STORE_IF]]:
31-
; CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x i8> [[TMP8]], i32 0
32-
; CHECK-NEXT: store i8 [[TMP17]], ptr [[P]], align 1
33-
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
34-
; CHECK: [[PRED_STORE_CONTINUE]]:
35-
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i1> [[TMP4]], i32 1
36-
; CHECK-NEXT: br i1 [[TMP11]], label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]]
37-
; CHECK: [[PRED_STORE_IF3]]:
38-
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i8> [[TMP8]], i32 1
39-
; CHECK-NEXT: store i8 [[TMP12]], ptr [[P]], align 1
40-
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE4]]
41-
; CHECK: [[PRED_STORE_CONTINUE4]]:
42-
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i1> [[TMP4]], i32 2
43-
; CHECK-NEXT: br i1 [[TMP13]], label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6:.*]]
44-
; CHECK: [[PRED_STORE_IF5]]:
45-
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i8> [[TMP8]], i32 2
46-
; CHECK-NEXT: store i8 [[TMP14]], ptr [[P]], align 1
47-
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE6]]
48-
; CHECK: [[PRED_STORE_CONTINUE6]]:
49-
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i1> [[TMP4]], i32 3
50-
; CHECK-NEXT: br i1 [[TMP15]], label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8]]
51-
; CHECK: [[PRED_STORE_IF7]]:
5228
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i8> [[TMP8]], i32 3
5329
; CHECK-NEXT: store i8 [[TMP9]], ptr [[P]], align 1
54-
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE8]]
55-
; CHECK: [[PRED_STORE_CONTINUE8]]:
5630
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
5731
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
5832
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], 12
@@ -127,43 +101,17 @@ define void @loop_invariant_udiv(ptr %p, i64 %a, i8 %b) {
127101
; CHECK-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[BROADCAST_SPLAT]] to <4 x i32>
128102
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
129103
; CHECK: [[VECTOR_BODY]]:
130-
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE8:.*]] ]
131-
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE8]] ]
104+
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
105+
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
132106
; CHECK-NEXT: [[TMP4:%.*]] = icmp ule <4 x i32> [[VEC_IND]], splat (i32 8)
133107
; CHECK-NEXT: [[TMP5:%.*]] = icmp sge <4 x i32> [[VEC_IND]], splat (i32 2)
134108
; CHECK-NEXT: [[TMP6:%.*]] = select <4 x i1> [[TMP4]], <4 x i1> [[TMP5]], <4 x i1> zeroinitializer
135109
; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP6]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]]
136110
; CHECK-NEXT: [[TMP7:%.*]] = shl <4 x i32> [[PREDPHI]], splat (i32 8)
137111
; CHECK-NEXT: [[TMP8:%.*]] = trunc <4 x i32> [[TMP7]] to <4 x i8>
138112
; CHECK-NEXT: [[TMP9:%.*]] = udiv <4 x i8> [[TMP8]], [[BROADCAST_SPLAT]]
139-
; CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x i1> [[TMP4]], i32 0
140-
; CHECK-NEXT: br i1 [[TMP17]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
141-
; CHECK: [[PRED_STORE_IF]]:
142-
; CHECK-NEXT: [[TMP18:%.*]] = extractelement <4 x i8> [[TMP9]], i32 0
143-
; CHECK-NEXT: store i8 [[TMP18]], ptr [[P]], align 1
144-
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
145-
; CHECK: [[PRED_STORE_CONTINUE]]:
146-
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i1> [[TMP4]], i32 1
147-
; CHECK-NEXT: br i1 [[TMP12]], label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]]
148-
; CHECK: [[PRED_STORE_IF3]]:
149-
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i8> [[TMP9]], i32 1
150-
; CHECK-NEXT: store i8 [[TMP13]], ptr [[P]], align 1
151-
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE4]]
152-
; CHECK: [[PRED_STORE_CONTINUE4]]:
153-
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP4]], i32 2
154-
; CHECK-NEXT: br i1 [[TMP14]], label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6:.*]]
155-
; CHECK: [[PRED_STORE_IF5]]:
156-
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i8> [[TMP9]], i32 2
157-
; CHECK-NEXT: store i8 [[TMP15]], ptr [[P]], align 1
158-
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE6]]
159-
; CHECK: [[PRED_STORE_CONTINUE6]]:
160-
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i1> [[TMP4]], i32 3
161-
; CHECK-NEXT: br i1 [[TMP16]], label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8]]
162-
; CHECK: [[PRED_STORE_IF7]]:
163113
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i8> [[TMP9]], i32 3
164114
; CHECK-NEXT: store i8 [[TMP10]], ptr [[P]], align 1
165-
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE8]]
166-
; CHECK: [[PRED_STORE_CONTINUE8]]:
167115
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
168116
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
169117
; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], 12
@@ -240,43 +188,17 @@ define void @loop_invariant_float_store(ptr noalias %p, ptr noalias %q, i64 %a,
240188
; CHECK-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[BROADCAST_SPLAT]] to <4 x i32>
241189
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
242190
; CHECK: [[VECTOR_BODY]]:
243-
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE8:.*]] ]
244-
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE8]] ]
191+
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
192+
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
245193
; CHECK-NEXT: [[TMP4:%.*]] = icmp ule <4 x i32> [[VEC_IND]], splat (i32 8)
246194
; CHECK-NEXT: store float 2.000000e+00, ptr [[Q]], align 4
247195
; CHECK-NEXT: [[TMP5:%.*]] = icmp sge <4 x i32> [[VEC_IND]], splat (i32 2)
248196
; CHECK-NEXT: [[TMP6:%.*]] = select <4 x i1> [[TMP4]], <4 x i1> [[TMP5]], <4 x i1> zeroinitializer
249197
; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP6]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]]
250198
; CHECK-NEXT: [[TMP7:%.*]] = shl <4 x i32> [[PREDPHI]], splat (i32 8)
251199
; CHECK-NEXT: [[TMP8:%.*]] = trunc <4 x i32> [[TMP7]] to <4 x i8>
252-
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i1> [[TMP4]], i32 0
253-
; CHECK-NEXT: br i1 [[TMP16]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
254-
; CHECK: [[PRED_STORE_IF]]:
255-
; CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x i8> [[TMP8]], i32 0
256-
; CHECK-NEXT: store i8 [[TMP17]], ptr [[P]], align 1
257-
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
258-
; CHECK: [[PRED_STORE_CONTINUE]]:
259-
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i1> [[TMP4]], i32 1
260-
; CHECK-NEXT: br i1 [[TMP11]], label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]]
261-
; CHECK: [[PRED_STORE_IF3]]:
262-
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i8> [[TMP8]], i32 1
263-
; CHECK-NEXT: store i8 [[TMP12]], ptr [[P]], align 1
264-
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE4]]
265-
; CHECK: [[PRED_STORE_CONTINUE4]]:
266-
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i1> [[TMP4]], i32 2
267-
; CHECK-NEXT: br i1 [[TMP13]], label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6:.*]]
268-
; CHECK: [[PRED_STORE_IF5]]:
269-
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i8> [[TMP8]], i32 2
270-
; CHECK-NEXT: store i8 [[TMP14]], ptr [[P]], align 1
271-
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE6]]
272-
; CHECK: [[PRED_STORE_CONTINUE6]]:
273-
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i1> [[TMP4]], i32 3
274-
; CHECK-NEXT: br i1 [[TMP15]], label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8]]
275-
; CHECK: [[PRED_STORE_IF7]]:
276200
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i8> [[TMP8]], i32 3
277201
; CHECK-NEXT: store i8 [[TMP9]], ptr [[P]], align 1
278-
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE8]]
279-
; CHECK: [[PRED_STORE_CONTINUE8]]:
280202
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
281203
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
282204
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], 12

0 commit comments

Comments
 (0)