Skip to content

Commit 8cc0214

Browse files
committed
[LV] Stengthen loop-invariance checks in isPredicatedInst
Check loop-invariance against SCEV as well.
1 parent 3820ddd commit 8cc0214

File tree

4 files changed

+13
-207
lines changed

4 files changed

+13
-207
lines changed

llvm/lib/Analysis/LoopAccessAnalysis.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2764,8 +2764,8 @@ LoopAccessInfo::recordAnalysis(StringRef RemarkName, const Instruction *I) {
27642764

27652765
bool LoopAccessInfo::isInvariant(Value *V) const {
27662766
auto *SE = PSE->getSE();
2767-
// TODO: Is this really what we want? Even without FP SCEV, we may want some
2768-
// trivially loop-invariant FP values to be considered invariant.
2767+
if (TheLoop->isLoopInvariant(V))
2768+
return true;
27692769
if (!SE->isSCEVable(V->getType()))
27702770
return false;
27712771
const SCEV *S = SE->getSCEV(V);

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3107,14 +3107,14 @@ bool LoopVectorizationCostModel::isPredicatedInst(Instruction *I) const {
31073107
// is correct. The easiest form of the later is to require that all values
31083108
// stored are the same.
31093109
return !(Legal->isInvariant(getLoadStorePointerOperand(I)) &&
3110-
TheLoop->isLoopInvariant(cast<StoreInst>(I)->getValueOperand()));
3110+
Legal->isInvariant(cast<StoreInst>(I)->getValueOperand()));
31113111
}
31123112
case Instruction::UDiv:
31133113
case Instruction::SDiv:
31143114
case Instruction::SRem:
31153115
case Instruction::URem:
31163116
// If the divisor is loop-invariant no predication is needed.
3117-
return !TheLoop->isLoopInvariant(I->getOperand(1));
3117+
return !Legal->isInvariant(I->getOperand(1));
31183118
}
31193119
}
31203120

Lines changed: 3 additions & 119 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 4
22
; RUN: opt -passes=loop-vectorize -mtriple=riscv64 -mattr=+v -S %s | FileCheck %s
33

44
define void @test(ptr %p, i64 %a, i8 %b) {
@@ -17,126 +17,16 @@ define void @test(ptr %p, i64 %a, i8 %b) {
1717
; CHECK-NEXT: [[TMP3:%.*]] = zext <16 x i8> [[BROADCAST_SPLAT]] to <16 x i32>
1818
; CHECK-NEXT: br label [[FOR_COND:%.*]]
1919
; CHECK: vector.body:
20-
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE8:%.*]] ]
21-
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <16 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE8]] ]
20+
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[FOR_COND]] ]
21+
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <16 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[FOR_COND]] ]
2222
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 [[INDEX]], i32 9)
2323
; CHECK-NEXT: [[TMP4:%.*]] = icmp sge <16 x i32> [[VEC_IND]], splat (i32 2)
2424
; CHECK-NEXT: [[TMP5:%.*]] = select <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i1> [[TMP4]], <16 x i1> zeroinitializer
2525
; CHECK-NEXT: [[PREDPHI:%.*]] = select <16 x i1> [[TMP5]], <16 x i32> [[TMP2]], <16 x i32> [[TMP3]]
2626
; CHECK-NEXT: [[TMP6:%.*]] = shl <16 x i32> [[PREDPHI]], splat (i32 8)
2727
; CHECK-NEXT: [[TMP8:%.*]] = trunc <16 x i32> [[TMP6]] to <16 x i8>
28-
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 0
29-
; CHECK-NEXT: br i1 [[TMP9]], label [[PRED_STORE_IF:%.*]], label [[VECTOR_BODY:%.*]]
30-
; CHECK: pred.store.if:
31-
; CHECK-NEXT: [[TMP19:%.*]] = extractelement <16 x i8> [[TMP8]], i32 0
32-
; CHECK-NEXT: store i8 [[TMP19]], ptr [[P]], align 1
33-
; CHECK-NEXT: br label [[VECTOR_BODY]]
34-
; CHECK: pred.store.continue:
35-
; CHECK-NEXT: [[CMP_N:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 1
36-
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH:%.*]]
37-
; CHECK: pred.store.if3:
38-
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <16 x i8> [[TMP8]], i32 1
39-
; CHECK-NEXT: store i8 [[TMP12]], ptr [[P]], align 1
40-
; CHECK-NEXT: br label [[SCALAR_PH]]
41-
; CHECK: pred.store.continue4:
42-
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 2
43-
; CHECK-NEXT: br i1 [[TMP13]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]]
44-
; CHECK: pred.store.if5:
45-
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <16 x i8> [[TMP8]], i32 2
46-
; CHECK-NEXT: store i8 [[TMP14]], ptr [[P]], align 1
47-
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]]
48-
; CHECK: pred.store.continue6:
49-
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 3
50-
; CHECK-NEXT: br i1 [[TMP15]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE9:%.*]]
51-
; CHECK: pred.store.if7:
52-
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <16 x i8> [[TMP8]], i32 3
53-
; CHECK-NEXT: store i8 [[TMP16]], ptr [[P]], align 1
54-
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE9]]
55-
; CHECK: pred.store.continue8:
56-
; CHECK-NEXT: [[TMP17:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 4
57-
; CHECK-NEXT: br i1 [[TMP17]], label [[PRED_STORE_IF9:%.*]], label [[PRED_STORE_CONTINUE10:%.*]]
58-
; CHECK: pred.store.if9:
59-
; CHECK-NEXT: [[TMP18:%.*]] = extractelement <16 x i8> [[TMP8]], i32 4
60-
; CHECK-NEXT: store i8 [[TMP18]], ptr [[P]], align 1
61-
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE10]]
62-
; CHECK: pred.store.continue10:
63-
; CHECK-NEXT: [[TMP41:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 5
64-
; CHECK-NEXT: br i1 [[TMP41]], label [[PRED_STORE_IF11:%.*]], label [[PRED_STORE_CONTINUE12:%.*]]
65-
; CHECK: pred.store.if11:
66-
; CHECK-NEXT: [[TMP20:%.*]] = extractelement <16 x i8> [[TMP8]], i32 5
67-
; CHECK-NEXT: store i8 [[TMP20]], ptr [[P]], align 1
68-
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE12]]
69-
; CHECK: pred.store.continue12:
70-
; CHECK-NEXT: [[TMP21:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 6
71-
; CHECK-NEXT: br i1 [[TMP21]], label [[PRED_STORE_IF13:%.*]], label [[PRED_STORE_CONTINUE14:%.*]]
72-
; CHECK: pred.store.if13:
73-
; CHECK-NEXT: [[TMP22:%.*]] = extractelement <16 x i8> [[TMP8]], i32 6
74-
; CHECK-NEXT: store i8 [[TMP22]], ptr [[P]], align 1
75-
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE14]]
76-
; CHECK: pred.store.continue14:
77-
; CHECK-NEXT: [[TMP23:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 7
78-
; CHECK-NEXT: br i1 [[TMP23]], label [[PRED_STORE_IF15:%.*]], label [[PRED_STORE_CONTINUE16:%.*]]
79-
; CHECK: pred.store.if15:
80-
; CHECK-NEXT: [[TMP24:%.*]] = extractelement <16 x i8> [[TMP8]], i32 7
81-
; CHECK-NEXT: store i8 [[TMP24]], ptr [[P]], align 1
82-
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE16]]
83-
; CHECK: pred.store.continue16:
84-
; CHECK-NEXT: [[TMP25:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 8
85-
; CHECK-NEXT: br i1 [[TMP25]], label [[PRED_STORE_IF17:%.*]], label [[PRED_STORE_CONTINUE18:%.*]]
86-
; CHECK: pred.store.if17:
87-
; CHECK-NEXT: [[TMP26:%.*]] = extractelement <16 x i8> [[TMP8]], i32 8
88-
; CHECK-NEXT: store i8 [[TMP26]], ptr [[P]], align 1
89-
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE18]]
90-
; CHECK: pred.store.continue18:
91-
; CHECK-NEXT: [[TMP27:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 9
92-
; CHECK-NEXT: br i1 [[TMP27]], label [[PRED_STORE_IF19:%.*]], label [[PRED_STORE_CONTINUE20:%.*]]
93-
; CHECK: pred.store.if19:
94-
; CHECK-NEXT: [[TMP28:%.*]] = extractelement <16 x i8> [[TMP8]], i32 9
95-
; CHECK-NEXT: store i8 [[TMP28]], ptr [[P]], align 1
96-
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE20]]
97-
; CHECK: pred.store.continue20:
98-
; CHECK-NEXT: [[TMP29:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 10
99-
; CHECK-NEXT: br i1 [[TMP29]], label [[PRED_STORE_IF21:%.*]], label [[PRED_STORE_CONTINUE22:%.*]]
100-
; CHECK: pred.store.if21:
101-
; CHECK-NEXT: [[TMP30:%.*]] = extractelement <16 x i8> [[TMP8]], i32 10
102-
; CHECK-NEXT: store i8 [[TMP30]], ptr [[P]], align 1
103-
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE22]]
104-
; CHECK: pred.store.continue22:
105-
; CHECK-NEXT: [[TMP31:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 11
106-
; CHECK-NEXT: br i1 [[TMP31]], label [[PRED_STORE_IF23:%.*]], label [[PRED_STORE_CONTINUE24:%.*]]
107-
; CHECK: pred.store.if23:
108-
; CHECK-NEXT: [[TMP32:%.*]] = extractelement <16 x i8> [[TMP8]], i32 11
109-
; CHECK-NEXT: store i8 [[TMP32]], ptr [[P]], align 1
110-
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE24]]
111-
; CHECK: pred.store.continue24:
112-
; CHECK-NEXT: [[TMP33:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 12
113-
; CHECK-NEXT: br i1 [[TMP33]], label [[PRED_STORE_IF25:%.*]], label [[PRED_STORE_CONTINUE26:%.*]]
114-
; CHECK: pred.store.if25:
115-
; CHECK-NEXT: [[TMP34:%.*]] = extractelement <16 x i8> [[TMP8]], i32 12
116-
; CHECK-NEXT: store i8 [[TMP34]], ptr [[P]], align 1
117-
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE26]]
118-
; CHECK: pred.store.continue26:
119-
; CHECK-NEXT: [[TMP35:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 13
120-
; CHECK-NEXT: br i1 [[TMP35]], label [[PRED_STORE_IF27:%.*]], label [[PRED_STORE_CONTINUE28:%.*]]
121-
; CHECK: pred.store.if27:
122-
; CHECK-NEXT: [[TMP36:%.*]] = extractelement <16 x i8> [[TMP8]], i32 13
123-
; CHECK-NEXT: store i8 [[TMP36]], ptr [[P]], align 1
124-
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE28]]
125-
; CHECK: pred.store.continue28:
126-
; CHECK-NEXT: [[TMP37:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 14
127-
; CHECK-NEXT: br i1 [[TMP37]], label [[PRED_STORE_IF29:%.*]], label [[PRED_STORE_CONTINUE30:%.*]]
128-
; CHECK: pred.store.if29:
129-
; CHECK-NEXT: [[TMP38:%.*]] = extractelement <16 x i8> [[TMP8]], i32 14
130-
; CHECK-NEXT: store i8 [[TMP38]], ptr [[P]], align 1
131-
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE30]]
132-
; CHECK: pred.store.continue30:
133-
; CHECK-NEXT: [[TMP39:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 15
134-
; CHECK-NEXT: br i1 [[TMP39]], label [[PRED_STORE_IF31:%.*]], label [[PRED_STORE_CONTINUE8]]
135-
; CHECK: pred.store.if31:
13628
; CHECK-NEXT: [[TMP40:%.*]] = extractelement <16 x i8> [[TMP8]], i32 15
13729
; CHECK-NEXT: store i8 [[TMP40]], ptr [[P]], align 1
138-
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE8]]
139-
; CHECK: pred.store.continue32:
14030
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16
14131
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <16 x i32> [[VEC_IND]], splat (i32 16)
14232
; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[FOR_COND]], !llvm.loop [[LOOP0:![0-9]+]]
@@ -193,9 +83,3 @@ for.body: ; preds = %cond.false, %for.co
19383
exit: ; preds = %for.body
19484
ret void
19585
}
196-
;.
197-
; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
198-
; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
199-
; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
200-
; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
201-
;.

0 commit comments

Comments
 (0)