Skip to content

Commit 2c8de7c

Browse files
committed
!fixup generalize logic
1 parent 1574791 commit 2c8de7c

File tree

2 files changed

+45
-61
lines changed

2 files changed

+45
-61
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 30 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -3696,47 +3696,48 @@ bool LoopVectorizationCostModel::isPredicatedInst(Instruction *I,
36963696
return false;
36973697

36983698
// Can we prove this instruction is safe to unconditionally execute?
3699-
// If not, we must use some form of predication.
3699+
if (I->getOpcode() == Instruction::Call)
3700+
return Legal->isMaskRequired(I);
3701+
3702+
if (isa<LoadInst, StoreInst>(I) && !Legal->isMaskRequired(I))
3703+
return false;
3704+
3705+
// TODO: We can use the loop-preheader as context point here and get
3706+
// context sensitive reasoning
3707+
if (isa<BranchInst, PHINode>(I) || isSafeToSpeculativelyExecute(I))
3708+
return false;
3709+
3710+
// If the instruction was executed conditionally in the original scalar loop,
3711+
// predication is needed.
3712+
if (Legal->blockNeedsPredication(I->getParent()))
3713+
return true;
3714+
3715+
// Tail folding may introduce additional predication, but we're guaranteed to
3716+
// always have at least one active lane. If the instruction in the original
3717+
// scalar loop was executed unconditionally, it may not need predication,
3718+
// depending on its operands.
37003719
switch(I->getOpcode()) {
37013720
default:
3702-
return false;
3721+
llvm_unreachable(
3722+
"instruction should have been considered to not require predication "
3723+
"by earlier checks");
37033724
case Instruction::Load:
3725+
// If the address is loop invariant no predication is needed.
3726+
return !Legal->isInvariant(getLoadStorePointerOperand(I));
37043727
case Instruction::Store: {
3705-
if (!Legal->isMaskRequired(I))
3706-
return false;
3707-
// When we know the load's address is loop invariant and the instruction
3708-
// in the original scalar loop was unconditionally executed then we
3709-
// don't need to mark it as a predicated instruction. Tail folding may
3710-
// introduce additional predication, but we're guaranteed to always have
3711-
// at least one active lane. We call Legal->blockNeedsPredication here
3712-
// because it doesn't query tail-folding. For stores, we need to prove
3728+
// For stores, we need to prove
37133729
// both speculation safety (which follows from the same argument as loads),
37143730
// but also must prove the value being stored is correct. The easiest
37153731
// form of the later is to require that all values stored are the same.
3716-
if (Legal->isInvariant(getLoadStorePointerOperand(I)) &&
3717-
(isa<LoadInst>(I) ||
3718-
(isa<StoreInst>(I) &&
3719-
TheLoop->isLoopInvariant(cast<StoreInst>(I)->getValueOperand()))) &&
3720-
!Legal->blockNeedsPredication(I->getParent()))
3721-
return false;
3722-
return true;
3732+
return !(Legal->isInvariant(getLoadStorePointerOperand(I)) &&
3733+
TheLoop->isLoopInvariant(cast<StoreInst>(I)->getValueOperand()));
37233734
}
37243735
case Instruction::UDiv:
37253736
case Instruction::SDiv:
37263737
case Instruction::SRem:
37273738
case Instruction::URem:
3728-
// When folding the tail, at least one of the lanes must execute
3729-
// unconditionally. If the divisor is loop-invariant no predication is
3730-
// needed, as predication would not prevent the divide-by-0 on the executed
3731-
// lane.
3732-
if (!Legal->blockNeedsPredication(I->getParent()) && TheLoop->isLoopInvariant(I->getOperand(1)))
3733-
return false;
3734-
3735-
// TODO: We can use the loop-preheader as context point here and get
3736-
// context sensitive reasoning
3737-
return !isSafeToSpeculativelyExecute(I);
3738-
case Instruction::Call:
3739-
return Legal->isMaskRequired(I);
3739+
// If the divisor is loop-invariant no predication is needed.
3740+
return !TheLoop->isLoopInvariant(I->getOperand(1));
37403741
}
37413742
}
37423743

llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll

Lines changed: 15 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -62,9 +62,9 @@ define void @sink_replicate_region_1(i32 %x, ptr %ptr, ptr noalias %dst) optsize
6262
; CHECK-NEXT: pred.store.continue:
6363
; CHECK-NEXT: No successors
6464
; CHECK-NEXT: }
65-
; CHECK-NEXT: Successor(s): loop.2
65+
; CHECK-NEXT: Successor(s): loop.1
6666
; CHECK-EMPTY:
67-
; CHECK-NEXT: loop.2:
67+
; CHECK-NEXT: loop.1:
6868
; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add vp<[[CAN_IV]]>, vp<[[VFxUF]]>
6969
; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]>
7070
; CHECK-NEXT: No successors
@@ -134,8 +134,8 @@ define void @sink_replicate_region_2(i32 %x, i8 %y, ptr %ptr) optsize {
134134
; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue
135135
; CHECK-EMPTY:
136136
; CHECK-NEXT: pred.store.if:
137-
; CHECK-NEXT: REPLICATE ir<%rem> = srem vp<[[SPLICE]]>, ir<%x>
138137
; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>
138+
; CHECK-NEXT: REPLICATE ir<%rem> = srem vp<[[SPLICE]]>, ir<%x>
139139
; CHECK-NEXT: REPLICATE ir<%gep> = getelementptr ir<%ptr>, vp<[[STEPS]]>
140140
; CHECK-NEXT: REPLICATE ir<%add> = add ir<%rem>, ir<%recur.next>
141141
; CHECK-NEXT: REPLICATE store ir<%add>, ir<%gep>
@@ -144,9 +144,9 @@ define void @sink_replicate_region_2(i32 %x, i8 %y, ptr %ptr) optsize {
144144
; CHECK-NEXT: pred.store.continue:
145145
; CHECK-NEXT: No successors
146146
; CHECK-NEXT: }
147-
; CHECK-NEXT: Successor(s): loop.1
147+
; CHECK-NEXT: Successor(s): loop.0
148148
; CHECK-EMPTY:
149-
; CHECK-NEXT: loop.1:
149+
; CHECK-NEXT: loop.0:
150150
; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add vp<[[CAN_IV]]>, vp<[[VFxUF]]>
151151
; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]>
152152
; CHECK-NEXT: No successors
@@ -207,25 +207,8 @@ define i32 @sink_replicate_region_3_reduction(i32 %x, i8 %y, ptr %ptr) optsize {
207207
; CHECK-NEXT: EMIT vp<[[MASK:%.+]]> = icmp ule vp<[[WIDEN_CAN]]>, vp<[[BTC]]>
208208
; CHECK-NEXT: WIDEN-CAST ir<%recur.next> = sext ir<%y> to i32
209209
; CHECK-NEXT: EMIT vp<[[SPLICE:%.+]]> = first-order splice ir<%recur>, ir<%recur.next>
210-
; CHECK-NEXT: Successor(s): pred.srem
211-
; CHECK-EMPTY:
212-
; CHECK-NEXT: <xVFxUF> pred.srem: {
213-
; CHECK-NEXT: pred.srem.entry:
214-
; CHECK-NEXT: BRANCH-ON-MASK vp<[[MASK]]>
215-
; CHECK-NEXT: Successor(s): pred.srem.if, pred.srem.continue
216-
; CHECK-EMPTY:
217-
; CHECK-NEXT: pred.srem.if:
218-
; CHECK-NEXT: REPLICATE ir<%rem> = srem vp<[[SPLICE]]>, ir<%x> (S->V)
219-
; CHECK-NEXT: Successor(s): pred.srem.continue
220-
; CHECK-EMPTY:
221-
; CHECK-NEXT: pred.srem.continue:
222-
; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<[[PRED:%.+]]> = ir<%rem>
223-
; CHECK-NEXT: No successors
224-
; CHECK-NEXT: }
225-
; CHECK-NEXT: Successor(s): loop.0
226-
; CHECK-EMPTY:
227-
; CHECK-NEXT: loop.0:
228-
; CHECK-NEXT: WIDEN ir<%add> = add vp<[[PRED]]>, ir<%recur.next>
210+
; CHECK-NEXT: WIDEN ir<%rem> = srem vp<[[SPLICE]]>, ir<%x>
211+
; CHECK-NEXT: WIDEN ir<%add> = add ir<%rem>, ir<%recur.next>
229212
; CHECK-NEXT: WIDEN ir<%and.red.next> = and ir<%and.red>, ir<%add>
230213
; CHECK-NEXT: EMIT vp<[[SEL:%.+]]> = select vp<[[MASK]]>, ir<%and.red.next>, ir<%and.red>
231214
; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add vp<[[CAN_IV]]>, vp<[[VFxUF]]>
@@ -320,8 +303,8 @@ define void @sink_replicate_region_4_requires_split_at_end_of_block(i32 %x, ptr
320303
; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue
321304
; CHECK-EMPTY:
322305
; CHECK: pred.store.if:
323-
; CHECK-NEXT: REPLICATE ir<%rem> = srem vp<[[SPLICE]]>, ir<%x>
324306
; CHECK-NEXT: REPLICATE ir<%lv.2> = load ir<%gep>
307+
; CHECK-NEXT: REPLICATE ir<%rem> = srem vp<[[SPLICE]]>, ir<%x>
325308
; CHECK-NEXT: REPLICATE ir<%conv.lv.2> = sext ir<%lv.2>
326309
; CHECK-NEXT: REPLICATE ir<%add.1> = add ir<%conv>, ir<%rem>
327310
; CHECK-NEXT: REPLICATE ir<%gep.dst> = getelementptr ir<%dst>, vp<[[STEPS]]>
@@ -332,9 +315,9 @@ define void @sink_replicate_region_4_requires_split_at_end_of_block(i32 %x, ptr
332315
; CHECK: pred.store.continue:
333316
; CHECK-NEXT: No successors
334317
; CHECK-NEXT: }
335-
; CHECK-NEXT: Successor(s): loop.3
318+
; CHECK-NEXT: Successor(s): loop.2
336319
; CHECK-EMPTY:
337-
; CHECK: loop.3:
320+
; CHECK: loop.2:
338321
; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add vp<[[CAN_IV]]>, vp<[[VFxUF]]>
339322
; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]>
340323
; CHECK-NEXT: No successors
@@ -424,9 +407,9 @@ define void @sink_replicate_region_after_replicate_region(ptr %ptr, ptr noalias
424407
; CHECK-NEXT: pred.store.continue:
425408
; CHECK-NEXT: No successors
426409
; CHECK-NEXT: }
427-
; CHECK-NEXT: Successor(s): loop.3
410+
; CHECK-NEXT: Successor(s): loop.2
428411
; CHECK-EMPTY:
429-
; CHECK-NEXT: loop.3:
412+
; CHECK-NEXT: loop.2:
430413
; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add vp<[[CAN_IV]]>, vp<[[VFxUF]]>
431414
; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]>
432415
; CHECK-NEXT: No successors
@@ -497,18 +480,18 @@ define void @need_new_block_after_sinking_pr56146(i32 %x, ptr %src, ptr noalias
497480
; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue
498481
; CHECK-EMPTY:
499482
; CHECK-NEXT: pred.store.if:
500-
; CHECK-NEXT: REPLICATE ir<%val> = sdiv vp<[[SPLICE]]>, ir<%x>
501483
; CHECK-NEXT: vp<[[SCALAR_STEPS:%.+]]> = SCALAR-STEPS vp<[[DERIVED_IV]]>, ir<1>
502484
; CHECK-NEXT: REPLICATE ir<%gep.dst> = getelementptr ir<%dst>, vp<[[SCALAR_STEPS]]>
485+
; CHECK-NEXT: REPLICATE ir<%val> = sdiv vp<[[SPLICE]]>, ir<%x>
503486
; CHECK-NEXT: REPLICATE store ir<%val>, ir<%gep.dst>
504487
; CHECK-NEXT: Successor(s): pred.store.continue
505488
; CHECK-EMPTY:
506489
; CHECK-NEXT: pred.store.continue:
507490
; CHECK-NEXT: No successors
508491
; CHECK-NEXT: }
509-
; CHECK-NEXT: Successor(s): loop.1
492+
; CHECK-NEXT: Successor(s): loop.0
510493
; CHECK-EMPTY:
511-
; CHECK-NEXT: loop.1:
494+
; CHECK-NEXT: loop.0:
512495
; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add vp<[[CAN_IV]]>, vp<[[VFxUF]]>
513496
; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]>
514497
; CHECK-NEXT: No successors

0 commit comments

Comments
 (0)