Skip to content

Commit 42b9e34

Browse files
committed
[VPlan] Remove ILV::sinkScalarOperands.
Remove legacy ILV sinkScalarOperands, which is superseded by the sinkScalarOperands VPlan transforms. There are a few cases that aren't handled by VPlan's sinkScalarOperands, because the recipes doesn't support replicating. Those are pointer inductions and blends. We could probably improve this further, by allowing replication for more recipes, but I don't think the extra complexity is warranted. Depends on #136021.
1 parent 68c1da5 commit 42b9e34

File tree

6 files changed

+63
-149
lines changed

6 files changed

+63
-149
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 0 additions & 89 deletions
Original file line numberDiff line numberDiff line change
@@ -541,10 +541,6 @@ class InnerLoopVectorizer {
541541
protected:
542542
friend class LoopVectorizationPlanner;
543543

544-
/// Iteratively sink the scalarized operands of a predicated instruction into
545-
/// the block that was created for it.
546-
void sinkScalarOperands(Instruction *PredInst);
547-
548544
/// Returns (and creates if needed) the trip count of the widened loop.
549545
Value *getOrCreateVectorTripCount(BasicBlock *InsertBlock);
550546

@@ -629,9 +625,6 @@ class InnerLoopVectorizer {
629625
/// A list of all bypass blocks. The first block is the entry of the loop.
630626
SmallVector<BasicBlock *, 4> LoopBypassBlocks;
631627

632-
/// Store instructions that were predicated.
633-
SmallVector<Instruction *, 4> PredicatedInstructions;
634-
635628
/// Trip count of the original loop.
636629
Value *TripCount = nullptr;
637630

@@ -2385,15 +2378,12 @@ void InnerLoopVectorizer::scalarizeInstruction(const Instruction *Instr,
23852378

23862379
// End if-block.
23872380
VPRegionBlock *Parent = RepRecipe->getParent()->getParent();
2388-
bool IfPredicateInstr = Parent ? Parent->isReplicator() : false;
23892381
assert(
23902382
(Parent || !RepRecipe->getParent()->getPlan()->getVectorLoopRegion() ||
23912383
all_of(RepRecipe->operands(),
23922384
[](VPValue *Op) { return Op->isDefinedOutsideLoopRegions(); })) &&
23932385
"Expected a recipe is either within a region or all of its operands "
23942386
"are defined outside the vectorized region.");
2395-
if (IfPredicateInstr)
2396-
PredicatedInstructions.push_back(Cloned);
23972387
}
23982388

23992389
Value *
@@ -2867,9 +2857,6 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) {
28672857
if (!State.Plan->getVectorLoopRegion())
28682858
return;
28692859

2870-
for (Instruction *PI : PredicatedInstructions)
2871-
sinkScalarOperands(&*PI);
2872-
28732860
VPRegionBlock *VectorRegion = State.Plan->getVectorLoopRegion();
28742861
VPBasicBlock *HeaderVPBB = VectorRegion->getEntryBasicBlock();
28752862
BasicBlock *HeaderBB = State.CFG.VPBB2IRBB[HeaderVPBB];
@@ -2895,82 +2882,6 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) {
28952882
VF.getKnownMinValue() * UF);
28962883
}
28972884

2898-
void InnerLoopVectorizer::sinkScalarOperands(Instruction *PredInst) {
2899-
// The basic block and loop containing the predicated instruction.
2900-
auto *PredBB = PredInst->getParent();
2901-
auto *VectorLoop = LI->getLoopFor(PredBB);
2902-
2903-
// Initialize a worklist with the operands of the predicated instruction.
2904-
SetVector<Value *> Worklist(PredInst->op_begin(), PredInst->op_end());
2905-
2906-
// Holds instructions that we need to analyze again. An instruction may be
2907-
// reanalyzed if we don't yet know if we can sink it or not.
2908-
SmallVector<Instruction *, 8> InstsToReanalyze;
2909-
2910-
// Returns true if a given use occurs in the predicated block. Phi nodes use
2911-
// their operands in their corresponding predecessor blocks.
2912-
auto IsBlockOfUsePredicated = [&](Use &U) -> bool {
2913-
auto *I = cast<Instruction>(U.getUser());
2914-
BasicBlock *BB = I->getParent();
2915-
if (auto *Phi = dyn_cast<PHINode>(I))
2916-
BB = Phi->getIncomingBlock(
2917-
PHINode::getIncomingValueNumForOperand(U.getOperandNo()));
2918-
return BB == PredBB;
2919-
};
2920-
2921-
// Iteratively sink the scalarized operands of the predicated instruction
2922-
// into the block we created for it. When an instruction is sunk, it's
2923-
// operands are then added to the worklist. The algorithm ends after one pass
2924-
// through the worklist doesn't sink a single instruction.
2925-
bool Changed;
2926-
do {
2927-
// Add the instructions that need to be reanalyzed to the worklist, and
2928-
// reset the changed indicator.
2929-
Worklist.insert_range(InstsToReanalyze);
2930-
InstsToReanalyze.clear();
2931-
Changed = false;
2932-
2933-
while (!Worklist.empty()) {
2934-
auto *I = dyn_cast<Instruction>(Worklist.pop_back_val());
2935-
2936-
// We can't sink an instruction if it is a phi node, is not in the loop,
2937-
// may have side effects or may read from memory.
2938-
// TODO: Could do more granular checking to allow sinking
2939-
// a load past non-store instructions.
2940-
if (!I || isa<PHINode>(I) || !VectorLoop->contains(I) ||
2941-
I->mayHaveSideEffects() || I->mayReadFromMemory())
2942-
continue;
2943-
2944-
// If the instruction is already in PredBB, check if we can sink its
2945-
// operands. In that case, VPlan's sinkScalarOperands() succeeded in
2946-
// sinking the scalar instruction I, hence it appears in PredBB; but it
2947-
// may have failed to sink I's operands (recursively), which we try
2948-
// (again) here.
2949-
if (I->getParent() == PredBB) {
2950-
Worklist.insert_range(I->operands());
2951-
continue;
2952-
}
2953-
2954-
// It's legal to sink the instruction if all its uses occur in the
2955-
// predicated block. Otherwise, there's nothing to do yet, and we may
2956-
// need to reanalyze the instruction.
2957-
if (!llvm::all_of(I->uses(), IsBlockOfUsePredicated)) {
2958-
InstsToReanalyze.push_back(I);
2959-
continue;
2960-
}
2961-
2962-
// Move the instruction to the beginning of the predicated block, and add
2963-
// it's operands to the worklist.
2964-
I->moveBefore(PredBB->getFirstInsertionPt());
2965-
Worklist.insert_range(I->operands());
2966-
2967-
// The sinking may have enabled other instructions to be sunk, so we will
2968-
// need to iterate.
2969-
Changed = true;
2970-
}
2971-
} while (Changed);
2972-
}
2973-
29742885
void InnerLoopVectorizer::fixNonInductionPHIs(VPTransformState &State) {
29752886
auto Iter = vp_depth_first_deep(Plan.getEntry());
29762887
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(Iter)) {

llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll

Lines changed: 20 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -996,28 +996,31 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 {
996996
; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY1:%.*]] = icmp ult i64 1, [[TMP0]]
997997
; TFA_INTERLEAVE-NEXT: br label %[[VECTOR_BODY:.*]]
998998
; TFA_INTERLEAVE: [[VECTOR_BODY]]:
999-
; TFA_INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[TMP27:%.*]], %[[PRED_STORE_CONTINUE5:.*]] ]
1000-
; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi i1 [ [[ACTIVE_LANE_MASK_ENTRY]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[PRED_STORE_CONTINUE5]] ]
1001-
; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK2:%.*]] = phi i1 [ [[ACTIVE_LANE_MASK_ENTRY1]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT6:%.*]], %[[PRED_STORE_CONTINUE5]] ]
999+
; TFA_INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[TMP27:%.*]], %[[TMP19:.*]] ]
1000+
; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi i1 [ [[ACTIVE_LANE_MASK_ENTRY]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[TMP19]] ]
1001+
; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK2:%.*]] = phi i1 [ [[ACTIVE_LANE_MASK_ENTRY1]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT6:%.*]], %[[TMP19]] ]
10021002
; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = load double, ptr [[P2]], align 8
1003-
; TFA_INTERLEAVE-NEXT: br i1 [[ACTIVE_LANE_MASK]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
1004-
; TFA_INTERLEAVE: [[PRED_STORE_IF]]:
10051003
; TFA_INTERLEAVE-NEXT: [[TMP5:%.*]] = tail call double @llvm.exp.f64(double [[TMP4]]) #[[ATTR7:[0-9]+]]
1006-
; TFA_INTERLEAVE-NEXT: [[TMP6:%.*]] = fcmp ogt double [[TMP5]], 0.000000e+00
1007-
; TFA_INTERLEAVE-NEXT: [[TMP7:%.*]] = xor i1 [[TMP6]], true
1008-
; TFA_INTERLEAVE-NEXT: [[TMP24:%.*]] = select i1 [[TMP7]], double 1.000000e+00, double 0.000000e+00
1009-
; TFA_INTERLEAVE-NEXT: store double [[TMP24]], ptr [[P]], align 8
1010-
; TFA_INTERLEAVE-NEXT: br label %[[PRED_STORE_CONTINUE]]
1011-
; TFA_INTERLEAVE: [[PRED_STORE_CONTINUE]]:
1012-
; TFA_INTERLEAVE-NEXT: br i1 [[ACTIVE_LANE_MASK2]], label %[[PRED_STORE_IF4:.*]], label %[[PRED_STORE_CONTINUE5]]
1013-
; TFA_INTERLEAVE: [[PRED_STORE_IF4]]:
10141004
; TFA_INTERLEAVE-NEXT: [[TMP8:%.*]] = tail call double @llvm.exp.f64(double [[TMP4]]) #[[ATTR7]]
1005+
; TFA_INTERLEAVE-NEXT: [[TMP6:%.*]] = fcmp ogt double [[TMP5]], 0.000000e+00
10151006
; TFA_INTERLEAVE-NEXT: [[TMP9:%.*]] = fcmp ogt double [[TMP8]], 0.000000e+00
1016-
; TFA_INTERLEAVE-NEXT: [[TMP10:%.*]] = xor i1 [[TMP9]], true
1007+
; TFA_INTERLEAVE-NEXT: [[TMP18:%.*]] = xor i1 [[TMP6]], true
1008+
; TFA_INTERLEAVE-NEXT: [[TMP20:%.*]] = xor i1 [[TMP9]], true
1009+
; TFA_INTERLEAVE-NEXT: [[TMP10:%.*]] = select i1 [[ACTIVE_LANE_MASK]], i1 [[TMP18]], i1 false
1010+
; TFA_INTERLEAVE-NEXT: [[TMP21:%.*]] = select i1 [[ACTIVE_LANE_MASK2]], i1 [[TMP20]], i1 false
10171011
; TFA_INTERLEAVE-NEXT: [[TMP26:%.*]] = select i1 [[TMP10]], double 1.000000e+00, double 0.000000e+00
1018-
; TFA_INTERLEAVE-NEXT: store double [[TMP26]], ptr [[P]], align 8
1019-
; TFA_INTERLEAVE-NEXT: br label %[[PRED_STORE_CONTINUE5]]
1020-
; TFA_INTERLEAVE: [[PRED_STORE_CONTINUE5]]:
1012+
; TFA_INTERLEAVE-NEXT: [[PREDPHI3:%.*]] = select i1 [[TMP21]], double 1.000000e+00, double 0.000000e+00
1013+
; TFA_INTERLEAVE-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[ACTIVE_LANE_MASK2]], double [[PREDPHI3]], double [[TMP26]]
1014+
; TFA_INTERLEAVE-NEXT: [[TMP13:%.*]] = xor i1 [[ACTIVE_LANE_MASK]], true
1015+
; TFA_INTERLEAVE-NEXT: [[TMP14:%.*]] = xor i1 [[ACTIVE_LANE_MASK2]], true
1016+
; TFA_INTERLEAVE-NEXT: [[TMP15:%.*]] = xor i1 [[TMP13]], true
1017+
; TFA_INTERLEAVE-NEXT: [[TMP16:%.*]] = xor i1 [[TMP14]], true
1018+
; TFA_INTERLEAVE-NEXT: [[TMP17:%.*]] = or i1 [[TMP15]], [[TMP16]]
1019+
; TFA_INTERLEAVE-NEXT: br i1 [[TMP17]], label %[[BB18:.*]], label %[[TMP19]]
1020+
; TFA_INTERLEAVE: [[BB18]]:
1021+
; TFA_INTERLEAVE-NEXT: store double [[SPEC_SELECT]], ptr [[P]], align 8
1022+
; TFA_INTERLEAVE-NEXT: br label %[[TMP19]]
1023+
; TFA_INTERLEAVE: [[TMP19]]:
10211024
; TFA_INTERLEAVE-NEXT: [[TMP27]] = add i64 [[INDEX]], 2
10221025
; TFA_INTERLEAVE-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 1
10231026
; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = icmp ult i64 [[INDEX]], [[TMP3]]

llvm/test/Transforms/LoopVectorize/X86/pr51366-sunk-instruction-used-outside-of-loop.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,13 @@ define ptr @test(ptr noalias %src, ptr noalias %dst) {
88
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, %vector.ph ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE2]] ]
99
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
1010
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
11-
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i64 [[TMP1]]
11+
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i64 [[TMP0]]
12+
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP1]]
1213
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <2 x i64> [[VEC_IND]], zeroinitializer
1314
; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i1> [[TMP3]], splat (i1 true)
1415
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0
1516
; CHECK-NEXT: br i1 [[TMP5]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
1617
; CHECK: pred.load.if:
17-
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP0]]
1818
; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4
1919
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i32> poison, i32 [[TMP7]], i32 0
2020
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]]

0 commit comments

Comments
 (0)