Skip to content

Commit f324d4e

Browse files
author
Valery Dmitriev
committed
[SLP] Improve gather tree nodes matching when users are PHIs.
This is re-commit of llvm#69392 and also fixes issue llvm#69670 which was uncovered with the prior commit. For delayed gather emission it may be incorrect to use stab instruction as insertion point if it is a PHI operand. For that case insertion point is adjusted to be at the end of block, ensuring that prior dependecy vector code is emitted earlier.
1 parent 260dbb4 commit f324d4e

File tree

3 files changed

+20
-8
lines changed

3 files changed

+20
-8
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9059,6 +9059,7 @@ BoUpSLP::isGatherShuffledEntry(const TreeEntry *TE, ArrayRef<Value *> VL,
90599059
// blocks.
90609060
if (auto *PHI = dyn_cast<PHINode>(TEUseEI.UserTE->getMainOp())) {
90619061
TEInsertBlock = PHI->getIncomingBlock(TEUseEI.EdgeIdx);
9062+
TEInsertPt = TEInsertBlock->getTerminator();
90629063
} else {
90639064
TEInsertBlock = TEInsertPt->getParent();
90649065
}
@@ -9122,9 +9123,10 @@ BoUpSLP::isGatherShuffledEntry(const TreeEntry *TE, ArrayRef<Value *> VL,
91229123
const Instruction *InsertPt =
91239124
UserPHI ? UserPHI->getIncomingBlock(UseEI.EdgeIdx)->getTerminator()
91249125
: &getLastInstructionInBundle(UseEI.UserTE);
9125-
if (!UserPHI && TEInsertPt == InsertPt) {
9126-
// If 2 gathers are operands of the same non-PHI entry,
9127-
// compare operands indices, use the earlier one as the base.
9126+
if (TEInsertPt == InsertPt) {
9127+
// If 2 gathers are operands of the same entry (regardless of wether
9128+
// user is PHI or else), compare operands indices, use the earlier one
9129+
// as the base.
91289130
if (TEUseEI.UserTE == UseEI.UserTE && TEUseEI.EdgeIdx < UseEI.EdgeIdx)
91299131
continue;
91309132
// If the user instruction is used for some reason in different
@@ -11250,7 +11252,18 @@ Value *BoUpSLP::vectorizeTree(
1125011252
TE->VectorizedValue = nullptr;
1125111253
auto *UserI =
1125211254
cast<Instruction>(TE->UserTreeIndices.front().UserTE->VectorizedValue);
11253-
Builder.SetInsertPoint(PrevVec);
11255+
// If user is a PHI node, its vector code have to be inserted right before
11256+
// block terminator. Since the node was delayed, there were some unresolved
11257+
// dependencies at the moment when stab instruction was emitted. In a case
11258+
// when any of these dependencies turn out an operand of another PHI, coming
11259+
// from this same block, position of a stab instruction will become invalid.
11260+
// The is because source vector that supposed to feed this gather node was
11261+
// inserted at the end of the block [after stab instruction]. So we need
11262+
// to adjust insertion point again to the end of block.
11263+
if (isa<PHINode>(UserI))
11264+
Builder.SetInsertPoint(PrevVec->getParent()->getTerminator());
11265+
else
11266+
Builder.SetInsertPoint(PrevVec);
1125411267
Builder.SetCurrentDebugLocation(UserI->getDebugLoc());
1125511268
Value *Vec = vectorizeTree(TE, /*PostponedPHIs=*/false);
1125611269
PrevVec->replaceAllUsesWith(Vec);

llvm/test/Transforms/SLPVectorizer/X86/delayed-gather-emission.ll

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ define void @test() {
1515
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x float> <float poison, float undef>, float [[DOTPRE_PRE]], i32 0
1616
; CHECK-NEXT: br label [[BB1:%.*]]
1717
; CHECK: bb1:
18-
; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x float> [ [[TMP0]], [[ENTRY:%.*]] ], [ [[TMP10:%.*]], [[BB2:%.*]] ]
18+
; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x float> [ [[TMP0]], [[ENTRY:%.*]] ], [ [[TMP8:%.*]], [[BB2:%.*]] ]
1919
; CHECK-NEXT: br label [[BB2]]
2020
; CHECK: bb2:
2121
; CHECK-NEXT: [[TMP2:%.*]] = phi <2 x float> [ [[TMP1]], [[BB1]] ], [ [[TMP9:%.*]], [[BB2]] ]
@@ -29,9 +29,8 @@ define void @test() {
2929
; CHECK-NEXT: tail call void @foo(float [[MUL]])
3030
; CHECK-NEXT: [[I2:%.*]] = load float, ptr poison, align 4
3131
; CHECK-NEXT: [[TOBOOL:%.*]] = fcmp une float [[I2]], 0.000000e+00
32-
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x float> poison, float [[I2]], i32 0
32+
; CHECK-NEXT: [[TMP8]] = insertelement <2 x float> [[TMP2]], float [[I2]], i32 0
3333
; CHECK-NEXT: [[TMP9]] = shufflevector <2 x float> [[TMP8]], <2 x float> [[TMP5]], <2 x i32> <i32 0, i32 2>
34-
; CHECK-NEXT: [[TMP10]] = shufflevector <2 x float> [[TMP8]], <2 x float> [[TMP2]], <2 x i32> <i32 0, i32 3>
3534
; CHECK-NEXT: br i1 [[TOBOOL]], label [[BB1]], label [[BB2]]
3635
;
3736
entry:

llvm/test/Transforms/SLPVectorizer/X86/matching-gather-nodes-phi-users.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
; YAML: Function: test
99
; YAML: Args:
1010
; YAML: - String: 'Stores SLP vectorized with cost '
11-
; YAML: - Cost: '-3'
11+
; YAML: - Cost: '-6'
1212
; YAML: - String: ' and with tree size '
1313
; YAML: - TreeSize: '14'
1414
; YAML: ...

0 commit comments

Comments
 (0)