Skip to content

Commit f74879c

Browse files
[SLP]Make PHICompare comparator follow weak strict ordering requirement
Reviewers: efriedma-quic Reviewed By: efriedma-quic Pull Request: #110529
1 parent c0dfef8 commit f74879c

File tree

3 files changed

+85
-23
lines changed

3 files changed

+85
-23
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 74 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -5448,6 +5448,33 @@ BoUpSLP::getReorderingData(const TreeEntry &TE, bool TopToBottom) {
54485448
if (!TE.ReorderIndices.empty())
54495449
return TE.ReorderIndices;
54505450

5451+
SmallVector<Instruction *> UserBVHead(TE.Scalars.size());
5452+
for (auto [I, V] : zip(UserBVHead, TE.Scalars)) {
5453+
if (!V->hasNUsesOrMore(1))
5454+
continue;
5455+
auto *II = dyn_cast<InsertElementInst>(*V->user_begin());
5456+
if (!II)
5457+
continue;
5458+
Instruction *BVHead = nullptr;
5459+
BasicBlock *BB = II->getParent();
5460+
while (II && II->hasOneUse() && II->getParent() == BB) {
5461+
BVHead = II;
5462+
II = dyn_cast<InsertElementInst>(II->getOperand(0));
5463+
}
5464+
I = BVHead;
5465+
}
5466+
5467+
auto CompareByBasicBlocks = [&](BasicBlock *BB1, BasicBlock *BB2) {
5468+
assert(BB1 != BB2 && "Expected different basic blocks.");
5469+
auto *NodeA = DT->getNode(BB1);
5470+
auto *NodeB = DT->getNode(BB2);
5471+
assert(NodeA && "Should only process reachable instructions");
5472+
assert(NodeB && "Should only process reachable instructions");
5473+
assert((NodeA == NodeB) ==
5474+
(NodeA->getDFSNumIn() == NodeB->getDFSNumIn()) &&
5475+
"Different nodes should have different DFS numbers");
5476+
return NodeA->getDFSNumIn() < NodeB->getDFSNumIn();
5477+
};
54515478
auto PHICompare = [&](unsigned I1, unsigned I2) {
54525479
Value *V1 = TE.Scalars[I1];
54535480
Value *V2 = TE.Scalars[I2];
@@ -5459,21 +5486,56 @@ BoUpSLP::getReorderingData(const TreeEntry &TE, bool TopToBottom) {
54595486
return false;
54605487
auto *FirstUserOfPhi1 = cast<Instruction>(*V1->user_begin());
54615488
auto *FirstUserOfPhi2 = cast<Instruction>(*V2->user_begin());
5462-
if (auto *IE1 = dyn_cast<InsertElementInst>(FirstUserOfPhi1))
5463-
if (auto *IE2 = dyn_cast<InsertElementInst>(FirstUserOfPhi2)) {
5464-
if (!areTwoInsertFromSameBuildVector(
5465-
IE1, IE2,
5466-
[](InsertElementInst *II) { return II->getOperand(0); }))
5467-
return I1 < I2;
5489+
if (FirstUserOfPhi1->getParent() != FirstUserOfPhi2->getParent())
5490+
return CompareByBasicBlocks(FirstUserOfPhi1->getParent(),
5491+
FirstUserOfPhi2->getParent());
5492+
auto *IE1 = dyn_cast<InsertElementInst>(FirstUserOfPhi1);
5493+
auto *IE2 = dyn_cast<InsertElementInst>(FirstUserOfPhi2);
5494+
auto *EE1 = dyn_cast<ExtractElementInst>(FirstUserOfPhi1);
5495+
auto *EE2 = dyn_cast<ExtractElementInst>(FirstUserOfPhi2);
5496+
if (IE1 && !IE2)
5497+
return true;
5498+
if (!IE1 && IE2)
5499+
return false;
5500+
if (IE1 && IE2) {
5501+
if (UserBVHead[I1] && !UserBVHead[I2])
5502+
return true;
5503+
if (!UserBVHead[I1])
5504+
return false;
5505+
if (UserBVHead[I1] == UserBVHead[I2])
54685506
return getElementIndex(IE1) < getElementIndex(IE2);
5469-
}
5470-
if (auto *EE1 = dyn_cast<ExtractElementInst>(FirstUserOfPhi1))
5471-
if (auto *EE2 = dyn_cast<ExtractElementInst>(FirstUserOfPhi2)) {
5472-
if (EE1->getOperand(0) != EE2->getOperand(0))
5473-
return I1 < I2;
5507+
if (UserBVHead[I1]->getParent() != UserBVHead[I2]->getParent())
5508+
return CompareByBasicBlocks(UserBVHead[I1]->getParent(),
5509+
UserBVHead[I2]->getParent());
5510+
return UserBVHead[I1]->comesBefore(UserBVHead[I2]);
5511+
}
5512+
if (EE1 && !EE2)
5513+
return true;
5514+
if (!EE1 && EE2)
5515+
return false;
5516+
if (EE1 && EE2) {
5517+
auto *Inst1 = dyn_cast<Instruction>(EE1->getOperand(0));
5518+
auto *Inst2 = dyn_cast<Instruction>(EE2->getOperand(0));
5519+
auto *P1 = dyn_cast<Argument>(EE1->getOperand(0));
5520+
auto *P2 = dyn_cast<Argument>(EE2->getOperand(0));
5521+
if (!Inst2 && !P2)
5522+
return Inst1 || P1;
5523+
if (EE1->getOperand(0) == EE2->getOperand(0))
54745524
return getElementIndex(EE1) < getElementIndex(EE2);
5525+
if (!Inst1 && Inst2)
5526+
return false;
5527+
if (Inst1 && Inst2) {
5528+
if (Inst1->getParent() != Inst2->getParent())
5529+
return CompareByBasicBlocks(Inst1->getParent(), Inst2->getParent());
5530+
return Inst1->comesBefore(Inst2);
54755531
}
5476-
return I1 < I2;
5532+
if (!P1 && P2)
5533+
return false;
5534+
assert(P1 && P2 &&
5535+
"Expected either instructions or arguments vector operands.");
5536+
return P1->getArgNo() < P2->getArgNo();
5537+
}
5538+
return false;
54775539
};
54785540
SmallDenseMap<unsigned, unsigned, 16> PhiToId;
54795541
SmallVector<unsigned> Phis(TE.Scalars.size());

llvm/test/Transforms/SLPVectorizer/AArch64/reused-scalar-repeated-in-node.ll

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -33,40 +33,40 @@ define void @test() {
3333
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <16 x float> [[TMP6]], float [[I68]], i32 6
3434
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <16 x float> [[TMP7]], float [[I66]], i32 7
3535
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <16 x float> [[TMP8]], float [[I72]], i32 13
36-
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <16 x float> [[TMP9]], float [[I69]], i32 14
37-
; CHECK-NEXT: [[TMP11:%.*]] = insertelement <16 x float> [[TMP10]], float [[I67]], i32 15
36+
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <16 x float> [[TMP9]], float [[I67]], i32 14
37+
; CHECK-NEXT: [[TMP11:%.*]] = insertelement <16 x float> [[TMP10]], float [[I69]], i32 15
3838
; CHECK-NEXT: br i1 poison, label %[[BB167:.*]], label %[[BB77:.*]]
3939
; CHECK: [[BB77]]:
40-
; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <16 x float> [[TMP11]], <16 x float> poison, <8 x i32> <i32 poison, i32 5, i32 6, i32 7, i32 15, i32 15, i32 14, i32 15>
40+
; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <16 x float> [[TMP11]], <16 x float> poison, <8 x i32> <i32 poison, i32 5, i32 6, i32 7, i32 14, i32 14, i32 14, i32 15>
4141
; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x float> [[TMP0]], <2 x float> poison, <16 x i32> <i32 poison, i32 poison, i32 1, i32 0, i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 1, i32 poison, i32 poison>
4242
; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <2 x float> [[TMP0]], <2 x float> poison, <16 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 0, i32 poison, i32 poison>
4343
; CHECK-NEXT: br label %[[BB78:.*]]
4444
; CHECK: [[BB78]]:
4545
; CHECK-NEXT: [[TMP15:%.*]] = phi <8 x float> [ [[TMP12]], %[[BB77]] ], [ [[TMP30:%.*]], %[[BB78]] ]
4646
; CHECK-NEXT: [[TMP16:%.*]] = phi <2 x float> [ poison, %[[BB77]] ], [ [[TMP31:%.*]], %[[BB78]] ]
47-
; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <8 x float> [[TMP15]], <8 x float> poison, <16 x i32> <i32 0, i32 3, i32 1, i32 2, i32 3, i32 0, i32 2, i32 3, i32 2, i32 7, i32 2, i32 3, i32 0, i32 6, i32 7, i32 7>
47+
; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <8 x float> [[TMP15]], <8 x float> poison, <16 x i32> <i32 0, i32 3, i32 1, i32 2, i32 3, i32 0, i32 2, i32 3, i32 2, i32 6, i32 2, i32 3, i32 0, i32 7, i32 6, i32 6>
4848
; CHECK-NEXT: [[TMP18:%.*]] = fmul fast <16 x float> [[TMP17]], [[TMP13]]
49-
; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <8 x float> [[TMP15]], <8 x float> poison, <16 x i32> <i32 1, i32 poison, i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 1, i32 7, i32 6, i32 6>
49+
; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <8 x float> [[TMP15]], <8 x float> poison, <16 x i32> <i32 1, i32 poison, i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 1, i32 6, i32 7, i32 7>
5050
; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <2 x float> [[TMP16]], <2 x float> poison, <16 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
5151
; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <16 x float> [[TMP19]], <16 x float> [[TMP20]], <16 x i32> <i32 0, i32 17, i32 2, i32 16, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 12, i32 13, i32 14, i32 15>
5252
; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <8 x float> [[TMP15]], <8 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
53-
; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <16 x float> [[TMP21]], <16 x float> [[TMP22]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 17, i32 6, i32 7, i32 8, i32 22, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
53+
; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <16 x float> [[TMP21]], <16 x float> [[TMP22]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 17, i32 6, i32 7, i32 8, i32 23, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5454
; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <16 x float> [[TMP23]], <16 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 1, i32 5, i32 3, i32 1, i32 3, i32 9, i32 3, i32 1, i32 12, i32 13, i32 14, i32 15>
5555
; CHECK-NEXT: [[TMP25:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v2f32(<16 x float> [[TMP14]], <2 x float> [[TMP0]], i64 2)
5656
; CHECK-NEXT: [[TMP26:%.*]] = fmul fast <16 x float> [[TMP24]], [[TMP25]]
5757
; CHECK-NEXT: [[TMP27:%.*]] = fadd fast <16 x float> [[TMP26]], [[TMP18]]
5858
; CHECK-NEXT: [[TMP28:%.*]] = fadd fast <16 x float> [[TMP27]], poison
5959
; CHECK-NEXT: [[TMP29:%.*]] = fadd fast <16 x float> [[TMP28]], poison
60-
; CHECK-NEXT: [[TMP30]] = shufflevector <16 x float> [[TMP29]], <16 x float> poison, <8 x i32> <i32 12, i32 5, i32 6, i32 7, i32 15, i32 15, i32 14, i32 15>
60+
; CHECK-NEXT: [[TMP30]] = shufflevector <16 x float> [[TMP29]], <16 x float> poison, <8 x i32> <i32 12, i32 5, i32 6, i32 7, i32 14, i32 14, i32 14, i32 15>
6161
; CHECK-NEXT: [[TMP31]] = shufflevector <16 x float> [[TMP29]], <16 x float> poison, <2 x i32> <i32 10, i32 11>
6262
; CHECK-NEXT: br i1 poison, label %[[BB78]], label %[[BB167]]
6363
; CHECK: [[BB167]]:
6464
; CHECK-NEXT: [[TMP32:%.*]] = phi <16 x float> [ [[TMP11]], %[[BB64]] ], [ [[TMP29]], %[[BB78]] ]
65-
; CHECK-NEXT: [[TMP33:%.*]] = extractelement <16 x float> [[TMP32]], i32 15
65+
; CHECK-NEXT: [[TMP33:%.*]] = extractelement <16 x float> [[TMP32]], i32 14
6666
; CHECK-NEXT: store float [[TMP33]], ptr poison, align 1
6767
; CHECK-NEXT: [[TMP34:%.*]] = extractelement <16 x float> [[TMP32]], i32 13
6868
; CHECK-NEXT: store float [[TMP34]], ptr poison, align 1
69-
; CHECK-NEXT: [[TMP35:%.*]] = extractelement <16 x float> [[TMP32]], i32 14
69+
; CHECK-NEXT: [[TMP35:%.*]] = extractelement <16 x float> [[TMP32]], i32 15
7070
; CHECK-NEXT: br i1 poison, label %[[BB186:.*]], label %[[BB184:.*]]
7171
; CHECK: [[BB184]]:
7272
; CHECK-NEXT: br label %[[BB185:.*]]

llvm/test/Transforms/SLPVectorizer/root-trunc-extract-reuse.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,9 @@ define i1 @test() {
1010
; CHECK-NEXT: br label [[ELSE]]
1111
; CHECK: else:
1212
; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i32> [ zeroinitializer, [[THEN]] ], [ zeroinitializer, [[ENTRY:%.*]] ]
13-
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[TMP0]], i32 0
13+
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[TMP0]], i32 1
1414
; CHECK-NEXT: [[BF_CAST162:%.*]] = and i32 [[TMP1]], 0
15-
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> zeroinitializer, <2 x i32> [[TMP0]], <2 x i32> <i32 3, i32 1>
15+
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> zeroinitializer, <2 x i32> [[TMP0]], <2 x i32> <i32 2, i32 1>
1616
; CHECK-NEXT: [[T13:%.*]] = and <2 x i32> [[TMP2]], zeroinitializer
1717
; CHECK-NEXT: br label [[ELSE1:%.*]]
1818
; CHECK: else1:

0 commit comments

Comments
 (0)