-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[SLP]Make PHICompare comparator follow weak strict ordering requirement #110529
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[SLP]Make PHICompare comparator follow weak strict ordering requirement #110529
Conversation
Created using spr 1.3.5
@llvm/pr-subscribers-llvm-transforms Author: Alexey Bataev (alexey-bataev) ChangesFull diff: https://github.com/llvm/llvm-project/pull/110529.diff 3 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index e45fcb2b5c790c..893fef4095b27c 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -5443,6 +5443,22 @@ BoUpSLP::getReorderingData(const TreeEntry &TE, bool TopToBottom) {
if (!TE.ReorderIndices.empty())
return TE.ReorderIndices;
+ SmallVector<Instruction *> UserBVHead(TE.Scalars.size());
+ for (auto [I, V] : zip(UserBVHead, TE.Scalars)) {
+ if (!V->hasNUsesOrMore(1))
+ continue;
+ auto *II = dyn_cast<InsertElementInst>(*V->user_begin());
+ if (!II)
+ continue;
+ Instruction *BVHead = nullptr;
+ BasicBlock *BB = II->getParent();
+ while (II && II->hasOneUse() && II->getParent() == BB) {
+ BVHead = II;
+ II = dyn_cast<InsertElementInst>(II->getOperand(0));
+ }
+ I = BVHead;
+ }
+
auto PHICompare = [&](unsigned I1, unsigned I2) {
Value *V1 = TE.Scalars[I1];
Value *V2 = TE.Scalars[I2];
@@ -5454,21 +5470,60 @@ BoUpSLP::getReorderingData(const TreeEntry &TE, bool TopToBottom) {
return false;
auto *FirstUserOfPhi1 = cast<Instruction>(*V1->user_begin());
auto *FirstUserOfPhi2 = cast<Instruction>(*V2->user_begin());
- if (auto *IE1 = dyn_cast<InsertElementInst>(FirstUserOfPhi1))
- if (auto *IE2 = dyn_cast<InsertElementInst>(FirstUserOfPhi2)) {
- if (!areTwoInsertFromSameBuildVector(
- IE1, IE2,
- [](InsertElementInst *II) { return II->getOperand(0); }))
- return I1 < I2;
+ if (FirstUserOfPhi1->getParent() != FirstUserOfPhi2->getParent())
+ return DT->dominates(FirstUserOfPhi1->getParent(),
+ FirstUserOfPhi2->getParent());
+ auto *IE1 = dyn_cast<InsertElementInst>(FirstUserOfPhi1);
+ auto *IE2 = dyn_cast<InsertElementInst>(FirstUserOfPhi2);
+ auto *EE1 = dyn_cast<ExtractElementInst>(FirstUserOfPhi1);
+ auto *EE2 = dyn_cast<ExtractElementInst>(FirstUserOfPhi2);
+ if (IE1 && !IE2)
+ return true;
+ if (!IE1 && IE2)
+ return false;
+ if (IE1 && IE2) {
+ if (UserBVHead[I1] && !UserBVHead[I2])
+ return true;
+ if (!UserBVHead[I1])
+ return false;
+ if (UserBVHead[I1] == UserBVHead[I2])
return getElementIndex(IE1) < getElementIndex(IE2);
- }
- if (auto *EE1 = dyn_cast<ExtractElementInst>(FirstUserOfPhi1))
- if (auto *EE2 = dyn_cast<ExtractElementInst>(FirstUserOfPhi2)) {
- if (EE1->getOperand(0) != EE2->getOperand(0))
- return I1 < I2;
+ if (UserBVHead[I1]->getParent() != UserBVHead[I2]->getParent())
+ return DT->dominates(UserBVHead[I1]->getParent(),
+ UserBVHead[I2]->getParent());
+ return UserBVHead[I1]->comesBefore(UserBVHead[I2]);
+ }
+ if (EE1 && !EE2)
+ return true;
+ if (!EE1 && EE2)
+ return false;
+ if (EE1 && EE2) {
+ if (EE1->getOperand(0) == EE2->getOperand(0))
return getElementIndex(EE1) < getElementIndex(EE2);
+ auto *I1 = dyn_cast<Instruction>(EE1->getOperand(0));
+ if (I1 && !I2)
+ return true;
+ if (!I1 && I2)
+ return false;
+ auto *I2 = dyn_cast<Instruction>(EE2->getOperand(0));
+ if (I1 && I2) {
+ if (I1->getParent() != I2->getParent())
+ return DT->dominates(I1->getParent(), I2->getParent());
+ return I1->comesBefore(I2);
}
- return I1 < I2;
+ auto *P1 = dyn_cast<Argument>(EE1->getOperand(0));
+ auto *P2 = dyn_cast<Argument>(EE2->getOperand(0));
+ if (P1 && !P2)
+ return true;
+ if (!P1 && P2)
+ return false;
+ if (P1 && P2)
+ return P1->getArgNo() < P2->getArgNo();
+ // TODO: add analysis for other value kinds.
+ return EE1->getOperand(0)->getValueID() <
+ EE2->getOperand(0)->getValueID();
+ }
+ return false;
};
DenseMap<unsigned, unsigned> PhiToId;
SmallVector<unsigned> Phis(TE.Scalars.size());
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/reused-scalar-repeated-in-node.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/reused-scalar-repeated-in-node.ll
index dbc4f3d59d4f9b..d6073ea4bbbae6 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/reused-scalar-repeated-in-node.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/reused-scalar-repeated-in-node.ll
@@ -33,40 +33,40 @@ define void @test() {
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <16 x float> [[TMP6]], float [[I68]], i32 6
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <16 x float> [[TMP7]], float [[I66]], i32 7
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <16 x float> [[TMP8]], float [[I72]], i32 13
-; CHECK-NEXT: [[TMP10:%.*]] = insertelement <16 x float> [[TMP9]], float [[I69]], i32 14
-; CHECK-NEXT: [[TMP11:%.*]] = insertelement <16 x float> [[TMP10]], float [[I67]], i32 15
+; CHECK-NEXT: [[TMP10:%.*]] = insertelement <16 x float> [[TMP9]], float [[I67]], i32 14
+; CHECK-NEXT: [[TMP11:%.*]] = insertelement <16 x float> [[TMP10]], float [[I69]], i32 15
; CHECK-NEXT: br i1 poison, label %[[BB167:.*]], label %[[BB77:.*]]
; CHECK: [[BB77]]:
-; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <16 x float> [[TMP11]], <16 x float> poison, <8 x i32> <i32 poison, i32 5, i32 6, i32 7, i32 15, i32 15, i32 14, i32 15>
+; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <16 x float> [[TMP11]], <16 x float> poison, <8 x i32> <i32 poison, i32 5, i32 6, i32 7, i32 14, i32 14, i32 14, i32 15>
; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x float> [[TMP0]], <2 x float> poison, <16 x i32> <i32 poison, i32 poison, i32 1, i32 0, i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 1, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <2 x float> [[TMP0]], <2 x float> poison, <16 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 0, i32 poison, i32 poison>
; CHECK-NEXT: br label %[[BB78:.*]]
; CHECK: [[BB78]]:
; CHECK-NEXT: [[TMP15:%.*]] = phi <8 x float> [ [[TMP12]], %[[BB77]] ], [ [[TMP30:%.*]], %[[BB78]] ]
; CHECK-NEXT: [[TMP16:%.*]] = phi <2 x float> [ poison, %[[BB77]] ], [ [[TMP31:%.*]], %[[BB78]] ]
-; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <8 x float> [[TMP15]], <8 x float> poison, <16 x i32> <i32 0, i32 3, i32 1, i32 2, i32 3, i32 0, i32 2, i32 3, i32 2, i32 7, i32 2, i32 3, i32 0, i32 6, i32 7, i32 7>
+; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <8 x float> [[TMP15]], <8 x float> poison, <16 x i32> <i32 0, i32 3, i32 1, i32 2, i32 3, i32 0, i32 2, i32 3, i32 2, i32 6, i32 2, i32 3, i32 0, i32 7, i32 6, i32 6>
; CHECK-NEXT: [[TMP18:%.*]] = fmul fast <16 x float> [[TMP17]], [[TMP13]]
-; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <8 x float> [[TMP15]], <8 x float> poison, <16 x i32> <i32 1, i32 poison, i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 1, i32 7, i32 6, i32 6>
+; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <8 x float> [[TMP15]], <8 x float> poison, <16 x i32> <i32 1, i32 poison, i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 1, i32 6, i32 7, i32 7>
; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <2 x float> [[TMP16]], <2 x float> poison, <16 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <16 x float> [[TMP19]], <16 x float> [[TMP20]], <16 x i32> <i32 0, i32 17, i32 2, i32 16, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 12, i32 13, i32 14, i32 15>
; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <8 x float> [[TMP15]], <8 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <16 x float> [[TMP21]], <16 x float> [[TMP22]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 17, i32 6, i32 7, i32 8, i32 22, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <16 x float> [[TMP21]], <16 x float> [[TMP22]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 17, i32 6, i32 7, i32 8, i32 23, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <16 x float> [[TMP23]], <16 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 1, i32 5, i32 3, i32 1, i32 3, i32 9, i32 3, i32 1, i32 12, i32 13, i32 14, i32 15>
; CHECK-NEXT: [[TMP25:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v2f32(<16 x float> [[TMP14]], <2 x float> [[TMP0]], i64 2)
; CHECK-NEXT: [[TMP26:%.*]] = fmul fast <16 x float> [[TMP24]], [[TMP25]]
; CHECK-NEXT: [[TMP27:%.*]] = fadd fast <16 x float> [[TMP26]], [[TMP18]]
; CHECK-NEXT: [[TMP28:%.*]] = fadd fast <16 x float> [[TMP27]], poison
; CHECK-NEXT: [[TMP29:%.*]] = fadd fast <16 x float> [[TMP28]], poison
-; CHECK-NEXT: [[TMP30]] = shufflevector <16 x float> [[TMP29]], <16 x float> poison, <8 x i32> <i32 12, i32 5, i32 6, i32 7, i32 15, i32 15, i32 14, i32 15>
+; CHECK-NEXT: [[TMP30]] = shufflevector <16 x float> [[TMP29]], <16 x float> poison, <8 x i32> <i32 12, i32 5, i32 6, i32 7, i32 14, i32 14, i32 14, i32 15>
; CHECK-NEXT: [[TMP31]] = shufflevector <16 x float> [[TMP29]], <16 x float> poison, <2 x i32> <i32 10, i32 11>
; CHECK-NEXT: br i1 poison, label %[[BB78]], label %[[BB167]]
; CHECK: [[BB167]]:
; CHECK-NEXT: [[TMP32:%.*]] = phi <16 x float> [ [[TMP11]], %[[BB64]] ], [ [[TMP29]], %[[BB78]] ]
-; CHECK-NEXT: [[TMP33:%.*]] = extractelement <16 x float> [[TMP32]], i32 15
+; CHECK-NEXT: [[TMP33:%.*]] = extractelement <16 x float> [[TMP32]], i32 14
; CHECK-NEXT: store float [[TMP33]], ptr poison, align 1
; CHECK-NEXT: [[TMP34:%.*]] = extractelement <16 x float> [[TMP32]], i32 13
; CHECK-NEXT: store float [[TMP34]], ptr poison, align 1
-; CHECK-NEXT: [[TMP35:%.*]] = extractelement <16 x float> [[TMP32]], i32 14
+; CHECK-NEXT: [[TMP35:%.*]] = extractelement <16 x float> [[TMP32]], i32 15
; CHECK-NEXT: br i1 poison, label %[[BB186:.*]], label %[[BB184:.*]]
; CHECK: [[BB184]]:
; CHECK-NEXT: br label %[[BB185:.*]]
diff --git a/llvm/test/Transforms/SLPVectorizer/root-trunc-extract-reuse.ll b/llvm/test/Transforms/SLPVectorizer/root-trunc-extract-reuse.ll
index 34c068478c5f5e..d4b737a6bc4211 100644
--- a/llvm/test/Transforms/SLPVectorizer/root-trunc-extract-reuse.ll
+++ b/llvm/test/Transforms/SLPVectorizer/root-trunc-extract-reuse.ll
@@ -10,9 +10,9 @@ define i1 @test() {
; CHECK-NEXT: br label [[ELSE]]
; CHECK: else:
; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i32> [ zeroinitializer, [[THEN]] ], [ zeroinitializer, [[ENTRY:%.*]] ]
-; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[TMP0]], i32 0
+; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[TMP0]], i32 1
; CHECK-NEXT: [[BF_CAST162:%.*]] = and i32 [[TMP1]], 0
-; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> zeroinitializer, <2 x i32> [[TMP0]], <2 x i32> <i32 3, i32 1>
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> zeroinitializer, <2 x i32> [[TMP0]], <2 x i32> <i32 2, i32 1>
; CHECK-NEXT: [[T13:%.*]] = and <2 x i32> [[TMP2]], zeroinitializer
; CHECK-NEXT: br label [[ELSE1:%.*]]
; CHECK: else1:
|
Created using spr 1.3.5
return P1->getArgNo() < P2->getArgNo(); | ||
return false; | ||
} | ||
return false; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This doesn't quite work... if the first operand isn't an instruction or an argument, we have to always return false. So this needs to happen before the if (EE1->getOperand(0) == EE2->getOperand(0))
.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think it already does this. if (EE1->getOperand(0) == EE2->getOperand(0)) occurs only if both users are extractelements, argument comparison occurs only if both users are arguments. If the first is extract/argument but the second is not - return true. If the first is not extract/argument - return false. Am I missing something?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Suppose you have three extractelements with a constant as the first operand. Two of them have A as the first operand, the other one has B as the first operand. They all need to compare equal, even if the extract indexes are different.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ah, missed it, fixed
Created using spr 1.3.5
Created using spr 1.3.5
auto *Inst2 = dyn_cast<Instruction>(EE2->getOperand(0)); | ||
auto *P1 = dyn_cast<Argument>(EE1->getOperand(0)); | ||
auto *P2 = dyn_cast<Argument>(EE2->getOperand(0)); | ||
if ((!Inst1 && !Inst2) || (!P1 && !P2)) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
if ((!Inst1 && !Inst2) || (!P1 && !P2)) | |
if (!Inst2 && !P2) | |
return Inst1 || P1; |
Created using spr 1.3.5
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
Hi, I bisected a crash back to this commit.
|
Need to check if the block is reachable before comparing phis from it to avoid compiler crash when requesting node. Fixes report in #110529 (comment)
Must be fixed in 0e1ffa3 |
Need to check if the block is reachable before comparing phis from it to avoid compiler crash when requesting node. Fixes report in llvm/llvm-project#110529 (comment)
Yep, thanks. |
Need to check if the block is reachable before comparing phis from it to avoid compiler crash when requesting node. Fixes report in llvm#110529 (comment)
No description provided.