-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[SLP]Change the insertion point for outside-block-used nodes and prevec phi operand gathers #139917
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
Created using spr 1.3.5
@llvm/pr-subscribers-llvm-transforms @llvm/pr-subscribers-vectorizers Author: Alexey Bataev (alexey-bataev) ChangesNeed to set the insertion point for (non-schedulable) vector node after Fixes #139728 Patch is 20.77 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/139917.diff 10 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 45cf4e1eac092..04849b86af7ce 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -16128,16 +16128,10 @@ Instruction &BoUpSLP::getLastInstructionInBundle(const TreeEntry *E) {
[](Value *V) {
return !isa<GetElementPtrInst>(V) && isa<Instruction>(V);
})) ||
- all_of(E->Scalars,
- [](Value *V) {
- return isa<PoisonValue>(V) ||
- (!isVectorLikeInstWithConstOps(V) &&
- isUsedOutsideBlock(V));
- }) ||
- (E->isGather() && E->Idx == 0 && all_of(E->Scalars, [](Value *V) {
- return isa<ExtractElementInst, UndefValue>(V) ||
- areAllOperandsNonInsts(V);
- })))
+ all_of(E->Scalars, [](Value *V) {
+ return isa<PoisonValue>(V) ||
+ (!isVectorLikeInstWithConstOps(V) && isUsedOutsideBlock(V));
+ }))
Res = FindLastInst();
else
Res = FindFirstInst();
@@ -16193,7 +16187,7 @@ void BoUpSLP::setInsertPointAfterBundle(const TreeEntry *E) {
}
if (IsPHI ||
(!E->isGather() && E->State != TreeEntry::SplitVectorize &&
- doesNotNeedToSchedule(E->Scalars)) ||
+ all_of(E->Scalars, areAllOperandsNonInsts)) ||
(GatheredLoadsEntriesFirst.has_value() &&
E->Idx >= *GatheredLoadsEntriesFirst && !E->isGather() &&
E->getOpcode() == Instruction::Load)) {
@@ -17782,17 +17776,27 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
Value *VecOp = NewPhi->getIncomingValueForBlock(IBB);
NewPhi->addIncoming(VecOp, IBB);
TreeEntry *OpTE = getOperandEntry(E, I);
+ assert(!OpTE->VectorizedValue && "Expected no vectorized value.");
OpTE->VectorizedValue = VecOp;
continue;
}
Builder.SetInsertPoint(IBB->getTerminator());
Builder.SetCurrentDebugLocation(PH->getDebugLoc());
- Value *Vec = vectorizeOperand(E, I);
+ const TreeEntry *OpE = getOperandEntry(E, I);
+ Value *Vec;
+ if (OpE->isGather()) {
+ assert(OpE->VectorizedValue && "Expected vectorized value.");
+ Vec = OpE->VectorizedValue;
+ if (auto *IVec = dyn_cast<Instruction>(Vec))
+ Builder.SetInsertPoint(IVec->getNextNonDebugInstruction());
+ } else {
+ Vec = vectorizeOperand(E, I);
+ }
if (VecTy != Vec->getType()) {
- assert((It != MinBWs.end() || getOperandEntry(E, I)->isGather() ||
- MinBWs.contains(getOperandEntry(E, I))) &&
- "Expected item in MinBWs.");
+ assert(
+ (It != MinBWs.end() || OpE->isGather() || MinBWs.contains(OpE)) &&
+ "Expected item in MinBWs.");
Vec = Builder.CreateIntCast(Vec, VecTy, GetOperandSignedness(I));
}
NewPhi->addIncoming(Vec, IBB);
@@ -18690,6 +18694,28 @@ Value *BoUpSLP::vectorizeTree(
(void)vectorizeTree(TE.get());
}
}
+ // Vectorize gather operands of the PHI nodes.
+ for (const std::unique_ptr<TreeEntry> &TE : reverse(VectorizableTree)) {
+ if (TE->isGather() && TE->UserTreeIndex.UserTE &&
+ TE->UserTreeIndex.UserTE->hasState() &&
+ !TE->UserTreeIndex.UserTE->isAltShuffle() &&
+ TE->UserTreeIndex.UserTE->State == TreeEntry::Vectorize &&
+ TE->UserTreeIndex.UserTE->getOpcode() == Instruction::PHI &&
+ !TE->VectorizedValue) {
+ auto *PH = cast<PHINode>(TE->UserTreeIndex.UserTE->getMainOp());
+ BasicBlock *IBB = PH->getIncomingBlock(TE->UserTreeIndex.EdgeIdx);
+ // If there is the same incoming block earlier - skip, it will be handled
+ // in PHI node.
+ if (TE->UserTreeIndex.EdgeIdx > 0 &&
+ any_of(seq<unsigned>(TE->UserTreeIndex.EdgeIdx), [&](unsigned Idx) {
+ return PH->getIncomingBlock(Idx) == IBB;
+ }))
+ continue;
+ Builder.SetInsertPoint(IBB->getTerminator());
+ Builder.SetCurrentDebugLocation(PH->getDebugLoc());
+ (void)vectorizeTree(TE.get());
+ }
+ }
(void)vectorizeTree(VectorizableTree[0].get());
// Run through the list of postponed gathers and emit them, replacing the temp
// emitted allocas with actual vector instructions.
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/reused-scalar-repeated-in-node.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/reused-scalar-repeated-in-node.ll
index 295a71899c338..d8ddfee3ed28b 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/reused-scalar-repeated-in-node.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/reused-scalar-repeated-in-node.ll
@@ -37,19 +37,19 @@ define void @test() {
; CHECK-NEXT: [[TMP11:%.*]] = insertelement <16 x float> [[TMP10]], float [[I69]], i32 15
; CHECK-NEXT: br i1 poison, label %[[BB167:.*]], label %[[BB77:.*]]
; CHECK: [[BB77]]:
-; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <16 x float> [[TMP11]], <16 x float> poison, <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 14, i32 15, i32 poison, i32 poison>
-; CHECK-NEXT: [[TMP17:%.*]] = insertelement <8 x float> poison, float [[I70]], i32 0
-; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <8 x float> [[TMP12]], <8 x float> [[TMP17]], <8 x i32> <i32 8, i32 poison, i32 poison, i32 poison, i32 4, i32 5, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP14:%.*]] = insertelement <8 x float> poison, float [[I70]], i32 1
; CHECK-NEXT: [[TMP19:%.*]] = insertelement <8 x float> [[TMP14]], float [[I68]], i32 2
; CHECK-NEXT: [[TMP16:%.*]] = insertelement <8 x float> [[TMP19]], float [[I66]], i32 3
; CHECK-NEXT: [[TMP20:%.*]] = insertelement <8 x float> [[TMP16]], float [[I67]], i32 6
; CHECK-NEXT: [[TMP21:%.*]] = insertelement <8 x float> [[TMP20]], float [[I69]], i32 7
+; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <16 x float> [[TMP11]], <16 x float> poison, <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 14, i32 15, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP23:%.*]] = insertelement <8 x float> poison, float [[I70]], i32 0
+; CHECK-NEXT: [[TMP30:%.*]] = shufflevector <8 x float> [[TMP17]], <8 x float> [[TMP23]], <8 x i32> <i32 8, i32 poison, i32 poison, i32 poison, i32 4, i32 5, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP39:%.*]] = shufflevector <16 x float> [[TMP25]], <16 x float> poison, <16 x i32> <i32 poison, i32 poison, i32 3, i32 2, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <16 x float> [[TMP39]], <16 x float> [[TMP25]], <16 x i32> <i32 poison, i32 poison, i32 2, i32 3, i32 18, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 19, i32 poison, i32 poison>
; CHECK-NEXT: br label %[[BB78:.*]]
; CHECK: [[BB78]]:
-; CHECK-NEXT: [[TMP15:%.*]] = phi <8 x float> [ [[TMP23]], %[[BB77]] ], [ [[TMP36:%.*]], %[[BB78]] ]
+; CHECK-NEXT: [[TMP15:%.*]] = phi <8 x float> [ [[TMP30]], %[[BB77]] ], [ [[TMP36:%.*]], %[[BB78]] ]
; CHECK-NEXT: [[TMP22:%.*]] = phi <8 x float> [ [[TMP21]], %[[BB77]] ], [ [[TMP31:%.*]], %[[BB78]] ]
; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <8 x float> [[TMP22]], <8 x float> poison, <16 x i32> <i32 0, i32 3, i32 1, i32 2, i32 3, i32 0, i32 2, i32 3, i32 2, i32 6, i32 2, i32 3, i32 0, i32 7, i32 6, i32 6>
; CHECK-NEXT: [[TMP38:%.*]] = shufflevector <8 x float> [[TMP15]], <8 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 1, i32 0, i32 3, i32 1, i32 3, i32 5, i32 3, i32 1, i32 0, i32 4, i32 5, i32 5>
@@ -58,8 +58,8 @@ define void @test() {
; CHECK-NEXT: [[TMP27:%.*]] = fadd fast <16 x float> [[TMP26]], [[TMP18]]
; CHECK-NEXT: [[TMP28:%.*]] = fadd fast <16 x float> [[TMP27]], poison
; CHECK-NEXT: [[TMP29:%.*]] = fadd fast <16 x float> [[TMP28]], poison
-; CHECK-NEXT: [[TMP36]] = shufflevector <16 x float> [[TMP29]], <16 x float> poison, <8 x i32> <i32 5, i32 11, i32 12, i32 10, i32 14, i32 15, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP31]] = shufflevector <16 x float> [[TMP29]], <16 x float> poison, <8 x i32> <i32 12, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 14, i32 15>
+; CHECK-NEXT: [[TMP36]] = shufflevector <16 x float> [[TMP29]], <16 x float> poison, <8 x i32> <i32 5, i32 11, i32 12, i32 10, i32 14, i32 15, i32 poison, i32 poison>
; CHECK-NEXT: br i1 poison, label %[[BB78]], label %[[BB167]]
; CHECK: [[BB167]]:
; CHECK-NEXT: [[TMP32:%.*]] = phi <16 x float> [ [[TMP11]], %[[BB64]] ], [ [[TMP29]], %[[BB78]] ]
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/buildvectors-parent-phi-nodes.ll b/llvm/test/Transforms/SLPVectorizer/X86/buildvectors-parent-phi-nodes.ll
index e3c134b068e04..773b9c069569d 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/buildvectors-parent-phi-nodes.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/buildvectors-parent-phi-nodes.ll
@@ -5,14 +5,14 @@ define void @test(ptr %0, float %1) {
; CHECK-LABEL: define void @test(
; CHECK-SAME: ptr [[TMP0:%.*]], float [[TMP1:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[TMP0]], align 4
-; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x float> <float 0.000000e+00, float poison>, float [[TMP3]], i32 1
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x float> <float poison, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, float [[TMP3]], i32 0
+; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x float> <float 0.000000e+00, float poison>, float [[TMP3]], i32 1
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x float> poison, float [[TMP1]], i32 0
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x float> [[TMP6]], <2 x float> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: br label %[[BB8:.*]]
; CHECK: [[BB8]]:
; CHECK-NEXT: [[TMP9:%.*]] = phi <4 x float> [ [[TMP15:%.*]], %[[BB8]] ], [ [[TMP5]], [[TMP2:%.*]] ]
-; CHECK-NEXT: [[TMP10:%.*]] = phi <2 x float> [ [[TMP7]], %[[BB8]] ], [ [[TMP4]], [[TMP2]] ]
+; CHECK-NEXT: [[TMP10:%.*]] = phi <2 x float> [ [[TMP7]], %[[BB8]] ], [ [[TMP8]], [[TMP2]] ]
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x float> [[TMP10]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 0>
; CHECK-NEXT: [[TMP12:%.*]] = fmul <4 x float> [[TMP9]], zeroinitializer
; CHECK-NEXT: [[TMP13:%.*]] = fadd <4 x float> [[TMP12]], zeroinitializer
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/full-matched-bv-with-subvectors.ll b/llvm/test/Transforms/SLPVectorizer/X86/full-matched-bv-with-subvectors.ll
index 2a54ae9a1e749..8ad05694b5322 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/full-matched-bv-with-subvectors.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/full-matched-bv-with-subvectors.ll
@@ -6,9 +6,9 @@ define i32 @test(i64 %l.549) {
; CHECK-SAME: i64 [[L_549:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: [[CONV3:%.*]] = sext i32 0 to i64
-; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i64> poison, i64 [[CONV3]], i32 3
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> poison, i64 0, i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i64> [[TMP3]], i64 0, i32 1
+; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i64> poison, i64 [[CONV3]], i32 3
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i64> poison, i64 [[L_549]], i32 0
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x i64> [[TMP8]], <4 x i64> poison, <4 x i32> <i32 poison, i32 0, i32 poison, i32 poison>
; CHECK-NEXT: br label %[[IF_THEN19:.*]]
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/matched-bv-schedulable.ll b/llvm/test/Transforms/SLPVectorizer/X86/matched-bv-schedulable.ll
index 5b936f65a3221..6fa33671a7b53 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/matched-bv-schedulable.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/matched-bv-schedulable.ll
@@ -7,11 +7,11 @@ define void @test() {
; CHECK-NEXT: br i1 false, label %[[BB1:.*]], label %[[BB5:.*]]
; CHECK: [[BB1]]:
; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i32> [ [[TMP3:%.*]], %[[BB1]] ], [ zeroinitializer, %[[BB]] ]
+; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 0, i32 0
+; CHECK-NEXT: [[TMP5:%.*]] = or <2 x i32> [[TMP0]], [[TMP4]]
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> <i32 poison, i32 0>, <2 x i32> <i32 0, i32 3>
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> <i32 poison, i32 1>, i32 0, i32 0
; CHECK-NEXT: [[TMP3]] = or <2 x i32> [[TMP1]], [[TMP2]]
-; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 0, i32 0
-; CHECK-NEXT: [[TMP5:%.*]] = or <2 x i32> [[TMP0]], [[TMP4]]
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i32> [[TMP5]], i32 1
; CHECK-NEXT: [[OR3:%.*]] = or i32 [[TMP6]], 0
; CHECK-NEXT: br i1 false, label %[[BB1]], label %[[BB5]]
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/matched-nodes-updated.ll b/llvm/test/Transforms/SLPVectorizer/X86/matched-nodes-updated.ll
index 289c6002851d7..5c7dc869395b8 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/matched-nodes-updated.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/matched-nodes-updated.ll
@@ -37,18 +37,18 @@ define i32 @test(i32 %s.0) {
; CHECK: [[IF_THEN18:.*]]:
; CHECK-NEXT: br label %[[T]]
; CHECK: [[T]]:
-; CHECK-NEXT: [[TMP30:%.*]] = phi <8 x i32> [ [[TMP27:%.*]], %[[O]] ], [ poison, %[[IF_THEN18]] ]
+; CHECK-NEXT: [[TMP19:%.*]] = phi <8 x i32> [ [[TMP27:%.*]], %[[O]] ], [ poison, %[[IF_THEN18]] ]
; CHECK-NEXT: [[TMP17]] = extractelement <4 x i32> [[TMP23:%.*]], i32 0
; CHECK-NEXT: br i1 false, label %[[IF_END24]], label %[[K]]
; CHECK: [[IF_END24]]:
-; CHECK-NEXT: [[TMP18:%.*]] = phi <8 x i32> [ [[TMP29]], %[[IF_THEN11]] ], [ [[TMP11]], %[[IF_END6]] ], [ [[TMP30]], %[[T]] ]
-; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <8 x i32> [[TMP18]], <8 x i32> poison, <2 x i32> <i32 7, i32 1>
+; CHECK-NEXT: [[TMP18:%.*]] = phi <8 x i32> [ [[TMP29]], %[[IF_THEN11]] ], [ [[TMP11]], %[[IF_END6]] ], [ [[TMP19]], %[[T]] ]
; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <8 x i32> [[TMP18]], <8 x i32> poison, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[TMP30:%.*]] = shufflevector <8 x i32> [[TMP18]], <8 x i32> poison, <2 x i32> <i32 7, i32 1>
; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <8 x i32> [[TMP18]], <8 x i32> poison, <4 x i32> <i32 2, i32 3, i32 4, i32 6>
; CHECK-NEXT: br label %[[O]]
; CHECK: [[O]]:
-; CHECK-NEXT: [[TMP22]] = phi <2 x i32> [ zeroinitializer, %[[K]] ], [ [[TMP19]], %[[IF_END24]] ]
; CHECK-NEXT: [[TMP23]] = phi <4 x i32> [ [[TMP1]], %[[K]] ], [ [[TMP20]], %[[IF_END24]] ]
+; CHECK-NEXT: [[TMP22]] = phi <2 x i32> [ zeroinitializer, %[[K]] ], [ [[TMP30]], %[[IF_END24]] ]
; CHECK-NEXT: [[TMP24:%.*]] = phi <4 x i32> [ zeroinitializer, %[[K]] ], [ [[TMP21]], %[[IF_END24]] ]
; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <4 x i32> [[TMP23]], <4 x i32> poison, <8 x i32> <i32 0, i32 poison, i32 0, i32 0, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <8 x i32> [[TMP25]], <8 x i32> <i32 poison, i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>, <8 x i32> <i32 0, i32 9, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/node-outside-used-only.ll b/llvm/test/Transforms/SLPVectorizer/X86/node-outside-used-only.ll
new file mode 100644
index 0000000000000..1c482e079bb0f
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/node-outside-used-only.ll
@@ -0,0 +1,40 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -slp-threshold=-99999 < %s | FileCheck %s
+
+define i64 @test() {
+; CHECK-LABEL: define i64 @test() {
+; CHECK-NEXT: [[BB:.*]]:
+; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> <i32 0, i32 poison>, i32 0, i32 1
+; CHECK-NEXT: br label %[[BB1:.*]]
+; CHECK: [[BB1]]:
+; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ zeroinitializer, %[[BB]] ], [ [[TMP4:%.*]], %[[BB5:.*]] ]
+; CHECK-NEXT: [[TMP2:%.*]] = or <2 x i32> [[TMP0]], [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP2]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT: [[TMP4]] = or <2 x i32> [[TMP3]], zeroinitializer
+; CHECK-NEXT: br label %[[BB5]]
+; CHECK: [[BB5]]:
+; CHECK-NEXT: br i1 false, label %[[BB6:.*]], label %[[BB1]]
+; CHECK: [[BB6]]:
+; CHECK-NEXT: [[TMP5:%.*]] = phi <2 x i32> [ [[TMP2]], %[[BB5]] ]
+; CHECK-NEXT: ret i64 0
+;
+bb:
+ br label %bb1
+
+bb1:
+ %phi = phi i32 [ 0, %bb ], [ %or, %bb5 ]
+ %phi2 = phi i32 [ 0, %bb ], [ %or4, %bb5 ]
+ %or = or i32 %phi, 0
+ %add = add i32 0, 0
+ %or3 = or i32 %add, %phi2
+ %or4 = or i32 %or3, 0
+ br label %bb5
+
+bb5:
+ br i1 false, label %bb6, label %bb1
+
+bb6:
+ %phi7 = phi i32 [ %or, %bb5 ]
+ %phi8 = phi i32 [ %or3, %bb5 ]
+ ret i64 0
+}
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reduced-val-vectorized-in-transform.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduced-val-vectorized-in-transform.ll
index 2612a21b9eedf..a08d0b1ab16cc 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/reduced-val-vectorized-in-transform.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/reduced-val-vectorized-in-transform.ll
@@ -16,10 +16,10 @@ define i32 @test(i1 %cond) {
; CHECK-NEXT: [[TMP5:%.*]] = or <4 x i32> zeroinitializer, [[TMP4]]
; CHECK-NEXT: [[OR92]] = or i32 1, 0
; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> [[TMP5]])
+; CHECK-NEXT: [[OP_RDX:%.*]] = xor i32 [[TMP6]], [[OR92]]
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i32> <i32 poison, i32 1>, i32 [[TMP6]], i32 0
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 [[OR92]], i32 0
; CHECK-NEXT: [[TMP8]] = xor <2 x i32> [[TMP9]], [[TMP7]]
-; CHECK-NEXT: [[OP_RDX:%.*]] = xor i32 [[TMP6]], [[OR92]]
; CHECK-NEXT: br i1 [[COND]], label %[[EXIT:.*]], label %[[BB]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret i32 [[OP_RDX]]
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/split-node-num-operands.ll b/llvm/test/Transforms/SLPVectorizer/X86/split-node-num-operands.ll
index 5aa4dba2b8a1b..f2b9d6329bfdf 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/split-node-num-operands.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/split-node-num-operands.ll
@@ -15,8 +15,8 @@ define i64 @Foo(ptr align 8 dereferenceable(344) %0, i64 %1) {
; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> [[TMP10]], i64 [[TMP9]], i32 1
; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x i64> poison, i64 [[TMP7]], i32 0
; CHECK-NEXT: [[TMP13:%.*]] = insertelement <2 x i64> [[TMP12]], i64 [[TMP8]], i32 1
-; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i64> poison, i64 0, i32 0
; CHECK-NEXT: [[TMP15:%.*]] = insertelement <2 x i64> <i64 0, i64 poison>, i64 [[TMP1]], i32 1
+; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i64> poison, i64 0, i32 0
; CHECK-NEXT: br label %[[BB16:.*]]
; CHECK: [[BB16]]:
; CHECK-NEXT: [[TMP17:%.*]] = phi <2 x i64> [ [[TMP11]], [[TMP2:%.*]] ], [ zeroinitializer, %[[TMP25:.*]] ]
diff --git a/llvm/test/Transforms/SLPVectorizer/revec.ll b/llvm/test/Transforms/SLPVectorizer/revec.ll
index 36dbeed9bbcd5..ebe3eb6b53358 100644
--- a/llvm/test/Transforms/SLPVectorizer/revec.ll
+++ b/llvm/test/Transforms/SLPVectorizer/revec.ll
@@ -234,12 +234,12 @@ define void @test7() {
define void @test8() {
; CHECK-LABEL: @test8(
; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.vector.insert.v4f32.v2f32(<4 x float> poison, <2 x float> zeroinitializer, i64 0)
+; CHECK-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.vector.insert.v4f32.v2f32(<4 x float> [[TMP4]], <2 x float> zeroinitializer, i64 2)
; CHECK-NEXT: [[TMP0:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v2f32(<8 x float> poison, <2 x float> zeroinitializer, i64 0)
; CHECK-NEXT: [[TMP1:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v2f32(<8 x float> [[TMP0]], <2 x float> z...
[truncated]
|
Ping! |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
…es and prevec phi operand gathers Need to set the insertion point for (non-schedulable) vector node after the last instruction in the node to avoid def-use breakage. But it also causes miscompilation with gather/buildvector operands of the phi nodes, used in the same phi only in the block. These nodes supposed to be inserted at the end of the block and after changing the insertion point for the non-schedulable vec block, it also may break def-use dependencies. Need to prevector such nodes, to emit them as early as possible, so the vectorized nodes are inserted before these nodes. Fixes #139728 Reviewers: hiraditya, HanKuanChen, RKSimon Reviewed By: RKSimon Pull Request: llvm/llvm-project#139917
This broke compilation of an older version of Qt. typedef double qreal;
void qRound(double);
struct QMatrix {
qreal _m11, _m12;
qreal _dx, _dy;
};
enum TransformationType { TxScale, TxShear, TxProject = 10 };
QMatrix affine;
qreal affine_3, affine_2, mapToPolygon_w;
struct QTransform {
void mapToPolygon() const;
TransformationType inline_type() const;
};
void QTransform::mapToPolygon() const {
TransformationType t = inline_type();
qreal x[2]{};
qreal x_0, y_1, y_0, x_2, y_2;
if (t <= TxScale) {
y_0 = affine._dy;
qreal h = y_1 = affine._dy;
y_2 = h;
} else
switch (t) {
case TxShear:
case TxProject:
x_0 = affine._dx;
if (t == TxProject)
x[1] *= y_1 *= mapToPolygon_w;
}
switch (t) {
case TxShear:
case TxProject:
x_2 = affine._m11 + affine_2 * 0 + affine._dx;
y_2 = affine._m12 + affine_3 * 0 + affine._dy;
if (t == TxProject) {
x_2 *= 0;
y_2 *= 0;
}
}
qRound(x_0);
qRound(y_0);
qRound(x[1]);
qRound(y_1);
qRound(x_2);
qRound(y_2);
} Compiled like this: $ clang -target x86_64-linux-gnu -c -O2 repro.cpp
Instruction does not dominate all uses!
%22 = fadd <2 x double> %17, %18
%19 = shufflevector <2 x double> %22, <2 x double> poison, <6 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison>
in function _ZNK10QTransform12mapToPolygonEv
fatal error: error in backend: Broken function found, compilation aborted! |
…and prevec phi operand gathers" This reverts commit d79d9b8 to fix a bug reported in #139917 (comment)
…used nodes and prevec phi operand gathers" This reverts commit d79d9b8 to fix a bug reported in llvm/llvm-project#139917 (comment)
…ec phi operand gathers Need to set the insertion point for (non-schedulable) vector node after the last instruction in the node to avoid def-use breakage. But it also causes miscompilation with gather/buildvector operands of the phi nodes, used in the same phi only in the block. These nodes supposed to be inserted at the end of the block and after changing the insertion point for the non-schedulable vec block, it also may break def-use dependencies. Need to prevector such nodes, to emit them as early as possible, so the vectorized nodes are inserted before these nodes. Fixes #139728 Recommit after revert 60fb921 Reviewers: hiraditya, HanKuanChen, RKSimon Reviewed By: RKSimon Pull Request: #139917
…es and prevec phi operand gathers Need to set the insertion point for (non-schedulable) vector node after the last instruction in the node to avoid def-use breakage. But it also causes miscompilation with gather/buildvector operands of the phi nodes, used in the same phi only in the block. These nodes supposed to be inserted at the end of the block and after changing the insertion point for the non-schedulable vec block, it also may break def-use dependencies. Need to prevector such nodes, to emit them as early as possible, so the vectorized nodes are inserted before these nodes. Fixes #139728 Recommit after revert 60fb921 Reviewers: hiraditya, HanKuanChen, RKSimon Reviewed By: RKSimon Pull Request: llvm/llvm-project#139917
…and prevec phi operand gathers" This reverts commit d79d9b8 to fix a bug reported in llvm#139917 (comment)
…ec phi operand gathers Need to set the insertion point for (non-schedulable) vector node after the last instruction in the node to avoid def-use breakage. But it also causes miscompilation with gather/buildvector operands of the phi nodes, used in the same phi only in the block. These nodes supposed to be inserted at the end of the block and after changing the insertion point for the non-schedulable vec block, it also may break def-use dependencies. Need to prevector such nodes, to emit them as early as possible, so the vectorized nodes are inserted before these nodes. Fixes llvm#139728 Recommit after revert 60fb921 Reviewers: hiraditya, HanKuanChen, RKSimon Reviewed By: RKSimon Pull Request: llvm#139917
Need to set the insertion point for (non-schedulable) vector node after
the last instruction in the node to avoid def-use breakage. But it also
causes miscompilation with gather/buildvector operands of the phi nodes,
used in the same phi only in the block.
These nodes supposed to be inserted at the end of the block and after
changing the insertion point for the non-schedulable vec block, it also
may break def-use dependencies. Need to prevector such nodes, to emit
them as early as possible, so the vectorized nodes are inserted before
these nodes.
Fixes #139728