Skip to content

[SLP]Buildvector for alternate instructions with non-profitable gather operands. #84978

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
128 changes: 128 additions & 0 deletions llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2995,6 +2995,15 @@ class BoUpSLP {
return ScalarToTreeEntry.lookup(V);
}

/// Check that the operand node of alternate node does not generate
/// buildvector sequence. If it is, then probably not worth it to build
/// alternate shuffle, if number of buildvector operands + alternate
/// instruction > than the number of buildvector instructions.
/// \param S the instructions state of the analyzed values.
/// \param VL list of the instructions with alternate opcodes.
bool areAltOperandsProfitable(const InstructionsState &S,
ArrayRef<Value *> VL) const;

/// Checks if the specified list of the instructions/values can be vectorized
/// and fills required data before actual scheduling of the instructions.
TreeEntry::EntryState getScalarsVectorizationState(
Expand Down Expand Up @@ -5777,6 +5786,117 @@ static bool isAlternateInstruction(const Instruction *I,
const Instruction *AltOp,
const TargetLibraryInfo &TLI);

bool BoUpSLP::areAltOperandsProfitable(const InstructionsState &S,
ArrayRef<Value *> VL) const {
unsigned Opcode0 = S.getOpcode();
unsigned Opcode1 = S.getAltOpcode();
// The opcode mask selects between the two opcodes.
SmallBitVector OpcodeMask(VL.size(), false);
for (unsigned Lane : seq<unsigned>(0, VL.size()))
if (cast<Instruction>(VL[Lane])->getOpcode() == Opcode1)
OpcodeMask.set(Lane);
// If this pattern is supported by the target then consider it profitable.
if (TTI->isLegalAltInstr(FixedVectorType::get(S.MainOp->getType(), VL.size()),
Opcode0, Opcode1, OpcodeMask))
return true;
SmallVector<ValueList> Operands;
for (unsigned I : seq<unsigned>(0, S.MainOp->getNumOperands())) {
Operands.emplace_back();
// Prepare the operand vector.
for (Value *V : VL)
Operands.back().push_back(cast<Instruction>(V)->getOperand(I));
}
if (Operands.size() == 2) {
// Try find best operands candidates.
for (unsigned I : seq<unsigned>(0, VL.size() - 1)) {
SmallVector<std::pair<Value *, Value *>> Candidates(3);
Candidates[0] = std::make_pair(Operands[0][I], Operands[0][I + 1]);
Candidates[1] = std::make_pair(Operands[0][I], Operands[1][I + 1]);
Candidates[2] = std::make_pair(Operands[1][I], Operands[0][I + 1]);
std::optional<int> Res = findBestRootPair(Candidates);
switch (Res.value_or(0)) {
case 0:
break;
case 1:
std::swap(Operands[0][I + 1], Operands[1][I + 1]);
break;
case 2:
std::swap(Operands[0][I], Operands[1][I]);
break;
default:
llvm_unreachable("Unexpected index.");
}
}
}
DenseSet<unsigned> UniqueOpcodes;
constexpr unsigned NumAltInsts = 3; // main + alt + shuffle.
unsigned NonInstCnt = 0;
// Estimate number of instructions, required for the vectorized node and for
// the buildvector node.
unsigned UndefCnt = 0;
// Count the number of extra shuffles, required for vector nodes.
unsigned ExtraShuffleInsts = 0;
// Check that operands do not contain same values and create either perfect
// diamond match or shuffled match.
if (Operands.size() == 2) {
// Do not count same operands twice.
if (Operands.front() == Operands.back()) {
Operands.erase(Operands.begin());
} else if (!allConstant(Operands.front()) &&
all_of(Operands.front(), [&](Value *V) {
return is_contained(Operands.back(), V);
})) {
Operands.erase(Operands.begin());
++ExtraShuffleInsts;
}
}
const Loop *L = LI->getLoopFor(S.MainOp->getParent());
// Vectorize node, if:
// 1. at least single operand is constant or splat.
// 2. Operands have many loop invariants (the instructions are not loop
// invariants).
// 3. At least single unique operands is supposed to vectorized.
return none_of(Operands,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add a comment describing this

[&](ArrayRef<Value *> Op) {
if (allConstant(Op) ||
(!isSplat(Op) && allSameBlock(Op) && allSameType(Op) &&
getSameOpcode(Op, *TLI).MainOp))
return false;
DenseMap<Value *, unsigned> Uniques;
for (Value *V : Op) {
if (isa<Constant, ExtractElementInst>(V) ||
getTreeEntry(V) || (L && L->isLoopInvariant(V))) {
if (isa<UndefValue>(V))
++UndefCnt;
continue;
}
auto Res = Uniques.try_emplace(V, 0);
// Found first duplicate - need to add shuffle.
if (!Res.second && Res.first->second == 1)
++ExtraShuffleInsts;
++Res.first->getSecond();
if (auto *I = dyn_cast<Instruction>(V))
UniqueOpcodes.insert(I->getOpcode());
else if (Res.second)
++NonInstCnt;
}
return none_of(Uniques, [&](const auto &P) {
return P.first->hasNUsesOrMore(P.second + 1) &&
none_of(P.first->users(), [&](User *U) {
return getTreeEntry(U) || Uniques.contains(U);
});
});
}) ||
// Do not vectorize node, if estimated number of vector instructions is
// more than estimated number of buildvector instructions. Number of
// vector operands is number of vector instructions + number of vector
// instructions for operands (buildvectors). Number of buildvector
// instructions is just number_of_operands * number_of_scalars.
(UndefCnt < (VL.size() - 1) * S.MainOp->getNumOperands() &&
(UniqueOpcodes.size() + NonInstCnt + ExtraShuffleInsts +
NumAltInsts) < S.MainOp->getNumOperands() * VL.size());
}

BoUpSLP::TreeEntry::EntryState BoUpSLP::getScalarsVectorizationState(
InstructionsState &S, ArrayRef<Value *> VL, bool IsScatterVectorizeUserTE,
OrdersType &CurrentOrder, SmallVectorImpl<Value *> &PointerOps) const {
Expand Down Expand Up @@ -6074,6 +6194,14 @@ BoUpSLP::TreeEntry::EntryState BoUpSLP::getScalarsVectorizationState(
LLVM_DEBUG(dbgs() << "SLP: ShuffleVector are not vectorized.\n");
return TreeEntry::NeedToGather;
}
if (!areAltOperandsProfitable(S, VL)) {

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi Alexey. Can you please put this under an option (let it be true by default)?
Here is the problem: sometime we create LIT tests deliberately short and force to go through via just making slp-threshold very low. We merely need SLP vectorizer to execute specific path even though it could be unprofitable in a test case scenario. But since this new staff does not check cost threshold at all, we observe changing some tests behavior and they no longer serve their purpose. Having the option we could fix that.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi Valery, you can add this option in the downstream compiler, should not be a very big change.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Of course we can. That will just adds another piece of divergence for things that should not really have existed from the very beginning. I believe similar problem may exist in llvm-project.

Copy link

@valerydmit valerydmit Apr 12, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here is the example (in this patch): gather-move-out-of-loop
The test intent was to check that if we vectorize, we place gather outside of the loop. Note that there is slp-threshold is there exactly for reason to force slp vectorizer to go further. But after this patch this test no longer serves its purpose. Does that sound like enough justification now?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Feel free to prepare the patch

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll prepare it. Thanks.

LLVM_DEBUG(
dbgs()
<< "SLP: ShuffleVector not vectorized, operands are buildvector and "
"the whole alt sequence is not profitable.\n");
return TreeEntry::NeedToGather;
}

return TreeEntry::Vectorize;
}
default:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -104,16 +104,16 @@ define void @dist_vec(ptr nocapture noundef readonly %pA, ptr nocapture noundef
; CHECK-NEXT: [[AND95:%.*]] = and i32 [[B_0278]], 1
; CHECK-NEXT: [[SHR96]] = lshr i32 [[A_0279]], 1
; CHECK-NEXT: [[SHR97]] = lshr i32 [[B_0278]], 1
; CHECK-NEXT: [[TMP22:%.*]] = insertelement <2 x i32> poison, i32 [[AND94]], i32 0
; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <2 x i32> [[TMP22]], <2 x i32> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP24:%.*]] = icmp eq <2 x i32> [[TMP23]], zeroinitializer
; CHECK-NEXT: [[TMP25:%.*]] = icmp ne <2 x i32> [[TMP23]], zeroinitializer
; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <2 x i1> [[TMP24]], <2 x i1> [[TMP25]], <4 x i32> <i32 0, i32 3, i32 0, i32 3>
; CHECK-NEXT: [[TMP27:%.*]] = insertelement <2 x i32> poison, i32 [[AND95]], i32 0
; CHECK-NEXT: [[TMP28:%.*]] = shufflevector <2 x i32> [[TMP27]], <2 x i32> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP29:%.*]] = icmp ne <2 x i32> [[TMP28]], zeroinitializer
; CHECK-NEXT: [[TMP30:%.*]] = icmp eq <2 x i32> [[TMP28]], zeroinitializer
; CHECK-NEXT: [[TMP31:%.*]] = shufflevector <2 x i1> [[TMP29]], <2 x i1> [[TMP30]], <4 x i32> <i32 0, i32 3, i32 3, i32 0>
; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[AND94]], 0
; CHECK-NEXT: [[TOBOOL98:%.*]] = icmp ne i32 [[AND95]], 0
; CHECK-NEXT: [[TOBOOL100:%.*]] = icmp eq i32 [[AND94]], 0
; CHECK-NEXT: [[TOBOOL103:%.*]] = icmp eq i32 [[AND95]], 0
; CHECK-NEXT: [[TMP22:%.*]] = insertelement <4 x i1> poison, i1 [[TOBOOL100]], i32 0
; CHECK-NEXT: [[TMP23:%.*]] = insertelement <4 x i1> [[TMP22]], i1 [[TOBOOL]], i32 1
; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <4 x i1> [[TMP23]], <4 x i1> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
; CHECK-NEXT: [[TMP25:%.*]] = insertelement <4 x i1> poison, i1 [[TOBOOL98]], i32 0
; CHECK-NEXT: [[TMP27:%.*]] = insertelement <4 x i1> [[TMP25]], i1 [[TOBOOL103]], i32 1
; CHECK-NEXT: [[TMP31:%.*]] = shufflevector <4 x i1> [[TMP27]], <4 x i1> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 0>
; CHECK-NEXT: [[TMP32:%.*]] = select <4 x i1> [[TMP26]], <4 x i1> [[TMP31]], <4 x i1> zeroinitializer
; CHECK-NEXT: [[TMP33:%.*]] = zext <4 x i1> [[TMP32]] to <4 x i32>
; CHECK-NEXT: [[TMP34]] = add <4 x i32> [[TMP21]], [[TMP33]]
Expand Down Expand Up @@ -149,16 +149,16 @@ define void @dist_vec(ptr nocapture noundef readonly %pA, ptr nocapture noundef
; CHECK-NEXT: [[AND134:%.*]] = and i32 [[B_1300]], 1
; CHECK-NEXT: [[SHR135]] = lshr i32 [[A_1301]], 1
; CHECK-NEXT: [[SHR136]] = lshr i32 [[B_1300]], 1
; CHECK-NEXT: [[TMP39:%.*]] = insertelement <2 x i32> poison, i32 [[AND133]], i32 0
; CHECK-NEXT: [[TMP40:%.*]] = shufflevector <2 x i32> [[TMP39]], <2 x i32> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP41:%.*]] = icmp eq <2 x i32> [[TMP40]], zeroinitializer
; CHECK-NEXT: [[TMP42:%.*]] = icmp ne <2 x i32> [[TMP40]], zeroinitializer
; CHECK-NEXT: [[TMP43:%.*]] = shufflevector <2 x i1> [[TMP41]], <2 x i1> [[TMP42]], <4 x i32> <i32 0, i32 3, i32 0, i32 3>
; CHECK-NEXT: [[TMP44:%.*]] = insertelement <2 x i32> poison, i32 [[AND134]], i32 0
; CHECK-NEXT: [[TMP45:%.*]] = shufflevector <2 x i32> [[TMP44]], <2 x i32> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP46:%.*]] = icmp ne <2 x i32> [[TMP45]], zeroinitializer
; CHECK-NEXT: [[TMP47:%.*]] = icmp eq <2 x i32> [[TMP45]], zeroinitializer
; CHECK-NEXT: [[TMP48:%.*]] = shufflevector <2 x i1> [[TMP46]], <2 x i1> [[TMP47]], <4 x i32> <i32 0, i32 3, i32 3, i32 0>
; CHECK-NEXT: [[TOBOOL137:%.*]] = icmp ne i32 [[AND133]], 0
; CHECK-NEXT: [[TOBOOL139:%.*]] = icmp ne i32 [[AND134]], 0
; CHECK-NEXT: [[TOBOOL144:%.*]] = icmp eq i32 [[AND133]], 0
; CHECK-NEXT: [[TOBOOL147:%.*]] = icmp eq i32 [[AND134]], 0
; CHECK-NEXT: [[TMP40:%.*]] = insertelement <4 x i1> poison, i1 [[TOBOOL144]], i32 0
; CHECK-NEXT: [[TMP41:%.*]] = insertelement <4 x i1> [[TMP40]], i1 [[TOBOOL137]], i32 1
; CHECK-NEXT: [[TMP43:%.*]] = shufflevector <4 x i1> [[TMP41]], <4 x i1> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
; CHECK-NEXT: [[TMP42:%.*]] = insertelement <4 x i1> poison, i1 [[TOBOOL139]], i32 0
; CHECK-NEXT: [[TMP39:%.*]] = insertelement <4 x i1> [[TMP42]], i1 [[TOBOOL147]], i32 1
; CHECK-NEXT: [[TMP48:%.*]] = shufflevector <4 x i1> [[TMP39]], <4 x i1> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 0>
; CHECK-NEXT: [[TMP49:%.*]] = select <4 x i1> [[TMP43]], <4 x i1> [[TMP48]], <4 x i1> zeroinitializer
; CHECK-NEXT: [[TMP50:%.*]] = zext <4 x i1> [[TMP49]] to <4 x i32>
; CHECK-NEXT: [[TMP51]] = add <4 x i32> [[TMP38]], [[TMP50]]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,8 @@ define i64 @wombat() {
; CHECK-NEXT: br label [[BB2]]
; CHECK: bb2:
; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ 0, [[BB:%.*]] ], [ 0, [[BB1:%.*]] ]
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> poison, i32 [[PHI]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = trunc <2 x i32> [[TMP1]] to <2 x i1>
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0
; CHECK-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i64
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP2]], i32 1
; CHECK-NEXT: [[TMP6:%.*]] = zext i1 [[TMP5]] to i64
; CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[PHI]] to i64
; CHECK-NEXT: [[TMP6:%.*]] = sext i32 [[PHI]] to i64
; CHECK-NEXT: [[OR:%.*]] = or i64 [[TMP4]], [[TMP6]]
; CHECK-NEXT: ret i64 [[OR]]
;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,12 @@
define void @test(i16 %0) {
; CHECK-LABEL: @test(
; CHECK-NEXT: for.body92.preheader:
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i16> <i16 0, i16 poison>, i16 [[TMP0:%.*]], i32 1
; CHECK-NEXT: [[TMP2:%.*]] = sext <2 x i16> [[TMP1]] to <2 x i32>
; CHECK-NEXT: [[TMP3:%.*]] = zext <2 x i16> [[TMP1]] to <2 x i32>
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> <i32 0, i32 3>
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> poison, <4 x i32> <i32 0, i32 poison, i32 1, i32 poison>
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> <i32 poison, i32 0, i32 poison, i32 0>, <4 x i32> [[TMP5]], <4 x i32> <i32 4, i32 1, i32 6, i32 3>
; CHECK-NEXT: br label [[FOR_BODY92:%.*]]
; CHECK: for.body92:
; CHECK-NEXT: [[CONV177_I:%.*]] = sext i16 0 to i32
; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[TMP0:%.*]] to i32
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> <i32 poison, i32 0, i32 poison, i32 0>, i32 [[CONV177_I]], i32 0
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[TMP1]], i32 2
; CHECK-NEXT: [[TMP7:%.*]] = add nsw <4 x i32> zeroinitializer, [[TMP6]]
; CHECK-NEXT: store <4 x i32> [[TMP7]], ptr undef, align 8
; CHECK-NEXT: br label [[FOR_BODY92]]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,21 +6,19 @@ define i64 @foo() {
; CHECK-NEXT: bb:
; CHECK-NEXT: br label [[BB3:%.*]]
; CHECK: bb1:
; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i64> [ [[TMP5:%.*]], [[BB3]] ]
; CHECK-NEXT: [[PHI:%.*]] = phi i64 [ [[ADD:%.*]], [[BB3]] ]
; CHECK-NEXT: [[PHI2:%.*]] = phi i64 [ [[TMP9:%.*]], [[BB3]] ]
; CHECK-NEXT: ret i64 0
; CHECK: bb3:
; CHECK-NEXT: [[PHI5:%.*]] = phi i64 [ 0, [[BB:%.*]] ], [ 0, [[BB3]] ]
; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i64> [ zeroinitializer, [[BB]] ], [ [[TMP7:%.*]], [[BB3]] ]
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i64> <i64 poison, i64 0>, i64 [[PHI5]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = add <2 x i64> [[TMP1]], [[TMP2]]
; CHECK-NEXT: [[TMP4:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]]
; CHECK-NEXT: [[TMP5]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> [[TMP4]], <2 x i32> <i32 0, i32 3>
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> <i64 poison, i64 0>, <2 x i32> <i32 0, i32 3>
; CHECK-NEXT: [[TMP7]] = add <2 x i64> [[TMP6]], [[TMP2]]
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i64> [[TMP7]], i32 1
; CHECK-NEXT: [[GETELEMENTPTR:%.*]] = getelementptr i64, ptr addrspace(1) null, i64 [[TMP8]]
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i64> [[TMP5]], i32 1
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP1]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
; CHECK-NEXT: [[ADD]] = add i64 [[TMP3]], [[TMP2]]
; CHECK-NEXT: [[GETELEMENTPTR:%.*]] = getelementptr i64, ptr addrspace(1) null, i64 0
; CHECK-NEXT: [[TMP9]] = or i64 [[PHI5]], 0
; CHECK-NEXT: [[ICMP:%.*]] = icmp ult i64 [[TMP9]], 0
; CHECK-NEXT: [[TMP7]] = insertelement <2 x i64> <i64 poison, i64 0>, i64 [[ADD]], i32 0
; CHECK-NEXT: br i1 false, label [[BB3]], label [[BB1:%.*]]
;
bb:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,22 +4,22 @@
define void @foo() {
; CHECK-LABEL: define void @foo() {
; CHECK-NEXT: bb:
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 0, i32 0
; CHECK-NEXT: br label [[BB1:%.*]]
; CHECK: bb1:
; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ zeroinitializer, [[BB:%.*]] ], [ [[TMP6:%.*]], [[BB4:%.*]] ]
; CHECK-NEXT: [[TMP2:%.*]] = shl <2 x i32> [[TMP1]], [[TMP0]]
; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i32> [[TMP1]], [[TMP0]]
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> <i32 0, i32 3>
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP1]], <2 x i32> <i32 0, i32 3>
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i32> [[TMP1]], i32 0
; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[TMP2]], 0
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[SHL]], i32 0
; CHECK-NEXT: [[TMP6]] = or <2 x i32> [[TMP5]], zeroinitializer
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i32> [[TMP6]], i32 0
; CHECK-NEXT: [[CALL:%.*]] = call i64 null(i32 [[TMP7]])
; CHECK-NEXT: br label [[BB4]]
; CHECK: bb4:
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i32> [[TMP6]], i32 1
; CHECK-NEXT: br i1 false, label [[BB5:%.*]], label [[BB1]]
; CHECK: bb5:
; CHECK-NEXT: [[TMP8:%.*]] = phi <2 x i32> [ [[TMP4]], [[BB4]] ]
; CHECK-NEXT: [[PHI6:%.*]] = phi i32 [ [[SHL]], [[BB4]] ]
; CHECK-NEXT: [[PHI7:%.*]] = phi i32 [ [[TMP8]], [[BB4]] ]
; CHECK-NEXT: ret void
;
bb:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -112,10 +112,10 @@ define void @addsub_and_external_users(ptr %A, ptr %ptr) {
; CHECK-NEXT: bb1:
; CHECK-NEXT: [[LD:%.*]] = load double, ptr undef, align 8
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[LD]], i32 0
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP1:%.*]] = fsub <2 x double> [[SHUFFLE]], <double 1.100000e+00, double 1.200000e+00>
; CHECK-NEXT: [[TMP2:%.*]] = fadd <2 x double> [[SHUFFLE]], <double 1.100000e+00, double 1.200000e+00>
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> [[TMP2]], <2 x i32> <i32 0, i32 3>
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = fsub <2 x double> [[TMP1]], <double 1.100000e+00, double 1.200000e+00>
; CHECK-NEXT: [[TMP6:%.*]] = fadd <2 x double> [[TMP1]], <double 1.100000e+00, double 1.200000e+00>
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> [[TMP6]], <2 x i32> <i32 0, i32 3>
; CHECK-NEXT: [[TMP4:%.*]] = fdiv <2 x double> [[TMP3]], <double 2.100000e+00, double 2.200000e+00>
; CHECK-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[TMP4]], <double 3.100000e+00, double 3.200000e+00>
; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> poison, <2 x i32> <i32 1, i32 0>
Expand Down Expand Up @@ -159,10 +159,10 @@ define void @subadd_and_external_users(ptr %A, ptr %ptr) {
; CHECK-NEXT: bb1:
; CHECK-NEXT: [[LD:%.*]] = load double, ptr undef, align 8
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[LD]], i32 0
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP1:%.*]] = fadd <2 x double> [[SHUFFLE]], <double 1.200000e+00, double 1.100000e+00>
; CHECK-NEXT: [[TMP2:%.*]] = fsub <2 x double> [[SHUFFLE]], <double 1.200000e+00, double 1.100000e+00>
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> [[TMP2]], <2 x i32> <i32 2, i32 1>
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = fsub <2 x double> [[TMP1]], <double 1.200000e+00, double 1.100000e+00>
; CHECK-NEXT: [[TMP6:%.*]] = fadd <2 x double> [[TMP1]], <double 1.200000e+00, double 1.100000e+00>
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> [[TMP6]], <2 x i32> <i32 0, i32 3>
; CHECK-NEXT: [[TMP4:%.*]] = fdiv <2 x double> [[TMP3]], <double 2.200000e+00, double 2.100000e+00>
; CHECK-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[TMP4]], <double 3.200000e+00, double 3.100000e+00>
; CHECK-NEXT: store <2 x double> [[TMP5]], ptr [[A:%.*]], align 8
Expand Down
Loading