Skip to content

Commit 2d81590

Browse files
committed
[SLP] Make getSameOpcode support different instructions if they have
same semantics.
1 parent 39ac121 commit 2d81590

14 files changed

+313
-137
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 207 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -818,6 +818,105 @@ struct InstructionsState {
818818

819819
} // end anonymous namespace
820820

821+
struct InterchangeableInstruction {
822+
unsigned Opcode;
823+
SmallVector<Value *> Ops;
824+
template <class... ArgTypes>
825+
InterchangeableInstruction(unsigned Opcode, ArgTypes &&...Args)
826+
: Opcode(Opcode), Ops{std::forward<decltype(Args)>(Args)...} {}
827+
};
828+
829+
bool operator<(const InterchangeableInstruction &LHS,
830+
const InterchangeableInstruction &RHS) {
831+
return LHS.Opcode < RHS.Opcode;
832+
}
833+
834+
/// \returns a list of interchangeable instructions which \p I can be converted
835+
/// to.
836+
/// e.g.,
837+
/// x << y -> x * (2^y)
838+
/// x << 1 -> x * 2
839+
/// x << 0 -> x * 1 -> x - 0 -> x + 0 -> x & 11...1 -> x | 0
840+
/// x * 0 -> x & 0
841+
/// x * -1 -> 0 - x
842+
/// TODO: support more patterns
843+
static SmallVector<InterchangeableInstruction, 6>
844+
getInterchangeableInstruction(Instruction *I) {
845+
// PII = Possible Interchangeable Instruction
846+
SmallVector<InterchangeableInstruction, 6> PII;
847+
unsigned Opcode = I->getOpcode();
848+
PII.emplace_back(Opcode, I->operands());
849+
if (!is_contained({Instruction::Shl, Instruction::Mul, Instruction::Sub,
850+
Instruction::Add},
851+
Opcode))
852+
return PII;
853+
Constant *C;
854+
if (match(I, m_BinOp(m_Value(), m_Constant(C)))) {
855+
ConstantInt *V = nullptr;
856+
if (auto *CI = dyn_cast<ConstantInt>(C)) {
857+
V = CI;
858+
} else if (auto *CDV = dyn_cast<ConstantDataVector>(C)) {
859+
if (auto *CI = dyn_cast_if_present<ConstantInt>(CDV->getSplatValue()))
860+
V = CI;
861+
}
862+
if (!V)
863+
return PII;
864+
Value *Op0 = I->getOperand(0);
865+
Type *Op1Ty = I->getOperand(1)->getType();
866+
const APInt &Op1Int = V->getValue();
867+
Constant *Zero =
868+
ConstantInt::get(Op1Ty, APInt::getZero(Op1Int.getBitWidth()));
869+
Constant *UnsignedMax =
870+
ConstantInt::get(Op1Ty, APInt::getMaxValue(Op1Int.getBitWidth()));
871+
switch (Opcode) {
872+
case Instruction::Shl: {
873+
PII.emplace_back(Instruction::Mul, Op0,
874+
ConstantInt::get(Op1Ty, 1 << Op1Int.getZExtValue()));
875+
if (Op1Int.isZero()) {
876+
PII.emplace_back(Instruction::Sub, Op0, Zero);
877+
PII.emplace_back(Instruction::Add, Op0, Zero);
878+
PII.emplace_back(Instruction::And, Op0, UnsignedMax);
879+
PII.emplace_back(Instruction::Or, Op0, Zero);
880+
}
881+
break;
882+
}
883+
case Instruction::Mul: {
884+
switch (Op1Int.getSExtValue()) {
885+
case 1:
886+
PII.emplace_back(Instruction::Sub, Op0, Zero);
887+
PII.emplace_back(Instruction::Add, Op0, Zero);
888+
PII.emplace_back(Instruction::And, Op0, UnsignedMax);
889+
PII.emplace_back(Instruction::Or, Op0, Zero);
890+
break;
891+
case 0:
892+
PII.emplace_back(Instruction::And, Op0, Zero);
893+
break;
894+
case -1:
895+
PII.emplace_back(Instruction::Sub, Zero, Op0);
896+
break;
897+
}
898+
break;
899+
}
900+
case Instruction::Sub:
901+
if (Op1Int.isZero()) {
902+
PII.emplace_back(Instruction::Add, Op0, Zero);
903+
PII.emplace_back(Instruction::And, Op0, UnsignedMax);
904+
PII.emplace_back(Instruction::Or, Op0, Zero);
905+
}
906+
break;
907+
case Instruction::Add:
908+
if (Op1Int.isZero()) {
909+
PII.emplace_back(Instruction::And, Op0, UnsignedMax);
910+
PII.emplace_back(Instruction::Or, Op0, Zero);
911+
}
912+
break;
913+
}
914+
}
915+
// std::set_intersection requires a sorted range.
916+
sort(PII);
917+
return PII;
918+
}
919+
821920
/// \returns true if \p Opcode is allowed as part of the main/alternate
822921
/// instruction for SLP vectorization.
823922
///
@@ -922,18 +1021,54 @@ static InstructionsState getSameOpcode(ArrayRef<Value *> VL,
9221021
if (!isTriviallyVectorizable(BaseID) && BaseMappings.empty())
9231022
return InstructionsState(VL[BaseIndex], nullptr, nullptr);
9241023
}
1024+
// Currently, this is only used for binary ops.
1025+
// TODO: support all instructions
1026+
SmallVector<InterchangeableInstruction> InterchangeableOpcode =
1027+
getInterchangeableInstruction(cast<Instruction>(VL[BaseIndex]));
1028+
SmallVector<InterchangeableInstruction> AlternateInterchangeableOpcode;
1029+
auto UpdateInterchangeableOpcode =
1030+
[](SmallVector<InterchangeableInstruction> &LHS,
1031+
ArrayRef<InterchangeableInstruction> RHS) {
1032+
SmallVector<InterchangeableInstruction> NewInterchangeableOpcode;
1033+
std::set_intersection(LHS.begin(), LHS.end(), RHS.begin(), RHS.end(),
1034+
std::back_inserter(NewInterchangeableOpcode));
1035+
if (NewInterchangeableOpcode.empty())
1036+
return false;
1037+
LHS = std::move(NewInterchangeableOpcode);
1038+
return true;
1039+
};
9251040
for (int Cnt = 0, E = VL.size(); Cnt < E; Cnt++) {
9261041
auto *I = cast<Instruction>(VL[Cnt]);
9271042
unsigned InstOpcode = I->getOpcode();
9281043
if (IsBinOp && isa<BinaryOperator>(I)) {
929-
if (InstOpcode == Opcode || InstOpcode == AltOpcode)
1044+
SmallVector<InterchangeableInstruction> ThisInterchangeableOpcode(
1045+
getInterchangeableInstruction(I));
1046+
if (UpdateInterchangeableOpcode(InterchangeableOpcode,
1047+
ThisInterchangeableOpcode))
9301048
continue;
931-
if (Opcode == AltOpcode && isValidForAlternation(InstOpcode) &&
932-
isValidForAlternation(Opcode)) {
933-
AltOpcode = InstOpcode;
934-
AltIndex = Cnt;
1049+
if (AlternateInterchangeableOpcode.empty()) {
1050+
InterchangeableOpcode.erase(
1051+
std::remove_if(InterchangeableOpcode.begin(),
1052+
InterchangeableOpcode.end(),
1053+
[](const InterchangeableInstruction &I) {
1054+
return !isValidForAlternation(I.Opcode);
1055+
}),
1056+
InterchangeableOpcode.end());
1057+
ThisInterchangeableOpcode.erase(
1058+
std::remove_if(ThisInterchangeableOpcode.begin(),
1059+
ThisInterchangeableOpcode.end(),
1060+
[](const InterchangeableInstruction &I) {
1061+
return !isValidForAlternation(I.Opcode);
1062+
}),
1063+
ThisInterchangeableOpcode.end());
1064+
if (InterchangeableOpcode.empty() || ThisInterchangeableOpcode.empty())
1065+
return InstructionsState(VL[BaseIndex], nullptr, nullptr);
1066+
AlternateInterchangeableOpcode = std::move(ThisInterchangeableOpcode);
9351067
continue;
9361068
}
1069+
if (UpdateInterchangeableOpcode(AlternateInterchangeableOpcode,
1070+
ThisInterchangeableOpcode))
1071+
continue;
9371072
} else if (IsCastOp && isa<CastInst>(I)) {
9381073
Value *Op0 = IBase->getOperand(0);
9391074
Type *Ty0 = Op0->getType();
@@ -1027,6 +1162,22 @@ static InstructionsState getSameOpcode(ArrayRef<Value *> VL,
10271162
return InstructionsState(VL[BaseIndex], nullptr, nullptr);
10281163
}
10291164

1165+
if (IsBinOp) {
1166+
auto FindOp =
1167+
[&](const SmallVector<InterchangeableInstruction> &CandidateOp) {
1168+
for (Value *V : VL)
1169+
for (const InterchangeableInstruction &I : CandidateOp)
1170+
if (cast<Instruction>(V)->getOpcode() == I.Opcode)
1171+
return cast<Instruction>(V);
1172+
llvm_unreachable(
1173+
"Cannot find the candidate instruction for InstructionsState.");
1174+
};
1175+
Instruction *MainOp = FindOp(InterchangeableOpcode);
1176+
Instruction *AltOp = AlternateInterchangeableOpcode.empty()
1177+
? MainOp
1178+
: FindOp(AlternateInterchangeableOpcode);
1179+
return InstructionsState(VL[BaseIndex], MainOp, AltOp);
1180+
}
10301181
return InstructionsState(VL[BaseIndex], cast<Instruction>(VL[BaseIndex]),
10311182
cast<Instruction>(VL[AltIndex]));
10321183
}
@@ -2318,24 +2469,41 @@ class BoUpSLP {
23182469
: cast<Instruction>(VL[0])->getNumOperands();
23192470
OpsVec.resize(NumOperands);
23202471
unsigned NumLanes = VL.size();
2321-
for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx) {
2472+
InstructionsState S = getSameOpcode(VL, TLI);
2473+
for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx)
23222474
OpsVec[OpIdx].resize(NumLanes);
2323-
for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
2324-
assert(isa<Instruction>(VL[Lane]) && "Expected instruction");
2325-
// Our tree has just 3 nodes: the root and two operands.
2326-
// It is therefore trivial to get the APO. We only need to check the
2327-
// opcode of VL[Lane] and whether the operand at OpIdx is the LHS or
2328-
// RHS operand. The LHS operand of both add and sub is never attached
2329-
// to an inversese operation in the linearized form, therefore its APO
2330-
// is false. The RHS is true only if VL[Lane] is an inverse operation.
2331-
2332-
// Since operand reordering is performed on groups of commutative
2333-
// operations or alternating sequences (e.g., +, -), we can safely
2334-
// tell the inverse operations by checking commutativity.
2335-
bool IsInverseOperation = !isCommutative(cast<Instruction>(VL[Lane]));
2475+
for (auto [I, V] : enumerate(VL)) {
2476+
assert(isa<Instruction>(V) && "Expected instruction");
2477+
SmallVector<InterchangeableInstruction> IIList =
2478+
getInterchangeableInstruction(cast<Instruction>(V));
2479+
Value *SelectedOp;
2480+
auto Iter = find_if(IIList, [&](const InterchangeableInstruction &II) {
2481+
return II.Opcode == S.MainOp->getOpcode();
2482+
});
2483+
if (Iter == IIList.end()) {
2484+
Iter = find_if(IIList, [&](const InterchangeableInstruction &II) {
2485+
return II.Opcode == S.AltOp->getOpcode();
2486+
});
2487+
SelectedOp = S.AltOp;
2488+
} else {
2489+
SelectedOp = S.MainOp;
2490+
}
2491+
assert(Iter != IIList.end() &&
2492+
"Cannot find an interchangeable instruction.");
2493+
// Our tree has just 3 nodes: the root and two operands.
2494+
// It is therefore trivial to get the APO. We only need to check the
2495+
// opcode of V and whether the operand at OpIdx is the LHS or RHS
2496+
// operand. The LHS operand of both add and sub is never attached to an
2497+
// inversese operation in the linearized form, therefore its APO is
2498+
// false. The RHS is true only if V is an inverse operation.
2499+
2500+
// Since operand reordering is performed on groups of commutative
2501+
// operations or alternating sequences (e.g., +, -), we can safely
2502+
// tell the inverse operations by checking commutativity.
2503+
bool IsInverseOperation = !isCommutative(cast<Instruction>(SelectedOp));
2504+
for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx) {
23362505
bool APO = (OpIdx == 0) ? false : IsInverseOperation;
2337-
OpsVec[OpIdx][Lane] = {cast<Instruction>(VL[Lane])->getOperand(OpIdx),
2338-
APO, false};
2506+
OpsVec[OpIdx][I] = {Iter->Ops[OpIdx], APO, false};
23392507
}
23402508
}
23412509
}
@@ -3227,15 +3395,25 @@ class BoUpSLP {
32273395
auto *I0 = cast<Instruction>(Scalars[0]);
32283396
Operands.resize(I0->getNumOperands());
32293397
unsigned NumLanes = Scalars.size();
3230-
for (unsigned OpIdx = 0, NumOperands = I0->getNumOperands();
3231-
OpIdx != NumOperands; ++OpIdx) {
3398+
unsigned NumOperands = I0->getNumOperands();
3399+
for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx)
32323400
Operands[OpIdx].resize(NumLanes);
3233-
for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
3234-
auto *I = cast<Instruction>(Scalars[Lane]);
3235-
assert(I->getNumOperands() == NumOperands &&
3236-
"Expected same number of operands");
3237-
Operands[OpIdx][Lane] = I->getOperand(OpIdx);
3238-
}
3401+
for (auto [I, V] : enumerate(Scalars)) {
3402+
SmallVector<InterchangeableInstruction> IIList =
3403+
getInterchangeableInstruction(cast<Instruction>(V));
3404+
auto Iter = find_if(IIList, [&](const InterchangeableInstruction &II) {
3405+
return II.Opcode == MainOp->getOpcode();
3406+
});
3407+
if (Iter == IIList.end())
3408+
Iter = find_if(IIList, [&](const InterchangeableInstruction &II) {
3409+
return II.Opcode == AltOp->getOpcode();
3410+
});
3411+
assert(Iter != IIList.end() &&
3412+
"Cannot find an interchangeable instruction.");
3413+
assert(Iter->Ops.size() == NumOperands &&
3414+
"Expected same number of operands");
3415+
for (auto [J, Op] : enumerate(Iter->Ops))
3416+
Operands[J][I] = Op;
32393417
}
32403418
}
32413419

llvm/test/Transforms/SLPVectorizer/AArch64/vec3-base.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -314,10 +314,10 @@ define void @store_try_reorder(ptr %dst) {
314314
;
315315
; POW2-ONLY-LABEL: @store_try_reorder(
316316
; POW2-ONLY-NEXT: entry:
317-
; POW2-ONLY-NEXT: [[ADD:%.*]] = add i32 0, 0
318-
; POW2-ONLY-NEXT: store i32 [[ADD]], ptr [[DST:%.*]], align 4
319-
; POW2-ONLY-NEXT: [[ARRAYIDX_I1887:%.*]] = getelementptr i32, ptr [[DST]], i64 1
320-
; POW2-ONLY-NEXT: store <2 x i32> zeroinitializer, ptr [[ARRAYIDX_I1887]], align 4
317+
; POW2-ONLY-NEXT: store <2 x i32> zeroinitializer, ptr [[DST:%.*]], align 4
318+
; POW2-ONLY-NEXT: [[ADD216:%.*]] = sub i32 0, 0
319+
; POW2-ONLY-NEXT: [[ARRAYIDX_I1891:%.*]] = getelementptr i32, ptr [[DST]], i64 2
320+
; POW2-ONLY-NEXT: store i32 [[ADD216]], ptr [[ARRAYIDX_I1891]], align 4
321321
; POW2-ONLY-NEXT: ret void
322322
;
323323
entry:

llvm/test/Transforms/SLPVectorizer/RISCV/reversed-strided-node-with-external-ptr.ll

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,12 @@ define void @test(ptr %a, i64 %0) {
77
; CHECK-NEXT: [[ENTRY:.*:]]
88
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x ptr> poison, ptr [[A]], i32 0
99
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x ptr> [[TMP1]], <2 x ptr> poison, <2 x i32> zeroinitializer
10+
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> <i64 poison, i64 0>, i64 [[TMP0]], i32 0
1011
; CHECK-NEXT: br label %[[BB:.*]]
1112
; CHECK: [[BB]]:
12-
; CHECK-NEXT: [[TMP3:%.*]] = or disjoint i64 [[TMP0]], 1
13-
; CHECK-NEXT: [[ARRAYIDX17_I28_1:%.*]] = getelementptr double, ptr [[A]], i64 [[TMP3]]
14-
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i64> poison, i64 [[TMP3]], i32 0
15-
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i64> [[TMP4]], i64 0, i32 1
13+
; CHECK-NEXT: [[TMP5:%.*]] = or disjoint <2 x i64> [[TMP3]], <i64 1, i64 0>
1614
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr double, <2 x ptr> [[TMP2]], <2 x i64> [[TMP5]]
15+
; CHECK-NEXT: [[ARRAYIDX17_I28_1:%.*]] = extractelement <2 x ptr> [[TMP6]], i32 0
1716
; CHECK-NEXT: [[TMP7:%.*]] = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> [[TMP6]], i32 8, <2 x i1> <i1 true, i1 true>, <2 x double> poison)
1817
; CHECK-NEXT: [[TMP8:%.*]] = load <2 x double>, ptr [[A]], align 8
1918
; CHECK-NEXT: [[TMP9:%.*]] = load <2 x double>, ptr [[A]], align 8

llvm/test/Transforms/SLPVectorizer/RISCV/vec3-base.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -324,10 +324,10 @@ define void @store_try_reorder(ptr %dst) {
324324
;
325325
; POW2-ONLY-LABEL: @store_try_reorder(
326326
; POW2-ONLY-NEXT: entry:
327-
; POW2-ONLY-NEXT: [[ADD:%.*]] = add i32 0, 0
328-
; POW2-ONLY-NEXT: store i32 [[ADD]], ptr [[DST:%.*]], align 4
329-
; POW2-ONLY-NEXT: [[ARRAYIDX_I1887:%.*]] = getelementptr i32, ptr [[DST]], i64 1
330-
; POW2-ONLY-NEXT: store <2 x i32> zeroinitializer, ptr [[ARRAYIDX_I1887]], align 4
327+
; POW2-ONLY-NEXT: store <2 x i32> zeroinitializer, ptr [[DST:%.*]], align 4
328+
; POW2-ONLY-NEXT: [[ADD216:%.*]] = sub i32 0, 0
329+
; POW2-ONLY-NEXT: [[ARRAYIDX_I1891:%.*]] = getelementptr i32, ptr [[DST]], i64 2
330+
; POW2-ONLY-NEXT: store i32 [[ADD216]], ptr [[ARRAYIDX_I1891]], align 4
331331
; POW2-ONLY-NEXT: ret void
332332
;
333333
entry:

llvm/test/Transforms/SLPVectorizer/X86/barriercall.ll

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,7 @@ define i32 @foo(ptr nocapture %A, i32 %n) {
1010
; CHECK-NEXT: [[CALL:%.*]] = tail call i32 (...) @bar()
1111
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i32 0
1212
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> zeroinitializer
13-
; CHECK-NEXT: [[TMP1:%.*]] = mul nsw <4 x i32> [[SHUFFLE]], <i32 5, i32 9, i32 3, i32 10>
14-
; CHECK-NEXT: [[TMP2:%.*]] = shl <4 x i32> [[SHUFFLE]], <i32 5, i32 9, i32 3, i32 10>
15-
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
13+
; CHECK-NEXT: [[TMP3:%.*]] = mul nsw <4 x i32> [[SHUFFLE]], <i32 5, i32 9, i32 8, i32 10>
1614
; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[TMP3]], <i32 9, i32 9, i32 9, i32 9>
1715
; CHECK-NEXT: store <4 x i32> [[TMP4]], ptr [[A:%.*]], align 4
1816
; CHECK-NEXT: ret i32 undef

llvm/test/Transforms/SLPVectorizer/X86/bottom-to-top-reorder.ll

Lines changed: 11 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -4,22 +4,17 @@
44
define void @test(ptr %0, ptr %1, ptr %2) {
55
; CHECK-LABEL: @test(
66
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP0:%.*]], i64 4
7-
; CHECK-NEXT: [[TMP6:%.*]] = load <4 x i32>, ptr [[TMP1:%.*]], align 4
8-
; CHECK-NEXT: [[TMP8:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
9-
; CHECK-NEXT: [[TMP10:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4
10-
; CHECK-NEXT: [[TMP11:%.*]] = sub <4 x i32> <i32 0, i32 0, i32 undef, i32 0>, [[TMP8]]
11-
; CHECK-NEXT: [[TMP12:%.*]] = sub <4 x i32> [[TMP11]], [[TMP10]]
12-
; CHECK-NEXT: [[TMP13:%.*]] = add <4 x i32> [[TMP12]], [[TMP6]]
13-
; CHECK-NEXT: [[TMP14:%.*]] = add <4 x i32> [[TMP13]], <i32 0, i32 0, i32 1, i32 0>
14-
; CHECK-NEXT: [[TMP15:%.*]] = sub <4 x i32> [[TMP13]], <i32 0, i32 0, i32 1, i32 0>
15-
; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <4 x i32> [[TMP14]], <4 x i32> [[TMP15]], <4 x i32> <i32 2, i32 0, i32 1, i32 7>
16-
; CHECK-NEXT: [[TMP17:%.*]] = add <4 x i32> [[TMP16]], zeroinitializer
17-
; CHECK-NEXT: [[TMP18:%.*]] = sub <4 x i32> [[TMP16]], zeroinitializer
18-
; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <4 x i32> [[TMP17]], <4 x i32> [[TMP18]], <4 x i32> <i32 0, i32 5, i32 6, i32 7>
19-
; CHECK-NEXT: [[TMP20:%.*]] = add <4 x i32> [[TMP19]], zeroinitializer
20-
; CHECK-NEXT: [[TMP21:%.*]] = sub <4 x i32> [[TMP19]], zeroinitializer
21-
; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <4 x i32> [[TMP20]], <4 x i32> [[TMP21]], <4 x i32> <i32 0, i32 5, i32 6, i32 3>
22-
; CHECK-NEXT: store <4 x i32> [[TMP22]], ptr [[TMP2:%.*]], align 4
7+
; CHECK-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr [[TMP1:%.*]], align 4
8+
; CHECK-NEXT: [[TMP6:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
9+
; CHECK-NEXT: [[TMP7:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4
10+
; CHECK-NEXT: [[TMP8:%.*]] = sub <4 x i32> <i32 0, i32 0, i32 undef, i32 0>, [[TMP6]]
11+
; CHECK-NEXT: [[TMP9:%.*]] = sub <4 x i32> [[TMP8]], [[TMP7]]
12+
; CHECK-NEXT: [[TMP10:%.*]] = add <4 x i32> [[TMP9]], [[TMP5]]
13+
; CHECK-NEXT: [[TMP11:%.*]] = add <4 x i32> <i32 0, i32 0, i32 1, i32 0>, [[TMP10]]
14+
; CHECK-NEXT: [[TMP12:%.*]] = add <4 x i32> [[TMP11]], zeroinitializer
15+
; CHECK-NEXT: [[TMP13:%.*]] = add <4 x i32> [[TMP12]], zeroinitializer
16+
; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x i32> [[TMP13]], <4 x i32> poison, <4 x i32> <i32 2, i32 0, i32 1, i32 3>
17+
; CHECK-NEXT: store <4 x i32> [[TMP14]], ptr [[TMP2:%.*]], align 4
2318
; CHECK-NEXT: ret void
2419
;
2520
%4 = load i32, ptr %1, align 4

0 commit comments

Comments
 (0)