Skip to content

Commit 0252d33

Browse files
[SLP]Model single unique value insert + shuffle as splat + select, where profitable
When we have the remaining unique scalar, that should be inserted into non-poison vector and into non-zero position: ``` %vec1 = insertelement %vec, %v, pos1 %res = shuffle %vec1, poison, <0, 1, 2,..., pos1, pos1 + 1, ..., pos1, ...> ``` better to estimate if it is profitable to model it as is or model it as: ``` %bv = insertelement poison, %v, 0 %splat = shuffle %bv, poison, <poison, ..., 0, ..., 0, ...> %res = shuffle %vec, %splat, <0, 1, 2,..., pos1 + VF, pos1 + 1, ...> ``` Reviewers: preames, hiraditya, RKSimon Reviewed By: preames Pull Request: #136590
1 parent f010725 commit 0252d33

11 files changed

+120
-38
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 82 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -12613,11 +12613,13 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
1261312613
}
1261412614
InstructionCost createFreeze(InstructionCost Cost) { return Cost; }
1261512615
/// Finalize emission of the shuffles.
12616-
InstructionCost
12617-
finalize(ArrayRef<int> ExtMask,
12618-
ArrayRef<std::pair<const TreeEntry *, unsigned>> SubVectors,
12619-
ArrayRef<int> SubVectorsMask, unsigned VF = 0,
12620-
function_ref<void(Value *&, SmallVectorImpl<int> &)> Action = {}) {
12616+
InstructionCost finalize(
12617+
ArrayRef<int> ExtMask,
12618+
ArrayRef<std::pair<const TreeEntry *, unsigned>> SubVectors,
12619+
ArrayRef<int> SubVectorsMask, unsigned VF = 0,
12620+
function_ref<void(Value *&, SmallVectorImpl<int> &,
12621+
function_ref<Value *(Value *, Value *, ArrayRef<int>)>)>
12622+
Action = {}) {
1262112623
IsFinalized = true;
1262212624
if (Action) {
1262312625
const PointerUnion<Value *, const TreeEntry *> &Vec = InVectors.front();
@@ -12629,7 +12631,10 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
1262912631
assert(VF > 0 &&
1263012632
"Expected vector length for the final value before action.");
1263112633
Value *V = cast<Value *>(Vec);
12632-
Action(V, CommonMask);
12634+
Action(V, CommonMask, [this](Value *V1, Value *V2, ArrayRef<int> Mask) {
12635+
Cost += createShuffle(V1, V2, Mask);
12636+
return V1;
12637+
});
1263312638
InVectors.front() = V;
1263412639
}
1263512640
if (!SubVectors.empty()) {
@@ -16593,11 +16598,13 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis {
1659316598
/// Finalize emission of the shuffles.
1659416599
/// \param Action the action (if any) to be performed before final applying of
1659516600
/// the \p ExtMask mask.
16596-
Value *
16597-
finalize(ArrayRef<int> ExtMask,
16598-
ArrayRef<std::pair<const TreeEntry *, unsigned>> SubVectors,
16599-
ArrayRef<int> SubVectorsMask, unsigned VF = 0,
16600-
function_ref<void(Value *&, SmallVectorImpl<int> &)> Action = {}) {
16601+
Value *finalize(
16602+
ArrayRef<int> ExtMask,
16603+
ArrayRef<std::pair<const TreeEntry *, unsigned>> SubVectors,
16604+
ArrayRef<int> SubVectorsMask, unsigned VF = 0,
16605+
function_ref<void(Value *&, SmallVectorImpl<int> &,
16606+
function_ref<Value *(Value *, Value *, ArrayRef<int>)>)>
16607+
Action = {}) {
1660116608
IsFinalized = true;
1660216609
if (Action) {
1660316610
Value *Vec = InVectors.front();
@@ -16616,7 +16623,9 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis {
1661616623
std::iota(ResizeMask.begin(), std::next(ResizeMask.begin(), VecVF), 0);
1661716624
Vec = createShuffle(Vec, nullptr, ResizeMask);
1661816625
}
16619-
Action(Vec, CommonMask);
16626+
Action(Vec, CommonMask, [this](Value *V1, Value *V2, ArrayRef<int> Mask) {
16627+
return createShuffle(V1, V2, Mask);
16628+
});
1662016629
InVectors.front() = Vec;
1662116630
}
1662216631
if (!SubVectors.empty()) {
@@ -17278,9 +17287,67 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Type *ScalarTy,
1727817287
else
1727917288
Res = ShuffleBuilder.finalize(
1728017289
E->ReuseShuffleIndices, SubVectors, SubVectorsMask, E->Scalars.size(),
17281-
[&](Value *&Vec, SmallVectorImpl<int> &Mask) {
17282-
TryPackScalars(NonConstants, Mask, /*IsRootPoison=*/false);
17283-
Vec = ShuffleBuilder.gather(NonConstants, Mask.size(), Vec);
17290+
[&](Value *&Vec, SmallVectorImpl<int> &Mask, auto CreateShuffle) {
17291+
bool IsSplat = isSplat(NonConstants);
17292+
SmallVector<int> BVMask(Mask.size(), PoisonMaskElem);
17293+
TryPackScalars(NonConstants, BVMask, /*IsRootPoison=*/false);
17294+
auto CheckIfSplatIsProfitable = [&]() {
17295+
// Estimate the cost of splatting + shuffle and compare with
17296+
// insert + shuffle.
17297+
constexpr TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
17298+
Value *V = *find_if_not(NonConstants, IsaPred<UndefValue>);
17299+
if (isa<ExtractElementInst>(V) || isVectorized(V))
17300+
return false;
17301+
InstructionCost SplatCost = TTI->getVectorInstrCost(
17302+
Instruction::InsertElement, VecTy, CostKind, /*Index=*/0,
17303+
PoisonValue::get(VecTy), V);
17304+
SmallVector<int> NewMask(Mask.begin(), Mask.end());
17305+
for (auto [Idx, I] : enumerate(BVMask))
17306+
if (I != PoisonMaskElem)
17307+
NewMask[Idx] = Mask.size();
17308+
SplatCost += ::getShuffleCost(*TTI, TTI::SK_PermuteTwoSrc, VecTy,
17309+
NewMask, CostKind);
17310+
InstructionCost BVCost = TTI->getVectorInstrCost(
17311+
Instruction::InsertElement, VecTy, CostKind,
17312+
*find_if(Mask, [](int I) { return I != PoisonMaskElem; }),
17313+
Vec, V);
17314+
// Shuffle required?
17315+
if (count(BVMask, PoisonMaskElem) <
17316+
static_cast<int>(BVMask.size() - 1)) {
17317+
SmallVector<int> NewMask(Mask.begin(), Mask.end());
17318+
for (auto [Idx, I] : enumerate(BVMask))
17319+
if (I != PoisonMaskElem)
17320+
NewMask[Idx] = I;
17321+
BVCost += ::getShuffleCost(*TTI, TTI::SK_PermuteSingleSrc,
17322+
VecTy, NewMask, CostKind);
17323+
}
17324+
return SplatCost <= BVCost;
17325+
};
17326+
if (!IsSplat || Mask.size() <= 2 || !CheckIfSplatIsProfitable()) {
17327+
for (auto [Idx, I] : enumerate(BVMask))
17328+
if (I != PoisonMaskElem)
17329+
Mask[Idx] = I;
17330+
Vec = ShuffleBuilder.gather(NonConstants, Mask.size(), Vec);
17331+
} else {
17332+
Value *V = *find_if_not(NonConstants, IsaPred<UndefValue>);
17333+
SmallVector<Value *> Values(NonConstants.size(),
17334+
PoisonValue::get(ScalarTy));
17335+
Values[0] = V;
17336+
Value *BV = ShuffleBuilder.gather(Values, BVMask.size());
17337+
SmallVector<int> SplatMask(BVMask.size(), PoisonMaskElem);
17338+
transform(BVMask, SplatMask.begin(), [](int I) {
17339+
return I == PoisonMaskElem ? PoisonMaskElem : 0;
17340+
});
17341+
if (!ShuffleVectorInst::isIdentityMask(SplatMask, VF))
17342+
BV = CreateShuffle(BV, nullptr, SplatMask);
17343+
for (auto [Idx, I] : enumerate(BVMask))
17344+
if (I != PoisonMaskElem)
17345+
Mask[Idx] = BVMask.size() + Idx;
17346+
Vec = CreateShuffle(Vec, BV, Mask);
17347+
for (auto [Idx, I] : enumerate(Mask))
17348+
if (I != PoisonMaskElem)
17349+
Mask[Idx] = Idx;
17350+
}
1728417351
});
1728517352
} else if (!allConstant(GatheredScalars)) {
1728617353
// Gather unique scalars and all constants.

llvm/test/Transforms/SLPVectorizer/AArch64/reused-scalar-repeated-in-node.ll

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,8 @@ define void @test() {
3838
; CHECK-NEXT: br i1 poison, label %[[BB167:.*]], label %[[BB77:.*]]
3939
; CHECK: [[BB77]]:
4040
; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <16 x float> [[TMP11]], <16 x float> poison, <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 14, i32 15, i32 poison, i32 poison>
41-
; CHECK-NEXT: [[TMP17:%.*]] = insertelement <8 x float> [[TMP12]], float [[I70]], i32 0
41+
; CHECK-NEXT: [[TMP17:%.*]] = insertelement <8 x float> poison, float [[I70]], i32 0
42+
; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <8 x float> [[TMP12]], <8 x float> [[TMP17]], <8 x i32> <i32 8, i32 poison, i32 poison, i32 poison, i32 4, i32 5, i32 poison, i32 poison>
4243
; CHECK-NEXT: [[TMP14:%.*]] = insertelement <8 x float> poison, float [[I70]], i32 1
4344
; CHECK-NEXT: [[TMP19:%.*]] = insertelement <8 x float> [[TMP14]], float [[I68]], i32 2
4445
; CHECK-NEXT: [[TMP16:%.*]] = insertelement <8 x float> [[TMP19]], float [[I66]], i32 3
@@ -48,7 +49,7 @@ define void @test() {
4849
; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <16 x float> [[TMP39]], <16 x float> [[TMP25]], <16 x i32> <i32 poison, i32 poison, i32 2, i32 3, i32 18, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 19, i32 poison, i32 poison>
4950
; CHECK-NEXT: br label %[[BB78:.*]]
5051
; CHECK: [[BB78]]:
51-
; CHECK-NEXT: [[TMP15:%.*]] = phi <8 x float> [ [[TMP17]], %[[BB77]] ], [ [[TMP36:%.*]], %[[BB78]] ]
52+
; CHECK-NEXT: [[TMP15:%.*]] = phi <8 x float> [ [[TMP23]], %[[BB77]] ], [ [[TMP36:%.*]], %[[BB78]] ]
5253
; CHECK-NEXT: [[TMP22:%.*]] = phi <8 x float> [ [[TMP21]], %[[BB77]] ], [ [[TMP31:%.*]], %[[BB78]] ]
5354
; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <8 x float> [[TMP22]], <8 x float> poison, <16 x i32> <i32 0, i32 3, i32 1, i32 2, i32 3, i32 0, i32 2, i32 3, i32 2, i32 6, i32 2, i32 3, i32 0, i32 7, i32 6, i32 6>
5455
; CHECK-NEXT: [[TMP38:%.*]] = shufflevector <8 x float> [[TMP15]], <8 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 1, i32 0, i32 3, i32 1, i32 3, i32 5, i32 3, i32 1, i32 0, i32 4, i32 5, i32 5>

llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-uniform-cmps.ll

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -255,7 +255,9 @@ define void @select_uniform_ugt_16xi8(ptr %ptr, i8 %x) {
255255
; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i8>, ptr [[GEP_12]], align 1
256256
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i8> [[TMP2]], <2 x i8> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
257257
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <8 x i8> [[TMP0]], <8 x i8> [[TMP4]], <16 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 0, i32 8, i32 9, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
258-
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <16 x i8> [[TMP5]], i8 [[L_11]], i32 11
258+
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <16 x i8> poison, i8 [[L_11]], i32 0
259+
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <16 x i8> [[TMP10]], <16 x i8> poison, <16 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 0, i32 poison, i32 poison, i32 poison, i32 poison>
260+
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <16 x i8> [[TMP5]], <16 x i8> [[TMP11]], <16 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 8, i32 9, i32 10, i32 27, i32 poison, i32 poison, i32 poison, i32 poison>
259261
; CHECK-NEXT: [[TMP7:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v8i8(<16 x i8> [[TMP6]], <8 x i8> [[TMP0]], i64 0)
260262
; CHECK-NEXT: [[TMP8:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP7]], <4 x i8> [[TMP3]], i64 12)
261263
; CHECK-NEXT: [[TMP9:%.*]] = icmp ugt <16 x i8> [[TMP8]], splat (i8 -1)

llvm/test/Transforms/SLPVectorizer/X86/full-match-with-poison-scalar.ll

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,9 @@ define i32 @test() {
1212
; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> zeroinitializer, [[TMP1]]
1313
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <12 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3>
1414
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <16 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 0, i32 0, i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
15-
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <16 x i32> [[TMP7]], i32 [[G_228_PROMOTED166_I1105_I]], i32 7
15+
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <16 x i32> poison, i32 [[G_228_PROMOTED166_I1105_I]], i32 0
16+
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <16 x i32> [[TMP5]], <16 x i32> poison, <16 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
17+
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <16 x i32> [[TMP7]], <16 x i32> [[TMP9]], <16 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 23, i32 8, i32 9, i32 10, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
1618
; CHECK-NEXT: [[TMP6:%.*]] = call <16 x i32> @llvm.vector.insert.v16i32.v12i32(<16 x i32> poison, <12 x i32> [[TMP3]], i64 0)
1719
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <16 x i32> [[TMP6]], <16 x i32> [[TMP8]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 23, i32 24, i32 25, i32 26, i32 7, i32 8, i32 9, i32 10, i32 11>
1820
; CHECK-NEXT: [[TMP12:%.*]] = icmp ugt <16 x i32> [[TMP11]], zeroinitializer

llvm/test/Transforms/SLPVectorizer/X86/full-matched-bv-with-subvectors.ll

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@ define i32 @test(i64 %l.549) {
99
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i64> poison, i64 [[CONV3]], i32 3
1010
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> poison, i64 0, i32 0
1111
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i64> [[TMP3]], i64 0, i32 1
12+
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i64> poison, i64 [[L_549]], i32 0
13+
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x i64> [[TMP8]], <4 x i64> poison, <4 x i32> <i32 poison, i32 0, i32 poison, i32 poison>
1214
; CHECK-NEXT: br label %[[IF_THEN19:.*]]
1315
; CHECK: [[P:.*]]:
1416
; CHECK-NEXT: [[TMP5:%.*]] = phi <2 x i64> [ zeroinitializer, %[[IF_END29:.*]] ], [ [[TMP13:%.*]], %[[IF_END25:.*]] ]
@@ -23,20 +25,20 @@ define i32 @test(i64 %l.549) {
2325
; CHECK: [[LOR_LHS_FALSE]]:
2426
; CHECK-NEXT: br i1 false, label %[[LAND_LHS_TRUE]], label %[[S]]
2527
; CHECK: [[R]]:
26-
; CHECK-NEXT: [[TMP8:%.*]] = phi <4 x i64> [ [[TMP7]], %[[Q]] ], [ [[TMP16:%.*]], %[[IF_THEN19]] ]
28+
; CHECK-NEXT: [[TMP18:%.*]] = phi <4 x i64> [ [[TMP7]], %[[Q]] ], [ [[TMP16:%.*]], %[[IF_THEN19]] ]
2729
; CHECK-NEXT: br i1 false, label %[[S]], label %[[LAND_LHS_TRUE]]
2830
; CHECK: [[LAND_LHS_TRUE]]:
29-
; CHECK-NEXT: [[TMP9:%.*]] = phi <4 x i64> [ [[TMP8]], %[[R]] ], [ zeroinitializer, %[[LOR_LHS_FALSE]] ]
31+
; CHECK-NEXT: [[TMP19:%.*]] = phi <4 x i64> [ [[TMP18]], %[[R]] ], [ zeroinitializer, %[[LOR_LHS_FALSE]] ]
3032
; CHECK-NEXT: br i1 false, label %[[Q]], label %[[S]]
3133
; CHECK: [[S]]:
32-
; CHECK-NEXT: [[TMP10:%.*]] = phi <4 x i64> [ [[TMP9]], %[[LAND_LHS_TRUE]] ], [ [[TMP8]], %[[R]] ], [ [[TMP7]], %[[LOR_LHS_FALSE]] ], [ [[TMP17]], %[[P]] ]
34+
; CHECK-NEXT: [[TMP10:%.*]] = phi <4 x i64> [ [[TMP19]], %[[LAND_LHS_TRUE]] ], [ [[TMP18]], %[[R]] ], [ [[TMP7]], %[[LOR_LHS_FALSE]] ], [ [[TMP17]], %[[P]] ]
3335
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i64> [[TMP10]], <4 x i64> poison, <2 x i32> <i32 0, i32 1>
3436
; CHECK-NEXT: br label %[[IF_THEN19]]
3537
; CHECK: [[IF_THEN19]]:
3638
; CHECK-NEXT: [[TMP12:%.*]] = phi <2 x i64> [ zeroinitializer, %[[ENTRY]] ], [ [[TMP11]], %[[S]] ]
3739
; CHECK-NEXT: [[TMP13]] = shufflevector <2 x i64> [[TMP12]], <2 x i64> poison, <2 x i32> <i32 1, i32 0>
3840
; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <2 x i64> [[TMP12]], <2 x i64> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
39-
; CHECK-NEXT: [[TMP15:%.*]] = insertelement <4 x i64> [[TMP14]], i64 [[L_549]], i32 1
41+
; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <4 x i64> [[TMP14]], <4 x i64> [[TMP9]], <4 x i32> <i32 0, i32 5, i32 poison, i32 poison>
4042
; CHECK-NEXT: [[TMP16]] = call <4 x i64> @llvm.vector.insert.v4i64.v2i64(<4 x i64> [[TMP15]], <2 x i64> [[TMP2]], i64 2)
4143
; CHECK-NEXT: br i1 false, label %[[R]], label %[[IF_END25]]
4244
; CHECK: [[IF_END25]]:

llvm/test/Transforms/SLPVectorizer/X86/matched-nodes-updated.ll

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,16 +30,18 @@ define i32 @test(i32 %s.0) {
3030
; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> poison, <8 x i32> <i32 poison, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
3131
; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
3232
; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <8 x i32> [[TMP13]], <8 x i32> <i32 0, i32 poison, i32 0, i32 0, i32 0, i32 0, i32 0, i32 poison>, <8 x i32> <i32 8, i32 1, i32 10, i32 11, i32 12, i32 13, i32 14, i32 poison>
33-
; CHECK-NEXT: [[TMP15:%.*]] = insertelement <8 x i32> [[TMP14]], i32 [[J_4]], i32 7
33+
; CHECK-NEXT: [[TMP15:%.*]] = insertelement <8 x i32> poison, i32 [[J_4]], i32 0
34+
; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <8 x i32> [[TMP15]], <8 x i32> poison, <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 0>
35+
; CHECK-NEXT: [[TMP29:%.*]] = shufflevector <8 x i32> [[TMP14]], <8 x i32> [[TMP16]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 15>
3436
; CHECK-NEXT: br label %[[IF_END24]]
3537
; CHECK: [[IF_THEN18:.*]]:
3638
; CHECK-NEXT: br label %[[T]]
3739
; CHECK: [[T]]:
38-
; CHECK-NEXT: [[TMP16:%.*]] = phi <8 x i32> [ [[TMP27:%.*]], %[[O]] ], [ poison, %[[IF_THEN18]] ]
40+
; CHECK-NEXT: [[TMP30:%.*]] = phi <8 x i32> [ [[TMP27:%.*]], %[[O]] ], [ poison, %[[IF_THEN18]] ]
3941
; CHECK-NEXT: [[TMP17]] = extractelement <4 x i32> [[TMP23:%.*]], i32 0
4042
; CHECK-NEXT: br i1 false, label %[[IF_END24]], label %[[K]]
4143
; CHECK: [[IF_END24]]:
42-
; CHECK-NEXT: [[TMP18:%.*]] = phi <8 x i32> [ [[TMP15]], %[[IF_THEN11]] ], [ [[TMP11]], %[[IF_END6]] ], [ [[TMP16]], %[[T]] ]
44+
; CHECK-NEXT: [[TMP18:%.*]] = phi <8 x i32> [ [[TMP29]], %[[IF_THEN11]] ], [ [[TMP11]], %[[IF_END6]] ], [ [[TMP30]], %[[T]] ]
4345
; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <8 x i32> [[TMP18]], <8 x i32> poison, <2 x i32> <i32 7, i32 1>
4446
; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <8 x i32> [[TMP18]], <8 x i32> poison, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
4547
; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <8 x i32> [[TMP18]], <8 x i32> poison, <4 x i32> <i32 2, i32 3, i32 4, i32 6>

llvm/test/Transforms/SLPVectorizer/X86/memory-runtime-checks.ll

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -124,23 +124,23 @@ define double @preserve_loop_info(ptr %arg, i1 %arg2) {
124124
; CHECK: outer.header:
125125
; CHECK-NEXT: br label [[INNER:%.*]]
126126
; CHECK: inner:
127-
; CHECK-NEXT: br i1 %arg2, label [[OUTER_LATCH:%.*]], label [[INNER]]
127+
; CHECK-NEXT: br i1 [[ARG2:%.*]], label [[OUTER_LATCH:%.*]], label [[INNER]]
128128
; CHECK: outer.latch:
129-
; CHECK-NEXT: br i1 %arg2, label [[BB:%.*]], label [[OUTER_HEADER]]
129+
; CHECK-NEXT: br i1 [[ARG2]], label [[BB:%.*]], label [[OUTER_HEADER]]
130130
; CHECK: bb:
131131
; CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr undef, align 8
132132
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x double], ptr [[TMP]], i64 0, i64 1
133133
; CHECK-NEXT: br label [[LOOP_3HEADER:%.*]]
134134
; CHECK: loop.3header:
135-
; CHECK-NEXT: br i1 %arg2, label [[LOOP_3LATCH:%.*]], label [[BB9:%.*]]
135+
; CHECK-NEXT: br i1 [[ARG2]], label [[LOOP_3LATCH:%.*]], label [[BB9:%.*]]
136136
; CHECK: bb9:
137137
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x double], ptr [[TMP5]], i64 undef, i64 1
138138
; CHECK-NEXT: store double undef, ptr [[TMP]], align 16
139139
; CHECK-NEXT: [[TMP12:%.*]] = load double, ptr [[TMP10]], align 8
140140
; CHECK-NEXT: store double [[TMP12]], ptr [[TMP7]], align 8
141141
; CHECK-NEXT: br label [[LOOP_3LATCH]]
142142
; CHECK: loop.3latch:
143-
; CHECK-NEXT: br i1 %arg2, label [[BB14:%.*]], label [[LOOP_3HEADER]]
143+
; CHECK-NEXT: br i1 [[ARG2]], label [[BB14:%.*]], label [[LOOP_3HEADER]]
144144
; CHECK: bb14:
145145
; CHECK-NEXT: [[TMP15:%.*]] = call double undef(ptr [[TMP]], ptr [[ARG:%.*]])
146146
; CHECK-NEXT: ret double undef
@@ -189,7 +189,8 @@ define void @gather_sequence_crash(<2 x float> %arg, ptr %arg1, float %arg2, ptr
189189
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[ARG1:%.*]], i32 3
190190
; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <2 x float> [[ARG:%.*]], <2 x float> poison, <4 x i32> <i32 poison, i32 0, i32 1, i32 poison>
191191
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> <float poison, float poison, float poison, float 0.000000e+00>, <4 x i32> <i32 poison, i32 1, i32 2, i32 7>
192-
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float [[ARG2:%.*]], i32 0
192+
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x float> poison, float [[ARG2:%.*]], i32 0
193+
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP6]], <4 x i32> <i32 4, i32 1, i32 2, i32 3>
193194
; CHECK-NEXT: [[TMP3:%.*]] = fmul <4 x float> [[TMP2]], zeroinitializer
194195
; CHECK-NEXT: store <4 x float> [[TMP3]], ptr [[TMP8]], align 4
195196
; CHECK-NEXT: ret void

0 commit comments

Comments
 (0)