Skip to content

Commit 822a6f7

Browse files
committed
[InstCombine][X86] Fold blendv(x,y,shuffle(bitcast(sext(m)))) -> select(shuffle(m),x,y)
We already handle blendv(x,y,bitcast(sext(m))) -> select(m,x,y) cases, but this adds support for peeking through one-use shuffles as well. VectorCombine should already have canonicalized the IR to shuffle(bitcast(...)) for us. The particular use case is where we have split generic 256/512-bit code to use target-specific blendv intrinsics (e.g. AVX1 spoofing AVX2 256-bit ops). Fixes #58895
1 parent 8467cc6 commit 822a6f7

File tree

3 files changed

+175
-184
lines changed

3 files changed

+175
-184
lines changed

llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp

Lines changed: 34 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2694,6 +2694,23 @@ X86TTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
26942694
return SelectInst::Create(NewSelector, Op1, Op0, "blendv");
26952695
}
26962696

2697+
// Peek through a one-use shuffle - VectorCombine should have simplified
2698+
// this for cases where we're splitting wider vectors to use blendv
2699+
// intrinsics.
2700+
Value *MaskSrc = nullptr;
2701+
ArrayRef<int> ShuffleMask;
2702+
if (match(Mask, PatternMatch::m_OneUse(PatternMatch::m_Shuffle(
2703+
PatternMatch::m_Value(MaskSrc), PatternMatch::m_Undef(),
2704+
PatternMatch::m_Mask(ShuffleMask))))) {
2705+
// Bail if the shuffle was irregular or contains undefs.
2706+
int NumElts = cast<FixedVectorType>(MaskSrc->getType())->getNumElements();
2707+
if (NumElts < ShuffleMask.size() || !isPowerOf2_32(NumElts) ||
2708+
any_of(ShuffleMask,
2709+
[NumElts](int M) { return M < 0 || M >= NumElts; }))
2710+
break;
2711+
Mask = MaskSrc;
2712+
}
2713+
26972714
// Convert to a vector select if we can bypass casts and find a boolean
26982715
// vector condition value.
26992716
Value *BoolVec;
@@ -2703,11 +2720,26 @@ X86TTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
27032720
BoolVec->getType()->getScalarSizeInBits() == 1) {
27042721
auto *MaskTy = cast<FixedVectorType>(Mask->getType());
27052722
auto *OpTy = cast<FixedVectorType>(II.getType());
2723+
unsigned NumMaskElts = MaskTy->getNumElements();
2724+
unsigned NumOperandElts = OpTy->getNumElements();
2725+
2726+
// If we peeked through a shuffle, reapply the shuffle to the bool vector.
2727+
if (MaskSrc) {
2728+
unsigned NumMaskSrcElts =
2729+
cast<FixedVectorType>(MaskSrc->getType())->getNumElements();
2730+
NumMaskElts = (ShuffleMask.size() * NumMaskElts) / NumMaskSrcElts;
2731+
// Multiple mask bits maps to the same operand element - bail out.
2732+
if (NumMaskElts > NumOperandElts)
2733+
break;
2734+
SmallVector<int> ScaledMask;
2735+
if (!llvm::scaleShuffleMaskElts(NumMaskElts, ShuffleMask, ScaledMask))
2736+
break;
2737+
BoolVec = IC.Builder.CreateShuffleVector(BoolVec, ScaledMask);
2738+
MaskTy = FixedVectorType::get(MaskTy->getElementType(), NumMaskElts);
2739+
}
27062740
assert(MaskTy->getPrimitiveSizeInBits() ==
27072741
OpTy->getPrimitiveSizeInBits() &&
27082742
"Not expecting mask and operands with different sizes");
2709-
unsigned NumMaskElts = MaskTy->getNumElements();
2710-
unsigned NumOperandElts = OpTy->getNumElements();
27112743

27122744
if (NumMaskElts == NumOperandElts) {
27132745
return SelectInst::Create(BoolVec, Op1, Op0);

0 commit comments

Comments
 (0)