-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[InstCombine][X86] Fold blendv(x,y,shuffle(bitcast(sext(m)))) -> select(shuffle(m),x,y) #96882
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2800,6 +2800,23 @@ X86TTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const { | |
return SelectInst::Create(NewSelector, Op1, Op0, "blendv"); | ||
} | ||
|
||
// Peek through a one-use shuffle - VectorCombine should have simplified | ||
// this for cases where we're splitting wider vectors to use blendv | ||
// intrinsics. | ||
Value *MaskSrc = nullptr; | ||
ArrayRef<int> ShuffleMask; | ||
if (match(Mask, PatternMatch::m_OneUse(PatternMatch::m_Shuffle( | ||
PatternMatch::m_Value(MaskSrc), PatternMatch::m_Undef(), | ||
PatternMatch::m_Mask(ShuffleMask))))) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: We should probably just start using |
||
// Bail if the shuffle was irregular or contains undefs. | ||
int NumElts = cast<FixedVectorType>(MaskSrc->getType())->getNumElements(); | ||
if (NumElts < ShuffleMask.size() || !isPowerOf2_32(NumElts) || | ||
any_of(ShuffleMask, | ||
[NumElts](int M) { return M < 0 || M >= NumElts; })) | ||
break; | ||
Mask = MaskSrc; | ||
} | ||
|
||
// Convert to a vector select if we can bypass casts and find a boolean | ||
// vector condition value. | ||
Value *BoolVec; | ||
|
@@ -2809,11 +2826,26 @@ X86TTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const { | |
BoolVec->getType()->getScalarSizeInBits() == 1) { | ||
auto *MaskTy = cast<FixedVectorType>(Mask->getType()); | ||
auto *OpTy = cast<FixedVectorType>(II.getType()); | ||
unsigned NumMaskElts = MaskTy->getNumElements(); | ||
unsigned NumOperandElts = OpTy->getNumElements(); | ||
|
||
// If we peeked through a shuffle, reapply the shuffle to the bool vector. | ||
if (MaskSrc) { | ||
unsigned NumMaskSrcElts = | ||
cast<FixedVectorType>(MaskSrc->getType())->getNumElements(); | ||
NumMaskElts = (ShuffleMask.size() * NumMaskElts) / NumMaskSrcElts; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Isn't There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. IR can have different result and argument vector widths (this patch is mainly to help extract subvector style shuffles after all) - only DAG is fixed |
||
// Multiple mask bits maps to the same operand element - bail out. | ||
if (NumMaskElts > NumOperandElts) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. isn't the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we don't handle NumMaskElts > NumOperandElts (IIRC due to endianess issues) - this is an early out so we don't bother creating an unused shuffle instruction |
||
break; | ||
SmallVector<int> ScaledMask; | ||
if (!llvm::scaleShuffleMaskElts(NumMaskElts, ShuffleMask, ScaledMask)) | ||
break; | ||
BoolVec = IC.Builder.CreateShuffleVector(BoolVec, ScaledMask); | ||
MaskTy = FixedVectorType::get(MaskTy->getElementType(), NumMaskElts); | ||
} | ||
assert(MaskTy->getPrimitiveSizeInBits() == | ||
OpTy->getPrimitiveSizeInBits() && | ||
"Not expecting mask and operands with different sizes"); | ||
unsigned NumMaskElts = MaskTy->getNumElements(); | ||
unsigned NumOperandElts = OpTy->getNumElements(); | ||
|
||
if (NumMaskElts == NumOperandElts) { | ||
return SelectInst::Create(BoolVec, Op1, Op0); | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should we
peekThroughBitcast
onMask
here?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'll investigate this as a followup, but I've already tried very hard on VectorCombine to make sure we simplify bitcast(shuffle(bitcast(shuffle()))) chains that we would have to peek through