-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[X86] matchBinaryPermuteShuffle - match AVX512 "cross lane" SHLDQ/SRLDQ style patterns using VALIGN #140538
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-x86 Author: Simon Pilgrim (RKSimon) ChangesVery similar to what we do in lowerShuffleAsVALIGN I've updated isTargetShuffleEquivalent to correctly handle SM_SentinelZero in the expected shuffle mask, but it only allows an exact match (or the test mask was undef) - it can't be used to match zero elements with MaskedVectorIsZero. Noticed while working on #140516 Full diff: https://github.com/llvm/llvm-project/pull/140538.diff 2 Files Affected:
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 3b6b0d7b86c9c..6f58910e55add 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -10096,7 +10096,9 @@ static bool isTargetShuffleEquivalent(MVT VT, ArrayRef<int> Mask,
if (Size != (int)ExpectedMask.size())
return false;
assert(llvm::all_of(ExpectedMask,
- [Size](int M) { return isInRange(M, 0, 2 * Size); }) &&
+ [Size](int M) {
+ return M == SM_SentinelZero || (M, 0, 2 * Size);
+ }) &&
"Illegal target shuffle mask");
// Check for out-of-range target shuffle mask indices.
@@ -10119,6 +10121,9 @@ static bool isTargetShuffleEquivalent(MVT VT, ArrayRef<int> Mask,
int ExpectedIdx = ExpectedMask[i];
if (MaskIdx == SM_SentinelUndef || MaskIdx == ExpectedIdx)
continue;
+ // If we failed to match an expected SM_SentinelZero then early out.
+ if (ExpectedIdx < 0)
+ return false;
if (MaskIdx == SM_SentinelZero) {
// If we need this expected index to be a zero element, then update the
// relevant zero mask and perform the known bits at the end to minimize
@@ -39594,18 +39599,45 @@ static bool matchBinaryPermuteShuffle(
((MaskVT.is128BitVector() && Subtarget.hasVLX()) ||
(MaskVT.is256BitVector() && Subtarget.hasVLX()) ||
(MaskVT.is512BitVector() && Subtarget.hasAVX512()))) {
+ MVT AlignVT = MVT::getVectorVT(MVT::getIntegerVT(EltSizeInBits),
+ MaskVT.getSizeInBits() / EltSizeInBits);
if (!isAnyZero(Mask)) {
int Rotation = matchShuffleAsElementRotate(V1, V2, Mask);
if (0 < Rotation) {
Shuffle = X86ISD::VALIGN;
- if (EltSizeInBits == 64)
- ShuffleVT = MVT::getVectorVT(MVT::i64, MaskVT.getSizeInBits() / 64);
- else
- ShuffleVT = MVT::getVectorVT(MVT::i32, MaskVT.getSizeInBits() / 32);
+ ShuffleVT = AlignVT;
PermuteImm = Rotation;
return true;
}
}
+ // See if we can use VALIGN as a cross-lane version of VSHLDQ/VSRLDQ.
+ unsigned ZeroLo = Zeroable.countr_one();
+ unsigned ZeroHi = Zeroable.countl_one();
+ assert((ZeroLo + ZeroHi) < NumMaskElts && "Zeroable shuffle detected");
+ if (ZeroLo) {
+ SmallVector<int, 16> ShiftMask(NumMaskElts, SM_SentinelZero);
+ std::iota(ShiftMask.begin() + ZeroLo, ShiftMask.end(), 0);
+ if (isTargetShuffleEquivalent(MaskVT, Mask, ShiftMask, DAG, V1)) {
+ V1 = V1;
+ V2 = getZeroVector(AlignVT, Subtarget, DAG, DL);
+ Shuffle = X86ISD::VALIGN;
+ ShuffleVT = AlignVT;
+ PermuteImm = NumMaskElts - ZeroLo;
+ return true;
+ }
+ }
+ if (ZeroHi) {
+ SmallVector<int, 16> ShiftMask(NumMaskElts, SM_SentinelZero);
+ std::iota(ShiftMask.begin(), ShiftMask.begin() + NumMaskElts - ZeroHi, ZeroHi);
+ if (isTargetShuffleEquivalent(MaskVT, Mask, ShiftMask, DAG, V1)) {
+ V2 = V1;
+ V1 = getZeroVector(AlignVT, Subtarget, DAG, DL);
+ Shuffle = X86ISD::VALIGN;
+ ShuffleVT = AlignVT;
+ PermuteImm = ZeroHi;
+ return true;
+ }
+ }
}
// Attempt to match against PALIGNR byte rotate.
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512f.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512f.ll
index b3b90b5f51501..68967c2ce6536 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512f.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512f.ll
@@ -812,10 +812,8 @@ define <8 x i64> @combine_vpermt2var_8i64_as_valignq(<8 x i64> %x0, <8 x i64> %x
define <8 x i64> @combine_vpermt2var_8i64_as_valignq_zero(<8 x i64> %x0) {
; CHECK-LABEL: combine_vpermt2var_8i64_as_valignq_zero:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpmovsxbq {{.*#+}} zmm2 = [15,0,1,2,3,4,5,6]
; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; CHECK-NEXT: vpermt2q %zmm0, %zmm2, %zmm1
-; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
+; CHECK-NEXT: valignq {{.*#+}} zmm0 = zmm0[7],zmm1[0,1,2,3,4,5,6]
; CHECK-NEXT: ret{{[l|q]}}
%res0 = call <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64> <i64 15, i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6>, <8 x i64> zeroinitializer, <8 x i64> %x0, i8 -1)
ret <8 x i64> %res0
@@ -825,8 +823,7 @@ define <8 x i64> @combine_vpermt2var_8i64_as_zero_valignq(<8 x i64> %x0) {
; CHECK-LABEL: combine_vpermt2var_8i64_as_zero_valignq:
; CHECK: # %bb.0:
; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; CHECK-NEXT: vpmovsxbq {{.*#+}} zmm2 = [15,0,1,2,3,4,5,6]
-; CHECK-NEXT: vpermt2q %zmm1, %zmm2, %zmm0
+; CHECK-NEXT: valignq {{.*#+}} zmm0 = zmm1[7],zmm0[0,1,2,3,4,5,6]
; CHECK-NEXT: ret{{[l|q]}}
%res0 = call <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64> <i64 15, i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6>, <8 x i64> %x0, <8 x i64> zeroinitializer, i8 -1)
ret <8 x i64> %res0
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
8676e30
to
0e1f9fb
Compare
…DQ patterns using VALIGN Very similar to what we do in lowerShuffleAsVALIGN I've updated isTargetShuffleEquivalent to correctly handle SM_SentinelZero in the expected shuffle mask, but it only allows an exact match (or the test mask was undef) - it can't be used to match zero elements with MaskedVectorIsZero. Noticed while working on llvm#140516
0e1f9fb
to
9a82207
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
…DQ style patterns using VALIGN (llvm#140538) Very similar to what we do in lowerShuffleAsVALIGN I've updated isTargetShuffleEquivalent to correctly handle SM_SentinelZero in the expected shuffle mask, but it only allows an exact match (or the test mask was undef) - it can't be used to match zero elements with MaskedVectorIsZero. Noticed while working on llvm#140516
…DQ style patterns using VALIGN (llvm#140538) Very similar to what we do in lowerShuffleAsVALIGN I've updated isTargetShuffleEquivalent to correctly handle SM_SentinelZero in the expected shuffle mask, but it only allows an exact match (or the test mask was undef) - it can't be used to match zero elements with MaskedVectorIsZero. Noticed while working on llvm#140516
…DQ style patterns using VALIGN (llvm#140538) Very similar to what we do in lowerShuffleAsVALIGN I've updated isTargetShuffleEquivalent to correctly handle SM_SentinelZero in the expected shuffle mask, but it only allows an exact match (or the test mask was undef) - it can't be used to match zero elements with MaskedVectorIsZero. Noticed while working on llvm#140516
Very similar to what we do in lowerShuffleAsVALIGN
I've updated isTargetShuffleEquivalent to correctly handle SM_SentinelZero in the expected shuffle mask, but it only allows an exact match (or the test mask was undef) - it can't be used to match zero elements with MaskedVectorIsZero.
Noticed while working on #140516