Skip to content

Commit ea59d17

Browse files
authored
[X86] getFauxShuffleMask - always match insert_subvector(insert_subvector(undef,sub,0),sub,c) 'subvector splat' patterns (#130115)
The plan is to remove the vXi64 cross lane shuffle constraint entirely, but this special 'splat' case was easy to handle while I fight the remaining regressions.
1 parent 3ccacc4 commit ea59d17

File tree

2 files changed

+4
-4
lines changed

2 files changed

+4
-4
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6153,12 +6153,13 @@ static bool getFauxShuffleMask(SDValue N, const APInt &DemandedElts,
61536153
return true;
61546154
}
61556155
// Handle CONCAT(SUB0, SUB1).
6156-
// Limit this to vXi64 vector cases to make the most of cross lane shuffles.
6156+
// Limit to vXi64/splat cases to make the most of cross lane shuffles.
61576157
if (Depth > 0 && InsertIdx == NumSubElts && NumElts == (2 * NumSubElts) &&
6158-
NumBitsPerElt == 64 && Src.getOpcode() == ISD::INSERT_SUBVECTOR &&
6158+
Src.getOpcode() == ISD::INSERT_SUBVECTOR &&
61596159
Src.getOperand(0).isUndef() &&
61606160
Src.getOperand(1).getValueType() == SubVT &&
61616161
Src.getConstantOperandVal(2) == 0 &&
6162+
(NumBitsPerElt == 64 || Src.getOperand(1) == Sub) &&
61626163
SDNode::areOnlyUsersOf({N.getNode(), Src.getNode()}, Sub.getNode())) {
61636164
for (int i = 0; i != (int)NumSubElts; ++i)
61646165
Mask.push_back(i);

llvm/test/CodeGen/X86/vector-partial-undef.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -150,8 +150,7 @@ define <8 x i32> @xor_undef_elts_alt(<4 x i32> %x) {
150150
; AVX-LABEL: xor_undef_elts_alt:
151151
; AVX: # %bb.0:
152152
; AVX-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
153-
; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
154-
; AVX-NEXT: vmovaps {{.*#+}} ymm1 = [6,1,5,4,3,2,0,7]
153+
; AVX-NEXT: vmovaps {{.*#+}} ymm1 = [2,1,1,0,3,2,0,3]
155154
; AVX-NEXT: vpermps %ymm0, %ymm1, %ymm0
156155
; AVX-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
157156
; AVX-NEXT: retq

0 commit comments

Comments
 (0)