Skip to content
This repository was archived by the owner on Apr 23, 2020. It is now read-only.

Commit 8edff6c

Browse files
committed
[X86][SSE] Added support for combining bit-shifts with shuffles.
Bit-shifts by a whole number of bytes can be represented as a shuffle mask suitable for combining. Added a 'getFauxShuffleMask' function to allow us to create shuffle masks from other suitable operations. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@288040 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent 2ef5665 commit 8edff6c

File tree

4 files changed

+98
-63
lines changed

4 files changed

+98
-63
lines changed

lib/Target/X86/X86ISelLowering.cpp

Lines changed: 57 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5395,8 +5395,9 @@ static bool setTargetShuffleZeroElements(SDValue N,
53955395
bool IsUnary;
53965396
if (!isTargetShuffle(N.getOpcode()))
53975397
return false;
5398-
if (!getTargetShuffleMask(N.getNode(), N.getSimpleValueType(), true, Ops,
5399-
Mask, IsUnary))
5398+
5399+
MVT VT = N.getSimpleValueType();
5400+
if (!getTargetShuffleMask(N.getNode(), VT, true, Ops, Mask, IsUnary))
54005401
return false;
54015402

54025403
SDValue V1 = Ops[0];
@@ -5458,9 +5459,61 @@ static bool setTargetShuffleZeroElements(SDValue N,
54585459
}
54595460
}
54605461

5462+
assert(VT.getVectorNumElements() == Mask.size() &&
5463+
"Different mask size from vector size!");
54615464
return true;
54625465
}
54635466

5467+
// Attempt to decode ops that could be represented as a shuffle mask.
5468+
// The decoded shuffle mask may contain a different number of elements to the
5469+
// destination value type.
5470+
static bool getFauxShuffleMask(SDValue N, SmallVectorImpl<int> &Mask,
5471+
SmallVectorImpl<SDValue> &Ops) {
5472+
Mask.clear();
5473+
Ops.clear();
5474+
5475+
MVT VT = N.getSimpleValueType();
5476+
unsigned NumElts = VT.getVectorNumElements();
5477+
5478+
unsigned Opcode = N.getOpcode();
5479+
switch (Opcode) {
5480+
case X86ISD::VSHLI:
5481+
case X86ISD::VSRLI: {
5482+
uint64_t ShiftVal = N.getConstantOperandVal(1);
5483+
// Out of range bit shifts are guaranteed to be zero.
5484+
if (VT.getScalarSizeInBits() <= ShiftVal) {
5485+
Mask.append(NumElts, SM_SentinelZero);
5486+
return true;
5487+
}
5488+
5489+
// We can only decode 'whole byte' bit shifts as shuffles.
5490+
if ((ShiftVal % 8) != 0)
5491+
break;
5492+
5493+
uint64_t ByteShift = ShiftVal / 8;
5494+
unsigned NumBytes = VT.getSizeInBits() / 8;
5495+
unsigned NumBytesPerElt = VT.getScalarSizeInBits() / 8;
5496+
Ops.push_back(N.getOperand(0));
5497+
5498+
// Clear mask to all zeros and insert the shifted byte indices.
5499+
Mask.append(NumBytes, SM_SentinelZero);
5500+
5501+
if (X86ISD::VSHLI == Opcode) {
5502+
for (unsigned i = 0; i != NumBytes; i += NumBytesPerElt)
5503+
for (unsigned j = ByteShift; j != NumBytesPerElt; ++j)
5504+
Mask[i + j] = i + j - ByteShift;
5505+
} else {
5506+
for (unsigned i = 0; i != NumBytes; i += NumBytesPerElt)
5507+
for (unsigned j = ByteShift; j != NumBytesPerElt; ++j)
5508+
Mask[i + j - ByteShift] = i + j;
5509+
}
5510+
return true;
5511+
}
5512+
}
5513+
5514+
return false;
5515+
}
5516+
54645517
/// Calls setTargetShuffleZeroElements to resolve a target shuffle mask's inputs
54655518
/// and set the SM_SentinelUndef and SM_SentinelZero values. Then check the
54665519
/// remaining input indices in case we now have a unary shuffle and adjust the
@@ -5470,7 +5523,8 @@ static bool resolveTargetShuffleInputs(SDValue Op, SDValue &Op0, SDValue &Op1,
54705523
SmallVectorImpl<int> &Mask) {
54715524
SmallVector<SDValue, 2> Ops;
54725525
if (!setTargetShuffleZeroElements(Op, Mask, Ops))
5473-
return false;
5526+
if (!getFauxShuffleMask(Op, Mask, Ops))
5527+
return false;
54745528

54755529
int NumElts = Mask.size();
54765530
bool Op0InUse = any_of(Mask, [NumElts](int Idx) {
@@ -26299,8 +26353,6 @@ static bool combineX86ShufflesRecursively(ArrayRef<SDValue> SrcOps,
2629926353
Ops.push_back(Input1);
2630026354
}
2630126355

26302-
assert(VT.getVectorNumElements() == OpMask.size() &&
26303-
"Different mask size from vector size!");
2630426356
assert(((RootMask.size() > OpMask.size() &&
2630526357
RootMask.size() % OpMask.size() == 0) ||
2630626358
(OpMask.size() > RootMask.size() &&

test/CodeGen/X86/vector-sext.ll

Lines changed: 29 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -442,20 +442,18 @@ define <4 x i64> @sext_16i8_to_4i64(<16 x i8> %A) nounwind uwtable readnone ssp
442442
;
443443
; SSSE3-LABEL: sext_16i8_to_4i64:
444444
; SSSE3: # BB#0: # %entry
445-
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
446-
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
447-
; SSSE3-NEXT: movdqa %xmm2, %xmm1
448-
; SSSE3-NEXT: psrad $31, %xmm1
449-
; SSSE3-NEXT: psrad $24, %xmm2
450-
; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
451-
; SSSE3-NEXT: psrld $16, %xmm0
452-
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
453-
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
454-
; SSSE3-NEXT: movdqa %xmm1, %xmm0
455-
; SSSE3-NEXT: psrad $31, %xmm0
445+
; SSSE3-NEXT: movdqa %xmm0, %xmm1
446+
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
447+
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
448+
; SSSE3-NEXT: movdqa %xmm0, %xmm2
449+
; SSSE3-NEXT: psrad $31, %xmm2
450+
; SSSE3-NEXT: psrad $24, %xmm0
451+
; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
452+
; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[u,u,u,2,u,u,u,3,u,u,u],zero,xmm1[u,u,u],zero
453+
; SSSE3-NEXT: movdqa %xmm1, %xmm2
454+
; SSSE3-NEXT: psrad $31, %xmm2
456455
; SSSE3-NEXT: psrad $24, %xmm1
457-
; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
458-
; SSSE3-NEXT: movdqa %xmm2, %xmm0
456+
; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
459457
; SSSE3-NEXT: retq
460458
;
461459
; SSE41-LABEL: sext_16i8_to_4i64:
@@ -532,34 +530,31 @@ define <8 x i64> @sext_16i8_to_8i64(<16 x i8> %A) nounwind uwtable readnone ssp
532530
;
533531
; SSSE3-LABEL: sext_16i8_to_8i64:
534532
; SSSE3: # BB#0: # %entry
533+
; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = <u,u,u,2,u,u,u,3,u,u,u,255,u,u,u,255>
534+
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3],xmm4[4],xmm0[4],xmm4[5],xmm0[5],xmm4[6],xmm0[6],xmm4[7],xmm0[7]
535+
; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,2,3]
535536
; SSSE3-NEXT: movdqa %xmm0, %xmm1
536-
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
537-
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
538-
; SSSE3-NEXT: movdqa %xmm0, %xmm2
539-
; SSSE3-NEXT: psrad $31, %xmm2
537+
; SSSE3-NEXT: pshufb %xmm2, %xmm1
538+
; SSSE3-NEXT: movdqa %xmm1, %xmm0
539+
; SSSE3-NEXT: psrad $31, %xmm0
540+
; SSSE3-NEXT: psrad $24, %xmm1
541+
; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
542+
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3]
543+
; SSSE3-NEXT: movdqa %xmm0, %xmm4
544+
; SSSE3-NEXT: psrad $31, %xmm4
540545
; SSSE3-NEXT: psrad $24, %xmm0
541-
; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
542-
; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,2,3]
543-
; SSSE3-NEXT: psrld $16, %xmm1
544-
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
545-
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3]
546-
; SSSE3-NEXT: movdqa %xmm1, %xmm2
546+
; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
547+
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7]
548+
; SSSE3-NEXT: pshufb %xmm2, %xmm3
549+
; SSSE3-NEXT: movdqa %xmm3, %xmm2
547550
; SSSE3-NEXT: psrad $31, %xmm2
548-
; SSSE3-NEXT: psrad $24, %xmm1
549-
; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
550-
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3],xmm2[4],xmm3[4],xmm2[5],xmm3[5],xmm2[6],xmm3[6],xmm2[7],xmm3[7]
551-
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3]
551+
; SSSE3-NEXT: psrad $24, %xmm3
552+
; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
553+
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3]
552554
; SSSE3-NEXT: movdqa %xmm2, %xmm4
553555
; SSSE3-NEXT: psrad $31, %xmm4
554556
; SSSE3-NEXT: psrad $24, %xmm2
555557
; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
556-
; SSSE3-NEXT: psrld $16, %xmm3
557-
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
558-
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3]
559-
; SSSE3-NEXT: movdqa %xmm3, %xmm4
560-
; SSSE3-NEXT: psrad $31, %xmm4
561-
; SSSE3-NEXT: psrad $24, %xmm3
562-
; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1]
563558
; SSSE3-NEXT: retq
564559
;
565560
; SSE41-LABEL: sext_16i8_to_8i64:

test/CodeGen/X86/vector-shuffle-combining-avx2.ll

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -599,14 +599,12 @@ define <32 x i8> @combine_pshufb_not_as_pshufw(<32 x i8> %a0) {
599599
define <32 x i8> @combine_psrlw_pshufb(<16 x i16> %a0) {
600600
; X32-LABEL: combine_psrlw_pshufb:
601601
; X32: # BB#0:
602-
; X32-NEXT: vpsrlw $8, %ymm0, %ymm0
603-
; X32-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14,17,16,19,18,21,20,23,22,25,24,27,26,29,28,31,30]
602+
; X32-NEXT: vpshufb {{.*#+}} ymm0 = zero,ymm0[1],zero,ymm0[3],zero,ymm0[5],zero,ymm0[7],zero,ymm0[9],zero,ymm0[11],zero,ymm0[13],zero,ymm0[15],zero,ymm0[17],zero,ymm0[19],zero,ymm0[21],zero,ymm0[23],zero,ymm0[25],zero,ymm0[27],zero,ymm0[29],zero,ymm0[31]
604603
; X32-NEXT: retl
605604
;
606605
; X64-LABEL: combine_psrlw_pshufb:
607606
; X64: # BB#0:
608-
; X64-NEXT: vpsrlw $8, %ymm0, %ymm0
609-
; X64-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14,17,16,19,18,21,20,23,22,25,24,27,26,29,28,31,30]
607+
; X64-NEXT: vpshufb {{.*#+}} ymm0 = zero,ymm0[1],zero,ymm0[3],zero,ymm0[5],zero,ymm0[7],zero,ymm0[9],zero,ymm0[11],zero,ymm0[13],zero,ymm0[15],zero,ymm0[17],zero,ymm0[19],zero,ymm0[21],zero,ymm0[23],zero,ymm0[25],zero,ymm0[27],zero,ymm0[29],zero,ymm0[31]
610608
; X64-NEXT: retq
611609
%1 = lshr <16 x i16> %a0, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
612610
%2 = bitcast <16 x i16> %1 to <32 x i8>
@@ -617,14 +615,12 @@ define <32 x i8> @combine_psrlw_pshufb(<16 x i16> %a0) {
617615
define <32 x i8> @combine_pslld_pshufb(<8 x i32> %a0) {
618616
; X32-LABEL: combine_pslld_pshufb:
619617
; X32: # BB#0:
620-
; X32-NEXT: vpslld $24, %ymm0, %ymm0
621-
; X32-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12,19,18,17,16,23,22,21,20,27,26,25,24,31,30,29,28]
618+
; X32-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0],zero,zero,zero,ymm0[4],zero,zero,zero,ymm0[8],zero,zero,zero,ymm0[12],zero,zero,zero,ymm0[16],zero,zero,zero,ymm0[20],zero,zero,zero,ymm0[24],zero,zero,zero,ymm0[28],zero,zero,zero
622619
; X32-NEXT: retl
623620
;
624621
; X64-LABEL: combine_pslld_pshufb:
625622
; X64: # BB#0:
626-
; X64-NEXT: vpslld $24, %ymm0, %ymm0
627-
; X64-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12,19,18,17,16,23,22,21,20,27,26,25,24,31,30,29,28]
623+
; X64-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0],zero,zero,zero,ymm0[4],zero,zero,zero,ymm0[8],zero,zero,zero,ymm0[12],zero,zero,zero,ymm0[16],zero,zero,zero,ymm0[20],zero,zero,zero,ymm0[24],zero,zero,zero,ymm0[28],zero,zero,zero
628624
; X64-NEXT: retq
629625
%1 = shl <8 x i32> %a0, <i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24>
630626
%2 = bitcast <8 x i32> %1 to <32 x i8>
@@ -635,14 +631,12 @@ define <32 x i8> @combine_pslld_pshufb(<8 x i32> %a0) {
635631
define <32 x i8> @combine_psrlq_pshufb(<4 x i64> %a0) {
636632
; X32-LABEL: combine_psrlq_pshufb:
637633
; X32: # BB#0:
638-
; X32-NEXT: vpsrlq $32, %ymm0, %ymm0
639-
; X32-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8,23,22,21,20,19,18,17,31,30,29,28,27,26,25,24,23]
634+
; X32-NEXT: vpshufb {{.*#+}} ymm0 = zero,zero,zero,zero,ymm0[7,6,5,4],zero,zero,zero,zero,ymm0[15,14,13,12],zero,zero,zero,zero,ymm0[23,22,21],zero,zero,zero,zero,ymm0[31,30,29,28],zero
640635
; X32-NEXT: retl
641636
;
642637
; X64-LABEL: combine_psrlq_pshufb:
643638
; X64: # BB#0:
644-
; X64-NEXT: vpsrlq $32, %ymm0, %ymm0
645-
; X64-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8,23,22,21,20,19,18,17,31,30,29,28,27,26,25,24,23]
639+
; X64-NEXT: vpshufb {{.*#+}} ymm0 = zero,zero,zero,zero,ymm0[7,6,5,4],zero,zero,zero,zero,ymm0[15,14,13,12],zero,zero,zero,zero,ymm0[23,22,21],zero,zero,zero,zero,ymm0[31,30,29,28],zero
646640
; X64-NEXT: retq
647641
%1 = lshr <4 x i64> %a0, <i64 32, i64 32, i64 32, i64 32>
648642
%2 = bitcast <4 x i64> %1 to <32 x i8>

test/CodeGen/X86/vector-shuffle-combining-ssse3.ll

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -412,14 +412,12 @@ define <16 x i8> @combine_pshufb_as_unary_unpckhwd(<16 x i8> %a0) {
412412
define <16 x i8> @combine_psrlw_pshufb(<8 x i16> %a0) {
413413
; SSE-LABEL: combine_psrlw_pshufb:
414414
; SSE: # BB#0:
415-
; SSE-NEXT: psrlw $8, %xmm0
416-
; SSE-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[0],zero,zero,zero,xmm0[0],zero,zero,zero,xmm0[0],zero,zero,zero
415+
; SSE-NEXT: pshufb {{.*#+}} xmm0 = xmm0[1],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[1],zero,zero,zero
417416
; SSE-NEXT: retq
418417
;
419418
; AVX-LABEL: combine_psrlw_pshufb:
420419
; AVX: # BB#0:
421-
; AVX-NEXT: vpsrlw $8, %xmm0, %xmm0
422-
; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[0],zero,zero,zero,xmm0[0],zero,zero,zero,xmm0[0],zero,zero,zero
420+
; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[1],zero,zero,zero
423421
; AVX-NEXT: retq
424422
%1 = lshr <8 x i16> %a0, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
425423
%2 = bitcast <8 x i16> %1 to <16 x i8>
@@ -430,14 +428,12 @@ define <16 x i8> @combine_psrlw_pshufb(<8 x i16> %a0) {
430428
define <16 x i8> @combine_pslld_pshufb(<4 x i32> %a0) {
431429
; SSE-LABEL: combine_pslld_pshufb:
432430
; SSE: # BB#0:
433-
; SSE-NEXT: pslld $8, %xmm0
434-
; SSE-NEXT: pshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
431+
; SSE-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,1,0],zero,xmm0[6,5,4],zero,xmm0[10,9,8],zero,xmm0[14,13,12],zero
435432
; SSE-NEXT: retq
436433
;
437434
; AVX-LABEL: combine_pslld_pshufb:
438435
; AVX: # BB#0:
439-
; AVX-NEXT: vpslld $8, %xmm0, %xmm0
440-
; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
436+
; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,1,0],zero,xmm0[6,5,4],zero,xmm0[10,9,8],zero,xmm0[14,13,12],zero
441437
; AVX-NEXT: retq
442438
%1 = shl <4 x i32> %a0, <i32 8, i32 8, i32 8, i32 8>
443439
%2 = bitcast <4 x i32> %1 to <16 x i8>
@@ -448,14 +444,12 @@ define <16 x i8> @combine_pslld_pshufb(<4 x i32> %a0) {
448444
define <16 x i8> @combine_psrlq_pshufb(<2 x i64> %a0) {
449445
; SSE-LABEL: combine_psrlq_pshufb:
450446
; SSE: # BB#0:
451-
; SSE-NEXT: psrlq $48, %xmm0
452-
; SSE-NEXT: pshufb {{.*#+}} xmm0 = xmm0[7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8]
447+
; SSE-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[7,6],zero,zero,zero,zero,zero,zero,xmm0[15,14]
453448
; SSE-NEXT: retq
454449
;
455450
; AVX-LABEL: combine_psrlq_pshufb:
456451
; AVX: # BB#0:
457-
; AVX-NEXT: vpsrlq $48, %xmm0, %xmm0
458-
; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8]
452+
; AVX-NEXT: vpshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[7,6],zero,zero,zero,zero,zero,zero,xmm0[15,14]
459453
; AVX-NEXT: retq
460454
%1 = lshr <2 x i64> %a0, <i64 48, i64 48>
461455
%2 = bitcast <2 x i64> %1 to <16 x i8>

0 commit comments

Comments
 (0)