Skip to content

Commit 4ed452b

Browse files
committed
[X86] getFauxShuffleMask - handle insert_subvector(src, bitcast(extract_subvector(sub))) patterns
Add bitcast handling to the existing insert_subvector(src, extract_subvector(sub)) pattern, and recognise undef src cases to allow us to detect vector widening patterns.
1 parent 2583946 commit 4ed452b

File tree

2 files changed

+45
-24
lines changed

2 files changed

+45
-24
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5646,17 +5646,28 @@ static bool getFauxShuffleMask(SDValue N, const APInt &DemandedElts,
56465646
unsigned NumSubElts = SubVT.getVectorNumElements();
56475647
if (!N->isOnlyUserOf(Sub.getNode()))
56485648
return false;
5649+
SDValue SubBC = peekThroughBitcasts(Sub);
56495650
uint64_t InsertIdx = N.getConstantOperandVal(2);
56505651
// Handle INSERT_SUBVECTOR(SRC0, EXTRACT_SUBVECTOR(SRC1)).
5651-
if (Sub.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
5652-
Sub.getOperand(0).getValueType() == VT) {
5653-
uint64_t ExtractIdx = Sub.getConstantOperandVal(1);
5654-
for (int i = 0; i != (int)NumElts; ++i)
5655-
Mask.push_back(i);
5652+
if (SubBC.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
5653+
SubBC.getOperand(0).getValueSizeInBits() == NumSizeInBits) {
5654+
uint64_t ExtractIdx = SubBC.getConstantOperandVal(1);
5655+
SDValue SubBCSrc = SubBC.getOperand(0);
5656+
unsigned NumSubSrcBCElts = SubBCSrc.getValueType().getVectorNumElements();
5657+
unsigned MaxElts = std::max(NumElts, NumSubSrcBCElts);
5658+
assert((MaxElts % NumElts) == 0 && (MaxElts % NumSubSrcBCElts) == 0 &&
5659+
"Subvector valuetype mismatch");
5660+
InsertIdx *= (MaxElts / NumElts);
5661+
ExtractIdx *= (MaxElts / NumSubSrcBCElts);
5662+
NumSubElts *= (MaxElts / NumElts);
5663+
bool SrcIsUndef = Src.isUndef();
5664+
for (int i = 0; i != (int)MaxElts; ++i)
5665+
Mask.push_back(SrcIsUndef ? SM_SentinelUndef : i);
56565666
for (int i = 0; i != (int)NumSubElts; ++i)
5657-
Mask[InsertIdx + i] = NumElts + ExtractIdx + i;
5658-
Ops.push_back(Src);
5659-
Ops.push_back(Sub.getOperand(0));
5667+
Mask[InsertIdx + i] = (SrcIsUndef ? 0 : MaxElts) + ExtractIdx + i;
5668+
if (!SrcIsUndef)
5669+
Ops.push_back(Src);
5670+
Ops.push_back(SubBCSrc);
56605671
return true;
56615672
}
56625673
// Handle INSERT_SUBVECTOR(SRC0, SHUFFLE(SRC1)).

llvm/test/CodeGen/X86/vector-trunc-packus.ll

Lines changed: 26 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -873,9 +873,8 @@ define <2 x i16> @trunc_packus_v2i64_v2i16(<2 x i64> %a0) {
873873
; SSE41-NEXT: pand %xmm5, %xmm0
874874
; SSE41-NEXT: por %xmm4, %xmm0
875875
; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm1
876-
; SSE41-NEXT: packusdw %xmm1, %xmm1
877-
; SSE41-NEXT: packusdw %xmm1, %xmm1
878-
; SSE41-NEXT: movdqa %xmm1, %xmm0
876+
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
877+
; SSE41-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
879878
; SSE41-NEXT: retq
880879
;
881880
; AVX1-LABEL: trunc_packus_v2i64_v2i16:
@@ -887,21 +886,32 @@ define <2 x i16> @trunc_packus_v2i64_v2i16(<2 x i64> %a0) {
887886
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
888887
; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm1
889888
; AVX1-NEXT: vpand %xmm0, %xmm1, %xmm0
890-
; AVX1-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
891-
; AVX1-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
889+
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
890+
; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
892891
; AVX1-NEXT: retq
893892
;
894-
; AVX2-LABEL: trunc_packus_v2i64_v2i16:
895-
; AVX2: # %bb.0:
896-
; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [65535,65535]
897-
; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
898-
; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
899-
; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
900-
; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm1
901-
; AVX2-NEXT: vpand %xmm0, %xmm1, %xmm0
902-
; AVX2-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
903-
; AVX2-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
904-
; AVX2-NEXT: retq
893+
; AVX2-SLOW-LABEL: trunc_packus_v2i64_v2i16:
894+
; AVX2-SLOW: # %bb.0:
895+
; AVX2-SLOW-NEXT: vpbroadcastq {{.*#+}} xmm1 = [65535,65535]
896+
; AVX2-SLOW-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
897+
; AVX2-SLOW-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
898+
; AVX2-SLOW-NEXT: vpxor %xmm1, %xmm1, %xmm1
899+
; AVX2-SLOW-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm1
900+
; AVX2-SLOW-NEXT: vpand %xmm0, %xmm1, %xmm0
901+
; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
902+
; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
903+
; AVX2-SLOW-NEXT: retq
904+
;
905+
; AVX2-FAST-LABEL: trunc_packus_v2i64_v2i16:
906+
; AVX2-FAST: # %bb.0:
907+
; AVX2-FAST-NEXT: vpbroadcastq {{.*#+}} xmm1 = [65535,65535]
908+
; AVX2-FAST-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
909+
; AVX2-FAST-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
910+
; AVX2-FAST-NEXT: vpxor %xmm1, %xmm1, %xmm1
911+
; AVX2-FAST-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm1
912+
; AVX2-FAST-NEXT: vpand %xmm0, %xmm1, %xmm0
913+
; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,u,u,u,u,u,u,u,u,u,u,u,u]
914+
; AVX2-FAST-NEXT: retq
905915
;
906916
; AVX512F-LABEL: trunc_packus_v2i64_v2i16:
907917
; AVX512F: # %bb.0:

0 commit comments

Comments
 (0)