Skip to content

Commit 6287b7b

Browse files
committed
[X86] combineEXTRACT_SUBVECTOR - extract 256-bit comparisons if only one subvector is required
If only one subvector extraction will be necessary (i.e. because the other is constant etc.) then extract the source operands and perform as a 128-bit comparison Ideally DAGCombiner's narrowExtractedVectorBinOp would handle this but its tricky to confirm when a target opcode can be safely extracted and performed as a different vector type Partially improves an outstanding regression in #82290
1 parent 2703f7e commit 6287b7b

File tree

4 files changed

+35
-16
lines changed

4 files changed

+35
-16
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -55789,6 +55789,15 @@ static SDValue combineEXTRACT_SUBVECTOR(SDNode *N, SelectionDAG &DAG,
5578955789
}
5579055790
}
5579155791

55792+
auto IsExtractFree = [](SDValue V) {
55793+
V = peekThroughBitcasts(V);
55794+
if (ISD::isBuildVectorOfConstantSDNodes(V.getNode()))
55795+
return true;
55796+
if (ISD::isBuildVectorOfConstantFPSDNodes(V.getNode()))
55797+
return true;
55798+
return V.isUndef();
55799+
};
55800+
5579255801
// If we're extracting the lowest subvector and we're the only user,
5579355802
// we may be able to perform this with a smaller vector width.
5579455803
unsigned InOpcode = InVec.getOpcode();
@@ -55830,14 +55839,27 @@ static SDValue combineEXTRACT_SUBVECTOR(SDNode *N, SelectionDAG &DAG,
5583055839
return DAG.getNode(InOpcode, DL, VT, Ext0, Ext1, Ext2);
5583155840
}
5583255841
if (IdxVal == 0 && InOpcode == ISD::TRUNCATE && Subtarget.hasVLX() &&
55833-
(VT.is128BitVector() || VT.is256BitVector())) {
55842+
(SizeInBits == 128 || SizeInBits == 256)) {
5583455843
SDValue InVecSrc = InVec.getOperand(0);
5583555844
unsigned Scale = InVecSrc.getValueSizeInBits() / InSizeInBits;
5583655845
SDValue Ext = extractSubVector(InVecSrc, 0, DAG, DL, Scale * SizeInBits);
5583755846
return DAG.getNode(InOpcode, DL, VT, Ext);
5583855847
}
55848+
if ((InOpcode == X86ISD::CMPP || InOpcode == X86ISD::PCMPEQ ||
55849+
InOpcode == X86ISD::PCMPGT) &&
55850+
(IsExtractFree(InVec.getOperand(0)) ||
55851+
IsExtractFree(InVec.getOperand(1))) &&
55852+
SizeInBits == 128) {
55853+
SDValue Ext0 =
55854+
extractSubVector(InVec.getOperand(0), IdxVal, DAG, DL, SizeInBits);
55855+
SDValue Ext1 =
55856+
extractSubVector(InVec.getOperand(1), IdxVal, DAG, DL, SizeInBits);
55857+
if (InOpcode == X86ISD::CMPP)
55858+
return DAG.getNode(InOpcode, DL, VT, Ext0, Ext1, InVec.getOperand(2));
55859+
return DAG.getNode(InOpcode, DL, VT, Ext0, Ext1);
55860+
}
5583955861
if (InOpcode == X86ISD::MOVDDUP &&
55840-
(VT.is128BitVector() || VT.is256BitVector())) {
55862+
(SizeInBits == 128 || SizeInBits == 256)) {
5584155863
SDValue Ext0 =
5584255864
extractSubVector(InVec.getOperand(0), IdxVal, DAG, DL, SizeInBits);
5584355865
return DAG.getNode(InOpcode, DL, VT, Ext0);

llvm/test/CodeGen/X86/kshift.ll

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -270,11 +270,10 @@ define i64 @kshiftl_v64i1_63(<64 x i8> %x, <64 x i8> %y) {
270270
; KNL-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0
271271
; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
272272
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
273-
; KNL-NEXT: vpxor %xmm0, %xmm0, %xmm0
274273
; KNL-NEXT: kshiftlw $15, %k0, %k1
275-
; KNL-NEXT: vextracti64x4 $1, %zmm1, %ymm1
276-
; KNL-NEXT: vpcmpeqb %ymm0, %ymm1, %ymm0
274+
; KNL-NEXT: vextracti64x4 $1, %zmm1, %ymm0
277275
; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
276+
; KNL-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0
278277
; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
279278
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
280279
; KNL-NEXT: kmovw %k0, %eax
@@ -564,14 +563,13 @@ define i64 @kshiftr_v64i1_63(<64 x i8> %x, <64 x i8> %y) {
564563
; KNL-LABEL: kshiftr_v64i1_63:
565564
; KNL: # %bb.0:
566565
; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
567-
; KNL-NEXT: vpxor %xmm2, %xmm2, %xmm2
568-
; KNL-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0
569566
; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
567+
; KNL-NEXT: vpxor %xmm2, %xmm2, %xmm2
568+
; KNL-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0
570569
; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
571570
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
572571
; KNL-NEXT: kshiftrw $15, %k0, %k1
573-
; KNL-NEXT: vpxor %xmm0, %xmm0, %xmm0
574-
; KNL-NEXT: vpcmpeqb %xmm0, %xmm1, %xmm0
572+
; KNL-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm0
575573
; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
576574
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
577575
; KNL-NEXT: kmovw %k0, %eax

llvm/test/CodeGen/X86/pr46455.ll

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,10 @@
44
define void @EntryModule(ptr %buffer_table) {
55
; CHECK-LABEL: EntryModule:
66
; CHECK: # %bb.0: # %entry
7-
; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0
87
; CHECK-NEXT: movq (%rdi), %rax
98
; CHECK-NEXT: movq 24(%rdi), %rcx
10-
; CHECK-NEXT: vcmpneqps (%rax), %ymm0, %ymm0
9+
; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0
10+
; CHECK-NEXT: vcmpneqps (%rax), %xmm0, %xmm0
1111
; CHECK-NEXT: vpsrld $31, %xmm0, %xmm1
1212
; CHECK-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,1,1]
1313
; CHECK-NEXT: vpshufd {{.*#+}} xmm3 = xmm1[2,3,2,3]
@@ -16,7 +16,6 @@ define void @EntryModule(ptr %buffer_table) {
1616
; CHECK-NEXT: vpsubd %xmm0, %xmm2, %xmm0
1717
; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0
1818
; CHECK-NEXT: vmovd %xmm0, (%rcx)
19-
; CHECK-NEXT: vzeroupper
2019
; CHECK-NEXT: retq
2120
entry:
2221
%i1 = load ptr, ptr %buffer_table, align 8

llvm/test/CodeGen/X86/setcc-lowering.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,11 @@ define <8 x i16> @pr25080(<8 x i32> %a) {
2121
;
2222
; AVX2-LABEL: pr25080:
2323
; AVX2: # %bb.0: # %entry
24-
; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [8388607,8388607,8388607,8388607,8388607,8388607,8388607,8388607]
25-
; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
26-
; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
27-
; AVX2-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0
24+
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [8388607,8388607,8388607,8388607]
2825
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
26+
; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
27+
; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
28+
; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
2929
; AVX2-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
3030
; AVX2-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
3131
; AVX2-NEXT: vzeroupper

0 commit comments

Comments
 (0)