Skip to content

Commit 85cf2e8

Browse files
committed
[X86] combineConcatVectorOps - concatenation of constant subvectors is free.
1 parent 18f1c1a commit 85cf2e8

File tree

3 files changed

+210
-205
lines changed

3 files changed

+210
-205
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -54378,15 +54378,19 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
5437854378
return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Subs);
5437954379
};
5438054380
auto IsConcatFree = [](MVT VT, ArrayRef<SDValue> SubOps, unsigned Op) {
54381+
bool AllConstants = true;
54382+
bool AllSubVectors = true;
5438154383
for (unsigned I = 0, E = SubOps.size(); I != E; ++I) {
5438254384
SDValue Sub = SubOps[I].getOperand(Op);
5438354385
unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
54384-
if (Sub.getOpcode() != ISD::EXTRACT_SUBVECTOR ||
54385-
Sub.getOperand(0).getValueType() != VT ||
54386-
Sub.getConstantOperandAPInt(1) != (I * NumSubElts))
54387-
return false;
54388-
}
54389-
return true;
54386+
SDValue BC = peekThroughBitcasts(Sub);
54387+
AllConstants &= ISD::isBuildVectorOfConstantSDNodes(BC.getNode()) ||
54388+
ISD::isBuildVectorOfConstantFPSDNodes(BC.getNode());
54389+
AllSubVectors &= Sub.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
54390+
Sub.getOperand(0).getValueType() == VT &&
54391+
Sub.getConstantOperandAPInt(1) == (I * NumSubElts);
54392+
}
54393+
return AllConstants || AllSubVectors;
5439054394
};
5439154395

5439254396
switch (Op0.getOpcode()) {

llvm/test/CodeGen/X86/masked_store_trunc_usat.ll

Lines changed: 68 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -531,28 +531,28 @@ define void @truncstore_v8i64_v8i16(<8 x i64> %x, ptr %p, <8 x i32> %mask) {
531531
;
532532
; AVX1-LABEL: truncstore_v8i64_v8i16:
533533
; AVX1: # %bb.0:
534-
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
535-
; AVX1-NEXT: vmovddup {{.*#+}} xmm4 = [9223372036854775808,9223372036854775808]
536-
; AVX1-NEXT: # xmm4 = mem[0,0]
537-
; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm5
538-
; AVX1-NEXT: vmovddup {{.*#+}} xmm6 = [9223372036854841343,9223372036854841343]
539-
; AVX1-NEXT: # xmm6 = mem[0,0]
540-
; AVX1-NEXT: vpcmpgtq %xmm5, %xmm6, %xmm5
541-
; AVX1-NEXT: vmovddup {{.*#+}} xmm7 = [65535,65535]
542-
; AVX1-NEXT: # xmm7 = mem[0,0]
543-
; AVX1-NEXT: vblendvpd %xmm5, %xmm3, %xmm7, %xmm3
544-
; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm5
545-
; AVX1-NEXT: vpcmpgtq %xmm5, %xmm6, %xmm5
546-
; AVX1-NEXT: vblendvpd %xmm5, %xmm0, %xmm7, %xmm0
547-
; AVX1-NEXT: vpackusdw %xmm3, %xmm0, %xmm0
548-
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
549-
; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm5
550-
; AVX1-NEXT: vpcmpgtq %xmm5, %xmm6, %xmm5
551-
; AVX1-NEXT: vblendvpd %xmm5, %xmm3, %xmm7, %xmm3
552-
; AVX1-NEXT: vpxor %xmm4, %xmm1, %xmm4
553-
; AVX1-NEXT: vpcmpgtq %xmm4, %xmm6, %xmm4
554-
; AVX1-NEXT: vblendvpd %xmm4, %xmm1, %xmm7, %xmm1
534+
; AVX1-NEXT: vmovddup {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
535+
; AVX1-NEXT: # xmm3 = mem[0,0]
536+
; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm4
537+
; AVX1-NEXT: vmovddup {{.*#+}} xmm5 = [9223372036854841343,9223372036854841343]
538+
; AVX1-NEXT: # xmm5 = mem[0,0]
539+
; AVX1-NEXT: vpcmpgtq %xmm4, %xmm5, %xmm4
540+
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm6
541+
; AVX1-NEXT: vpxor %xmm3, %xmm6, %xmm7
542+
; AVX1-NEXT: vpcmpgtq %xmm7, %xmm5, %xmm7
543+
; AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm8
544+
; AVX1-NEXT: vpcmpgtq %xmm8, %xmm5, %xmm8
545+
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm9
546+
; AVX1-NEXT: vpxor %xmm3, %xmm9, %xmm3
547+
; AVX1-NEXT: vpcmpgtq %xmm3, %xmm5, %xmm3
548+
; AVX1-NEXT: vmovddup {{.*#+}} xmm5 = [65535,65535]
549+
; AVX1-NEXT: # xmm5 = mem[0,0]
550+
; AVX1-NEXT: vblendvpd %xmm3, %xmm9, %xmm5, %xmm3
551+
; AVX1-NEXT: vblendvpd %xmm8, %xmm1, %xmm5, %xmm1
555552
; AVX1-NEXT: vpackusdw %xmm3, %xmm1, %xmm1
553+
; AVX1-NEXT: vblendvpd %xmm7, %xmm6, %xmm5, %xmm3
554+
; AVX1-NEXT: vblendvpd %xmm4, %xmm0, %xmm5, %xmm0
555+
; AVX1-NEXT: vpackusdw %xmm3, %xmm0, %xmm0
556556
; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
557557
; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm1
558558
; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
@@ -1003,28 +1003,28 @@ define void @truncstore_v8i64_v8i8(<8 x i64> %x, ptr %p, <8 x i32> %mask) {
10031003
;
10041004
; AVX1-LABEL: truncstore_v8i64_v8i8:
10051005
; AVX1: # %bb.0:
1006-
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1007-
; AVX1-NEXT: vmovddup {{.*#+}} xmm4 = [9223372036854775808,9223372036854775808]
1008-
; AVX1-NEXT: # xmm4 = mem[0,0]
1009-
; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm5
1010-
; AVX1-NEXT: vmovddup {{.*#+}} xmm6 = [9223372036854776063,9223372036854776063]
1011-
; AVX1-NEXT: # xmm6 = mem[0,0]
1012-
; AVX1-NEXT: vpcmpgtq %xmm5, %xmm6, %xmm5
1013-
; AVX1-NEXT: vmovddup {{.*#+}} xmm7 = [255,255]
1014-
; AVX1-NEXT: # xmm7 = mem[0,0]
1015-
; AVX1-NEXT: vblendvpd %xmm5, %xmm3, %xmm7, %xmm3
1016-
; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm5
1017-
; AVX1-NEXT: vpcmpgtq %xmm5, %xmm6, %xmm5
1018-
; AVX1-NEXT: vblendvpd %xmm5, %xmm0, %xmm7, %xmm0
1019-
; AVX1-NEXT: vpackusdw %xmm3, %xmm0, %xmm0
1020-
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
1021-
; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm5
1022-
; AVX1-NEXT: vpcmpgtq %xmm5, %xmm6, %xmm5
1023-
; AVX1-NEXT: vblendvpd %xmm5, %xmm3, %xmm7, %xmm3
1024-
; AVX1-NEXT: vpxor %xmm4, %xmm1, %xmm4
1025-
; AVX1-NEXT: vpcmpgtq %xmm4, %xmm6, %xmm4
1026-
; AVX1-NEXT: vblendvpd %xmm4, %xmm1, %xmm7, %xmm1
1006+
; AVX1-NEXT: vmovddup {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
1007+
; AVX1-NEXT: # xmm3 = mem[0,0]
1008+
; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm4
1009+
; AVX1-NEXT: vmovddup {{.*#+}} xmm5 = [9223372036854776063,9223372036854776063]
1010+
; AVX1-NEXT: # xmm5 = mem[0,0]
1011+
; AVX1-NEXT: vpcmpgtq %xmm4, %xmm5, %xmm4
1012+
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm6
1013+
; AVX1-NEXT: vpxor %xmm3, %xmm6, %xmm7
1014+
; AVX1-NEXT: vpcmpgtq %xmm7, %xmm5, %xmm7
1015+
; AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm8
1016+
; AVX1-NEXT: vpcmpgtq %xmm8, %xmm5, %xmm8
1017+
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm9
1018+
; AVX1-NEXT: vpxor %xmm3, %xmm9, %xmm3
1019+
; AVX1-NEXT: vpcmpgtq %xmm3, %xmm5, %xmm3
1020+
; AVX1-NEXT: vmovddup {{.*#+}} xmm5 = [255,255]
1021+
; AVX1-NEXT: # xmm5 = mem[0,0]
1022+
; AVX1-NEXT: vblendvpd %xmm3, %xmm9, %xmm5, %xmm3
1023+
; AVX1-NEXT: vblendvpd %xmm8, %xmm1, %xmm5, %xmm1
10271024
; AVX1-NEXT: vpackusdw %xmm3, %xmm1, %xmm1
1025+
; AVX1-NEXT: vblendvpd %xmm7, %xmm6, %xmm5, %xmm3
1026+
; AVX1-NEXT: vblendvpd %xmm4, %xmm0, %xmm5, %xmm0
1027+
; AVX1-NEXT: vpackusdw %xmm3, %xmm0, %xmm0
10281028
; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
10291029
; AVX1-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
10301030
; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm1
@@ -1578,19 +1578,19 @@ define void @truncstore_v4i64_v4i16(<4 x i64> %x, ptr %p, <4 x i32> %mask) {
15781578
; AVX1-LABEL: truncstore_v4i64_v4i16:
15791579
; AVX1: # %bb.0:
15801580
; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
1581-
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1582-
; AVX1-NEXT: vmovddup {{.*#+}} xmm4 = [9223372036854775808,9223372036854775808]
1583-
; AVX1-NEXT: # xmm4 = mem[0,0]
1584-
; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm5
1585-
; AVX1-NEXT: vmovddup {{.*#+}} xmm6 = [9223372036854841343,9223372036854841343]
1586-
; AVX1-NEXT: # xmm6 = mem[0,0]
1587-
; AVX1-NEXT: vpcmpgtq %xmm5, %xmm6, %xmm5
1588-
; AVX1-NEXT: vmovddup {{.*#+}} xmm7 = [65535,65535]
1589-
; AVX1-NEXT: # xmm7 = mem[0,0]
1590-
; AVX1-NEXT: vblendvpd %xmm5, %xmm3, %xmm7, %xmm3
1591-
; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm4
1592-
; AVX1-NEXT: vpcmpgtq %xmm4, %xmm6, %xmm4
1593-
; AVX1-NEXT: vblendvpd %xmm4, %xmm0, %xmm7, %xmm0
1581+
; AVX1-NEXT: vmovddup {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
1582+
; AVX1-NEXT: # xmm3 = mem[0,0]
1583+
; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm4
1584+
; AVX1-NEXT: vmovddup {{.*#+}} xmm5 = [9223372036854841343,9223372036854841343]
1585+
; AVX1-NEXT: # xmm5 = mem[0,0]
1586+
; AVX1-NEXT: vpcmpgtq %xmm4, %xmm5, %xmm4
1587+
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm6
1588+
; AVX1-NEXT: vpxor %xmm3, %xmm6, %xmm3
1589+
; AVX1-NEXT: vpcmpgtq %xmm3, %xmm5, %xmm3
1590+
; AVX1-NEXT: vmovddup {{.*#+}} xmm5 = [65535,65535]
1591+
; AVX1-NEXT: # xmm5 = mem[0,0]
1592+
; AVX1-NEXT: vblendvpd %xmm3, %xmm6, %xmm5, %xmm3
1593+
; AVX1-NEXT: vblendvpd %xmm4, %xmm0, %xmm5, %xmm0
15941594
; AVX1-NEXT: vpackusdw %xmm3, %xmm0, %xmm0
15951595
; AVX1-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
15961596
; AVX1-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm1
@@ -1860,19 +1860,19 @@ define void @truncstore_v4i64_v4i8(<4 x i64> %x, ptr %p, <4 x i32> %mask) {
18601860
; AVX1-LABEL: truncstore_v4i64_v4i8:
18611861
; AVX1: # %bb.0:
18621862
; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
1863-
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1864-
; AVX1-NEXT: vmovddup {{.*#+}} xmm4 = [9223372036854775808,9223372036854775808]
1865-
; AVX1-NEXT: # xmm4 = mem[0,0]
1866-
; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm5
1867-
; AVX1-NEXT: vmovddup {{.*#+}} xmm6 = [9223372036854776063,9223372036854776063]
1868-
; AVX1-NEXT: # xmm6 = mem[0,0]
1869-
; AVX1-NEXT: vpcmpgtq %xmm5, %xmm6, %xmm5
1870-
; AVX1-NEXT: vmovddup {{.*#+}} xmm7 = [255,255]
1871-
; AVX1-NEXT: # xmm7 = mem[0,0]
1872-
; AVX1-NEXT: vblendvpd %xmm5, %xmm3, %xmm7, %xmm3
1873-
; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm4
1874-
; AVX1-NEXT: vpcmpgtq %xmm4, %xmm6, %xmm4
1875-
; AVX1-NEXT: vblendvpd %xmm4, %xmm0, %xmm7, %xmm0
1863+
; AVX1-NEXT: vmovddup {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
1864+
; AVX1-NEXT: # xmm3 = mem[0,0]
1865+
; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm4
1866+
; AVX1-NEXT: vmovddup {{.*#+}} xmm5 = [9223372036854776063,9223372036854776063]
1867+
; AVX1-NEXT: # xmm5 = mem[0,0]
1868+
; AVX1-NEXT: vpcmpgtq %xmm4, %xmm5, %xmm4
1869+
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm6
1870+
; AVX1-NEXT: vpxor %xmm3, %xmm6, %xmm3
1871+
; AVX1-NEXT: vpcmpgtq %xmm3, %xmm5, %xmm3
1872+
; AVX1-NEXT: vmovddup {{.*#+}} xmm5 = [255,255]
1873+
; AVX1-NEXT: # xmm5 = mem[0,0]
1874+
; AVX1-NEXT: vblendvpd %xmm3, %xmm6, %xmm5, %xmm3
1875+
; AVX1-NEXT: vblendvpd %xmm4, %xmm0, %xmm5, %xmm0
18761876
; AVX1-NEXT: vpackusdw %xmm3, %xmm0, %xmm0
18771877
; AVX1-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
18781878
; AVX1-NEXT: vpackuswb %xmm0, %xmm0, %xmm0

0 commit comments

Comments
 (0)