Skip to content

Commit edaac11

Browse files
authored
[X86] combineSelect - attempt to combine with shuffles (#143753)
Before legalization we will convert to a vector_shuffle node - but afterward we can try to combine the select into an existing target shuffle chain
1 parent 6e5a142 commit edaac11

10 files changed

+3610
-3902
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -47785,13 +47785,19 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
4778547785
DL, DAG, Subtarget))
4778647786
return V;
4778747787

47788-
// Convert vselects with constant condition into shuffles.
47789-
if (CondConstantVector && DCI.isBeforeLegalizeOps() &&
47790-
(N->getOpcode() == ISD::VSELECT || N->getOpcode() == X86ISD::BLENDV)) {
47788+
if (N->getOpcode() == ISD::VSELECT || N->getOpcode() == X86ISD::BLENDV) {
4779147789
SmallVector<int, 64> Mask;
4779247790
if (createShuffleMaskFromVSELECT(Mask, Cond,
47793-
N->getOpcode() == X86ISD::BLENDV))
47794-
return DAG.getVectorShuffle(VT, DL, LHS, RHS, Mask);
47791+
N->getOpcode() == X86ISD::BLENDV)) {
47792+
// Convert vselects with constant condition into shuffles.
47793+
if (DCI.isBeforeLegalizeOps())
47794+
return DAG.getVectorShuffle(VT, DL, LHS, RHS, Mask);
47795+
47796+
// Attempt to combine as shuffle.
47797+
SDValue Op(N, 0);
47798+
if (SDValue Res = combineX86ShufflesRecursively(Op, DAG, Subtarget))
47799+
return Res;
47800+
}
4779547801
}
4779647802

4779747803
// fold vselect(cond, pshufb(x), pshufb(y)) -> or (pshufb(x), pshufb(y))

llvm/test/CodeGen/X86/combine-mask-with-shuffle.ll

Lines changed: 12 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -67,11 +67,9 @@ define <16 x i32> @combine_mask_with_abs(<16 x i32> %v0) {
6767
define <16 x i32> @combine_mask_with_umin(<16 x i32> %v0) {
6868
; CHECK-LABEL: combine_mask_with_umin:
6969
; CHECK: # %bb.0:
70-
; CHECK-NEXT: vpshufd {{.*#+}} zmm1 = zmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14]
71-
; CHECK-NEXT: movw $-21846, %ax # imm = 0xAAAA
72-
; CHECK-NEXT: kmovw %eax, %k1
73-
; CHECK-NEXT: vpblendmd %zmm1, %zmm0, %zmm2 {%k1}
74-
; CHECK-NEXT: vpminud %zmm2, %zmm1, %zmm1
70+
; CHECK-NEXT: vpshufd {{.*#+}} zmm1 = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
71+
; CHECK-NEXT: vpshufd {{.*#+}} zmm2 = zmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14]
72+
; CHECK-NEXT: vpminud %zmm1, %zmm2, %zmm1
7573
; CHECK-NEXT: movw $-3856, %ax # imm = 0xF0F0
7674
; CHECK-NEXT: kmovw %eax, %k1
7775
; CHECK-NEXT: vpopcntd %zmm0, %zmm1 {%k1}
@@ -88,11 +86,9 @@ define <16 x i32> @combine_mask_with_umin(<16 x i32> %v0) {
8886
define <16 x i32> @combine_mask_with_umax(<16 x i32> %v0) {
8987
; CHECK-LABEL: combine_mask_with_umax:
9088
; CHECK: # %bb.0:
91-
; CHECK-NEXT: vpshufd {{.*#+}} zmm1 = zmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14]
92-
; CHECK-NEXT: movw $-21846, %ax # imm = 0xAAAA
93-
; CHECK-NEXT: kmovw %eax, %k1
94-
; CHECK-NEXT: vpblendmd %zmm1, %zmm0, %zmm2 {%k1}
95-
; CHECK-NEXT: vpmaxud %zmm2, %zmm1, %zmm1
89+
; CHECK-NEXT: vpshufd {{.*#+}} zmm1 = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
90+
; CHECK-NEXT: vpshufd {{.*#+}} zmm2 = zmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14]
91+
; CHECK-NEXT: vpmaxud %zmm1, %zmm2, %zmm1
9692
; CHECK-NEXT: movw $-3856, %ax # imm = 0xF0F0
9793
; CHECK-NEXT: kmovw %eax, %k1
9894
; CHECK-NEXT: vpopcntd %zmm0, %zmm1 {%k1}
@@ -109,11 +105,9 @@ define <16 x i32> @combine_mask_with_umax(<16 x i32> %v0) {
109105
define <16 x i32> @combine_mask_with_smin(<16 x i32> %v0) {
110106
; CHECK-LABEL: combine_mask_with_smin:
111107
; CHECK: # %bb.0:
112-
; CHECK-NEXT: vpshufd {{.*#+}} zmm1 = zmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14]
113-
; CHECK-NEXT: movw $-21846, %ax # imm = 0xAAAA
114-
; CHECK-NEXT: kmovw %eax, %k1
115-
; CHECK-NEXT: vpblendmd %zmm1, %zmm0, %zmm2 {%k1}
116-
; CHECK-NEXT: vpminsd %zmm2, %zmm1, %zmm1
108+
; CHECK-NEXT: vpshufd {{.*#+}} zmm1 = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
109+
; CHECK-NEXT: vpshufd {{.*#+}} zmm2 = zmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14]
110+
; CHECK-NEXT: vpminsd %zmm1, %zmm2, %zmm1
117111
; CHECK-NEXT: movw $-3856, %ax # imm = 0xF0F0
118112
; CHECK-NEXT: kmovw %eax, %k1
119113
; CHECK-NEXT: vpopcntd %zmm0, %zmm1 {%k1}
@@ -130,11 +124,9 @@ define <16 x i32> @combine_mask_with_smin(<16 x i32> %v0) {
130124
define <16 x i32> @combine_mask_with_smax(<16 x i32> %v0) {
131125
; CHECK-LABEL: combine_mask_with_smax:
132126
; CHECK: # %bb.0:
133-
; CHECK-NEXT: vpshufd {{.*#+}} zmm1 = zmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14]
134-
; CHECK-NEXT: movw $-21846, %ax # imm = 0xAAAA
135-
; CHECK-NEXT: kmovw %eax, %k1
136-
; CHECK-NEXT: vpblendmd %zmm1, %zmm0, %zmm2 {%k1}
137-
; CHECK-NEXT: vpmaxsd %zmm2, %zmm1, %zmm1
127+
; CHECK-NEXT: vpshufd {{.*#+}} zmm1 = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
128+
; CHECK-NEXT: vpshufd {{.*#+}} zmm2 = zmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14]
129+
; CHECK-NEXT: vpmaxsd %zmm1, %zmm2, %zmm1
138130
; CHECK-NEXT: movw $-3856, %ax # imm = 0xF0F0
139131
; CHECK-NEXT: kmovw %eax, %k1
140132
; CHECK-NEXT: vpopcntd %zmm0, %zmm1 {%k1}

llvm/test/CodeGen/X86/pr132844.ll

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,11 @@
44
define { ptr, i8 } @PR132844(<4 x ptr> %0, <4 x ptr> %1) {
55
; CHECK-LABEL: PR132844:
66
; CHECK: # %bb.0:
7-
; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm2
8-
; CHECK-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
9-
; CHECK-NEXT: movb $10, %al
10-
; CHECK-NEXT: kmovd %eax, %k1
11-
; CHECK-NEXT: vinserti64x2 $1, 16, %ymm2, %ymm0 {%k1}
12-
; CHECK-NEXT: vmovdqu %ymm0, {{[0-9]+}}(%rsp)
7+
; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm2
8+
; CHECK-NEXT: vinsertf128 $1, 16, %ymm2, %ymm2
9+
; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
10+
; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm2[2,3],ymm0[4,5],ymm2[6,7]
11+
; CHECK-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
1312
; CHECK-NEXT: xorl %eax, %eax
1413
; CHECK-NEXT: xorl %edx, %edx
1514
; CHECK-NEXT: vzeroupper

0 commit comments

Comments
 (0)