Skip to content

Commit 0821682

Browse files
author
git apple-llvm automerger
committed
Merge commit 'f7a333401658' from llvm.org/main into next
2 parents 75db9f4 + f7a3334 commit 0821682

File tree

4 files changed

+53
-73
lines changed

4 files changed

+53
-73
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44227,6 +44227,35 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode(
4422744227
}
4422844228
return false;
4422944229
}
44230+
case X86ISD::BLENDI: {
44231+
SDValue LHS = Op.getOperand(0);
44232+
SDValue RHS = Op.getOperand(1);
44233+
APInt Mask = getBLENDIBlendMask(Op);
44234+
44235+
APInt DemandedEltsLHS = OriginalDemandedElts & ~Mask;
44236+
if (SimplifyDemandedBits(LHS, OriginalDemandedBits, DemandedEltsLHS, Known,
44237+
TLO, Depth + 1))
44238+
return true;
44239+
44240+
APInt DemandedEltsRHS = OriginalDemandedElts & Mask;
44241+
if (SimplifyDemandedBits(RHS, OriginalDemandedBits, DemandedEltsRHS, Known,
44242+
TLO, Depth + 1))
44243+
return true;
44244+
44245+
// Attempt to avoid multi-use ops if we don't need anything from them.
44246+
SDValue NewLHS = SimplifyMultipleUseDemandedBits(
44247+
LHS, OriginalDemandedBits, DemandedEltsLHS, TLO.DAG, Depth + 1);
44248+
SDValue NewRHS = SimplifyMultipleUseDemandedBits(
44249+
RHS, OriginalDemandedBits, DemandedEltsRHS, TLO.DAG, Depth + 1);
44250+
if (NewLHS || NewRHS) {
44251+
NewLHS = NewLHS ? NewLHS : LHS;
44252+
NewRHS = NewRHS ? NewRHS : RHS;
44253+
return TLO.CombineTo(Op,
44254+
TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT,
44255+
NewLHS, NewRHS, Op.getOperand(2)));
44256+
}
44257+
break;
44258+
}
4423044259
case X86ISD::BLENDV: {
4423144260
SDValue Sel = Op.getOperand(0);
4423244261
SDValue LHS = Op.getOperand(1);

llvm/test/CodeGen/X86/combine-movmsk-avx.ll

Lines changed: 11 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -75,39 +75,15 @@ define i1 @movmskps_allof_bitcast_v4f64(<4 x double> %a0) {
7575
}
7676

7777
;
78-
; TODO - Avoid sign extension ops when just extracting the sign bits.
78+
; Avoid sign extension ops when just extracting the sign bits.
7979
;
8080

8181
define i32 @movmskpd_cmpgt_v4i64(<4 x i64> %a0) {
82-
; VTEST-AVX1-LABEL: movmskpd_cmpgt_v4i64:
83-
; VTEST-AVX1: # %bb.0:
84-
; VTEST-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
85-
; VTEST-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm1
86-
; VTEST-AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
87-
; VTEST-AVX1-NEXT: vmovmskpd %ymm0, %eax
88-
; VTEST-AVX1-NEXT: vzeroupper
89-
; VTEST-AVX1-NEXT: retq
90-
;
91-
; VTEST-AVX2-LABEL: movmskpd_cmpgt_v4i64:
92-
; VTEST-AVX2: # %bb.0:
93-
; VTEST-AVX2-NEXT: vmovmskpd %ymm0, %eax
94-
; VTEST-AVX2-NEXT: vzeroupper
95-
; VTEST-AVX2-NEXT: retq
96-
;
97-
; MOVMSK-AVX1-LABEL: movmskpd_cmpgt_v4i64:
98-
; MOVMSK-AVX1: # %bb.0:
99-
; MOVMSK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
100-
; MOVMSK-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm1
101-
; MOVMSK-AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
102-
; MOVMSK-AVX1-NEXT: vmovmskpd %ymm0, %eax
103-
; MOVMSK-AVX1-NEXT: vzeroupper
104-
; MOVMSK-AVX1-NEXT: retq
105-
;
106-
; MOVMSK-AVX2-LABEL: movmskpd_cmpgt_v4i64:
107-
; MOVMSK-AVX2: # %bb.0:
108-
; MOVMSK-AVX2-NEXT: vmovmskpd %ymm0, %eax
109-
; MOVMSK-AVX2-NEXT: vzeroupper
110-
; MOVMSK-AVX2-NEXT: retq
82+
; CHECK-LABEL: movmskpd_cmpgt_v4i64:
83+
; CHECK: # %bb.0:
84+
; CHECK-NEXT: vmovmskpd %ymm0, %eax
85+
; CHECK-NEXT: vzeroupper
86+
; CHECK-NEXT: retq
11187
%1 = icmp sgt <4 x i64> zeroinitializer, %a0
11288
%2 = sext <4 x i1> %1 to <4 x i64>
11389
%3 = bitcast <4 x i64> %2 to <4 x double>
@@ -116,33 +92,11 @@ define i32 @movmskpd_cmpgt_v4i64(<4 x i64> %a0) {
11692
}
11793

11894
define i32 @movmskps_ashr_v8i32(<8 x i32> %a0) {
119-
; VTEST-AVX1-LABEL: movmskps_ashr_v8i32:
120-
; VTEST-AVX1: # %bb.0:
121-
; VTEST-AVX1-NEXT: vpsrad $31, %xmm0, %xmm1
122-
; VTEST-AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
123-
; VTEST-AVX1-NEXT: vmovmskps %ymm0, %eax
124-
; VTEST-AVX1-NEXT: vzeroupper
125-
; VTEST-AVX1-NEXT: retq
126-
;
127-
; VTEST-AVX2-LABEL: movmskps_ashr_v8i32:
128-
; VTEST-AVX2: # %bb.0:
129-
; VTEST-AVX2-NEXT: vmovmskps %ymm0, %eax
130-
; VTEST-AVX2-NEXT: vzeroupper
131-
; VTEST-AVX2-NEXT: retq
132-
;
133-
; MOVMSK-AVX1-LABEL: movmskps_ashr_v8i32:
134-
; MOVMSK-AVX1: # %bb.0:
135-
; MOVMSK-AVX1-NEXT: vpsrad $31, %xmm0, %xmm1
136-
; MOVMSK-AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
137-
; MOVMSK-AVX1-NEXT: vmovmskps %ymm0, %eax
138-
; MOVMSK-AVX1-NEXT: vzeroupper
139-
; MOVMSK-AVX1-NEXT: retq
140-
;
141-
; MOVMSK-AVX2-LABEL: movmskps_ashr_v8i32:
142-
; MOVMSK-AVX2: # %bb.0:
143-
; MOVMSK-AVX2-NEXT: vmovmskps %ymm0, %eax
144-
; MOVMSK-AVX2-NEXT: vzeroupper
145-
; MOVMSK-AVX2-NEXT: retq
95+
; CHECK-LABEL: movmskps_ashr_v8i32:
96+
; CHECK: # %bb.0:
97+
; CHECK-NEXT: vmovmskps %ymm0, %eax
98+
; CHECK-NEXT: vzeroupper
99+
; CHECK-NEXT: retq
146100
%1 = ashr <8 x i32> %a0, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
147101
%2 = bitcast <8 x i32> %1 to <8 x float>
148102
%3 = tail call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %2)

llvm/test/CodeGen/X86/combine-sdiv.ll

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2187,15 +2187,14 @@ define <16 x i8> @non_splat_minus_one_divisor_1(<16 x i8> %A) {
21872187
; SSE41-NEXT: pxor %xmm0, %xmm0
21882188
; SSE41-NEXT: pxor %xmm3, %xmm3
21892189
; SSE41-NEXT: pcmpgtb %xmm1, %xmm3
2190-
; SSE41-NEXT: pxor %xmm4, %xmm4
2191-
; SSE41-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7]
2192-
; SSE41-NEXT: pmovzxbw {{.*#+}} xmm2 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero
2193-
; SSE41-NEXT: paddw %xmm2, %xmm2
2194-
; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm4[0,1],xmm2[2],xmm4[3,4,5],xmm2[6],xmm4[7]
2195-
; SSE41-NEXT: psrlw $8, %xmm2
2190+
; SSE41-NEXT: pmovzxbw {{.*#+}} xmm4 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero
21962191
; SSE41-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm0[8],xmm3[9],xmm0[9],xmm3[10],xmm0[10],xmm3[11],xmm0[11],xmm3[12],xmm0[12],xmm3[13],xmm0[13],xmm3[14],xmm0[14],xmm3[15],xmm0[15]
21972192
; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 # [256,2,2,2,2,128,2,128]
21982193
; SSE41-NEXT: psrlw $8, %xmm3
2194+
; SSE41-NEXT: paddw %xmm4, %xmm4
2195+
; SSE41-NEXT: pmovsxbw %xmm1, %xmm2
2196+
; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm4[2],xmm2[3,4,5],xmm4[6],xmm2[7]
2197+
; SSE41-NEXT: psrlw $8, %xmm2
21992198
; SSE41-NEXT: packuswb %xmm3, %xmm2
22002199
; SSE41-NEXT: paddb %xmm1, %xmm2
22012200
; SSE41-NEXT: movdqa %xmm2, %xmm0
@@ -2223,15 +2222,15 @@ define <16 x i8> @non_splat_minus_one_divisor_1(<16 x i8> %A) {
22232222
; AVX1: # %bb.0:
22242223
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
22252224
; AVX1-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm2
2226-
; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm3 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
2227-
; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm4 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
2228-
; AVX1-NEXT: vpaddw %xmm4, %xmm4, %xmm4
2229-
; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0,1],xmm4[2],xmm3[3,4,5],xmm4[6],xmm3[7]
2230-
; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3
22312225
; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15]
22322226
; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [256,2,2,2,2,128,2,128]
22332227
; AVX1-NEXT: vpsrlw $8, %xmm1, %xmm1
2234-
; AVX1-NEXT: vpackuswb %xmm1, %xmm3, %xmm1
2228+
; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
2229+
; AVX1-NEXT: vpaddw %xmm2, %xmm2, %xmm2
2230+
; AVX1-NEXT: vpmovsxbw %xmm0, %xmm3
2231+
; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1],xmm2[2],xmm3[3,4,5],xmm2[6],xmm3[7]
2232+
; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2
2233+
; AVX1-NEXT: vpackuswb %xmm1, %xmm2, %xmm1
22352234
; AVX1-NEXT: vpaddb %xmm1, %xmm0, %xmm1
22362235
; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
22372236
; AVX1-NEXT: vpsraw $8, %xmm2, %xmm2

llvm/test/CodeGen/X86/vector-idiv-udiv-256.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -273,8 +273,7 @@ define <32 x i8> @test_divconstant_32i8(<32 x i8> %a) nounwind {
273273
; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2
274274
; AVX1-NEXT: vpackuswb %xmm3, %xmm2, %xmm2
275275
; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
276-
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm4
277-
; AVX1-NEXT: vpsllw $7, %xmm4, %xmm4
276+
; AVX1-NEXT: vpsrlw $1, %xmm0, %xmm4
278277
; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0,1,2,3,4,5,6],xmm4[7]
279278
; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3
280279
; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 # [137,16,241,57,27,205,135,187]
@@ -711,8 +710,7 @@ define <32 x i8> @test_remconstant_32i8(<32 x i8> %a) nounwind {
711710
; AVX1-NEXT: vpor %xmm4, %xmm5, %xmm4
712711
; AVX1-NEXT: vpsubb %xmm4, %xmm2, %xmm2
713712
; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm4 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
714-
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm5
715-
; AVX1-NEXT: vpsllw $7, %xmm5, %xmm5
713+
; AVX1-NEXT: vpsrlw $1, %xmm0, %xmm5
716714
; AVX1-NEXT: vpblendw {{.*#+}} xmm4 = xmm4[0,1,2,3,4,5,6],xmm5[7]
717715
; AVX1-NEXT: vpsrlw $8, %xmm4, %xmm4
718716
; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4, %xmm4 # [137,16,241,57,27,205,135,187]

0 commit comments

Comments
 (0)