Skip to content

Commit a6d289d

Browse files
authored
[X86] Add checkSignTestSetCCCombine - if X86ISD::CMP/OR is testing for signbits, attempt to test for the signbit source instead. (#97433)
There's a lot more we could do here (including the reverse fold back to X86::COND_S/NS with some other X86ISD nodes), but I wanted to address the MOVMSK issue initially. Fixes #66191
1 parent 1eec81a commit a6d289d

File tree

3 files changed

+117
-83
lines changed

3 files changed

+117
-83
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46433,6 +46433,62 @@ static SDValue combineSetCCAtomicArith(SDValue Cmp, X86::CondCode &CC,
4643346433
return LockOp;
4643446434
}
4643546435

46436+
// Check whether we're just testing the signbit, and whether we can simplify
46437+
// this by tracking where the signbit came from.
46438+
static SDValue checkSignTestSetCCCombine(SDValue Cmp, X86::CondCode &CC,
46439+
SelectionDAG &DAG) {
46440+
if (CC != X86::COND_S && CC != X86::COND_NS)
46441+
return SDValue();
46442+
46443+
if (!Cmp.hasOneUse())
46444+
return SDValue();
46445+
46446+
SDValue Src;
46447+
if (Cmp.getOpcode() == X86ISD::CMP) {
46448+
// CMP(X,0) -> signbit test
46449+
if (!isNullConstant(Cmp.getOperand(1)))
46450+
return SDValue();
46451+
Src = Cmp.getOperand(0);
46452+
// Peek through a SRA node as we just need the signbit.
46453+
// TODO: Remove one use limit once sdiv-fix regressions are fixed.
46454+
// TODO: Use SimplifyDemandedBits instead of just SRA?
46455+
if (Src.getOpcode() != ISD::SRA || !Src.hasOneUse())
46456+
return SDValue();
46457+
Src = Src.getOperand(0);
46458+
} else if (Cmp.getOpcode() == X86ISD::OR) {
46459+
// OR(X,Y) -> see if only one operand contributes to the signbit.
46460+
// TODO: XOR(X,Y) -> see if only one operand contributes to the signbit.
46461+
if (DAG.SignBitIsZero(Cmp.getOperand(0)))
46462+
Src = Cmp.getOperand(1);
46463+
else if (DAG.SignBitIsZero(Cmp.getOperand(1)))
46464+
Src = Cmp.getOperand(0);
46465+
else
46466+
return SDValue();
46467+
} else {
46468+
return SDValue();
46469+
}
46470+
46471+
// Replace with a TEST on the MSB.
46472+
SDLoc DL(Cmp);
46473+
MVT SrcVT = Src.getSimpleValueType();
46474+
APInt BitMask = APInt::getSignMask(SrcVT.getScalarSizeInBits());
46475+
46476+
// If Src came from a SHL (probably from an expanded SIGN_EXTEND_INREG), then
46477+
// peek through and adjust the TEST bit.
46478+
if (Src.getOpcode() == ISD::SHL) {
46479+
if (std::optional<uint64_t> ShiftAmt = DAG.getValidShiftAmount(Src)) {
46480+
Src = Src.getOperand(0);
46481+
BitMask.lshrInPlace(*ShiftAmt);
46482+
}
46483+
}
46484+
46485+
SDValue Mask = DAG.getNode(ISD::AND, DL, SrcVT, Src,
46486+
DAG.getConstant(BitMask, DL, SrcVT));
46487+
CC = CC == X86::COND_S ? X86::COND_NE : X86::COND_E;
46488+
return DAG.getNode(X86ISD::CMP, DL, MVT::i32, Mask,
46489+
DAG.getConstant(0, DL, SrcVT));
46490+
}
46491+
4643646492
// Check whether a boolean test is testing a boolean value generated by
4643746493
// X86ISD::SETCC. If so, return the operand of that SETCC and proper condition
4643846494
// code.
@@ -47072,6 +47128,9 @@ static SDValue combineSetCCEFLAGS(SDValue EFLAGS, X86::CondCode &CC,
4707247128
if (SDValue Flags = combineCarryThroughADD(EFLAGS, DAG))
4707347129
return Flags;
4707447130

47131+
if (SDValue R = checkSignTestSetCCCombine(EFLAGS, CC, DAG))
47132+
return R;
47133+
4707547134
if (SDValue R = checkBoolTestSetCCCombine(EFLAGS, CC))
4707647135
return R;
4707747136

llvm/test/CodeGen/X86/is_fpclass-fp80.ll

Lines changed: 20 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -363,19 +363,18 @@ define i1 @is_posnormal_f80(x86_fp80 %x) {
363363
; CHECK-32-NEXT: pushl %esi
364364
; CHECK-32-NEXT: .cfi_def_cfa_offset 8
365365
; CHECK-32-NEXT: .cfi_offset %esi, -8
366-
; CHECK-32-NEXT: movzwl {{[0-9]+}}(%esp), %edx
367-
; CHECK-32-NEXT: movswl %dx, %ecx
368-
; CHECK-32-NEXT: sarl $15, %ecx
369366
; CHECK-32-NEXT: movl {{[0-9]+}}(%esp), %eax
367+
; CHECK-32-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
368+
; CHECK-32-NEXT: movl %ecx, %edx
370369
; CHECK-32-NEXT: andl $32767, %edx # imm = 0x7FFF
371370
; CHECK-32-NEXT: decl %edx
372371
; CHECK-32-NEXT: movzwl %dx, %edx
373372
; CHECK-32-NEXT: xorl %esi, %esi
374373
; CHECK-32-NEXT: cmpl $32766, %edx # imm = 0x7FFE
375374
; CHECK-32-NEXT: sbbl %esi, %esi
376375
; CHECK-32-NEXT: setb %dl
377-
; CHECK-32-NEXT: testl %ecx, %ecx
378-
; CHECK-32-NEXT: setns %cl
376+
; CHECK-32-NEXT: testl $32768, %ecx # imm = 0x8000
377+
; CHECK-32-NEXT: sete %cl
379378
; CHECK-32-NEXT: shrl $31, %eax
380379
; CHECK-32-NEXT: andb %cl, %al
381380
; CHECK-32-NEXT: andb %dl, %al
@@ -411,19 +410,18 @@ define i1 @is_negnormal_f80(x86_fp80 %x) {
411410
; CHECK-32-NEXT: pushl %esi
412411
; CHECK-32-NEXT: .cfi_def_cfa_offset 8
413412
; CHECK-32-NEXT: .cfi_offset %esi, -8
414-
; CHECK-32-NEXT: movzwl {{[0-9]+}}(%esp), %edx
415-
; CHECK-32-NEXT: movswl %dx, %ecx
416-
; CHECK-32-NEXT: sarl $15, %ecx
417413
; CHECK-32-NEXT: movl {{[0-9]+}}(%esp), %eax
414+
; CHECK-32-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
415+
; CHECK-32-NEXT: movl %ecx, %edx
418416
; CHECK-32-NEXT: andl $32767, %edx # imm = 0x7FFF
419417
; CHECK-32-NEXT: decl %edx
420418
; CHECK-32-NEXT: movzwl %dx, %edx
421419
; CHECK-32-NEXT: xorl %esi, %esi
422420
; CHECK-32-NEXT: cmpl $32766, %edx # imm = 0x7FFE
423421
; CHECK-32-NEXT: sbbl %esi, %esi
424422
; CHECK-32-NEXT: setb %dl
425-
; CHECK-32-NEXT: testl %ecx, %ecx
426-
; CHECK-32-NEXT: sets %cl
423+
; CHECK-32-NEXT: testl $32768, %ecx # imm = 0x8000
424+
; CHECK-32-NEXT: setne %cl
427425
; CHECK-32-NEXT: shrl $31, %eax
428426
; CHECK-32-NEXT: andb %cl, %al
429427
; CHECK-32-NEXT: andb %dl, %al
@@ -543,24 +541,23 @@ define i1 @is_negsubnormal_f80(x86_fp80 %x) {
543541
; CHECK-32-NEXT: .cfi_def_cfa_offset 12
544542
; CHECK-32-NEXT: .cfi_offset %esi, -12
545543
; CHECK-32-NEXT: .cfi_offset %edi, -8
546-
; CHECK-32-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
547-
; CHECK-32-NEXT: movswl %cx, %eax
548-
; CHECK-32-NEXT: sarl $15, %eax
549-
; CHECK-32-NEXT: movl {{[0-9]+}}(%esp), %esi
550544
; CHECK-32-NEXT: movl {{[0-9]+}}(%esp), %edi
545+
; CHECK-32-NEXT: movl {{[0-9]+}}(%esp), %edx
546+
; CHECK-32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
547+
; CHECK-32-NEXT: movl %eax, %ecx
551548
; CHECK-32-NEXT: andl $32767, %ecx # imm = 0x7FFF
552-
; CHECK-32-NEXT: xorl %edx, %edx
553-
; CHECK-32-NEXT: addl $-1, %esi
554-
; CHECK-32-NEXT: adcl $-1, %edi
555-
; CHECK-32-NEXT: adcl $-1, %ecx
549+
; CHECK-32-NEXT: xorl %esi, %esi
550+
; CHECK-32-NEXT: addl $-1, %edi
556551
; CHECK-32-NEXT: adcl $-1, %edx
557-
; CHECK-32-NEXT: cmpl $-1, %esi
558-
; CHECK-32-NEXT: sbbl $2147483647, %edi # imm = 0x7FFFFFFF
552+
; CHECK-32-NEXT: adcl $-1, %ecx
553+
; CHECK-32-NEXT: adcl $-1, %esi
554+
; CHECK-32-NEXT: cmpl $-1, %edi
555+
; CHECK-32-NEXT: sbbl $2147483647, %edx # imm = 0x7FFFFFFF
559556
; CHECK-32-NEXT: sbbl $0, %ecx
560-
; CHECK-32-NEXT: sbbl $0, %edx
557+
; CHECK-32-NEXT: sbbl $0, %esi
561558
; CHECK-32-NEXT: setb %cl
562-
; CHECK-32-NEXT: testl %eax, %eax
563-
; CHECK-32-NEXT: sets %al
559+
; CHECK-32-NEXT: testl $32768, %eax # imm = 0x8000
560+
; CHECK-32-NEXT: setne %al
564561
; CHECK-32-NEXT: andb %cl, %al
565562
; CHECK-32-NEXT: popl %esi
566563
; CHECK-32-NEXT: .cfi_def_cfa_offset 8

llvm/test/CodeGen/X86/movmsk-bittest.ll

Lines changed: 38 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -37,18 +37,16 @@ define i32 @movmsk_slt_v2i64_1(<2 x i64> %v, i32 %a, i32 %b) {
3737
; SSE: # %bb.0:
3838
; SSE-NEXT: movl %edi, %eax
3939
; SSE-NEXT: movmskpd %xmm0, %ecx
40-
; SSE-NEXT: shlb $6, %cl
41-
; SSE-NEXT: sarb $6, %cl
42-
; SSE-NEXT: cmovnsl %esi, %eax
40+
; SSE-NEXT: testb $2, %cl
41+
; SSE-NEXT: cmovel %esi, %eax
4342
; SSE-NEXT: retq
4443
;
4544
; AVX-LABEL: movmsk_slt_v2i64_1:
4645
; AVX: # %bb.0:
4746
; AVX-NEXT: movl %edi, %eax
4847
; AVX-NEXT: vmovmskpd %xmm0, %ecx
49-
; AVX-NEXT: shlb $6, %cl
50-
; AVX-NEXT: sarb $6, %cl
51-
; AVX-NEXT: cmovnsl %esi, %eax
48+
; AVX-NEXT: testb $2, %cl
49+
; AVX-NEXT: cmovel %esi, %eax
5250
; AVX-NEXT: retq
5351
%cmp = icmp slt <2 x i64> %v, zeroinitializer
5452
%msk = bitcast <2 x i1> %cmp to i2
@@ -62,18 +60,16 @@ define i32 @movmsk_sgt_v2i64_1(<2 x i64> %v, i32 %a, i32 %b) {
6260
; SSE: # %bb.0:
6361
; SSE-NEXT: movl %edi, %eax
6462
; SSE-NEXT: movmskpd %xmm0, %ecx
65-
; SSE-NEXT: shlb $6, %cl
66-
; SSE-NEXT: sarb $6, %cl
67-
; SSE-NEXT: cmovsl %esi, %eax
63+
; SSE-NEXT: testb $2, %cl
64+
; SSE-NEXT: cmovnel %esi, %eax
6865
; SSE-NEXT: retq
6966
;
7067
; AVX-LABEL: movmsk_sgt_v2i64_1:
7168
; AVX: # %bb.0:
7269
; AVX-NEXT: movl %edi, %eax
7370
; AVX-NEXT: vmovmskpd %xmm0, %ecx
74-
; AVX-NEXT: shlb $6, %cl
75-
; AVX-NEXT: sarb $6, %cl
76-
; AVX-NEXT: cmovsl %esi, %eax
71+
; AVX-NEXT: testb $2, %cl
72+
; AVX-NEXT: cmovnel %esi, %eax
7773
; AVX-NEXT: retq
7874
%cmp = icmp slt <2 x i64> %v, zeroinitializer
7975
%msk = bitcast <2 x i1> %cmp to i2
@@ -111,18 +107,16 @@ define i32 @movmsk_slt_v4i32_3(<4 x i32> %v, i32 %a, i32 %b) {
111107
; SSE: # %bb.0:
112108
; SSE-NEXT: movl %edi, %eax
113109
; SSE-NEXT: movmskps %xmm0, %ecx
114-
; SSE-NEXT: shlb $4, %cl
115-
; SSE-NEXT: sarb $4, %cl
116-
; SSE-NEXT: cmovnsl %esi, %eax
110+
; SSE-NEXT: testb $8, %cl
111+
; SSE-NEXT: cmovel %esi, %eax
117112
; SSE-NEXT: retq
118113
;
119114
; AVX-LABEL: movmsk_slt_v4i32_3:
120115
; AVX: # %bb.0:
121116
; AVX-NEXT: movl %edi, %eax
122117
; AVX-NEXT: vmovmskps %xmm0, %ecx
123-
; AVX-NEXT: shlb $4, %cl
124-
; AVX-NEXT: sarb $4, %cl
125-
; AVX-NEXT: cmovnsl %esi, %eax
118+
; AVX-NEXT: testb $8, %cl
119+
; AVX-NEXT: cmovel %esi, %eax
126120
; AVX-NEXT: retq
127121
%cmp = icmp slt <4 x i32> %v, zeroinitializer
128122
%msk = bitcast <4 x i1> %cmp to i4
@@ -136,18 +130,16 @@ define i32 @movmsk_sgt_v4i32_3(<4 x i32> %v, i32 %a, i32 %b) {
136130
; SSE: # %bb.0:
137131
; SSE-NEXT: movl %edi, %eax
138132
; SSE-NEXT: movmskps %xmm0, %ecx
139-
; SSE-NEXT: shlb $4, %cl
140-
; SSE-NEXT: sarb $4, %cl
141-
; SSE-NEXT: cmovsl %esi, %eax
133+
; SSE-NEXT: testb $8, %cl
134+
; SSE-NEXT: cmovnel %esi, %eax
142135
; SSE-NEXT: retq
143136
;
144137
; AVX-LABEL: movmsk_sgt_v4i32_3:
145138
; AVX: # %bb.0:
146139
; AVX-NEXT: movl %edi, %eax
147140
; AVX-NEXT: vmovmskps %xmm0, %ecx
148-
; AVX-NEXT: shlb $4, %cl
149-
; AVX-NEXT: sarb $4, %cl
150-
; AVX-NEXT: cmovsl %esi, %eax
141+
; AVX-NEXT: testb $8, %cl
142+
; AVX-NEXT: cmovnel %esi, %eax
151143
; AVX-NEXT: retq
152144
%cmp = icmp slt <4 x i32> %v, zeroinitializer
153145
%msk = bitcast <4 x i1> %cmp to i4
@@ -256,20 +248,17 @@ define i32 @movmsk_slt_v4i64_3(<4 x i64> %v, i32 %a, i32 %b) {
256248
; SSE-LABEL: movmsk_slt_v4i64_3:
257249
; SSE: # %bb.0:
258250
; SSE-NEXT: movl %edi, %eax
259-
; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3]
260-
; SSE-NEXT: movmskps %xmm0, %ecx
261-
; SSE-NEXT: shlb $4, %cl
262-
; SSE-NEXT: sarb $4, %cl
263-
; SSE-NEXT: cmovnsl %esi, %eax
251+
; SSE-NEXT: movmskps %xmm1, %ecx
252+
; SSE-NEXT: testb $8, %cl
253+
; SSE-NEXT: cmovel %esi, %eax
264254
; SSE-NEXT: retq
265255
;
266256
; AVX-LABEL: movmsk_slt_v4i64_3:
267257
; AVX: # %bb.0:
268258
; AVX-NEXT: movl %edi, %eax
269259
; AVX-NEXT: vmovmskpd %ymm0, %ecx
270-
; AVX-NEXT: shlb $4, %cl
271-
; AVX-NEXT: sarb $4, %cl
272-
; AVX-NEXT: cmovnsl %esi, %eax
260+
; AVX-NEXT: testb $8, %cl
261+
; AVX-NEXT: cmovel %esi, %eax
273262
; AVX-NEXT: vzeroupper
274263
; AVX-NEXT: retq
275264
%cmp = icmp slt <4 x i64> %v, zeroinitializer
@@ -283,20 +272,17 @@ define i32 @movmsk_sgt_v4i64_3(<4 x i64> %v, i32 %a, i32 %b) {
283272
; SSE-LABEL: movmsk_sgt_v4i64_3:
284273
; SSE: # %bb.0:
285274
; SSE-NEXT: movl %edi, %eax
286-
; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3]
287-
; SSE-NEXT: movmskps %xmm0, %ecx
288-
; SSE-NEXT: shlb $4, %cl
289-
; SSE-NEXT: sarb $4, %cl
290-
; SSE-NEXT: cmovsl %esi, %eax
275+
; SSE-NEXT: movmskps %xmm1, %ecx
276+
; SSE-NEXT: testb $8, %cl
277+
; SSE-NEXT: cmovnel %esi, %eax
291278
; SSE-NEXT: retq
292279
;
293280
; AVX-LABEL: movmsk_sgt_v4i64_3:
294281
; AVX: # %bb.0:
295282
; AVX-NEXT: movl %edi, %eax
296283
; AVX-NEXT: vmovmskpd %ymm0, %ecx
297-
; AVX-NEXT: shlb $4, %cl
298-
; AVX-NEXT: sarb $4, %cl
299-
; AVX-NEXT: cmovsl %esi, %eax
284+
; AVX-NEXT: testb $8, %cl
285+
; AVX-NEXT: cmovnel %esi, %eax
300286
; AVX-NEXT: vzeroupper
301287
; AVX-NEXT: retq
302288
%cmp = icmp slt <4 x i64> %v, zeroinitializer
@@ -487,22 +473,18 @@ define i32 @movmsk_slt_v32i8_31(<32 x i8> %v, i32 %a, i32 %b) {
487473
; SSE-LABEL: movmsk_slt_v32i8_31:
488474
; SSE: # %bb.0:
489475
; SSE-NEXT: movl %edi, %eax
490-
; SSE-NEXT: pmovmskb %xmm0, %ecx
491-
; SSE-NEXT: pmovmskb %xmm1, %edx
492-
; SSE-NEXT: shll $16, %edx
493-
; SSE-NEXT: orl %ecx, %edx
494-
; SSE-NEXT: cmovnsl %esi, %eax
476+
; SSE-NEXT: pmovmskb %xmm1, %ecx
477+
; SSE-NEXT: testl $32768, %ecx # imm = 0x8000
478+
; SSE-NEXT: cmovel %esi, %eax
495479
; SSE-NEXT: retq
496480
;
497481
; AVX1-LABEL: movmsk_slt_v32i8_31:
498482
; AVX1: # %bb.0:
499483
; AVX1-NEXT: movl %edi, %eax
500-
; AVX1-NEXT: vpmovmskb %xmm0, %ecx
501484
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
502-
; AVX1-NEXT: vpmovmskb %xmm0, %edx
503-
; AVX1-NEXT: shll $16, %edx
504-
; AVX1-NEXT: orl %ecx, %edx
505-
; AVX1-NEXT: cmovnsl %esi, %eax
485+
; AVX1-NEXT: vpmovmskb %xmm0, %ecx
486+
; AVX1-NEXT: testl $32768, %ecx # imm = 0x8000
487+
; AVX1-NEXT: cmovel %esi, %eax
506488
; AVX1-NEXT: vzeroupper
507489
; AVX1-NEXT: retq
508490
;
@@ -534,22 +516,18 @@ define i32 @movmsk_sgt_v32i8_31(<32 x i8> %v, i32 %a, i32 %b) {
534516
; SSE-LABEL: movmsk_sgt_v32i8_31:
535517
; SSE: # %bb.0:
536518
; SSE-NEXT: movl %edi, %eax
537-
; SSE-NEXT: pmovmskb %xmm0, %ecx
538-
; SSE-NEXT: pmovmskb %xmm1, %edx
539-
; SSE-NEXT: shll $16, %edx
540-
; SSE-NEXT: orl %ecx, %edx
541-
; SSE-NEXT: cmovsl %esi, %eax
519+
; SSE-NEXT: pmovmskb %xmm1, %ecx
520+
; SSE-NEXT: testl $32768, %ecx # imm = 0x8000
521+
; SSE-NEXT: cmovnel %esi, %eax
542522
; SSE-NEXT: retq
543523
;
544524
; AVX1-LABEL: movmsk_sgt_v32i8_31:
545525
; AVX1: # %bb.0:
546526
; AVX1-NEXT: movl %edi, %eax
547-
; AVX1-NEXT: vpmovmskb %xmm0, %ecx
548527
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
549-
; AVX1-NEXT: vpmovmskb %xmm0, %edx
550-
; AVX1-NEXT: shll $16, %edx
551-
; AVX1-NEXT: orl %ecx, %edx
552-
; AVX1-NEXT: cmovsl %esi, %eax
528+
; AVX1-NEXT: vpmovmskb %xmm0, %ecx
529+
; AVX1-NEXT: testl $32768, %ecx # imm = 0x8000
530+
; AVX1-NEXT: cmovnel %esi, %eax
553531
; AVX1-NEXT: vzeroupper
554532
; AVX1-NEXT: retq
555533
;

0 commit comments

Comments
 (0)