Skip to content

Commit 34cef43

Browse files
committed
[SelectionDAG]: Deduce KnownNeverZero from SMIN and SMAX
1 parent 7cafbc2 commit 34cef43

File tree

2 files changed

+41
-21
lines changed

2 files changed

+41
-21
lines changed

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Lines changed: 31 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5362,10 +5362,38 @@ bool SelectionDAG::isKnownNeverZero(SDValue Op, unsigned Depth) const {
53625362
return isKnownNeverZero(Op.getOperand(1), Depth + 1) ||
53635363
isKnownNeverZero(Op.getOperand(0), Depth + 1);
53645364

5365-
// TODO for smin/smax: If either operand is known negative/positive
5365+
// For smin/smax: If either operand is known negative/positive
53665366
// respectively we don't need the other to be known at all.
5367-
case ISD::SMAX:
5368-
case ISD::SMIN:
5367+
case ISD::SMAX: {
5368+
KnownBits Op1 = computeKnownBits(Op.getOperand(1), Depth + 1);
5369+
if (Op1.isStrictlyPositive())
5370+
return true;
5371+
5372+
KnownBits Op0 = computeKnownBits(Op.getOperand(0), Depth + 1);
5373+
if (Op0.isStrictlyPositive())
5374+
return true;
5375+
5376+
if (Op1.isNonZero() && Op0.isNonZero())
5377+
return true;
5378+
5379+
return isKnownNeverZero(Op.getOperand(1), Depth + 1) &&
5380+
isKnownNeverZero(Op.getOperand(0), Depth + 1);
5381+
}
5382+
case ISD::SMIN: {
5383+
KnownBits Op1 = computeKnownBits(Op.getOperand(1), Depth + 1);
5384+
if (Op1.isNegative())
5385+
return true;
5386+
5387+
KnownBits Op0 = computeKnownBits(Op.getOperand(0), Depth + 1);
5388+
if (Op0.isNegative())
5389+
return true;
5390+
5391+
if (Op1.isNonZero() && Op0.isNonZero())
5392+
return true;
5393+
5394+
return isKnownNeverZero(Op.getOperand(1), Depth + 1) &&
5395+
isKnownNeverZero(Op.getOperand(0), Depth + 1);
5396+
}
53695397
case ISD::UMIN:
53705398
return isKnownNeverZero(Op.getOperand(1), Depth + 1) &&
53715399
isKnownNeverZero(Op.getOperand(0), Depth + 1);

llvm/test/CodeGen/X86/known-never-zero.ll

Lines changed: 10 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -462,19 +462,15 @@ define i32 @smin_known_zero(i32 %x, i32 %y) {
462462
; X86-NEXT: cmpl $-54, %eax
463463
; X86-NEXT: movl $-54, %ecx
464464
; X86-NEXT: cmovll %eax, %ecx
465-
; X86-NEXT: bsfl %ecx, %ecx
466-
; X86-NEXT: movl $32, %eax
467-
; X86-NEXT: cmovnel %ecx, %eax
465+
; X86-NEXT: rep bsfl %ecx, %eax
468466
; X86-NEXT: retl
469467
;
470468
; X64-LABEL: smin_known_zero:
471469
; X64: # %bb.0:
472470
; X64-NEXT: cmpl $-54, %edi
473471
; X64-NEXT: movl $-54, %eax
474472
; X64-NEXT: cmovll %edi, %eax
475-
; X64-NEXT: bsfl %eax, %ecx
476-
; X64-NEXT: movl $32, %eax
477-
; X64-NEXT: cmovnel %ecx, %eax
473+
; X64-NEXT: rep bsfl %eax, %eax
478474
; X64-NEXT: retq
479475
%z = call i32 @llvm.smin.i32(i32 %x, i32 -54)
480476
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
@@ -508,9 +504,9 @@ define <4 x i32> @smin_known_zero_vec(<4 x i32> %x, <4 x i32> %y) {
508504
; X86-NEXT: por %xmm2, %xmm0
509505
; X86-NEXT: pcmpeqd %xmm1, %xmm1
510506
; X86-NEXT: paddd %xmm0, %xmm1
511-
; X86-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
512-
; X86-NEXT: pxor %xmm1, %xmm0
513-
; X86-NEXT: pcmpgtd %xmm1, %xmm0
507+
; X86-NEXT: pand %xmm1, %xmm0
508+
; X86-NEXT: pxor %xmm1, %xmm1
509+
; X86-NEXT: pcmpeqd %xmm1, %xmm0
514510
; X86-NEXT: psrld $31, %xmm0
515511
; X86-NEXT: retl
516512
;
@@ -519,10 +515,10 @@ define <4 x i32> @smin_known_zero_vec(<4 x i32> %x, <4 x i32> %y) {
519515
; X64-NEXT: vpminsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
520516
; X64-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
521517
; X64-NEXT: vpaddd %xmm1, %xmm0, %xmm1
522-
; X64-NEXT: vpxor %xmm1, %xmm0, %xmm0
523-
; X64-NEXT: vpminud %xmm1, %xmm0, %xmm1
518+
; X64-NEXT: vpand %xmm1, %xmm0, %xmm0
519+
; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1
524520
; X64-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
525-
; X64-NEXT: vpandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
521+
; X64-NEXT: vpsrld $31, %xmm0, %xmm0
526522
; X64-NEXT: retq
527523
%z = call <4 x i32> @llvm.smin.v4i32(<4 x i32> %x, <4 x i32> <i32 -54, i32 -23, i32 -12, i32 -1>)
528524
%r = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %z)
@@ -603,19 +599,15 @@ define i32 @smax_maybe_zero(i32 %x, i32 %y) {
603599
; X86-NEXT: cmpl $55, %eax
604600
; X86-NEXT: movl $54, %ecx
605601
; X86-NEXT: cmovgel %eax, %ecx
606-
; X86-NEXT: bsfl %ecx, %ecx
607-
; X86-NEXT: movl $32, %eax
608-
; X86-NEXT: cmovnel %ecx, %eax
602+
; X86-NEXT: rep bsfl %ecx, %eax
609603
; X86-NEXT: retl
610604
;
611605
; X64-LABEL: smax_maybe_zero:
612606
; X64: # %bb.0:
613607
; X64-NEXT: cmpl $55, %edi
614608
; X64-NEXT: movl $54, %eax
615609
; X64-NEXT: cmovgel %edi, %eax
616-
; X64-NEXT: bsfl %eax, %ecx
617-
; X64-NEXT: movl $32, %eax
618-
; X64-NEXT: cmovnel %ecx, %eax
610+
; X64-NEXT: rep bsfl %eax, %eax
619611
; X64-NEXT: retq
620612
%z = call i32 @llvm.smax.i32(i32 %x, i32 54)
621613
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)

0 commit comments

Comments
 (0)