Skip to content

Commit 238c1e2

Browse files
committed
[SelectionDAG]: Deduce KnownNeverZero from SMIN and SMAX
1 parent 5a2bacf commit 238c1e2

File tree

2 files changed

+41
-21
lines changed

2 files changed

+41
-21
lines changed

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Lines changed: 31 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5362,10 +5362,38 @@ bool SelectionDAG::isKnownNeverZero(SDValue Op, unsigned Depth) const {
53625362
return isKnownNeverZero(Op.getOperand(1), Depth + 1) ||
53635363
isKnownNeverZero(Op.getOperand(0), Depth + 1);
53645364

5365-
// TODO for smin/smax: If either operand is known negative/positive
5365+
// For smin/smax: If either operand is known negative/positive
53665366
// respectively we don't need the other to be known at all.
5367-
case ISD::SMAX:
5368-
case ISD::SMIN:
5367+
case ISD::SMAX: {
5368+
KnownBits Op1 = computeKnownBits(Op.getOperand(1), Depth + 1);
5369+
if (Op1.isStrictlyPositive())
5370+
return true;
5371+
5372+
KnownBits Op0 = computeKnownBits(Op.getOperand(0), Depth + 1);
5373+
if (Op0.isStrictlyPositive())
5374+
return true;
5375+
5376+
if (Op1.isNonZero() && Op0.isNonZero())
5377+
return true;
5378+
5379+
return isKnownNeverZero(Op.getOperand(1), Depth + 1) &&
5380+
isKnownNeverZero(Op.getOperand(0), Depth + 1);
5381+
}
5382+
case ISD::SMIN: {
5383+
KnownBits Op1 = computeKnownBits(Op.getOperand(1), Depth + 1);
5384+
if (Op1.isNegative())
5385+
return true;
5386+
5387+
KnownBits Op0 = computeKnownBits(Op.getOperand(0), Depth + 1);
5388+
if (Op0.isNegative())
5389+
return true;
5390+
5391+
if (Op1.isNonZero() && Op0.isNonZero())
5392+
return true;
5393+
5394+
return isKnownNeverZero(Op.getOperand(1), Depth + 1) &&
5395+
isKnownNeverZero(Op.getOperand(0), Depth + 1);
5396+
}
53695397
case ISD::UMIN:
53705398
return isKnownNeverZero(Op.getOperand(1), Depth + 1) &&
53715399
isKnownNeverZero(Op.getOperand(0), Depth + 1);

llvm/test/CodeGen/X86/known-never-zero.ll

Lines changed: 10 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -452,19 +452,15 @@ define i32 @smin_known_zero(i32 %x, i32 %y) {
452452
; X86-NEXT: cmpl $-54, %eax
453453
; X86-NEXT: movl $-54, %ecx
454454
; X86-NEXT: cmovll %eax, %ecx
455-
; X86-NEXT: bsfl %ecx, %ecx
456-
; X86-NEXT: movl $32, %eax
457-
; X86-NEXT: cmovnel %ecx, %eax
455+
; X86-NEXT: rep bsfl %ecx, %eax
458456
; X86-NEXT: retl
459457
;
460458
; X64-LABEL: smin_known_zero:
461459
; X64: # %bb.0:
462460
; X64-NEXT: cmpl $-54, %edi
463461
; X64-NEXT: movl $-54, %eax
464462
; X64-NEXT: cmovll %edi, %eax
465-
; X64-NEXT: bsfl %eax, %ecx
466-
; X64-NEXT: movl $32, %eax
467-
; X64-NEXT: cmovnel %ecx, %eax
463+
; X64-NEXT: rep bsfl %eax, %eax
468464
; X64-NEXT: retq
469465
%z = call i32 @llvm.smin.i32(i32 %x, i32 -54)
470466
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
@@ -482,9 +478,9 @@ define <4 x i32> @smin_known_zero_vec(<4 x i32> %x, <4 x i32> %y) {
482478
; X86-NEXT: por %xmm2, %xmm0
483479
; X86-NEXT: pcmpeqd %xmm1, %xmm1
484480
; X86-NEXT: paddd %xmm0, %xmm1
485-
; X86-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
486-
; X86-NEXT: pxor %xmm1, %xmm0
487-
; X86-NEXT: pcmpgtd %xmm1, %xmm0
481+
; X86-NEXT: pand %xmm1, %xmm0
482+
; X86-NEXT: pxor %xmm1, %xmm1
483+
; X86-NEXT: pcmpeqd %xmm1, %xmm0
488484
; X86-NEXT: psrld $31, %xmm0
489485
; X86-NEXT: retl
490486
;
@@ -493,10 +489,10 @@ define <4 x i32> @smin_known_zero_vec(<4 x i32> %x, <4 x i32> %y) {
493489
; X64-NEXT: vpminsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
494490
; X64-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
495491
; X64-NEXT: vpaddd %xmm1, %xmm0, %xmm1
496-
; X64-NEXT: vpxor %xmm1, %xmm0, %xmm0
497-
; X64-NEXT: vpminud %xmm1, %xmm0, %xmm1
492+
; X64-NEXT: vpand %xmm1, %xmm0, %xmm0
493+
; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1
498494
; X64-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
499-
; X64-NEXT: vpandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
495+
; X64-NEXT: vpsrld $31, %xmm0, %xmm0
500496
; X64-NEXT: retq
501497
%z = call <4 x i32> @llvm.smin.v4i32(<4 x i32> %x, <4 x i32> <i32 -54, i32 -23, i32 -12, i32 -1>)
502498
%r = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %z)
@@ -577,19 +573,15 @@ define i32 @smax_maybe_zero(i32 %x, i32 %y) {
577573
; X86-NEXT: cmpl $55, %eax
578574
; X86-NEXT: movl $54, %ecx
579575
; X86-NEXT: cmovgel %eax, %ecx
580-
; X86-NEXT: bsfl %ecx, %ecx
581-
; X86-NEXT: movl $32, %eax
582-
; X86-NEXT: cmovnel %ecx, %eax
576+
; X86-NEXT: rep bsfl %ecx, %eax
583577
; X86-NEXT: retl
584578
;
585579
; X64-LABEL: smax_maybe_zero:
586580
; X64: # %bb.0:
587581
; X64-NEXT: cmpl $55, %edi
588582
; X64-NEXT: movl $54, %eax
589583
; X64-NEXT: cmovgel %edi, %eax
590-
; X64-NEXT: bsfl %eax, %ecx
591-
; X64-NEXT: movl $32, %eax
592-
; X64-NEXT: cmovnel %ecx, %eax
584+
; X64-NEXT: rep bsfl %eax, %eax
593585
; X64-NEXT: retq
594586
%z = call i32 @llvm.smax.i32(i32 %x, i32 54)
595587
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)

0 commit comments

Comments
 (0)