Skip to content

Commit b279ca2

Browse files
committed
[DAG] visitCTPOP - CTPOP(SHIFT(X)) -> CTPOP(X) iff the shift doesn't affect any non-zero bits
If the source is being (logically) shifted, but doesn't affect any active bits, then we can call CTPOP on the shift source directly.
1 parent 2df652a commit b279ca2

File tree

2 files changed

+20
-5
lines changed

2 files changed

+20
-5
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11144,6 +11144,23 @@ SDValue DAGCombiner::visitCTPOP(SDNode *N) {
1114411144
if (SDValue C = DAG.FoldConstantArithmetic(ISD::CTPOP, DL, VT, {N0}))
1114511145
return C;
1114611146

11147+
// If the source is being shifted, but doesn't affect any active bits,
11148+
// then we can call CTPOP on the shift source directly.
11149+
if (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SHL) {
11150+
if (ConstantSDNode *AmtC = isConstOrConstSplat(N0.getOperand(1))) {
11151+
const APInt &Amt = AmtC->getAPIntValue();
11152+
if (Amt.ult(NumBits)) {
11153+
KnownBits KnownSrc = DAG.computeKnownBits(N0.getOperand(0));
11154+
if ((N0.getOpcode() == ISD::SRL &&
11155+
Amt.ule(KnownSrc.countMinTrailingZeros())) ||
11156+
(N0.getOpcode() == ISD::SHL &&
11157+
Amt.ule(KnownSrc.countMinLeadingZeros()))) {
11158+
return DAG.getNode(ISD::CTPOP, DL, VT, N0.getOperand(0));
11159+
}
11160+
}
11161+
}
11162+
}
11163+
1114711164
// If the upper bits are known to be zero, then see if its profitable to
1114811165
// only count the lower bits.
1114911166
if (VT.isScalarInteger() && NumBits > 8 && (NumBits & 1) == 0) {

llvm/test/CodeGen/X86/ctpop-mask.ll

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -549,7 +549,6 @@ define i64 @ctpop_shifted_mask8(i64 %x) nounwind readnone {
549549
; X86-POPCOUNT-LABEL: ctpop_shifted_mask8:
550550
; X86-POPCOUNT: # %bb.0:
551551
; X86-POPCOUNT-NEXT: movzbl {{[0-9]+}}(%esp), %eax
552-
; X86-POPCOUNT-NEXT: shll $8, %eax
553552
; X86-POPCOUNT-NEXT: popcntl %eax, %eax
554553
; X86-POPCOUNT-NEXT: xorl %edx, %edx
555554
; X86-POPCOUNT-NEXT: retl
@@ -663,12 +662,11 @@ define i64 @ctpop_shifted_mask16(i64 %x) nounwind readnone {
663662
; X86-NO-POPCOUNT-NEXT: movl {{[0-9]+}}(%esp), %ecx
664663
; X86-NO-POPCOUNT-NEXT: movl %ecx, %eax
665664
; X86-NO-POPCOUNT-NEXT: andl $524280, %eax # imm = 0x7FFF8
666-
; X86-NO-POPCOUNT-NEXT: shrl $4, %ecx
667-
; X86-NO-POPCOUNT-NEXT: andl $21845, %ecx # imm = 0x5555
668-
; X86-NO-POPCOUNT-NEXT: shrl $3, %eax
665+
; X86-NO-POPCOUNT-NEXT: shrl %ecx
666+
; X86-NO-POPCOUNT-NEXT: andl $87380, %ecx # imm = 0x15554
669667
; X86-NO-POPCOUNT-NEXT: subl %ecx, %eax
670668
; X86-NO-POPCOUNT-NEXT: movl %eax, %ecx
671-
; X86-NO-POPCOUNT-NEXT: andl $858993459, %ecx # imm = 0x33333333
669+
; X86-NO-POPCOUNT-NEXT: andl $858993456, %ecx # imm = 0x33333330
672670
; X86-NO-POPCOUNT-NEXT: shrl $2, %eax
673671
; X86-NO-POPCOUNT-NEXT: andl $858993459, %eax # imm = 0x33333333
674672
; X86-NO-POPCOUNT-NEXT: addl %ecx, %eax

0 commit comments

Comments
 (0)