Skip to content

Commit 9410019

Browse files
authored
[X86] Add i8 CTPOP lowering using i32 MUL (#79989)
This is the first basic proposal in #79823 - we can investigate improving support for other widths if we can find further use cases.
1 parent ffb3589 commit 9410019

File tree

3 files changed

+367
-383
lines changed

3 files changed

+367
-383
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -427,7 +427,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
427427
// on the dest that popcntl hasn't had since Cannon Lake.
428428
setOperationPromotedToType(ISD::CTPOP, MVT::i16, MVT::i32);
429429
} else {
430-
setOperationAction(ISD::CTPOP , MVT::i8 , Expand);
430+
setOperationAction(ISD::CTPOP , MVT::i8 , Custom);
431431
setOperationAction(ISD::CTPOP , MVT::i16 , Expand);
432432
setOperationAction(ISD::CTPOP , MVT::i32 , Expand);
433433
if (Subtarget.is64Bit())
@@ -30989,12 +30989,12 @@ static SDValue LowerVectorCTPOPInRegLUT(SDValue Op, const SDLoc &DL,
3098930989

3099030990
// Please ensure that any codegen change from LowerVectorCTPOP is reflected in
3099130991
// updated cost models in X86TTIImpl::getIntrinsicInstrCost.
30992-
static SDValue LowerVectorCTPOP(SDValue Op, const X86Subtarget &Subtarget,
30992+
static SDValue LowerVectorCTPOP(SDValue Op, const SDLoc &DL,
30993+
const X86Subtarget &Subtarget,
3099330994
SelectionDAG &DAG) {
3099430995
MVT VT = Op.getSimpleValueType();
3099530996
assert((VT.is512BitVector() || VT.is256BitVector() || VT.is128BitVector()) &&
3099630997
"Unknown CTPOP type to handle");
30997-
SDLoc DL(Op.getNode());
3099830998
SDValue Op0 = Op.getOperand(0);
3099930999

3100031000
// TRUNC(CTPOP(ZEXT(X))) to make use of vXi32/vXi64 VPOPCNT instructions.
@@ -31035,9 +31035,27 @@ static SDValue LowerVectorCTPOP(SDValue Op, const X86Subtarget &Subtarget,
3103531035

3103631036
static SDValue LowerCTPOP(SDValue Op, const X86Subtarget &Subtarget,
3103731037
SelectionDAG &DAG) {
31038-
assert(Op.getSimpleValueType().isVector() &&
31038+
MVT VT = Op.getSimpleValueType();
31039+
SDLoc DL(Op);
31040+
31041+
// i8 CTPOP - with efficient i32 MUL, then attempt multiply-mask-multiply.
31042+
if (VT == MVT::i8) {
31043+
SDValue Mask11 = DAG.getConstant(0x11111111U, DL, MVT::i32);
31044+
Op = DAG.getZExtOrTrunc(Op.getOperand(0), DL, MVT::i32);
31045+
Op = DAG.getNode(ISD::MUL, DL, MVT::i32, Op,
31046+
DAG.getConstant(0x08040201U, DL, MVT::i32));
31047+
Op = DAG.getNode(ISD::SRL, DL, MVT::i32, Op,
31048+
DAG.getShiftAmountConstant(3, MVT::i32, DL));
31049+
Op = DAG.getNode(ISD::AND, DL, MVT::i32, Op, Mask11);
31050+
Op = DAG.getNode(ISD::MUL, DL, MVT::i32, Op, Mask11);
31051+
Op = DAG.getNode(ISD::SRL, DL, MVT::i32, Op,
31052+
DAG.getShiftAmountConstant(28, MVT::i32, DL));
31053+
return DAG.getZExtOrTrunc(Op, DL, VT);
31054+
}
31055+
31056+
assert(VT.isVector() &&
3103931057
"We only do custom lowering for vector population count.");
31040-
return LowerVectorCTPOP(Op, Subtarget, DAG);
31058+
return LowerVectorCTPOP(Op, DL, Subtarget, DAG);
3104131059
}
3104231060

3104331061
static SDValue LowerBITREVERSE_XOP(SDValue Op, SelectionDAG &DAG) {

llvm/test/CodeGen/X86/ctpop-combine.ll

Lines changed: 7 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -88,20 +88,13 @@ define i8 @test4(i8 %x) nounwind readnone {
8888
;
8989
; NO-POPCOUNT-LABEL: test4:
9090
; NO-POPCOUNT: # %bb.0:
91-
; NO-POPCOUNT-NEXT: movl %edi, %ecx
92-
; NO-POPCOUNT-NEXT: andb $127, %cl
93-
; NO-POPCOUNT-NEXT: shrb %dil
94-
; NO-POPCOUNT-NEXT: andb $21, %dil
95-
; NO-POPCOUNT-NEXT: subb %dil, %cl
96-
; NO-POPCOUNT-NEXT: movl %ecx, %eax
97-
; NO-POPCOUNT-NEXT: andb $51, %al
98-
; NO-POPCOUNT-NEXT: shrb $2, %cl
99-
; NO-POPCOUNT-NEXT: andb $51, %cl
100-
; NO-POPCOUNT-NEXT: addb %al, %cl
101-
; NO-POPCOUNT-NEXT: movl %ecx, %eax
102-
; NO-POPCOUNT-NEXT: shrb $4, %al
103-
; NO-POPCOUNT-NEXT: addb %cl, %al
104-
; NO-POPCOUNT-NEXT: andb $15, %al
91+
; NO-POPCOUNT-NEXT: andl $127, %edi
92+
; NO-POPCOUNT-NEXT: imull $134480385, %edi, %eax # imm = 0x8040201
93+
; NO-POPCOUNT-NEXT: shrl $3, %eax
94+
; NO-POPCOUNT-NEXT: andl $286331153, %eax # imm = 0x11111111
95+
; NO-POPCOUNT-NEXT: imull $286331153, %eax, %eax # imm = 0x11111111
96+
; NO-POPCOUNT-NEXT: shrl $28, %eax
97+
; NO-POPCOUNT-NEXT: # kill: def $al killed $al killed $eax
10598
; NO-POPCOUNT-NEXT: retq
10699
%x2 = and i8 %x, 127
107100
%count = tail call i8 @llvm.ctpop.i8(i8 %x2)

0 commit comments

Comments
 (0)