Skip to content

Commit cc6f8c3

Browse files
committed
[SDAG] Use shifts if ISD::MUL is illegal when lowering ISD::CTPOP
We can avoid libcalls. Fixes #86205
1 parent e6f63a9 commit cc6f8c3

File tree

11 files changed

+1106
-1274
lines changed

11 files changed

+1106
-1274
lines changed

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8709,11 +8709,21 @@ SDValue TargetLowering::expandCTPOP(SDNode *Node, SelectionDAG &DAG) const {
87098709
DAG.getConstant(0xFF, dl, VT));
87108710
}
87118711

8712-
// v = (v * 0x01010101...) >> (Len - 8)
8713-
SDValue Mask01 =
8714-
DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
8715-
return DAG.getNode(ISD::SRL, dl, VT,
8716-
DAG.getNode(ISD::MUL, dl, VT, Op, Mask01),
8712+
SDValue V;
8713+
if (isOperationLegalOrCustomOrPromote(ISD::MUL, VT)) {
8714+
// v = (v * 0x01010101...) >> (Len - 8)
8715+
SDValue Mask01 =
8716+
DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
8717+
V = DAG.getNode(ISD::MUL, dl, VT, Op, Mask01);
8718+
} else {
8719+
V = Op;
8720+
SDValue ShiftC = DAG.getConstant(8, dl, VT);
8721+
for (unsigned I = 8; I < Len; I += 8) {
8722+
V = DAG.getNode(ISD::ADD, dl, VT, Op,
8723+
DAG.getNode(ISD::SHL, dl, VT, V, ShiftC));
8724+
}
8725+
}
8726+
return DAG.getNode(ISD::SRL, dl, VT, V,
87178727
DAG.getConstant(Len - 8, dl, ShVT));
87188728
}
87198729

llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -285,9 +285,12 @@ define i32 @test_ctpop_i32(i32 %a) nounwind {
285285
; LA64-NEXT: lu12i.w $a1, 61680
286286
; LA64-NEXT: ori $a1, $a1, 3855
287287
; LA64-NEXT: and $a0, $a0, $a1
288-
; LA64-NEXT: lu12i.w $a1, 4112
289-
; LA64-NEXT: ori $a1, $a1, 257
290-
; LA64-NEXT: mul.d $a0, $a0, $a1
288+
; LA64-NEXT: slli.d $a1, $a0, 8
289+
; LA64-NEXT: add.d $a1, $a0, $a1
290+
; LA64-NEXT: slli.d $a1, $a1, 8
291+
; LA64-NEXT: add.d $a1, $a0, $a1
292+
; LA64-NEXT: slli.d $a1, $a1, 8
293+
; LA64-NEXT: add.d $a0, $a0, $a1
291294
; LA64-NEXT: bstrpick.d $a0, $a0, 31, 24
292295
; LA64-NEXT: ret
293296
%1 = call i32 @llvm.ctpop.i32(i32 %a)

0 commit comments

Comments
 (0)