Skip to content

Commit 8f9c994

Browse files
committed
[AArch64] Improve non-SVE popcount for 32bit and 64 bit using udot
1 parent fb86cb7 commit 8f9c994

File tree

1 file changed

+20
-0
lines changed

1 file changed

+20
-0
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9800,6 +9800,26 @@ SDValue AArch64TargetLowering::LowerCTPOP_PARITY(SDValue Op,
98009800
Val = DAG.getBitcast(VT8Bit, Val);
98019801
Val = DAG.getNode(ISD::CTPOP, DL, VT8Bit, Val);
98029802

9803+
if (Subtarget->hasDotProd() && VT.getScalarSizeInBits() != 16) {
9804+
EVT DT = VT == MVT::v2i64 ? MVT::v4i32 : VT;
9805+
SDValue Zeros = DAG.getSplatBuildVector(
9806+
DT, DL, DAG.getConstant(0, DL, DT.getScalarType()));
9807+
SDValue Ones =
9808+
DAG.getSplatBuildVector(VT8Bit, DL, DAG.getConstant(1, DL, MVT::i8));
9809+
9810+
if (VT == MVT::v2i64) {
9811+
Val = DAG.getNode(AArch64ISD::UDOT, DL, DT, Zeros, Ones, Val);
9812+
Val = DAG.getNode(AArch64ISD::UADDLP, DL, VT, Val);
9813+
} else if (VT == MVT::v2i32) {
9814+
Val = DAG.getNode(AArch64ISD::UDOT, DL, DT, Zeros, Ones, Val);
9815+
} else if (VT == MVT::v4i32) {
9816+
Val = DAG.getNode(AArch64ISD::UDOT, DL, DT, Zeros, Ones, Val);
9817+
} else {
9818+
llvm_unreachable("Unexpected type for custom ctpop lowering");
9819+
}
9820+
9821+
return Val;
9822+
}
98039823
// Widen v8i8/v16i8 CTPOP result to VT by repeatedly widening pairwise adds.
98049824
unsigned EltSize = 8;
98059825
unsigned NumElts = VT.is64BitVector() ? 8 : 16;

0 commit comments

Comments
 (0)