Skip to content

Commit 3dd6016

Browse files
committed
[AArch64][GlobalISel] Improve non-SVE popcount for 32bit and 64 bit using udot
1 parent f3005d5 commit 3dd6016

File tree

1 file changed

+26
-0
lines changed

1 file changed

+26
-0
lines changed

llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include "AArch64LegalizerInfo.h"
1515
#include "AArch64RegisterBankInfo.h"
1616
#include "AArch64Subtarget.h"
17+
#include "MCTargetDesc/AArch64MCTargetDesc.h"
1718
#include "llvm/ADT/STLExtras.h"
1819
#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
1920
#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
@@ -1904,6 +1905,31 @@ bool AArch64LegalizerInfo::legalizeCTPOP(MachineInstr &MI,
19041905
auto CTPOP = MIRBuilder.buildCTPOP(VTy, Val);
19051906

19061907
// Sum across lanes.
1908+
1909+
if (ST->hasDotProd() && Ty.getNumElements() >= 2 &&
1910+
Ty.getScalarSizeInBits() != 16) {
1911+
LLT Dt = Ty == LLT::fixed_vector(2, 64) ? LLT::fixed_vector(4, 32) : Ty;
1912+
auto Zeros = MIRBuilder.buildConstant(Ty, 0);
1913+
auto Ones = MIRBuilder.buildConstant(VTy, 1);
1914+
MachineInstrBuilder SUM;
1915+
1916+
if (Ty == LLT::fixed_vector(2, 64)) {
1917+
auto UDOT =
1918+
MIRBuilder.buildInstr(AArch64::G_UDOT, {Dt}, {Zeros, Ones, CTPOP});
1919+
SUM = MIRBuilder.buildInstr(AArch64::G_UADDLP, {Ty}, {UDOT});
1920+
} else if (Ty == LLT::fixed_vector(4, 32)) {
1921+
SUM = MIRBuilder.buildInstr(AArch64::G_UDOT, {Dt}, {Zeros, Ones, CTPOP});
1922+
} else if (Ty == LLT::fixed_vector(2, 32)) {
1923+
SUM = MIRBuilder.buildInstr(AArch64::G_UDOT, {Dt}, {Zeros, Ones, CTPOP});
1924+
} else {
1925+
llvm_unreachable("unexpected vector shape");
1926+
}
1927+
1928+
SUM->getOperand(0).setReg(Dst);
1929+
MI.eraseFromParent();
1930+
return true;
1931+
}
1932+
19071933
Register HSum = CTPOP.getReg(0);
19081934
unsigned Opc;
19091935
SmallVector<LLT> HAddTys;

0 commit comments

Comments
 (0)