Skip to content

Commit 18b7e73

Browse files
committed
[AArch64][GlobalISel] Improve non-SVE popcount for 32bit and 64 bit using udot
1 parent f056770 commit 18b7e73

File tree

1 file changed

+26
-0
lines changed

1 file changed

+26
-0
lines changed

llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include "AArch64LegalizerInfo.h"
1515
#include "AArch64RegisterBankInfo.h"
1616
#include "AArch64Subtarget.h"
17+
#include "MCTargetDesc/AArch64MCTargetDesc.h"
1718
#include "llvm/ADT/STLExtras.h"
1819
#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
1920
#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
@@ -1908,6 +1909,31 @@ bool AArch64LegalizerInfo::legalizeCTPOP(MachineInstr &MI,
19081909
auto CTPOP = MIRBuilder.buildCTPOP(VTy, Val);
19091910

19101911
// Sum across lanes.
1912+
1913+
if (ST->hasDotProd() && Ty.isVector() && Ty.getNumElements() >= 2 &&
1914+
Ty.getScalarSizeInBits() != 16) {
1915+
LLT Dt = Ty == LLT::fixed_vector(2, 64) ? LLT::fixed_vector(4, 32) : Ty;
1916+
auto Zeros = MIRBuilder.buildConstant(Dt, 0);
1917+
auto Ones = MIRBuilder.buildConstant(VTy, 1);
1918+
MachineInstrBuilder SUM;
1919+
1920+
if (Ty == LLT::fixed_vector(2, 64)) {
1921+
auto UDOT =
1922+
MIRBuilder.buildInstr(AArch64::G_UDOT, {Dt}, {Zeros, Ones, CTPOP});
1923+
SUM = MIRBuilder.buildInstr(AArch64::G_UADDLP, {Ty}, {UDOT});
1924+
} else if (Ty == LLT::fixed_vector(4, 32)) {
1925+
SUM = MIRBuilder.buildInstr(AArch64::G_UDOT, {Dt}, {Zeros, Ones, CTPOP});
1926+
} else if (Ty == LLT::fixed_vector(2, 32)) {
1927+
SUM = MIRBuilder.buildInstr(AArch64::G_UDOT, {Dt}, {Zeros, Ones, CTPOP});
1928+
} else {
1929+
llvm_unreachable("unexpected vector shape");
1930+
}
1931+
1932+
SUM->getOperand(0).setReg(Dst);
1933+
MI.eraseFromParent();
1934+
return true;
1935+
}
1936+
19111937
Register HSum = CTPOP.getReg(0);
19121938
unsigned Opc;
19131939
SmallVector<LLT> HAddTys;

0 commit comments

Comments
 (0)