|
14 | 14 | #include "AArch64LegalizerInfo.h"
|
15 | 15 | #include "AArch64RegisterBankInfo.h"
|
16 | 16 | #include "AArch64Subtarget.h"
|
| 17 | +#include "MCTargetDesc/AArch64MCTargetDesc.h" |
17 | 18 | #include "llvm/ADT/STLExtras.h"
|
18 | 19 | #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
|
19 | 20 | #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
|
@@ -1904,6 +1905,31 @@ bool AArch64LegalizerInfo::legalizeCTPOP(MachineInstr &MI,
|
1904 | 1905 | auto CTPOP = MIRBuilder.buildCTPOP(VTy, Val);
|
1905 | 1906 |
|
1906 | 1907 | // Sum across lanes.
|
| 1908 | + |
| 1909 | + if (ST->hasDotProd() && Ty.getNumElements() >= 2 && |
| 1910 | + Ty.getScalarSizeInBits() != 16) { |
| 1911 | + LLT Dt = Ty == LLT::fixed_vector(2, 64) ? LLT::fixed_vector(4, 32) : Ty; |
| 1912 | + auto Zeros = MIRBuilder.buildConstant(Dt, 0); |
| 1913 | + auto Ones = MIRBuilder.buildConstant(VTy, 1); |
| 1914 | + MachineInstrBuilder SUM; |
| 1915 | + |
| 1916 | + if (Ty == LLT::fixed_vector(2, 64)) { |
| 1917 | + auto UDOT = |
| 1918 | + MIRBuilder.buildInstr(AArch64::G_UDOT, {Dt}, {Zeros, Ones, CTPOP}); |
| 1919 | + SUM = MIRBuilder.buildInstr(AArch64::G_UADDLP, {Ty}, {UDOT}); |
| 1920 | + } else if (Ty == LLT::fixed_vector(4, 32)) { |
| 1921 | + SUM = MIRBuilder.buildInstr(AArch64::G_UDOT, {Dt}, {Zeros, Ones, CTPOP}); |
| 1922 | + } else if (Ty == LLT::fixed_vector(2, 32)) { |
| 1923 | + SUM = MIRBuilder.buildInstr(AArch64::G_UDOT, {Dt}, {Zeros, Ones, CTPOP}); |
| 1924 | + } else { |
| 1925 | + llvm_unreachable("unexpected vector shape"); |
| 1926 | + } |
| 1927 | + |
| 1928 | + SUM->getOperand(0).setReg(Dst); |
| 1929 | + MI.eraseFromParent(); |
| 1930 | + return true; |
| 1931 | + } |
| 1932 | + |
1907 | 1933 | Register HSum = CTPOP.getReg(0);
|
1908 | 1934 | unsigned Opc;
|
1909 | 1935 | SmallVector<LLT> HAddTys;
|
|
0 commit comments