|
20 | 20 | #include "MCTargetDesc/AArch64InstPrinter.h"
|
21 | 21 | #include "llvm/ADT/BitVector.h"
|
22 | 22 | #include "llvm/BinaryFormat/Dwarf.h"
|
| 23 | +#include "llvm/CodeGen/LiveRegMatrix.h" |
23 | 24 | #include "llvm/CodeGen/MachineFrameInfo.h"
|
24 | 25 | #include "llvm/CodeGen/MachineInstrBuilder.h"
|
25 | 26 | #include "llvm/CodeGen/MachineRegisterInfo.h"
|
@@ -1107,23 +1108,83 @@ bool AArch64RegisterInfo::getRegAllocationHints(
|
1107 | 1108 | // FORM_TRANSPOSED_REG_TUPLE pseudo, we want to favour reducing copy
|
1108 | 1109 | // instructions over reducing the number of clobbered callee-save registers,
|
1109 | 1110 | // so we add the strided registers as a hint.
|
| 1111 | + const MachineInstr *TupleInst = nullptr; |
1110 | 1112 | unsigned RegID = MRI.getRegClass(VirtReg)->getID();
|
1111 | 1113 | // Look through uses of the register for FORM_TRANSPOSED_REG_TUPLE.
|
1112 | 1114 | if ((RegID == AArch64::ZPR2StridedOrContiguousRegClassID ||
|
1113 | 1115 | RegID == AArch64::ZPR4StridedOrContiguousRegClassID) &&
|
1114 |
| - any_of(MRI.use_nodbg_instructions(VirtReg), [](const MachineInstr &Use) { |
1115 |
| - return Use.getOpcode() == |
1116 |
| - AArch64::FORM_TRANSPOSED_REG_TUPLE_X2_PSEUDO || |
1117 |
| - Use.getOpcode() == AArch64::FORM_TRANSPOSED_REG_TUPLE_X4_PSEUDO; |
| 1116 | + any_of(MRI.use_nodbg_instructions(VirtReg), [&TupleInst]( |
| 1117 | + const MachineInstr &Use) { |
| 1118 | + bool IsTuple = |
| 1119 | + Use.getOpcode() == AArch64::FORM_TRANSPOSED_REG_TUPLE_X2_PSEUDO || |
| 1120 | + Use.getOpcode() == AArch64::FORM_TRANSPOSED_REG_TUPLE_X4_PSEUDO; |
| 1121 | + TupleInst = &Use; |
| 1122 | + return IsTuple; |
1118 | 1123 | })) {
|
1119 |
| - const TargetRegisterClass *StridedRC = |
1120 |
| - RegID == AArch64::ZPR2StridedOrContiguousRegClassID |
1121 |
| - ? &AArch64::ZPR2StridedRegClass |
1122 |
| - : &AArch64::ZPR4StridedRegClass; |
| 1124 | + unsigned LdOps = TupleInst->getNumOperands() - 1; |
| 1125 | + const TargetRegisterClass *StridedRC = LdOps == 2 |
| 1126 | + ? &AArch64::ZPR2StridedRegClass |
| 1127 | + : &AArch64::ZPR4StridedRegClass; |
1123 | 1128 |
|
| 1129 | + SmallVector<MCPhysReg, 4> StridedOrder; |
1124 | 1130 | for (MCPhysReg Reg : Order)
|
1125 | 1131 | if (StridedRC->contains(Reg))
|
1126 |
| - Hints.push_back(Reg); |
| 1132 | + StridedOrder.push_back(Reg); |
| 1133 | + |
| 1134 | + int OpIdx = TupleInst->findRegisterUseOperandIdx(VirtReg, this); |
| 1135 | + if (OpIdx == -1) |
| 1136 | + return TargetRegisterInfo::getRegAllocationHints(VirtReg, Order, Hints, |
| 1137 | + MF, VRM); |
| 1138 | + |
| 1139 | + unsigned TupleID = |
| 1140 | + MRI.getRegClass(TupleInst->getOperand(0).getReg())->getID(); |
| 1141 | + bool IsMulZPR = TupleID == AArch64::ZPR2Mul2RegClassID || |
| 1142 | + TupleID == AArch64::ZPR4Mul4RegClassID; |
| 1143 | + |
| 1144 | + if (OpIdx == 1) { |
| 1145 | + for (unsigned I = 0; I < StridedOrder.size(); ++I) { |
| 1146 | + MCPhysReg Reg = StridedOrder[I]; |
| 1147 | + unsigned FirstReg = getSubReg(Reg, AArch64::zsub0); |
| 1148 | + |
| 1149 | + // If the FORM_TRANSPOSE nodes use the ZPRMul classes, the starting |
| 1150 | + // register of the first load should be a multiple of 2 or 4. |
| 1151 | + if (IsMulZPR && |
| 1152 | + (getSubReg(Reg, AArch64::zsub0) - AArch64::Z0) % LdOps != 0) |
| 1153 | + continue; |
| 1154 | + // Skip this register if it has any live intervals assigned. |
| 1155 | + if (Matrix->isPhysRegUsed(Reg)) |
| 1156 | + continue; |
| 1157 | + |
| 1158 | + bool CanAssign = true; |
| 1159 | + for (unsigned Next = 1; Next < LdOps; ++Next) { |
| 1160 | + // Ensure we can assign enough registers from the list for all loads. |
| 1161 | + if (I + Next >= StridedOrder.size()) { |
| 1162 | + CanAssign = false; |
| 1163 | + break; |
| 1164 | + } |
| 1165 | + // Ensure the subsequent registers are not live and that the starting |
| 1166 | + // sub-registers are sequential. |
| 1167 | + MCPhysReg NextReg = StridedOrder[I + Next]; |
| 1168 | + if (Matrix->isPhysRegUsed(NextReg) || |
| 1169 | + (getSubReg(NextReg, AArch64::zsub0) != FirstReg + Next)) { |
| 1170 | + CanAssign = false; |
| 1171 | + break; |
| 1172 | + } |
| 1173 | + } |
| 1174 | + if (CanAssign) |
| 1175 | + Hints.push_back(Reg); |
| 1176 | + } |
| 1177 | + } else if (VRM->hasPhys(TupleInst->getOperand(1).getReg())) { |
| 1178 | + // This is not the first load in the sequence. Find the register |
| 1179 | + // assigned to the first and match to a strided reg in the list. |
| 1180 | + MCPhysReg FirstLoadPhysReg = |
| 1181 | + VRM->getPhys(TupleInst->getOperand(1).getReg()); |
| 1182 | + for (unsigned I = 0; I < StridedOrder.size(); ++I) { |
| 1183 | + if (StridedOrder[I] == FirstLoadPhysReg && |
| 1184 | + (I + (OpIdx - 1) < StridedOrder.size())) |
| 1185 | + Hints.push_back(StridedOrder[I + (OpIdx - 1)]); |
| 1186 | + } |
| 1187 | + } |
1127 | 1188 |
|
1128 | 1189 | return TargetRegisterInfo::getRegAllocationHints(VirtReg, Order, Hints, MF,
|
1129 | 1190 | VRM);
|
|
0 commit comments