Skip to content

Commit 81c3d47

Browse files
- Rewrote search for operand which already has a register assigned
- Add early exit if the function does not have SME or is not in streaming mode - Move loop over uses of VirtReg into if block checking the RegID - Remove GetRegStartingAt
1 parent 7635e0e commit 81c3d47

File tree

3 files changed

+225
-229
lines changed

3 files changed

+225
-229
lines changed

llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp

Lines changed: 77 additions & 84 deletions
Original file line numberDiff line numberDiff line change
@@ -1100,6 +1100,11 @@ bool AArch64RegisterInfo::getRegAllocationHints(
11001100
const VirtRegMap *VRM, const LiveRegMatrix *Matrix) const {
11011101
const MachineRegisterInfo &MRI = MF.getRegInfo();
11021102

1103+
auto &ST = MF.getSubtarget<AArch64Subtarget>();
1104+
if (!ST.hasSME() || !ST.isStreaming())
1105+
return TargetRegisterInfo::getRegAllocationHints(VirtReg, Order, Hints, MF,
1106+
VRM);
1107+
11031108
// The SVE calling convention preserves registers Z8-Z23. As a result, there
11041109
// are no ZPR2Strided or ZPR4Strided registers that do not overlap with the
11051110
// callee-saved registers and so by default these will be pushed to the back
@@ -1109,94 +1114,82 @@ bool AArch64RegisterInfo::getRegAllocationHints(
11091114
// instructions over reducing the number of clobbered callee-save registers,
11101115
// so we add the strided registers as a hint.
11111116
unsigned RegID = MRI.getRegClass(VirtReg)->getID();
1112-
// Look through uses of the register for FORM_TRANSPOSED_REG_TUPLE.
1113-
for (const MachineInstr &Use : MRI.use_nodbg_instructions(VirtReg)) {
1114-
if ((RegID != AArch64::ZPR2StridedOrContiguousRegClassID &&
1115-
RegID != AArch64::ZPR4StridedOrContiguousRegClassID) ||
1116-
(Use.getOpcode() != AArch64::FORM_TRANSPOSED_REG_TUPLE_X2_PSEUDO &&
1117-
Use.getOpcode() != AArch64::FORM_TRANSPOSED_REG_TUPLE_X4_PSEUDO))
1118-
continue;
1119-
1120-
unsigned LdOps = Use.getNumOperands() - 1;
1121-
const TargetRegisterClass *StridedRC = LdOps == 2
1122-
? &AArch64::ZPR2StridedRegClass
1123-
: &AArch64::ZPR4StridedRegClass;
1124-
1125-
SmallVector<MCPhysReg, 4> StridedOrder;
1126-
for (MCPhysReg Reg : Order)
1127-
if (StridedRC->contains(Reg))
1128-
StridedOrder.push_back(Reg);
1129-
1130-
auto GetRegStartingAt = [&](MCPhysReg FirstReg) -> MCPhysReg {
1131-
for (MCPhysReg Strided : StridedOrder)
1132-
if (getSubReg(Strided, AArch64::zsub0) == FirstReg)
1133-
return Strided;
1134-
return (MCPhysReg)AArch64::NoRegister;
1135-
};
1136-
1137-
int OpIdx = Use.findRegisterUseOperandIdx(VirtReg, this);
1138-
assert(OpIdx != -1 && "Expected operand index from register use.");
1139-
1140-
unsigned TupleID = MRI.getRegClass(Use.getOperand(0).getReg())->getID();
1141-
bool IsMulZPR = TupleID == AArch64::ZPR2Mul2RegClassID ||
1142-
TupleID == AArch64::ZPR4Mul4RegClassID;
1143-
1144-
unsigned AssignedOp = 0;
1145-
if (!any_of(make_range(Use.operands_begin() + 1, Use.operands_end()),
1146-
[&](const MachineOperand &Op) {
1147-
if (!VRM->hasPhys(Op.getReg()))
1148-
return false;
1149-
AssignedOp = Op.getOperandNo();
1150-
return true;
1151-
})) {
1152-
// There are no registers already assigned to any of the pseudo operands.
1153-
// Look for a valid starting register for the group.
1154-
for (unsigned I = 0; I < StridedOrder.size(); ++I) {
1155-
MCPhysReg Reg = StridedOrder[I];
1156-
unsigned FirstReg = getSubReg(Reg, AArch64::zsub0);
1157-
1158-
// If the FORM_TRANSPOSE nodes use the ZPRMul classes, the starting
1159-
// register of the first load should be a multiple of 2 or 4.
1160-
if (IsMulZPR && (FirstReg - AArch64::Z0) % LdOps != 0)
1161-
continue;
1162-
// Skip this register if it has any live intervals assigned.
1163-
if (Matrix->isPhysRegUsed(Reg))
1164-
continue;
1165-
1166-
// Look for registers in StridedOrder which start with sub-registers
1167-
// following sequentially from FirstReg. If all are found and none are
1168-
// already live, add Reg to Hints.
1169-
MCPhysReg RegToAssign = Reg;
1170-
for (unsigned Next = 1; Next < LdOps; ++Next) {
1171-
MCPhysReg Strided = GetRegStartingAt(FirstReg + Next);
1172-
if (Strided == AArch64::NoRegister ||
1173-
Matrix->isPhysRegUsed(Strided)) {
1174-
RegToAssign = AArch64::NoRegister;
1175-
break;
1117+
if (RegID == AArch64::ZPR2StridedOrContiguousRegClassID ||
1118+
RegID == AArch64::ZPR4StridedOrContiguousRegClassID) {
1119+
1120+
// Look through uses of the register for FORM_TRANSPOSED_REG_TUPLE.
1121+
for (const MachineInstr &Use : MRI.use_nodbg_instructions(VirtReg)) {
1122+
if (Use.getOpcode() != AArch64::FORM_TRANSPOSED_REG_TUPLE_X2_PSEUDO &&
1123+
Use.getOpcode() != AArch64::FORM_TRANSPOSED_REG_TUPLE_X4_PSEUDO)
1124+
continue;
1125+
1126+
unsigned LdOps = Use.getNumOperands() - 1;
1127+
const TargetRegisterClass *StridedRC =
1128+
LdOps == 2 ? &AArch64::ZPR2StridedRegClass
1129+
: &AArch64::ZPR4StridedRegClass;
1130+
1131+
SmallVector<MCPhysReg, 4> StridedOrder;
1132+
for (MCPhysReg Reg : Order)
1133+
if (StridedRC->contains(Reg))
1134+
StridedOrder.push_back(Reg);
1135+
1136+
int OpIdx = Use.findRegisterUseOperandIdx(VirtReg, this);
1137+
assert(OpIdx != -1 && "Expected operand index from register use.");
1138+
1139+
unsigned TupleID = MRI.getRegClass(Use.getOperand(0).getReg())->getID();
1140+
bool IsMulZPR = TupleID == AArch64::ZPR2Mul2RegClassID ||
1141+
TupleID == AArch64::ZPR4Mul4RegClassID;
1142+
1143+
const MachineOperand *AssignedRegOp = llvm::find_if(
1144+
make_range(Use.operands_begin() + 1, Use.operands_end()),
1145+
[&VRM](const MachineOperand &Op) {
1146+
return VRM->hasPhys(Op.getReg());
1147+
});
1148+
1149+
if (AssignedRegOp == Use.operands_end()) {
1150+
// There are no registers already assigned to any of the pseudo
1151+
// operands. Look for a valid starting register for the group.
1152+
for (unsigned I = 0; I < StridedOrder.size(); ++I) {
1153+
MCPhysReg Reg = StridedOrder[I];
1154+
SmallVector<MCPhysReg> Regs;
1155+
unsigned FirstStridedReg = Reg - OpIdx + 1;
1156+
1157+
// If the FORM_TRANSPOSE nodes use the ZPRMul classes, the starting
1158+
// register of the first load should be a multiple of 2 or 4.
1159+
unsigned FirstSubReg = getSubReg(FirstStridedReg, AArch64::zsub0);
1160+
if (IsMulZPR && (FirstSubReg - AArch64::Z0) % LdOps != 0)
1161+
continue;
1162+
1163+
for (unsigned Op = 0; Op < LdOps; ++Op) {
1164+
if (!is_contained(StridedOrder, FirstStridedReg + Op) ||
1165+
getSubReg(FirstStridedReg + Op, AArch64::zsub0) !=
1166+
FirstSubReg + Op)
1167+
break;
1168+
Regs.push_back(FirstStridedReg + Op);
11761169
}
1177-
if (Next == (unsigned)OpIdx - 1)
1178-
RegToAssign = Strided;
1170+
1171+
if (Regs.size() == LdOps && all_of(Regs, [&](MCPhysReg R) {
1172+
return !Matrix->isPhysRegUsed(R);
1173+
}))
1174+
Hints.push_back(FirstStridedReg + OpIdx - 1);
11791175
}
1180-
if (RegToAssign != AArch64::NoRegister)
1181-
Hints.push_back(RegToAssign);
1176+
} else {
1177+
// At least one operand already has a physical register assigned.
1178+
// Find the starting sub-register of this and use it to work out the
1179+
// correct strided register to suggest based on the current op index.
1180+
MCPhysReg TargetStartReg =
1181+
getSubReg(VRM->getPhys(AssignedRegOp->getReg()), AArch64::zsub0) +
1182+
(OpIdx - AssignedRegOp->getOperandNo());
1183+
1184+
for (unsigned I = 0; I < StridedOrder.size(); ++I)
1185+
if (getSubReg(StridedOrder[I], AArch64::zsub0) == TargetStartReg)
1186+
Hints.push_back(StridedOrder[I]);
11821187
}
1183-
} else {
1184-
// At least one operand already has a physical register assigned.
1185-
// Find the starting sub-register of this and use it to work out the
1186-
// correct strided register to suggest based on the current op index.
1187-
MCPhysReg TargetStartReg =
1188-
getSubReg(VRM->getPhys(Use.getOperand(AssignedOp).getReg()),
1189-
AArch64::zsub0) +
1190-
(OpIdx - AssignedOp);
1191-
1192-
for (unsigned I = 0; I < StridedOrder.size(); ++I)
1193-
if (getSubReg(StridedOrder[I], AArch64::zsub0) == TargetStartReg)
1194-
Hints.push_back(StridedOrder[I]);
1195-
}
11961188

1197-
if (!Hints.empty())
1198-
return TargetRegisterInfo::getRegAllocationHints(VirtReg, Order, Hints,
1199-
MF, VRM);
1189+
if (!Hints.empty())
1190+
return TargetRegisterInfo::getRegAllocationHints(VirtReg, Order, Hints,
1191+
MF, VRM);
1192+
}
12001193
}
12011194

12021195
for (MachineInstr &MI : MRI.def_instructions(VirtReg)) {

0 commit comments

Comments
 (0)