Skip to content

Commit 68e65db

Browse files
[AArch64][SME] Make getRegAllocationHints stricter for multi-vector loads.
getRegAllocationHints looks for ZPR2StridedOrContiguous load instructions which are used by FORM_TRANSPOSED_REG_TUPLE pseudos and adds all strided registers from this class to the list of hints. This patch changes getRegAllocationHints to restrict this list: - If the pseudo uses ZPRMul class, the first load must begin with a register which is a multiple of 2 or 4. - Only add a strided register to the list if it does not already have any live intervals.
1 parent f9d4ac8 commit 68e65db

File tree

4 files changed

+511
-435
lines changed

4 files changed

+511
-435
lines changed

llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp

Lines changed: 70 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#include "MCTargetDesc/AArch64InstPrinter.h"
2121
#include "llvm/ADT/BitVector.h"
2222
#include "llvm/BinaryFormat/Dwarf.h"
23+
#include "llvm/CodeGen/LiveRegMatrix.h"
2324
#include "llvm/CodeGen/MachineFrameInfo.h"
2425
#include "llvm/CodeGen/MachineInstrBuilder.h"
2526
#include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -1107,23 +1108,83 @@ bool AArch64RegisterInfo::getRegAllocationHints(
11071108
// FORM_TRANSPOSED_REG_TUPLE pseudo, we want to favour reducing copy
11081109
// instructions over reducing the number of clobbered callee-save registers,
11091110
// so we add the strided registers as a hint.
1111+
const MachineInstr *TupleInst = nullptr;
11101112
unsigned RegID = MRI.getRegClass(VirtReg)->getID();
11111113
// Look through uses of the register for FORM_TRANSPOSED_REG_TUPLE.
11121114
if ((RegID == AArch64::ZPR2StridedOrContiguousRegClassID ||
11131115
RegID == AArch64::ZPR4StridedOrContiguousRegClassID) &&
1114-
any_of(MRI.use_nodbg_instructions(VirtReg), [](const MachineInstr &Use) {
1115-
return Use.getOpcode() ==
1116-
AArch64::FORM_TRANSPOSED_REG_TUPLE_X2_PSEUDO ||
1117-
Use.getOpcode() == AArch64::FORM_TRANSPOSED_REG_TUPLE_X4_PSEUDO;
1116+
any_of(MRI.use_nodbg_instructions(VirtReg), [&TupleInst](
1117+
const MachineInstr &Use) {
1118+
bool IsTuple =
1119+
Use.getOpcode() == AArch64::FORM_TRANSPOSED_REG_TUPLE_X2_PSEUDO ||
1120+
Use.getOpcode() == AArch64::FORM_TRANSPOSED_REG_TUPLE_X4_PSEUDO;
1121+
TupleInst = &Use;
1122+
return IsTuple;
11181123
})) {
1119-
const TargetRegisterClass *StridedRC =
1120-
RegID == AArch64::ZPR2StridedOrContiguousRegClassID
1121-
? &AArch64::ZPR2StridedRegClass
1122-
: &AArch64::ZPR4StridedRegClass;
1124+
unsigned LdOps = TupleInst->getNumOperands() - 1;
1125+
const TargetRegisterClass *StridedRC = LdOps == 2
1126+
? &AArch64::ZPR2StridedRegClass
1127+
: &AArch64::ZPR4StridedRegClass;
11231128

1129+
SmallVector<MCPhysReg, 4> StridedOrder;
11241130
for (MCPhysReg Reg : Order)
11251131
if (StridedRC->contains(Reg))
1126-
Hints.push_back(Reg);
1132+
StridedOrder.push_back(Reg);
1133+
1134+
int OpIdx = TupleInst->findRegisterUseOperandIdx(VirtReg, this);
1135+
if (OpIdx == -1)
1136+
return TargetRegisterInfo::getRegAllocationHints(VirtReg, Order, Hints,
1137+
MF, VRM);
1138+
1139+
unsigned TupleID =
1140+
MRI.getRegClass(TupleInst->getOperand(0).getReg())->getID();
1141+
bool IsMulZPR = TupleID == AArch64::ZPR2Mul2RegClassID ||
1142+
TupleID == AArch64::ZPR4Mul4RegClassID;
1143+
1144+
if (OpIdx == 1) {
1145+
for (unsigned I = 0; I < StridedOrder.size(); ++I) {
1146+
MCPhysReg Reg = StridedOrder[I];
1147+
unsigned FirstReg = getSubReg(Reg, AArch64::zsub0);
1148+
1149+
// If the FORM_TRANSPOSE nodes use the ZPRMul classes, the starting
1150+
// register of the first load should be a multiple of 2 or 4.
1151+
if (IsMulZPR &&
1152+
(getSubReg(Reg, AArch64::zsub0) - AArch64::Z0) % LdOps != 0)
1153+
continue;
1154+
// Skip this register if it has any live intervals assigned.
1155+
if (Matrix->isPhysRegUsed(Reg))
1156+
continue;
1157+
1158+
bool CanAssign = true;
1159+
for (unsigned Next = 1; Next < LdOps; ++Next) {
1160+
// Ensure we can assign enough registers from the list for all loads.
1161+
if (I + Next >= StridedOrder.size()) {
1162+
CanAssign = false;
1163+
break;
1164+
}
1165+
// Ensure the subsequent registers are not live and that the starting
1166+
// sub-registers are sequential.
1167+
MCPhysReg NextReg = StridedOrder[I + Next];
1168+
if (Matrix->isPhysRegUsed(NextReg) ||
1169+
(getSubReg(NextReg, AArch64::zsub0) != FirstReg + Next)) {
1170+
CanAssign = false;
1171+
break;
1172+
}
1173+
}
1174+
if (CanAssign)
1175+
Hints.push_back(Reg);
1176+
}
1177+
} else if (VRM->hasPhys(TupleInst->getOperand(1).getReg())) {
1178+
// This is not the first load in the sequence. Find the register
1179+
// assigned to the first and match to a strided reg in the list.
1180+
MCPhysReg FirstLoadPhysReg =
1181+
VRM->getPhys(TupleInst->getOperand(1).getReg());
1182+
for (unsigned I = 0; I < StridedOrder.size(); ++I) {
1183+
if (StridedOrder[I] == FirstLoadPhysReg &&
1184+
(I + (OpIdx - 1) < StridedOrder.size()))
1185+
Hints.push_back(StridedOrder[I + (OpIdx - 1)]);
1186+
}
1187+
}
11271188

11281189
return TargetRegisterInfo::getRegAllocationHints(VirtReg, Order, Hints, MF,
11291190
VRM);

0 commit comments

Comments
 (0)