@@ -1100,6 +1100,11 @@ bool AArch64RegisterInfo::getRegAllocationHints(
1100
1100
const VirtRegMap *VRM, const LiveRegMatrix *Matrix) const {
1101
1101
const MachineRegisterInfo &MRI = MF.getRegInfo ();
1102
1102
1103
+ auto &ST = MF.getSubtarget <AArch64Subtarget>();
1104
+ if (!ST.hasSME () || !ST.isStreaming ())
1105
+ return TargetRegisterInfo::getRegAllocationHints (VirtReg, Order, Hints, MF,
1106
+ VRM);
1107
+
1103
1108
// The SVE calling convention preserves registers Z8-Z23. As a result, there
1104
1109
// are no ZPR2Strided or ZPR4Strided registers that do not overlap with the
1105
1110
// callee-saved registers and so by default these will be pushed to the back
@@ -1109,94 +1114,82 @@ bool AArch64RegisterInfo::getRegAllocationHints(
1109
1114
// instructions over reducing the number of clobbered callee-save registers,
1110
1115
// so we add the strided registers as a hint.
1111
1116
unsigned RegID = MRI.getRegClass (VirtReg)->getID ();
1112
- // Look through uses of the register for FORM_TRANSPOSED_REG_TUPLE.
1113
- for (const MachineInstr &Use : MRI.use_nodbg_instructions (VirtReg)) {
1114
- if ((RegID != AArch64::ZPR2StridedOrContiguousRegClassID &&
1115
- RegID != AArch64::ZPR4StridedOrContiguousRegClassID) ||
1116
- (Use.getOpcode () != AArch64::FORM_TRANSPOSED_REG_TUPLE_X2_PSEUDO &&
1117
- Use.getOpcode () != AArch64::FORM_TRANSPOSED_REG_TUPLE_X4_PSEUDO))
1118
- continue ;
1119
-
1120
- unsigned LdOps = Use.getNumOperands () - 1 ;
1121
- const TargetRegisterClass *StridedRC = LdOps == 2
1122
- ? &AArch64::ZPR2StridedRegClass
1123
- : &AArch64::ZPR4StridedRegClass;
1124
-
1125
- SmallVector<MCPhysReg, 4 > StridedOrder;
1126
- for (MCPhysReg Reg : Order)
1127
- if (StridedRC->contains (Reg))
1128
- StridedOrder.push_back (Reg);
1129
-
1130
- auto GetRegStartingAt = [&](MCPhysReg FirstReg) -> MCPhysReg {
1131
- for (MCPhysReg Strided : StridedOrder)
1132
- if (getSubReg (Strided, AArch64::zsub0) == FirstReg)
1133
- return Strided;
1134
- return (MCPhysReg)AArch64::NoRegister;
1135
- };
1136
-
1137
- int OpIdx = Use.findRegisterUseOperandIdx (VirtReg, this );
1138
- assert (OpIdx != -1 && " Expected operand index from register use." );
1139
-
1140
- unsigned TupleID = MRI.getRegClass (Use.getOperand (0 ).getReg ())->getID ();
1141
- bool IsMulZPR = TupleID == AArch64::ZPR2Mul2RegClassID ||
1142
- TupleID == AArch64::ZPR4Mul4RegClassID;
1143
-
1144
- unsigned AssignedOp = 0 ;
1145
- if (!any_of (make_range (Use.operands_begin () + 1 , Use.operands_end ()),
1146
- [&](const MachineOperand &Op) {
1147
- if (!VRM->hasPhys (Op.getReg ()))
1148
- return false ;
1149
- AssignedOp = Op.getOperandNo ();
1150
- return true ;
1151
- })) {
1152
- // There are no registers already assigned to any of the pseudo operands.
1153
- // Look for a valid starting register for the group.
1154
- for (unsigned I = 0 ; I < StridedOrder.size (); ++I) {
1155
- MCPhysReg Reg = StridedOrder[I];
1156
- unsigned FirstReg = getSubReg (Reg, AArch64::zsub0);
1157
-
1158
- // If the FORM_TRANSPOSE nodes use the ZPRMul classes, the starting
1159
- // register of the first load should be a multiple of 2 or 4.
1160
- if (IsMulZPR && (FirstReg - AArch64::Z0) % LdOps != 0 )
1161
- continue ;
1162
- // Skip this register if it has any live intervals assigned.
1163
- if (Matrix->isPhysRegUsed (Reg))
1164
- continue ;
1165
-
1166
- // Look for registers in StridedOrder which start with sub-registers
1167
- // following sequentially from FirstReg. If all are found and none are
1168
- // already live, add Reg to Hints.
1169
- MCPhysReg RegToAssign = Reg;
1170
- for (unsigned Next = 1 ; Next < LdOps; ++Next) {
1171
- MCPhysReg Strided = GetRegStartingAt (FirstReg + Next);
1172
- if (Strided == AArch64::NoRegister ||
1173
- Matrix->isPhysRegUsed (Strided)) {
1174
- RegToAssign = AArch64::NoRegister;
1175
- break ;
1117
+ if (RegID == AArch64::ZPR2StridedOrContiguousRegClassID ||
1118
+ RegID == AArch64::ZPR4StridedOrContiguousRegClassID) {
1119
+
1120
+ // Look through uses of the register for FORM_TRANSPOSED_REG_TUPLE.
1121
+ for (const MachineInstr &Use : MRI.use_nodbg_instructions (VirtReg)) {
1122
+ if (Use.getOpcode () != AArch64::FORM_TRANSPOSED_REG_TUPLE_X2_PSEUDO &&
1123
+ Use.getOpcode () != AArch64::FORM_TRANSPOSED_REG_TUPLE_X4_PSEUDO)
1124
+ continue ;
1125
+
1126
+ unsigned LdOps = Use.getNumOperands () - 1 ;
1127
+ const TargetRegisterClass *StridedRC =
1128
+ LdOps == 2 ? &AArch64::ZPR2StridedRegClass
1129
+ : &AArch64::ZPR4StridedRegClass;
1130
+
1131
+ SmallVector<MCPhysReg, 4 > StridedOrder;
1132
+ for (MCPhysReg Reg : Order)
1133
+ if (StridedRC->contains (Reg))
1134
+ StridedOrder.push_back (Reg);
1135
+
1136
+ int OpIdx = Use.findRegisterUseOperandIdx (VirtReg, this );
1137
+ assert (OpIdx != -1 && " Expected operand index from register use." );
1138
+
1139
+ unsigned TupleID = MRI.getRegClass (Use.getOperand (0 ).getReg ())->getID ();
1140
+ bool IsMulZPR = TupleID == AArch64::ZPR2Mul2RegClassID ||
1141
+ TupleID == AArch64::ZPR4Mul4RegClassID;
1142
+
1143
+ const MachineOperand *AssignedRegOp = llvm::find_if (
1144
+ make_range (Use.operands_begin () + 1 , Use.operands_end ()),
1145
+ [&VRM](const MachineOperand &Op) {
1146
+ return VRM->hasPhys (Op.getReg ());
1147
+ });
1148
+
1149
+ if (AssignedRegOp == Use.operands_end ()) {
1150
+ // There are no registers already assigned to any of the pseudo
1151
+ // operands. Look for a valid starting register for the group.
1152
+ for (unsigned I = 0 ; I < StridedOrder.size (); ++I) {
1153
+ MCPhysReg Reg = StridedOrder[I];
1154
+ SmallVector<MCPhysReg> Regs;
1155
+ unsigned FirstStridedReg = Reg - OpIdx + 1 ;
1156
+
1157
+ // If the FORM_TRANSPOSE nodes use the ZPRMul classes, the starting
1158
+ // register of the first load should be a multiple of 2 or 4.
1159
+ unsigned FirstSubReg = getSubReg (FirstStridedReg, AArch64::zsub0);
1160
+ if (IsMulZPR && (FirstSubReg - AArch64::Z0) % LdOps != 0 )
1161
+ continue ;
1162
+
1163
+ for (unsigned Op = 0 ; Op < LdOps; ++Op) {
1164
+ if (!is_contained (StridedOrder, FirstStridedReg + Op) ||
1165
+ getSubReg (FirstStridedReg + Op, AArch64::zsub0) !=
1166
+ FirstSubReg + Op)
1167
+ break ;
1168
+ Regs.push_back (FirstStridedReg + Op);
1176
1169
}
1177
- if (Next == (unsigned )OpIdx - 1 )
1178
- RegToAssign = Strided;
1170
+
1171
+ if (Regs.size () == LdOps && all_of (Regs, [&](MCPhysReg R) {
1172
+ return !Matrix->isPhysRegUsed (R);
1173
+ }))
1174
+ Hints.push_back (FirstStridedReg + OpIdx - 1 );
1179
1175
}
1180
- if (RegToAssign != AArch64::NoRegister)
1181
- Hints.push_back (RegToAssign);
1176
+ } else {
1177
+ // At least one operand already has a physical register assigned.
1178
+ // Find the starting sub-register of this and use it to work out the
1179
+ // correct strided register to suggest based on the current op index.
1180
+ MCPhysReg TargetStartReg =
1181
+ getSubReg (VRM->getPhys (AssignedRegOp->getReg ()), AArch64::zsub0) +
1182
+ (OpIdx - AssignedRegOp->getOperandNo ());
1183
+
1184
+ for (unsigned I = 0 ; I < StridedOrder.size (); ++I)
1185
+ if (getSubReg (StridedOrder[I], AArch64::zsub0) == TargetStartReg)
1186
+ Hints.push_back (StridedOrder[I]);
1182
1187
}
1183
- } else {
1184
- // At least one operand already has a physical register assigned.
1185
- // Find the starting sub-register of this and use it to work out the
1186
- // correct strided register to suggest based on the current op index.
1187
- MCPhysReg TargetStartReg =
1188
- getSubReg (VRM->getPhys (Use.getOperand (AssignedOp).getReg ()),
1189
- AArch64::zsub0) +
1190
- (OpIdx - AssignedOp);
1191
-
1192
- for (unsigned I = 0 ; I < StridedOrder.size (); ++I)
1193
- if (getSubReg (StridedOrder[I], AArch64::zsub0) == TargetStartReg)
1194
- Hints.push_back (StridedOrder[I]);
1195
- }
1196
1188
1197
- if (!Hints.empty ())
1198
- return TargetRegisterInfo::getRegAllocationHints (VirtReg, Order, Hints,
1199
- MF, VRM);
1189
+ if (!Hints.empty ())
1190
+ return TargetRegisterInfo::getRegAllocationHints (VirtReg, Order, Hints,
1191
+ MF, VRM);
1192
+ }
1200
1193
}
1201
1194
1202
1195
for (MachineInstr &MI : MRI.def_instructions (VirtReg)) {
0 commit comments