@@ -1108,20 +1108,16 @@ bool AArch64RegisterInfo::getRegAllocationHints(
1108
1108
// FORM_TRANSPOSED_REG_TUPLE pseudo, we want to favour reducing copy
1109
1109
// instructions over reducing the number of clobbered callee-save registers,
1110
1110
// so we add the strided registers as a hint.
1111
- const MachineInstr *TupleInst = nullptr ;
1112
1111
unsigned RegID = MRI.getRegClass (VirtReg)->getID ();
1113
1112
// Look through uses of the register for FORM_TRANSPOSED_REG_TUPLE.
1114
- if ((RegID == AArch64::ZPR2StridedOrContiguousRegClassID ||
1115
- RegID == AArch64::ZPR4StridedOrContiguousRegClassID) &&
1116
- any_of (MRI.use_nodbg_instructions (VirtReg), [&TupleInst](
1117
- const MachineInstr &Use) {
1118
- bool IsTuple =
1119
- Use.getOpcode () == AArch64::FORM_TRANSPOSED_REG_TUPLE_X2_PSEUDO ||
1120
- Use.getOpcode () == AArch64::FORM_TRANSPOSED_REG_TUPLE_X4_PSEUDO;
1121
- TupleInst = &Use;
1122
- return IsTuple;
1123
- })) {
1124
- unsigned LdOps = TupleInst->getNumOperands () - 1 ;
1113
+ for (const MachineInstr &Use : MRI.use_nodbg_instructions (VirtReg)) {
1114
+ if ((RegID != AArch64::ZPR2StridedOrContiguousRegClassID &&
1115
+ RegID != AArch64::ZPR4StridedOrContiguousRegClassID) ||
1116
+ (Use.getOpcode () != AArch64::FORM_TRANSPOSED_REG_TUPLE_X2_PSEUDO &&
1117
+ Use.getOpcode () != AArch64::FORM_TRANSPOSED_REG_TUPLE_X4_PSEUDO))
1118
+ continue ;
1119
+
1120
+ unsigned LdOps = Use.getNumOperands () - 1 ;
1125
1121
const TargetRegisterClass *StridedRC = LdOps == 2
1126
1122
? &AArch64::ZPR2StridedRegClass
1127
1123
: &AArch64::ZPR4StridedRegClass;
@@ -1131,63 +1127,76 @@ bool AArch64RegisterInfo::getRegAllocationHints(
1131
1127
if (StridedRC->contains (Reg))
1132
1128
StridedOrder.push_back (Reg);
1133
1129
1134
- int OpIdx = TupleInst->findRegisterUseOperandIdx (VirtReg, this );
1135
- if (OpIdx == -1 )
1136
- return TargetRegisterInfo::getRegAllocationHints (VirtReg, Order, Hints,
1137
- MF, VRM);
1130
+ auto GetRegStartingAt = [&](MCPhysReg FirstReg) -> MCPhysReg {
1131
+ for (MCPhysReg Strided : StridedOrder)
1132
+ if (getSubReg (Strided, AArch64::zsub0) == FirstReg)
1133
+ return Strided;
1134
+ return (MCPhysReg)AArch64::NoRegister;
1135
+ };
1136
+
1137
+ int OpIdx = Use.findRegisterUseOperandIdx (VirtReg, this );
1138
+ assert (OpIdx != -1 && " Expected operand index from register use." );
1138
1139
1139
- unsigned TupleID =
1140
- MRI.getRegClass (TupleInst->getOperand (0 ).getReg ())->getID ();
1140
+ unsigned TupleID = MRI.getRegClass (Use.getOperand (0 ).getReg ())->getID ();
1141
1141
bool IsMulZPR = TupleID == AArch64::ZPR2Mul2RegClassID ||
1142
1142
TupleID == AArch64::ZPR4Mul4RegClassID;
1143
1143
1144
- if (OpIdx == 1 ) {
1144
+ unsigned AssignedOp = 0 ;
1145
+ if (!any_of (make_range (Use.operands_begin () + 1 , Use.operands_end ()),
1146
+ [&](const MachineOperand &Op) {
1147
+ if (!VRM->hasPhys (Op.getReg ()))
1148
+ return false ;
1149
+ AssignedOp = Op.getOperandNo ();
1150
+ return true ;
1151
+ })) {
1152
+ // There are no registers already assigned to any of the pseudo operands.
1153
+ // Look for a valid starting register for the group.
1145
1154
for (unsigned I = 0 ; I < StridedOrder.size (); ++I) {
1146
1155
MCPhysReg Reg = StridedOrder[I];
1147
1156
unsigned FirstReg = getSubReg (Reg, AArch64::zsub0);
1148
1157
1149
1158
// If the FORM_TRANSPOSE nodes use the ZPRMul classes, the starting
1150
1159
// register of the first load should be a multiple of 2 or 4.
1151
- if (IsMulZPR &&
1152
- (getSubReg (Reg, AArch64::zsub0) - AArch64::Z0) % LdOps != 0 )
1160
+ if (IsMulZPR && (FirstReg - AArch64::Z0) % LdOps != 0 )
1153
1161
continue ;
1154
1162
// Skip this register if it has any live intervals assigned.
1155
1163
if (Matrix->isPhysRegUsed (Reg))
1156
1164
continue ;
1157
1165
1158
- bool CanAssign = true ;
1166
+ // Look for registers in StridedOrder which start with sub-registers
1167
+ // following sequentially from FirstReg. If all are found and none are
1168
+ // already live, add Reg to Hints.
1169
+ MCPhysReg RegToAssign = Reg;
1159
1170
for (unsigned Next = 1 ; Next < LdOps; ++Next) {
1160
- // Ensure we can assign enough registers from the list for all loads.
1161
- if (I + Next >= StridedOrder.size ()) {
1162
- CanAssign = false ;
1163
- break ;
1164
- }
1165
- // Ensure the subsequent registers are not live and that the starting
1166
- // sub-registers are sequential.
1167
- MCPhysReg NextReg = StridedOrder[I + Next];
1168
- if (Matrix->isPhysRegUsed (NextReg) ||
1169
- (getSubReg (NextReg, AArch64::zsub0) != FirstReg + Next)) {
1170
- CanAssign = false ;
1171
+ MCPhysReg Strided = GetRegStartingAt (FirstReg + Next);
1172
+ if (Strided == AArch64::NoRegister ||
1173
+ Matrix->isPhysRegUsed (Strided)) {
1174
+ RegToAssign = AArch64::NoRegister;
1171
1175
break ;
1172
1176
}
1177
+ if (Next == (unsigned )OpIdx - 1 )
1178
+ RegToAssign = Strided;
1173
1179
}
1174
- if (CanAssign)
1175
- Hints.push_back (Reg);
1176
- }
1177
- } else if (VRM->hasPhys (TupleInst->getOperand (1 ).getReg ())) {
1178
- // This is not the first load in the sequence. Find the register
1179
- // assigned to the first and match to a strided reg in the list.
1180
- MCPhysReg FirstLoadPhysReg =
1181
- VRM->getPhys (TupleInst->getOperand (1 ).getReg ());
1182
- for (unsigned I = 0 ; I < StridedOrder.size (); ++I) {
1183
- if (StridedOrder[I] == FirstLoadPhysReg &&
1184
- (I + (OpIdx - 1 ) < StridedOrder.size ()))
1185
- Hints.push_back (StridedOrder[I + (OpIdx - 1 )]);
1180
+ if (RegToAssign != AArch64::NoRegister)
1181
+ Hints.push_back (RegToAssign);
1186
1182
}
1183
+ } else {
1184
+ // At least one operand already has a physical register assigned.
1185
+ // Find the starting sub-register of this and use it to work out the
1186
+ // correct strided register to suggest based on the current op index.
1187
+ MCPhysReg TargetStartReg =
1188
+ getSubReg (VRM->getPhys (Use.getOperand (AssignedOp).getReg ()),
1189
+ AArch64::zsub0) +
1190
+ (OpIdx - AssignedOp);
1191
+
1192
+ for (unsigned I = 0 ; I < StridedOrder.size (); ++I)
1193
+ if (getSubReg (StridedOrder[I], AArch64::zsub0) == TargetStartReg)
1194
+ Hints.push_back (StridedOrder[I]);
1187
1195
}
1188
1196
1189
- return TargetRegisterInfo::getRegAllocationHints (VirtReg, Order, Hints, MF,
1190
- VRM);
1197
+ if (!Hints.empty ())
1198
+ return TargetRegisterInfo::getRegAllocationHints (VirtReg, Order, Hints,
1199
+ MF, VRM);
1191
1200
}
1192
1201
1193
1202
for (MachineInstr &MI : MRI.def_instructions (VirtReg)) {
0 commit comments