@@ -2992,19 +2992,28 @@ MachineBasicBlock *AArch64TargetLowering::EmitZTInstr(MachineInstr &MI,
2992
2992
2993
2993
MachineBasicBlock *
2994
2994
AArch64TargetLowering::EmitZAInstr(unsigned Opc, unsigned BaseReg,
2995
- MachineInstr &MI,
2996
- MachineBasicBlock *BB , bool HasTile ) const {
2995
+ MachineInstr &MI, MachineBasicBlock *BB,
2996
+ bool HasTile , bool HasZPROut ) const {
2997
2997
const TargetInstrInfo *TII = Subtarget->getInstrInfo();
2998
2998
MachineInstrBuilder MIB = BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(Opc));
2999
2999
unsigned StartIdx = 0;
3000
3000
3001
- if (HasTile) {
3002
- MIB.addReg(BaseReg + MI.getOperand(0).getImm(), RegState::Define);
3003
- MIB.addReg(BaseReg + MI.getOperand(0).getImm());
3004
- StartIdx = 1;
3005
- } else
3006
- MIB.addReg(BaseReg, RegState::Define).addReg(BaseReg);
3007
-
3001
+ if (HasZPROut) {
3002
+ if (HasTile) {
3003
+ MIB.add(MI.getOperand(0)); // Output ZPR
3004
+ MIB.addReg(BaseReg + MI.getOperand(1).getImm(),
3005
+ RegState::Define); // Output ZA Tile
3006
+ MIB.addReg(BaseReg + MI.getOperand(1).getImm()); // Input Za Tile
3007
+ StartIdx = 2;
3008
+ }
3009
+ } else {
3010
+ if (HasTile) {
3011
+ MIB.addReg(BaseReg + MI.getOperand(0).getImm(), RegState::Define);
3012
+ MIB.addReg(BaseReg + MI.getOperand(0).getImm());
3013
+ StartIdx = 1;
3014
+ } else
3015
+ MIB.addReg(BaseReg, RegState::Define).addReg(BaseReg);
3016
+ }
3008
3017
for (unsigned I = StartIdx; I < MI.getNumOperands(); ++I)
3009
3018
MIB.add(MI.getOperand(I));
3010
3019
@@ -3113,17 +3122,59 @@ MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter(
3113
3122
TII->get(MI.getOpcode()).TSFlags & AArch64::SMEMatrixTypeMask;
3114
3123
switch (SMEMatrixType) {
3115
3124
case (AArch64::SMEMatrixArray):
3116
- return EmitZAInstr(SMEOrigInstr, AArch64::ZA, MI, BB, /*HasTile*/ false);
3125
+ return EmitZAInstr(SMEOrigInstr, AArch64::ZA, MI, BB, /*HasTile*/ false,
3126
+ /*HasZPROut*/ false);
3117
3127
case (AArch64::SMEMatrixTileB):
3118
- return EmitZAInstr(SMEOrigInstr, AArch64::ZAB0, MI, BB, /*HasTile*/ true);
3128
+ switch (MI.getOpcode()) {
3129
+ case AArch64::MOVAZ_2ZMI_H_B_PSEUDO:
3130
+ case AArch64::MOVAZ_2ZMI_V_B_PSEUDO:
3131
+ case AArch64::MOVAZ_4ZMI_H_B_PSEUDO:
3132
+ case AArch64::MOVAZ_4ZMI_V_B_PSEUDO:
3133
+ return EmitZAInstr(SMEOrigInstr, AArch64::ZAB0, MI, BB,
3134
+ /*HasTile*/ true, /*HasZPROut*/ true);
3135
+ default:
3136
+ return EmitZAInstr(SMEOrigInstr, AArch64::ZAB0, MI, BB,
3137
+ /*HasTile*/ true, /*HasZPROut*/ false);
3138
+ }
3119
3139
case (AArch64::SMEMatrixTileH):
3120
- return EmitZAInstr(SMEOrigInstr, AArch64::ZAH0, MI, BB, /*HasTile*/ true);
3140
+ switch (MI.getOpcode()) {
3141
+ case AArch64::MOVAZ_2ZMI_H_H_PSEUDO:
3142
+ case AArch64::MOVAZ_2ZMI_V_H_PSEUDO:
3143
+ case AArch64::MOVAZ_4ZMI_H_H_PSEUDO:
3144
+ case AArch64::MOVAZ_4ZMI_V_H_PSEUDO:
3145
+ return EmitZAInstr(SMEOrigInstr, AArch64::ZAH0, MI, BB,
3146
+ /*HasTile*/ true, /*HasZPROut*/ true);
3147
+ default:
3148
+ return EmitZAInstr(SMEOrigInstr, AArch64::ZAH0, MI, BB,
3149
+ /*HasTile*/ true, /*HasZPROut*/ false);
3150
+ }
3121
3151
case (AArch64::SMEMatrixTileS):
3122
- return EmitZAInstr(SMEOrigInstr, AArch64::ZAS0, MI, BB, /*HasTile*/ true);
3152
+ switch (MI.getOpcode()) {
3153
+ case AArch64::MOVAZ_2ZMI_H_S_PSEUDO:
3154
+ case AArch64::MOVAZ_2ZMI_V_S_PSEUDO:
3155
+ case AArch64::MOVAZ_4ZMI_H_S_PSEUDO:
3156
+ case AArch64::MOVAZ_4ZMI_V_S_PSEUDO:
3157
+ return EmitZAInstr(SMEOrigInstr, AArch64::ZAS0, MI, BB,
3158
+ /*HasTile*/ true, /*HasZPROut*/ true);
3159
+ default:
3160
+ return EmitZAInstr(SMEOrigInstr, AArch64::ZAS0, MI, BB,
3161
+ /*HasTile*/ true, /*HasZPROut*/ false);
3162
+ }
3123
3163
case (AArch64::SMEMatrixTileD):
3124
- return EmitZAInstr(SMEOrigInstr, AArch64::ZAD0, MI, BB, /*HasTile*/ true);
3164
+ switch (MI.getOpcode()) {
3165
+ case AArch64::MOVAZ_2ZMI_H_D_PSEUDO:
3166
+ case AArch64::MOVAZ_2ZMI_V_D_PSEUDO:
3167
+ case AArch64::MOVAZ_4ZMI_H_D_PSEUDO:
3168
+ case AArch64::MOVAZ_4ZMI_V_D_PSEUDO:
3169
+ return EmitZAInstr(SMEOrigInstr, AArch64::ZAD0, MI, BB,
3170
+ /*HasTile*/ true, /*HasZPROut*/ true);
3171
+ default:
3172
+ return EmitZAInstr(SMEOrigInstr, AArch64::ZAD0, MI, BB,
3173
+ /*HasTile*/ true, /*HasZPROut*/ false);
3174
+ }
3125
3175
case (AArch64::SMEMatrixTileQ):
3126
- return EmitZAInstr(SMEOrigInstr, AArch64::ZAQ0, MI, BB, /*HasTile*/ true);
3176
+ return EmitZAInstr(SMEOrigInstr, AArch64::ZAQ0, MI, BB, /*HasTile*/ true,
3177
+ /*HasZPROut*/ false);
3127
3178
}
3128
3179
}
3129
3180
@@ -3195,38 +3246,6 @@ MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter(
3195
3246
return EmitZero(MI, BB);
3196
3247
case AArch64::ZERO_T_PSEUDO:
3197
3248
return EmitZTInstr(MI, BB, AArch64::ZERO_T, /*Op0IsDef=*/true);
3198
- case AArch64::MOVAZ_2ZMI_H_B_PSEUDO:
3199
- return EmitTileMovaz(AArch64::MOVAZ_2ZMI_H_B, AArch64::ZAB0, MI, BB);
3200
- case AArch64::MOVAZ_2ZMI_H_H_PSEUDO:
3201
- return EmitTileMovaz(AArch64::MOVAZ_2ZMI_H_H, AArch64::ZAH0, MI, BB);
3202
- case AArch64::MOVAZ_2ZMI_H_S_PSEUDO:
3203
- return EmitTileMovaz(AArch64::MOVAZ_2ZMI_H_S, AArch64::ZAS0, MI, BB);
3204
- case AArch64::MOVAZ_2ZMI_H_D_PSEUDO:
3205
- return EmitTileMovaz(AArch64::MOVAZ_2ZMI_H_D, AArch64::ZAD0, MI, BB);
3206
- case AArch64::MOVAZ_2ZMI_V_B_PSEUDO:
3207
- return EmitTileMovaz(AArch64::MOVAZ_2ZMI_V_B, AArch64::ZAB0, MI, BB);
3208
- case AArch64::MOVAZ_2ZMI_V_H_PSEUDO:
3209
- return EmitTileMovaz(AArch64::MOVAZ_2ZMI_V_H, AArch64::ZAH0, MI, BB);
3210
- case AArch64::MOVAZ_2ZMI_V_S_PSEUDO:
3211
- return EmitTileMovaz(AArch64::MOVAZ_2ZMI_V_S, AArch64::ZAS0, MI, BB);
3212
- case AArch64::MOVAZ_2ZMI_V_D_PSEUDO:
3213
- return EmitTileMovaz(AArch64::MOVAZ_2ZMI_V_D, AArch64::ZAD0, MI, BB);
3214
- case AArch64::MOVAZ_4ZMI_H_B_PSEUDO:
3215
- return EmitTileMovaz(AArch64::MOVAZ_4ZMI_H_B, AArch64::ZAB0, MI, BB);
3216
- case AArch64::MOVAZ_4ZMI_H_H_PSEUDO:
3217
- return EmitTileMovaz(AArch64::MOVAZ_4ZMI_H_H, AArch64::ZAH0, MI, BB);
3218
- case AArch64::MOVAZ_4ZMI_H_S_PSEUDO:
3219
- return EmitTileMovaz(AArch64::MOVAZ_4ZMI_H_S, AArch64::ZAS0, MI, BB);
3220
- case AArch64::MOVAZ_4ZMI_H_D_PSEUDO:
3221
- return EmitTileMovaz(AArch64::MOVAZ_4ZMI_H_D, AArch64::ZAD0, MI, BB);
3222
- case AArch64::MOVAZ_4ZMI_V_B_PSEUDO:
3223
- return EmitTileMovaz(AArch64::MOVAZ_4ZMI_V_B, AArch64::ZAB0, MI, BB);
3224
- case AArch64::MOVAZ_4ZMI_V_H_PSEUDO:
3225
- return EmitTileMovaz(AArch64::MOVAZ_4ZMI_V_H, AArch64::ZAH0, MI, BB);
3226
- case AArch64::MOVAZ_4ZMI_V_S_PSEUDO:
3227
- return EmitTileMovaz(AArch64::MOVAZ_4ZMI_V_S, AArch64::ZAS0, MI, BB);
3228
- case AArch64::MOVAZ_4ZMI_V_D_PSEUDO:
3229
- return EmitTileMovaz(AArch64::MOVAZ_4ZMI_V_D, AArch64::ZAD0, MI, BB);
3230
3249
}
3231
3250
}
3232
3251
0 commit comments