@@ -2965,19 +2965,28 @@ MachineBasicBlock *AArch64TargetLowering::EmitZTInstr(MachineInstr &MI,
2965
2965
2966
2966
MachineBasicBlock *
2967
2967
AArch64TargetLowering::EmitZAInstr(unsigned Opc, unsigned BaseReg,
2968
- MachineInstr &MI,
2969
- MachineBasicBlock *BB , bool HasTile ) const {
2968
+ MachineInstr &MI, MachineBasicBlock *BB,
2969
+ bool HasTile , bool HasZPROut ) const {
2970
2970
const TargetInstrInfo *TII = Subtarget->getInstrInfo();
2971
2971
MachineInstrBuilder MIB = BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(Opc));
2972
2972
unsigned StartIdx = 0;
2973
2973
2974
- if (HasTile) {
2975
- MIB.addReg(BaseReg + MI.getOperand(0).getImm(), RegState::Define);
2976
- MIB.addReg(BaseReg + MI.getOperand(0).getImm());
2977
- StartIdx = 1;
2978
- } else
2979
- MIB.addReg(BaseReg, RegState::Define).addReg(BaseReg);
2980
-
2974
+ if (HasZPROut) {
2975
+ if (HasTile) {
2976
+ MIB.add(MI.getOperand(0)); // Output ZPR
2977
+ MIB.addReg(BaseReg + MI.getOperand(1).getImm(),
2978
+ RegState::Define); // Output ZA Tile
2979
+ MIB.addReg(BaseReg + MI.getOperand(1).getImm()); // Input Za Tile
2980
+ StartIdx = 2;
2981
+ }
2982
+ } else {
2983
+ if (HasTile) {
2984
+ MIB.addReg(BaseReg + MI.getOperand(0).getImm(), RegState::Define);
2985
+ MIB.addReg(BaseReg + MI.getOperand(0).getImm());
2986
+ StartIdx = 1;
2987
+ } else
2988
+ MIB.addReg(BaseReg, RegState::Define).addReg(BaseReg);
2989
+ }
2981
2990
for (unsigned I = StartIdx; I < MI.getNumOperands(); ++I)
2982
2991
MIB.add(MI.getOperand(I));
2983
2992
@@ -3012,17 +3021,59 @@ MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter(
3012
3021
TII->get(MI.getOpcode()).TSFlags & AArch64::SMEMatrixTypeMask;
3013
3022
switch (SMEMatrixType) {
3014
3023
case (AArch64::SMEMatrixArray):
3015
- return EmitZAInstr(SMEOrigInstr, AArch64::ZA, MI, BB, /*HasTile*/ false);
3024
+ return EmitZAInstr(SMEOrigInstr, AArch64::ZA, MI, BB, /*HasTile*/ false,
3025
+ /*HasZPROut*/ false);
3016
3026
case (AArch64::SMEMatrixTileB):
3017
- return EmitZAInstr(SMEOrigInstr, AArch64::ZAB0, MI, BB, /*HasTile*/ true);
3027
+ switch (MI.getOpcode()) {
3028
+ case AArch64::MOVAZ_2ZMI_H_B_PSEUDO:
3029
+ case AArch64::MOVAZ_2ZMI_V_B_PSEUDO:
3030
+ case AArch64::MOVAZ_4ZMI_H_B_PSEUDO:
3031
+ case AArch64::MOVAZ_4ZMI_V_B_PSEUDO:
3032
+ return EmitZAInstr(SMEOrigInstr, AArch64::ZAB0, MI, BB,
3033
+ /*HasTile*/ true, /*HasZPROut*/ true);
3034
+ default:
3035
+ return EmitZAInstr(SMEOrigInstr, AArch64::ZAB0, MI, BB,
3036
+ /*HasTile*/ true, /*HasZPROut*/ false);
3037
+ }
3018
3038
case (AArch64::SMEMatrixTileH):
3019
- return EmitZAInstr(SMEOrigInstr, AArch64::ZAH0, MI, BB, /*HasTile*/ true);
3039
+ switch (MI.getOpcode()) {
3040
+ case AArch64::MOVAZ_2ZMI_H_H_PSEUDO:
3041
+ case AArch64::MOVAZ_2ZMI_V_H_PSEUDO:
3042
+ case AArch64::MOVAZ_4ZMI_H_H_PSEUDO:
3043
+ case AArch64::MOVAZ_4ZMI_V_H_PSEUDO:
3044
+ return EmitZAInstr(SMEOrigInstr, AArch64::ZAH0, MI, BB,
3045
+ /*HasTile*/ true, /*HasZPROut*/ true);
3046
+ default:
3047
+ return EmitZAInstr(SMEOrigInstr, AArch64::ZAH0, MI, BB,
3048
+ /*HasTile*/ true, /*HasZPROut*/ false);
3049
+ }
3020
3050
case (AArch64::SMEMatrixTileS):
3021
- return EmitZAInstr(SMEOrigInstr, AArch64::ZAS0, MI, BB, /*HasTile*/ true);
3051
+ switch (MI.getOpcode()) {
3052
+ case AArch64::MOVAZ_2ZMI_H_S_PSEUDO:
3053
+ case AArch64::MOVAZ_2ZMI_V_S_PSEUDO:
3054
+ case AArch64::MOVAZ_4ZMI_H_S_PSEUDO:
3055
+ case AArch64::MOVAZ_4ZMI_V_S_PSEUDO:
3056
+ return EmitZAInstr(SMEOrigInstr, AArch64::ZAS0, MI, BB,
3057
+ /*HasTile*/ true, /*HasZPROut*/ true);
3058
+ default:
3059
+ return EmitZAInstr(SMEOrigInstr, AArch64::ZAS0, MI, BB,
3060
+ /*HasTile*/ true, /*HasZPROut*/ false);
3061
+ }
3022
3062
case (AArch64::SMEMatrixTileD):
3023
- return EmitZAInstr(SMEOrigInstr, AArch64::ZAD0, MI, BB, /*HasTile*/ true);
3063
+ switch (MI.getOpcode()) {
3064
+ case AArch64::MOVAZ_2ZMI_H_D_PSEUDO:
3065
+ case AArch64::MOVAZ_2ZMI_V_D_PSEUDO:
3066
+ case AArch64::MOVAZ_4ZMI_H_D_PSEUDO:
3067
+ case AArch64::MOVAZ_4ZMI_V_D_PSEUDO:
3068
+ return EmitZAInstr(SMEOrigInstr, AArch64::ZAD0, MI, BB,
3069
+ /*HasTile*/ true, /*HasZPROut*/ true);
3070
+ default:
3071
+ return EmitZAInstr(SMEOrigInstr, AArch64::ZAD0, MI, BB,
3072
+ /*HasTile*/ true, /*HasZPROut*/ false);
3073
+ }
3024
3074
case (AArch64::SMEMatrixTileQ):
3025
- return EmitZAInstr(SMEOrigInstr, AArch64::ZAQ0, MI, BB, /*HasTile*/ true);
3075
+ return EmitZAInstr(SMEOrigInstr, AArch64::ZAQ0, MI, BB, /*HasTile*/ true,
3076
+ /*HasZPROut*/ false);
3026
3077
}
3027
3078
}
3028
3079
@@ -3091,38 +3142,6 @@ MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter(
3091
3142
return EmitZero(MI, BB);
3092
3143
case AArch64::ZERO_T_PSEUDO:
3093
3144
return EmitZTInstr(MI, BB, AArch64::ZERO_T, /*Op0IsDef=*/true);
3094
- case AArch64::MOVAZ_2ZMI_H_B_PSEUDO:
3095
- return EmitTileMovaz(AArch64::MOVAZ_2ZMI_H_B, AArch64::ZAB0, MI, BB);
3096
- case AArch64::MOVAZ_2ZMI_H_H_PSEUDO:
3097
- return EmitTileMovaz(AArch64::MOVAZ_2ZMI_H_H, AArch64::ZAH0, MI, BB);
3098
- case AArch64::MOVAZ_2ZMI_H_S_PSEUDO:
3099
- return EmitTileMovaz(AArch64::MOVAZ_2ZMI_H_S, AArch64::ZAS0, MI, BB);
3100
- case AArch64::MOVAZ_2ZMI_H_D_PSEUDO:
3101
- return EmitTileMovaz(AArch64::MOVAZ_2ZMI_H_D, AArch64::ZAD0, MI, BB);
3102
- case AArch64::MOVAZ_2ZMI_V_B_PSEUDO:
3103
- return EmitTileMovaz(AArch64::MOVAZ_2ZMI_V_B, AArch64::ZAB0, MI, BB);
3104
- case AArch64::MOVAZ_2ZMI_V_H_PSEUDO:
3105
- return EmitTileMovaz(AArch64::MOVAZ_2ZMI_V_H, AArch64::ZAH0, MI, BB);
3106
- case AArch64::MOVAZ_2ZMI_V_S_PSEUDO:
3107
- return EmitTileMovaz(AArch64::MOVAZ_2ZMI_V_S, AArch64::ZAS0, MI, BB);
3108
- case AArch64::MOVAZ_2ZMI_V_D_PSEUDO:
3109
- return EmitTileMovaz(AArch64::MOVAZ_2ZMI_V_D, AArch64::ZAD0, MI, BB);
3110
- case AArch64::MOVAZ_4ZMI_H_B_PSEUDO:
3111
- return EmitTileMovaz(AArch64::MOVAZ_4ZMI_H_B, AArch64::ZAB0, MI, BB);
3112
- case AArch64::MOVAZ_4ZMI_H_H_PSEUDO:
3113
- return EmitTileMovaz(AArch64::MOVAZ_4ZMI_H_H, AArch64::ZAH0, MI, BB);
3114
- case AArch64::MOVAZ_4ZMI_H_S_PSEUDO:
3115
- return EmitTileMovaz(AArch64::MOVAZ_4ZMI_H_S, AArch64::ZAS0, MI, BB);
3116
- case AArch64::MOVAZ_4ZMI_H_D_PSEUDO:
3117
- return EmitTileMovaz(AArch64::MOVAZ_4ZMI_H_D, AArch64::ZAD0, MI, BB);
3118
- case AArch64::MOVAZ_4ZMI_V_B_PSEUDO:
3119
- return EmitTileMovaz(AArch64::MOVAZ_4ZMI_V_B, AArch64::ZAB0, MI, BB);
3120
- case AArch64::MOVAZ_4ZMI_V_H_PSEUDO:
3121
- return EmitTileMovaz(AArch64::MOVAZ_4ZMI_V_H, AArch64::ZAH0, MI, BB);
3122
- case AArch64::MOVAZ_4ZMI_V_S_PSEUDO:
3123
- return EmitTileMovaz(AArch64::MOVAZ_4ZMI_V_S, AArch64::ZAS0, MI, BB);
3124
- case AArch64::MOVAZ_4ZMI_V_D_PSEUDO:
3125
- return EmitTileMovaz(AArch64::MOVAZ_4ZMI_V_D, AArch64::ZAD0, MI, BB);
3126
3145
}
3127
3146
}
3128
3147
0 commit comments