Skip to content

Commit 74e3dcd

Browse files
- Removed post-isel hook for FORM_TRANSPOSED_REG_TUPLE
- Added visitRegSequence to the AArch64MIPeepholeOpt pass to create the pseudo if a REG_SEQUENCE matches the pattern - Removed uses of FORM_TRANSPOSED_REG_TUPLE from SME2 multiclasses - Added tests for every other multiclass which can now use the pseudo
1 parent 1f66364 commit 74e3dcd

File tree

9 files changed

+713
-208
lines changed

9 files changed

+713
-208
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 0 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -8787,51 +8787,6 @@ static bool checkZExtBool(SDValue Arg, const SelectionDAG &DAG) {
87878787
return ZExtBool;
87888788
}
87898789

8790-
// The FORM_TRANSPOSED_REG_TUPLE pseudo should only be used if the
8791-
// input operands are copy nodes where the source register is in a
8792-
// StridedOrContiguous class. For example:
8793-
//
8794-
// %3:zpr2stridedorcontiguous = LD1B_2Z_IMM_PSEUDO ..
8795-
// %4:zpr = COPY %3.zsub1:zpr2stridedorcontiguous
8796-
// %5:zpr = COPY %3.zsub0:zpr2stridedorcontiguous
8797-
// %6:zpr2stridedorcontiguous = LD1B_2Z_PSEUDO ..
8798-
// %7:zpr = COPY %6.zsub1:zpr2stridedorcontiguous
8799-
// %8:zpr = COPY %6.zsub0:zpr2stridedorcontiguous
8800-
// %9:zpr2mul2 = FORM_TRANSPOSED_REG_TUPLE_X2_PSEUDO %5:zpr, %8:zpr
8801-
//
8802-
bool shouldUseFormStridedPseudo(MachineInstr &MI) {
8803-
MachineRegisterInfo &MRI = MI.getMF()->getRegInfo();
8804-
8805-
assert((MI.getOpcode() == AArch64::FORM_TRANSPOSED_REG_TUPLE_X2_PSEUDO ||
8806-
MI.getOpcode() == AArch64::FORM_TRANSPOSED_REG_TUPLE_X4_PSEUDO) &&
8807-
"Unexpected opcode.");
8808-
8809-
MCRegister SubReg = MCRegister::NoRegister;
8810-
for (unsigned I = 1; I < MI.getNumOperands(); ++I) {
8811-
MachineOperand &MO = MI.getOperand(I);
8812-
assert(MO.isReg() && "Unexpected operand to FORM_TRANSPOSED_REG_TUPLE");
8813-
8814-
MachineOperand *Def = MRI.getOneDef(MO.getReg());
8815-
if (!Def || !Def->getParent()->isCopy())
8816-
return false;
8817-
8818-
const MachineOperand &CopySrc = Def->getParent()->getOperand(1);
8819-
unsigned OpSubReg = CopySrc.getSubReg();
8820-
if (SubReg == MCRegister::NoRegister)
8821-
SubReg = OpSubReg;
8822-
8823-
MachineOperand *CopySrcOp = MRI.getOneDef(CopySrc.getReg());
8824-
const TargetRegisterClass *CopySrcClass =
8825-
MRI.getRegClass(CopySrcOp->getReg());
8826-
if (!CopySrcOp || !CopySrcOp->isReg() || OpSubReg != SubReg ||
8827-
(CopySrcClass != &AArch64::ZPR2StridedOrContiguousRegClass &&
8828-
CopySrcClass != &AArch64::ZPR4StridedOrContiguousRegClass))
8829-
return false;
8830-
}
8831-
8832-
return true;
8833-
}
8834-
88358790
void AArch64TargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
88368791
SDNode *Node) const {
88378792
// Live-in physreg copies that are glued to SMSTART are applied as
@@ -8857,27 +8812,6 @@ void AArch64TargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
88578812
}
88588813
}
88598814

8860-
if (MI.getOpcode() == AArch64::FORM_TRANSPOSED_REG_TUPLE_X2_PSEUDO ||
8861-
MI.getOpcode() == AArch64::FORM_TRANSPOSED_REG_TUPLE_X4_PSEUDO) {
8862-
// If input values to the FORM_TRANSPOSED_REG_TUPLE pseudo aren't copies
8863-
// from a StridedOrContiguous class, fall back on REG_SEQUENCE node.
8864-
if (shouldUseFormStridedPseudo(MI))
8865-
return;
8866-
8867-
const TargetInstrInfo *TII = Subtarget->getInstrInfo();
8868-
MachineInstrBuilder MIB = BuildMI(*MI.getParent(), MI, MI.getDebugLoc(),
8869-
TII->get(TargetOpcode::REG_SEQUENCE),
8870-
MI.getOperand(0).getReg());
8871-
8872-
for (unsigned I = 1; I < MI.getNumOperands(); ++I) {
8873-
MIB.add(MI.getOperand(I));
8874-
MIB.addImm(AArch64::zsub0 + (I - 1));
8875-
}
8876-
8877-
MI.eraseFromParent();
8878-
return;
8879-
}
8880-
88818815
// Add an implicit use of 'VG' for ADDXri/SUBXri, which are instructions that
88828816
// have nothing to do with VG, were it not that they are used to materialise a
88838817
// frame-address. If they contain a frame-index to a scalable vector, this

llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@
7171

7272
#include "AArch64ExpandImm.h"
7373
#include "AArch64InstrInfo.h"
74+
#include "AArch64Subtarget.h"
7475
#include "MCTargetDesc/AArch64AddressingModes.h"
7576
#include "llvm/CodeGen/MachineDominators.h"
7677
#include "llvm/CodeGen/MachineLoopInfo.h"
@@ -137,6 +138,7 @@ struct AArch64MIPeepholeOpt : public MachineFunctionPass {
137138
bool visitFMOVDr(MachineInstr &MI);
138139
bool visitUBFMXri(MachineInstr &MI);
139140
bool visitCopy(MachineInstr &MI);
141+
bool visitRegSequence(MachineInstr &MI);
140142
bool runOnMachineFunction(MachineFunction &MF) override;
141143

142144
StringRef getPassName() const override {
@@ -835,6 +837,85 @@ bool AArch64MIPeepholeOpt::visitCopy(MachineInstr &MI) {
835837
return true;
836838
}
837839

840+
// Using the FORM_TRANSPOSED_REG_TUPLE pseudo can improve register allocation
841+
// of multi-vector intrinsics. However, the psuedo should only be emitted if
842+
// the input registers of the REG_SEQUENCE are copy nodes where the source
843+
// register is in a StridedOrContiguous class. For example:
844+
//
845+
// %3:zpr2stridedorcontiguous = LD1B_2Z_IMM_PSEUDO ..
846+
// %4:zpr = COPY %3.zsub1:zpr2stridedorcontiguous
847+
// %5:zpr = COPY %3.zsub0:zpr2stridedorcontiguous
848+
// %6:zpr2stridedorcontiguous = LD1B_2Z_PSEUDO ..
849+
// %7:zpr = COPY %6.zsub1:zpr2stridedorcontiguous
850+
// %8:zpr = COPY %6.zsub0:zpr2stridedorcontiguous
851+
// %9:zpr2mul2 = REG_SEQUENCE %5:zpr, %subreg.zsub0, %8:zpr, %subreg.zsub1
852+
//
853+
// -> %9:zpr2mul2 = FORM_TRANSPOSED_REG_TUPLE_X2_PSEUDO %5:zpr, %8:zpr
854+
//
855+
bool AArch64MIPeepholeOpt::visitRegSequence(MachineInstr &MI) {
856+
MachineRegisterInfo &MRI = MI.getMF()->getRegInfo();
857+
858+
auto &ST = MI.getMF()->getSubtarget<AArch64Subtarget>();
859+
if (!ST.hasSME() || !ST.isStreaming())
860+
return false;
861+
862+
switch (MRI.getRegClass(MI.getOperand(0).getReg())->getID()) {
863+
case AArch64::ZPR2RegClassID:
864+
case AArch64::ZPR4RegClassID:
865+
case AArch64::ZPR2Mul2RegClassID:
866+
case AArch64::ZPR4Mul4RegClassID:
867+
break;
868+
default:
869+
return false;
870+
}
871+
872+
// The first operand is the register class created by the REG_SEQUENCE.
873+
// Each operand pair after this consists of a vreg + subreg index, so
874+
// for example a sequence of 2 registers will have a total of 5 operands.
875+
if (MI.getNumOperands() != 5 && MI.getNumOperands() != 9)
876+
return false;
877+
878+
MCRegister SubReg = MCRegister::NoRegister;
879+
for (unsigned I = 1; I < MI.getNumOperands(); I += 2) {
880+
MachineOperand &MO = MI.getOperand(I);
881+
882+
if (!MI.getOperand(I).isReg())
883+
return false;
884+
885+
MachineOperand *Def = MRI.getOneDef(MO.getReg());
886+
if (!Def || !Def->getParent()->isCopy())
887+
return false;
888+
889+
const MachineOperand &CopySrc = Def->getParent()->getOperand(1);
890+
unsigned OpSubReg = CopySrc.getSubReg();
891+
if (SubReg == MCRegister::NoRegister)
892+
SubReg = OpSubReg;
893+
894+
MachineOperand *CopySrcOp = MRI.getOneDef(CopySrc.getReg());
895+
if (!CopySrcOp || !CopySrcOp->isReg() || OpSubReg != SubReg ||
896+
CopySrcOp->getReg().isPhysical())
897+
return false;
898+
899+
const TargetRegisterClass *CopySrcClass =
900+
MRI.getRegClass(CopySrcOp->getReg());
901+
if (CopySrcClass != &AArch64::ZPR2StridedOrContiguousRegClass &&
902+
CopySrcClass != &AArch64::ZPR4StridedOrContiguousRegClass)
903+
return false;
904+
}
905+
906+
unsigned Opc = MI.getNumOperands() == 5
907+
? AArch64::FORM_TRANSPOSED_REG_TUPLE_X2_PSEUDO
908+
: AArch64::FORM_TRANSPOSED_REG_TUPLE_X4_PSEUDO;
909+
910+
MachineInstrBuilder MIB = BuildMI(*MI.getParent(), MI, MI.getDebugLoc(),
911+
TII->get(Opc), MI.getOperand(0).getReg());
912+
for (unsigned I = 1; I < MI.getNumOperands(); I += 2)
913+
MIB.addReg(MI.getOperand(I).getReg());
914+
915+
MI.eraseFromParent();
916+
return true;
917+
}
918+
838919
bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) {
839920
if (skipFunction(MF.getFunction()))
840921
return false;
@@ -926,6 +1007,9 @@ bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) {
9261007
case AArch64::COPY:
9271008
Changed |= visitCopy(MI);
9281009
break;
1010+
case AArch64::REG_SEQUENCE:
1011+
Changed |= visitRegSequence(MI);
1012+
break;
9291013
}
9301014
}
9311015
}

llvm/lib/Target/AArch64/SMEInstrFormats.td

Lines changed: 16 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -49,14 +49,12 @@ def FORM_TRANSPOSED_REG_TUPLE_X2_PSEUDO :
4949
Pseudo<(outs ZPR2:$tup),
5050
(ins ZPR:$zn0, ZPR:$zn1), []>, Sched<[]>{
5151
let hasSideEffects = 0;
52-
let hasPostISelHook = 1;
5352
}
5453

5554
def FORM_TRANSPOSED_REG_TUPLE_X4_PSEUDO :
5655
Pseudo<(outs ZPR4:$tup),
5756
(ins ZPR:$zn0, ZPR:$zn1, ZPR:$zn2, ZPR:$zn3), []>, Sched<[]>{
5857
let hasSideEffects = 0;
59-
let hasPostISelHook = 1;
6058
}
6159

6260
def SPILL_PPR_TO_ZPR_SLOT_PSEUDO :
@@ -178,28 +176,28 @@ class SME2_ZA_TwoOp_Multi_Single_Pat<string name, SDPatternOperator intrinsic, O
178176
class SME2_ZA_TwoOp_VG2_Multi_Single_Pat<string name, SDPatternOperator intrinsic, Operand index_ty, ZPRRegOp zpr_ty,
179177
ValueType vt, ComplexPattern tileslice>
180178
: Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)), vt:$Zn1, vt:$Zn2, vt:$Zm),
181-
(!cast<Instruction>(name # _PSEUDO) $base, $offset, (FORM_TRANSPOSED_REG_TUPLE_X2_PSEUDO vt:$Zn1, vt:$Zn2),
179+
(!cast<Instruction>(name # _PSEUDO) $base, $offset, (REG_SEQUENCE ZPR2, vt:$Zn1, zsub0, vt:$Zn2, zsub1),
182180
zpr_ty:$Zm)>;
183181
class SME2_ZA_TwoOp_VG4_Multi_Single_Pat<string name, SDPatternOperator intrinsic, Operand index_ty, ZPRRegOp zpr_ty,
184182
ValueType vt, ComplexPattern tileslice>
185183
: Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)),
186184
vt:$Zn1, vt:$Zn2, vt:$Zn3, vt:$Zn4, vt:$Zm),
187185
(!cast<Instruction>(name # _PSEUDO) $base, $offset,
188-
(FORM_TRANSPOSED_REG_TUPLE_X4_PSEUDO vt:$Zn1, vt:$Zn2, vt:$Zn3, vt:$Zn4),
186+
(REG_SEQUENCE ZPR4, vt:$Zn1, zsub0, vt:$Zn2, zsub1, vt:$Zn3, zsub2, vt:$Zn4, zsub3),
189187
zpr_ty:$Zm)>;
190188

191189
class SME2_ZA_TwoOp_VG2_Multi_Multi_Pat<string name, SDPatternOperator intrinsic, Operand index_ty, ValueType vt, ComplexPattern tileslice>
192190
: Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)), vt:$Zn1, vt:$Zn2, vt:$Zm1, vt:$Zm2),
193191
(!cast<Instruction>(name # _PSEUDO) $base, $offset,
194-
(FORM_TRANSPOSED_REG_TUPLE_X2_PSEUDO vt:$Zn1, vt:$Zn2),
195-
(FORM_TRANSPOSED_REG_TUPLE_X2_PSEUDO vt:$Zm1, vt:$Zm2))>;
192+
(REG_SEQUENCE ZPR2Mul2, vt:$Zn1, zsub0, vt:$Zn2, zsub1),
193+
(REG_SEQUENCE ZPR2Mul2, vt:$Zm1, zsub0, vt:$Zm2, zsub1))>;
196194

197195
class SME2_ZA_TwoOp_VG4_Multi_Multi_Pat<string name, SDPatternOperator intrinsic, Operand index_ty, ValueType vt, ComplexPattern tileslice>
198196
: Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)),
199197
vt:$Zn1, vt:$Zn2, vt:$Zn3, vt:$Zn4, vt:$Zm1, vt:$Zm2, vt:$Zm3, vt:$Zm4),
200198
(!cast<Instruction>(name # _PSEUDO) $base, $offset,
201-
(FORM_TRANSPOSED_REG_TUPLE_X4_PSEUDO vt:$Zn1, vt:$Zn2, vt:$Zn3, vt:$Zn4),
202-
(FORM_TRANSPOSED_REG_TUPLE_X4_PSEUDO vt:$Zm1, vt:$Zm2, vt:$Zm3, vt:$Zm4))>;
199+
(REG_SEQUENCE ZPR4Mul4, vt:$Zn1, zsub0, vt:$Zn2, zsub1, vt:$Zn3, zsub2, vt:$Zn4, zsub3),
200+
(REG_SEQUENCE ZPR4Mul4, vt:$Zm1, zsub0, vt:$Zm2, zsub1, vt:$Zm3, zsub2, vt:$Zm4, zsub3))>;
203201

204202
class SME2_ZA_TwoOp_Multi_Index_Pat<string name, SDPatternOperator intrinsic, Operand index_ty, ZPRRegOp zpr_ty, ValueType vt,
205203
Operand imm_ty, ComplexPattern tileslice>
@@ -211,44 +209,44 @@ class SME2_ZA_TwoOp_VG2_Multi_Index_Pat<string name, SDPatternOperator intrinsic
211209
Operand imm_ty, ComplexPattern tileslice>
212210
: Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)), vt:$Zn1, vt:$Zn2, vt:$Zm, (i32 imm_ty:$i)),
213211
(!cast<Instruction>(name # _PSEUDO) $base, $offset,
214-
(FORM_TRANSPOSED_REG_TUPLE_X2_PSEUDO vt:$Zn1,vt:$Zn2), zpr_ty:$Zm, imm_ty:$i)>;
212+
(REG_SEQUENCE ZPR2Mul2, vt:$Zn1, zsub0, vt:$Zn2, zsub1), zpr_ty:$Zm, imm_ty:$i)>;
215213

216214
class SME2_ZA_TwoOp_VG4_Multi_Index_Pat<string name, SDPatternOperator intrinsic, Operand index_ty, ZPRRegOp zpr_ty, ValueType vt,
217215
Operand imm_ty, ComplexPattern tileslice>
218216
: Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)),
219217
vt:$Zn1, vt:$Zn2, vt:$Zn3, vt:$Zn4, vt:$Zm, (i32 imm_ty:$i)),
220218
(!cast<Instruction>(name # _PSEUDO) $base, $offset,
221-
(FORM_TRANSPOSED_REG_TUPLE_X4_PSEUDO vt:$Zn1, vt:$Zn2, vt:$Zn3, vt:$Zn4),
219+
(REG_SEQUENCE ZPR4Mul4, vt:$Zn1, zsub0, vt:$Zn2, zsub1, vt:$Zn3, zsub2, vt:$Zn4, zsub3),
222220
zpr_ty:$Zm, imm_ty:$i)>;
223221

224222
class SME2_Sat_Shift_VG2_Pat<string name, SDPatternOperator intrinsic, ValueType out_vt, ValueType in_vt, Operand imm_ty>
225223
: Pat<(out_vt (intrinsic in_vt:$Zn1, in_vt:$Zn2, (i32 imm_ty:$i))),
226-
(!cast<Instruction>(name) (FORM_TRANSPOSED_REG_TUPLE_X2_PSEUDO in_vt:$Zn1, in_vt:$Zn2), imm_ty:$i)>;
224+
(!cast<Instruction>(name) (REG_SEQUENCE ZPR2Mul2, in_vt:$Zn1, zsub0, in_vt:$Zn2, zsub1), imm_ty:$i)>;
227225

228226
class SME2_Sat_Shift_VG4_Pat<string name, SDPatternOperator intrinsic, ValueType out_vt, ValueType in_vt, Operand imm_ty>
229227
: Pat<(out_vt (intrinsic in_vt:$Zn1, in_vt:$Zn2, in_vt:$Zn3, in_vt:$Zn4, (i32 imm_ty:$i))),
230-
(!cast<Instruction>(name) (FORM_TRANSPOSED_REG_TUPLE_X4_PSEUDO in_vt:$Zn1, in_vt:$Zn2, in_vt:$Zn3, in_vt:$Zn4),
228+
(!cast<Instruction>(name) (REG_SEQUENCE ZPR4Mul4, in_vt:$Zn1, zsub0, in_vt:$Zn2, zsub1, in_vt:$Zn3, zsub2, in_vt:$Zn4, zsub3),
231229
imm_ty:$i)>;
232230

233231
class SME2_Cvt_VG4_Pat<string name, SDPatternOperator intrinsic, ValueType out_vt, ValueType in_vt>
234232
: Pat<(out_vt (intrinsic in_vt:$Zn1, in_vt:$Zn2, in_vt:$Zn3, in_vt:$Zn4)),
235-
(!cast<Instruction>(name) (FORM_TRANSPOSED_REG_TUPLE_X4_PSEUDO in_vt:$Zn1, in_vt:$Zn2, in_vt:$Zn3, in_vt:$Zn4))>;
233+
(!cast<Instruction>(name) (REG_SEQUENCE ZPR4Mul4, in_vt:$Zn1, zsub0, in_vt:$Zn2, zsub1, in_vt:$Zn3, zsub2, in_vt:$Zn4, zsub3))>;
236234

237235
class SME2_ZA_VG1x2_Multi_Pat<string name, SDPatternOperator intrinsic, ValueType vt, Operand index_ty, ComplexPattern tileslice>
238236
: Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)), vt:$Zn1, vt:$Zn2),
239-
(!cast<Instruction>(name # _PSEUDO) $base, $offset, (FORM_TRANSPOSED_REG_TUPLE_X2_PSEUDO vt:$Zn1, vt:$Zn2))>;
237+
(!cast<Instruction>(name # _PSEUDO) $base, $offset, (REG_SEQUENCE ZPR2Mul2, vt:$Zn1, zsub0, vt:$Zn2, zsub1))>;
240238

241239
class SME2_ZA_VG1x4_Multi_Pat<string name, SDPatternOperator intrinsic, ValueType vt, Operand index_ty, ComplexPattern tileslice>
242240
: Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)), vt:$Zn1, vt:$Zn2, vt:$Zn3, vt:$Zn4),
243-
(!cast<Instruction>(name # _PSEUDO) $base, $offset, (FORM_TRANSPOSED_REG_TUPLE_X4_PSEUDO vt:$Zn1, vt:$Zn2, vt:$Zn3, vt:$Zn4))>;
241+
(!cast<Instruction>(name # _PSEUDO) $base, $offset, (REG_SEQUENCE ZPR4Mul4, vt:$Zn1, zsub0, vt:$Zn2, zsub1, vt:$Zn3, zsub2, vt:$Zn4, zsub3))>;
244242

245243
class SME2_Tile_VG2_Multi_Pat<string name, SDPatternOperator intrinsic, Operand tile_imm, ValueType vt, Operand index_ty, ComplexPattern tileslice>
246244
: Pat<(intrinsic tile_imm:$tile, (i32 (tileslice MatrixIndexGPR32Op12_15:$base, index_ty:$offset)), vt:$Zn1, vt:$Zn2),
247-
(!cast<Instruction>(name # _PSEUDO) $tile, $base, $offset, (FORM_TRANSPOSED_REG_TUPLE_X2_PSEUDO vt:$Zn1, vt:$Zn2))>;
245+
(!cast<Instruction>(name # _PSEUDO) $tile, $base, $offset, (REG_SEQUENCE ZPR2Mul2, vt:$Zn1, zsub0, vt:$Zn2, zsub1))>;
248246

249247
class SME2_Tile_VG4_Multi_Pat<string name, SDPatternOperator intrinsic, Operand tile_imm, ValueType vt, Operand index_ty, ComplexPattern tileslice>
250248
: Pat<(intrinsic tile_imm:$tile, (i32 (tileslice MatrixIndexGPR32Op12_15:$base, index_ty:$offset)), vt:$Zn1, vt:$Zn2, vt:$Zn3, vt:$Zn4),
251-
(!cast<Instruction>(name # _PSEUDO) $tile, $base, $offset, (FORM_TRANSPOSED_REG_TUPLE_X4_PSEUDO vt:$Zn1, vt:$Zn2, vt:$Zn3, vt:$Zn4))>;
249+
(!cast<Instruction>(name # _PSEUDO) $tile, $base, $offset, (REG_SEQUENCE ZPR4Mul4, vt:$Zn1, zsub0, vt:$Zn2, zsub1, vt:$Zn3, zsub2, vt:$Zn4, zsub3))>;
252250

253251
class SME2_Zero_Matrix_Pat<string name, SDPatternOperator intrinsic, Operand offset_ty, ComplexPattern tileslice>
254252
: Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, offset_ty:$offset))),
@@ -2460,7 +2458,7 @@ multiclass sme2_fp8_cvt_vg2_single<string mnemonic, bit op, ValueType in_vt, SDP
24602458
let Uses = [FPMR, FPCR];
24612459
}
24622460
def : Pat<(nxv16i8 (intrinsic in_vt:$Zn1, in_vt:$Zn2)),
2463-
(!cast<Instruction>(NAME) (FORM_TRANSPOSED_REG_TUPLE_X2_PSEUDO in_vt:$Zn1, in_vt:$Zn2))>;
2461+
(!cast<Instruction>(NAME) (REG_SEQUENCE ZPR2Mul2, in_vt:$Zn1, zsub0, in_vt:$Zn2, zsub1))>;
24642462
}
24652463

24662464
class sme2_cvt_unpk_vector_vg2<bits<2>sz, bits<3> op, bit u, RegisterOperand first_ty,

0 commit comments

Comments
 (0)