Skip to content

Commit 29b6365

Browse files
- Removed post-isel hook for FORM_TRANSPOSED_REG_TUPLE
- Added visitRegSequence to the AArch64MIPeepholeOpt pass to create the pseudo if a REG_SEQUENCE matches the pattern - Removed uses of FORM_TRANSPOSED_REG_TUPLE from SME2 multiclasses - Added tests for every other multiclass which can now use the pseudo
1 parent 18b490f commit 29b6365

File tree

9 files changed

+713
-208
lines changed

9 files changed

+713
-208
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 0 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -8750,51 +8750,6 @@ static bool checkZExtBool(SDValue Arg, const SelectionDAG &DAG) {
87508750
return ZExtBool;
87518751
}
87528752

8753-
// The FORM_TRANSPOSED_REG_TUPLE pseudo should only be used if the
8754-
// input operands are copy nodes where the source register is in a
8755-
// StridedOrContiguous class. For example:
8756-
//
8757-
// %3:zpr2stridedorcontiguous = LD1B_2Z_IMM_PSEUDO ..
8758-
// %4:zpr = COPY %3.zsub1:zpr2stridedorcontiguous
8759-
// %5:zpr = COPY %3.zsub0:zpr2stridedorcontiguous
8760-
// %6:zpr2stridedorcontiguous = LD1B_2Z_PSEUDO ..
8761-
// %7:zpr = COPY %6.zsub1:zpr2stridedorcontiguous
8762-
// %8:zpr = COPY %6.zsub0:zpr2stridedorcontiguous
8763-
// %9:zpr2mul2 = FORM_TRANSPOSED_REG_TUPLE_X2_PSEUDO %5:zpr, %8:zpr
8764-
//
8765-
bool shouldUseFormStridedPseudo(MachineInstr &MI) {
8766-
MachineRegisterInfo &MRI = MI.getMF()->getRegInfo();
8767-
8768-
assert((MI.getOpcode() == AArch64::FORM_TRANSPOSED_REG_TUPLE_X2_PSEUDO ||
8769-
MI.getOpcode() == AArch64::FORM_TRANSPOSED_REG_TUPLE_X4_PSEUDO) &&
8770-
"Unexpected opcode.");
8771-
8772-
MCRegister SubReg = MCRegister::NoRegister;
8773-
for (unsigned I = 1; I < MI.getNumOperands(); ++I) {
8774-
MachineOperand &MO = MI.getOperand(I);
8775-
assert(MO.isReg() && "Unexpected operand to FORM_TRANSPOSED_REG_TUPLE");
8776-
8777-
MachineOperand *Def = MRI.getOneDef(MO.getReg());
8778-
if (!Def || !Def->getParent()->isCopy())
8779-
return false;
8780-
8781-
const MachineOperand &CopySrc = Def->getParent()->getOperand(1);
8782-
unsigned OpSubReg = CopySrc.getSubReg();
8783-
if (SubReg == MCRegister::NoRegister)
8784-
SubReg = OpSubReg;
8785-
8786-
MachineOperand *CopySrcOp = MRI.getOneDef(CopySrc.getReg());
8787-
const TargetRegisterClass *CopySrcClass =
8788-
MRI.getRegClass(CopySrcOp->getReg());
8789-
if (!CopySrcOp || !CopySrcOp->isReg() || OpSubReg != SubReg ||
8790-
(CopySrcClass != &AArch64::ZPR2StridedOrContiguousRegClass &&
8791-
CopySrcClass != &AArch64::ZPR4StridedOrContiguousRegClass))
8792-
return false;
8793-
}
8794-
8795-
return true;
8796-
}
8797-
87988753
void AArch64TargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
87998754
SDNode *Node) const {
88008755
// Live-in physreg copies that are glued to SMSTART are applied as
@@ -8820,27 +8775,6 @@ void AArch64TargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
88208775
}
88218776
}
88228777

8823-
if (MI.getOpcode() == AArch64::FORM_TRANSPOSED_REG_TUPLE_X2_PSEUDO ||
8824-
MI.getOpcode() == AArch64::FORM_TRANSPOSED_REG_TUPLE_X4_PSEUDO) {
8825-
// If input values to the FORM_TRANSPOSED_REG_TUPLE pseudo aren't copies
8826-
// from a StridedOrContiguous class, fall back on REG_SEQUENCE node.
8827-
if (shouldUseFormStridedPseudo(MI))
8828-
return;
8829-
8830-
const TargetInstrInfo *TII = Subtarget->getInstrInfo();
8831-
MachineInstrBuilder MIB = BuildMI(*MI.getParent(), MI, MI.getDebugLoc(),
8832-
TII->get(TargetOpcode::REG_SEQUENCE),
8833-
MI.getOperand(0).getReg());
8834-
8835-
for (unsigned I = 1; I < MI.getNumOperands(); ++I) {
8836-
MIB.add(MI.getOperand(I));
8837-
MIB.addImm(AArch64::zsub0 + (I - 1));
8838-
}
8839-
8840-
MI.eraseFromParent();
8841-
return;
8842-
}
8843-
88448778
// Add an implicit use of 'VG' for ADDXri/SUBXri, which are instructions that
88458779
// have nothing to do with VG, were it not that they are used to materialise a
88468780
// frame-address. If they contain a frame-index to a scalable vector, this

llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@
7171

7272
#include "AArch64ExpandImm.h"
7373
#include "AArch64InstrInfo.h"
74+
#include "AArch64Subtarget.h"
7475
#include "MCTargetDesc/AArch64AddressingModes.h"
7576
#include "llvm/CodeGen/MachineDominators.h"
7677
#include "llvm/CodeGen/MachineLoopInfo.h"
@@ -137,6 +138,7 @@ struct AArch64MIPeepholeOpt : public MachineFunctionPass {
137138
bool visitFMOVDr(MachineInstr &MI);
138139
bool visitUBFMXri(MachineInstr &MI);
139140
bool visitCopy(MachineInstr &MI);
141+
bool visitRegSequence(MachineInstr &MI);
140142
bool runOnMachineFunction(MachineFunction &MF) override;
141143

142144
StringRef getPassName() const override {
@@ -835,6 +837,85 @@ bool AArch64MIPeepholeOpt::visitCopy(MachineInstr &MI) {
835837
return true;
836838
}
837839

840+
// Using the FORM_TRANSPOSED_REG_TUPLE pseudo can improve register allocation
841+
// of multi-vector intrinsics. However, the psuedo should only be emitted if
842+
// the input registers of the REG_SEQUENCE are copy nodes where the source
843+
// register is in a StridedOrContiguous class. For example:
844+
//
845+
// %3:zpr2stridedorcontiguous = LD1B_2Z_IMM_PSEUDO ..
846+
// %4:zpr = COPY %3.zsub1:zpr2stridedorcontiguous
847+
// %5:zpr = COPY %3.zsub0:zpr2stridedorcontiguous
848+
// %6:zpr2stridedorcontiguous = LD1B_2Z_PSEUDO ..
849+
// %7:zpr = COPY %6.zsub1:zpr2stridedorcontiguous
850+
// %8:zpr = COPY %6.zsub0:zpr2stridedorcontiguous
851+
// %9:zpr2mul2 = REG_SEQUENCE %5:zpr, %subreg.zsub0, %8:zpr, %subreg.zsub1
852+
//
853+
// -> %9:zpr2mul2 = FORM_TRANSPOSED_REG_TUPLE_X2_PSEUDO %5:zpr, %8:zpr
854+
//
855+
bool AArch64MIPeepholeOpt::visitRegSequence(MachineInstr &MI) {
856+
MachineRegisterInfo &MRI = MI.getMF()->getRegInfo();
857+
858+
auto &ST = MI.getMF()->getSubtarget<AArch64Subtarget>();
859+
if (!ST.hasSME() || !ST.isStreaming())
860+
return false;
861+
862+
switch (MRI.getRegClass(MI.getOperand(0).getReg())->getID()) {
863+
case AArch64::ZPR2RegClassID:
864+
case AArch64::ZPR4RegClassID:
865+
case AArch64::ZPR2Mul2RegClassID:
866+
case AArch64::ZPR4Mul4RegClassID:
867+
break;
868+
default:
869+
return false;
870+
}
871+
872+
// The first operand is the register class created by the REG_SEQUENCE.
873+
// Each operand pair after this consists of a vreg + subreg index, so
874+
// for example a sequence of 2 registers will have a total of 5 operands.
875+
if (MI.getNumOperands() != 5 && MI.getNumOperands() != 9)
876+
return false;
877+
878+
MCRegister SubReg = MCRegister::NoRegister;
879+
for (unsigned I = 1; I < MI.getNumOperands(); I += 2) {
880+
MachineOperand &MO = MI.getOperand(I);
881+
882+
if (!MI.getOperand(I).isReg())
883+
return false;
884+
885+
MachineOperand *Def = MRI.getOneDef(MO.getReg());
886+
if (!Def || !Def->getParent()->isCopy())
887+
return false;
888+
889+
const MachineOperand &CopySrc = Def->getParent()->getOperand(1);
890+
unsigned OpSubReg = CopySrc.getSubReg();
891+
if (SubReg == MCRegister::NoRegister)
892+
SubReg = OpSubReg;
893+
894+
MachineOperand *CopySrcOp = MRI.getOneDef(CopySrc.getReg());
895+
if (!CopySrcOp || !CopySrcOp->isReg() || OpSubReg != SubReg ||
896+
CopySrcOp->getReg().isPhysical())
897+
return false;
898+
899+
const TargetRegisterClass *CopySrcClass =
900+
MRI.getRegClass(CopySrcOp->getReg());
901+
if (CopySrcClass != &AArch64::ZPR2StridedOrContiguousRegClass &&
902+
CopySrcClass != &AArch64::ZPR4StridedOrContiguousRegClass)
903+
return false;
904+
}
905+
906+
unsigned Opc = MI.getNumOperands() == 5
907+
? AArch64::FORM_TRANSPOSED_REG_TUPLE_X2_PSEUDO
908+
: AArch64::FORM_TRANSPOSED_REG_TUPLE_X4_PSEUDO;
909+
910+
MachineInstrBuilder MIB = BuildMI(*MI.getParent(), MI, MI.getDebugLoc(),
911+
TII->get(Opc), MI.getOperand(0).getReg());
912+
for (unsigned I = 1; I < MI.getNumOperands(); I += 2)
913+
MIB.addReg(MI.getOperand(I).getReg());
914+
915+
MI.eraseFromParent();
916+
return true;
917+
}
918+
838919
bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) {
839920
if (skipFunction(MF.getFunction()))
840921
return false;
@@ -926,6 +1007,9 @@ bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) {
9261007
case AArch64::COPY:
9271008
Changed |= visitCopy(MI);
9281009
break;
1010+
case AArch64::REG_SEQUENCE:
1011+
Changed |= visitRegSequence(MI);
1012+
break;
9291013
}
9301014
}
9311015
}

llvm/lib/Target/AArch64/SMEInstrFormats.td

Lines changed: 16 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -49,14 +49,12 @@ def FORM_TRANSPOSED_REG_TUPLE_X2_PSEUDO :
4949
Pseudo<(outs ZPR2:$tup),
5050
(ins ZPR:$zn0, ZPR:$zn1), []>, Sched<[]>{
5151
let hasSideEffects = 0;
52-
let hasPostISelHook = 1;
5352
}
5453

5554
def FORM_TRANSPOSED_REG_TUPLE_X4_PSEUDO :
5655
Pseudo<(outs ZPR4:$tup),
5756
(ins ZPR:$zn0, ZPR:$zn1, ZPR:$zn2, ZPR:$zn3), []>, Sched<[]>{
5857
let hasSideEffects = 0;
59-
let hasPostISelHook = 1;
6058
}
6159

6260
def SDTZALoadStore : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisInt<2>]>;
@@ -164,28 +162,28 @@ class SME2_ZA_TwoOp_Multi_Single_Pat<string name, SDPatternOperator intrinsic, O
164162
class SME2_ZA_TwoOp_VG2_Multi_Single_Pat<string name, SDPatternOperator intrinsic, Operand index_ty, ZPRRegOp zpr_ty,
165163
ValueType vt, ComplexPattern tileslice>
166164
: Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)), vt:$Zn1, vt:$Zn2, vt:$Zm),
167-
(!cast<Instruction>(name # _PSEUDO) $base, $offset, (FORM_TRANSPOSED_REG_TUPLE_X2_PSEUDO vt:$Zn1, vt:$Zn2),
165+
(!cast<Instruction>(name # _PSEUDO) $base, $offset, (REG_SEQUENCE ZPR2, vt:$Zn1, zsub0, vt:$Zn2, zsub1),
168166
zpr_ty:$Zm)>;
169167
class SME2_ZA_TwoOp_VG4_Multi_Single_Pat<string name, SDPatternOperator intrinsic, Operand index_ty, ZPRRegOp zpr_ty,
170168
ValueType vt, ComplexPattern tileslice>
171169
: Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)),
172170
vt:$Zn1, vt:$Zn2, vt:$Zn3, vt:$Zn4, vt:$Zm),
173171
(!cast<Instruction>(name # _PSEUDO) $base, $offset,
174-
(FORM_TRANSPOSED_REG_TUPLE_X4_PSEUDO vt:$Zn1, vt:$Zn2, vt:$Zn3, vt:$Zn4),
172+
(REG_SEQUENCE ZPR4, vt:$Zn1, zsub0, vt:$Zn2, zsub1, vt:$Zn3, zsub2, vt:$Zn4, zsub3),
175173
zpr_ty:$Zm)>;
176174

177175
class SME2_ZA_TwoOp_VG2_Multi_Multi_Pat<string name, SDPatternOperator intrinsic, Operand index_ty, ValueType vt, ComplexPattern tileslice>
178176
: Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)), vt:$Zn1, vt:$Zn2, vt:$Zm1, vt:$Zm2),
179177
(!cast<Instruction>(name # _PSEUDO) $base, $offset,
180-
(FORM_TRANSPOSED_REG_TUPLE_X2_PSEUDO vt:$Zn1, vt:$Zn2),
181-
(FORM_TRANSPOSED_REG_TUPLE_X2_PSEUDO vt:$Zm1, vt:$Zm2))>;
178+
(REG_SEQUENCE ZPR2Mul2, vt:$Zn1, zsub0, vt:$Zn2, zsub1),
179+
(REG_SEQUENCE ZPR2Mul2, vt:$Zm1, zsub0, vt:$Zm2, zsub1))>;
182180

183181
class SME2_ZA_TwoOp_VG4_Multi_Multi_Pat<string name, SDPatternOperator intrinsic, Operand index_ty, ValueType vt, ComplexPattern tileslice>
184182
: Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)),
185183
vt:$Zn1, vt:$Zn2, vt:$Zn3, vt:$Zn4, vt:$Zm1, vt:$Zm2, vt:$Zm3, vt:$Zm4),
186184
(!cast<Instruction>(name # _PSEUDO) $base, $offset,
187-
(FORM_TRANSPOSED_REG_TUPLE_X4_PSEUDO vt:$Zn1, vt:$Zn2, vt:$Zn3, vt:$Zn4),
188-
(FORM_TRANSPOSED_REG_TUPLE_X4_PSEUDO vt:$Zm1, vt:$Zm2, vt:$Zm3, vt:$Zm4))>;
185+
(REG_SEQUENCE ZPR4Mul4, vt:$Zn1, zsub0, vt:$Zn2, zsub1, vt:$Zn3, zsub2, vt:$Zn4, zsub3),
186+
(REG_SEQUENCE ZPR4Mul4, vt:$Zm1, zsub0, vt:$Zm2, zsub1, vt:$Zm3, zsub2, vt:$Zm4, zsub3))>;
189187

190188
class SME2_ZA_TwoOp_Multi_Index_Pat<string name, SDPatternOperator intrinsic, Operand index_ty, ZPRRegOp zpr_ty, ValueType vt,
191189
Operand imm_ty, ComplexPattern tileslice>
@@ -197,44 +195,44 @@ class SME2_ZA_TwoOp_VG2_Multi_Index_Pat<string name, SDPatternOperator intrinsic
197195
Operand imm_ty, ComplexPattern tileslice>
198196
: Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)), vt:$Zn1, vt:$Zn2, vt:$Zm, (i32 imm_ty:$i)),
199197
(!cast<Instruction>(name # _PSEUDO) $base, $offset,
200-
(FORM_TRANSPOSED_REG_TUPLE_X2_PSEUDO vt:$Zn1,vt:$Zn2), zpr_ty:$Zm, imm_ty:$i)>;
198+
(REG_SEQUENCE ZPR2Mul2, vt:$Zn1, zsub0, vt:$Zn2, zsub1), zpr_ty:$Zm, imm_ty:$i)>;
201199

202200
class SME2_ZA_TwoOp_VG4_Multi_Index_Pat<string name, SDPatternOperator intrinsic, Operand index_ty, ZPRRegOp zpr_ty, ValueType vt,
203201
Operand imm_ty, ComplexPattern tileslice>
204202
: Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)),
205203
vt:$Zn1, vt:$Zn2, vt:$Zn3, vt:$Zn4, vt:$Zm, (i32 imm_ty:$i)),
206204
(!cast<Instruction>(name # _PSEUDO) $base, $offset,
207-
(FORM_TRANSPOSED_REG_TUPLE_X4_PSEUDO vt:$Zn1, vt:$Zn2, vt:$Zn3, vt:$Zn4),
205+
(REG_SEQUENCE ZPR4Mul4, vt:$Zn1, zsub0, vt:$Zn2, zsub1, vt:$Zn3, zsub2, vt:$Zn4, zsub3),
208206
zpr_ty:$Zm, imm_ty:$i)>;
209207

210208
class SME2_Sat_Shift_VG2_Pat<string name, SDPatternOperator intrinsic, ValueType out_vt, ValueType in_vt, Operand imm_ty>
211209
: Pat<(out_vt (intrinsic in_vt:$Zn1, in_vt:$Zn2, (i32 imm_ty:$i))),
212-
(!cast<Instruction>(name) (FORM_TRANSPOSED_REG_TUPLE_X2_PSEUDO in_vt:$Zn1, in_vt:$Zn2), imm_ty:$i)>;
210+
(!cast<Instruction>(name) (REG_SEQUENCE ZPR2Mul2, in_vt:$Zn1, zsub0, in_vt:$Zn2, zsub1), imm_ty:$i)>;
213211

214212
class SME2_Sat_Shift_VG4_Pat<string name, SDPatternOperator intrinsic, ValueType out_vt, ValueType in_vt, Operand imm_ty>
215213
: Pat<(out_vt (intrinsic in_vt:$Zn1, in_vt:$Zn2, in_vt:$Zn3, in_vt:$Zn4, (i32 imm_ty:$i))),
216-
(!cast<Instruction>(name) (FORM_TRANSPOSED_REG_TUPLE_X4_PSEUDO in_vt:$Zn1, in_vt:$Zn2, in_vt:$Zn3, in_vt:$Zn4),
214+
(!cast<Instruction>(name) (REG_SEQUENCE ZPR4Mul4, in_vt:$Zn1, zsub0, in_vt:$Zn2, zsub1, in_vt:$Zn3, zsub2, in_vt:$Zn4, zsub3),
217215
imm_ty:$i)>;
218216

219217
class SME2_Cvt_VG4_Pat<string name, SDPatternOperator intrinsic, ValueType out_vt, ValueType in_vt>
220218
: Pat<(out_vt (intrinsic in_vt:$Zn1, in_vt:$Zn2, in_vt:$Zn3, in_vt:$Zn4)),
221-
(!cast<Instruction>(name) (FORM_TRANSPOSED_REG_TUPLE_X4_PSEUDO in_vt:$Zn1, in_vt:$Zn2, in_vt:$Zn3, in_vt:$Zn4))>;
219+
(!cast<Instruction>(name) (REG_SEQUENCE ZPR4Mul4, in_vt:$Zn1, zsub0, in_vt:$Zn2, zsub1, in_vt:$Zn3, zsub2, in_vt:$Zn4, zsub3))>;
222220

223221
class SME2_ZA_VG1x2_Multi_Pat<string name, SDPatternOperator intrinsic, ValueType vt, Operand index_ty, ComplexPattern tileslice>
224222
: Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)), vt:$Zn1, vt:$Zn2),
225-
(!cast<Instruction>(name # _PSEUDO) $base, $offset, (FORM_TRANSPOSED_REG_TUPLE_X2_PSEUDO vt:$Zn1, vt:$Zn2))>;
223+
(!cast<Instruction>(name # _PSEUDO) $base, $offset, (REG_SEQUENCE ZPR2Mul2, vt:$Zn1, zsub0, vt:$Zn2, zsub1))>;
226224

227225
class SME2_ZA_VG1x4_Multi_Pat<string name, SDPatternOperator intrinsic, ValueType vt, Operand index_ty, ComplexPattern tileslice>
228226
: Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)), vt:$Zn1, vt:$Zn2, vt:$Zn3, vt:$Zn4),
229-
(!cast<Instruction>(name # _PSEUDO) $base, $offset, (FORM_TRANSPOSED_REG_TUPLE_X4_PSEUDO vt:$Zn1, vt:$Zn2, vt:$Zn3, vt:$Zn4))>;
227+
(!cast<Instruction>(name # _PSEUDO) $base, $offset, (REG_SEQUENCE ZPR4Mul4, vt:$Zn1, zsub0, vt:$Zn2, zsub1, vt:$Zn3, zsub2, vt:$Zn4, zsub3))>;
230228

231229
class SME2_Tile_VG2_Multi_Pat<string name, SDPatternOperator intrinsic, Operand tile_imm, ValueType vt, Operand index_ty, ComplexPattern tileslice>
232230
: Pat<(intrinsic tile_imm:$tile, (i32 (tileslice MatrixIndexGPR32Op12_15:$base, index_ty:$offset)), vt:$Zn1, vt:$Zn2),
233-
(!cast<Instruction>(name # _PSEUDO) $tile, $base, $offset, (FORM_TRANSPOSED_REG_TUPLE_X2_PSEUDO vt:$Zn1, vt:$Zn2))>;
231+
(!cast<Instruction>(name # _PSEUDO) $tile, $base, $offset, (REG_SEQUENCE ZPR2Mul2, vt:$Zn1, zsub0, vt:$Zn2, zsub1))>;
234232

235233
class SME2_Tile_VG4_Multi_Pat<string name, SDPatternOperator intrinsic, Operand tile_imm, ValueType vt, Operand index_ty, ComplexPattern tileslice>
236234
: Pat<(intrinsic tile_imm:$tile, (i32 (tileslice MatrixIndexGPR32Op12_15:$base, index_ty:$offset)), vt:$Zn1, vt:$Zn2, vt:$Zn3, vt:$Zn4),
237-
(!cast<Instruction>(name # _PSEUDO) $tile, $base, $offset, (FORM_TRANSPOSED_REG_TUPLE_X4_PSEUDO vt:$Zn1, vt:$Zn2, vt:$Zn3, vt:$Zn4))>;
235+
(!cast<Instruction>(name # _PSEUDO) $tile, $base, $offset, (REG_SEQUENCE ZPR4Mul4, vt:$Zn1, zsub0, vt:$Zn2, zsub1, vt:$Zn3, zsub2, vt:$Zn4, zsub3))>;
238236

239237
class SME2_Zero_Matrix_Pat<string name, SDPatternOperator intrinsic, Operand offset_ty, ComplexPattern tileslice>
240238
: Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, offset_ty:$offset))),
@@ -2446,7 +2444,7 @@ multiclass sme2_fp8_cvt_vg2_single<string mnemonic, bit op, ValueType in_vt, SDP
24462444
let Uses = [FPMR, FPCR];
24472445
}
24482446
def : Pat<(nxv16i8 (intrinsic in_vt:$Zn1, in_vt:$Zn2)),
2449-
(!cast<Instruction>(NAME) (FORM_TRANSPOSED_REG_TUPLE_X2_PSEUDO in_vt:$Zn1, in_vt:$Zn2))>;
2447+
(!cast<Instruction>(NAME) (REG_SEQUENCE ZPR2Mul2, in_vt:$Zn1, zsub0, in_vt:$Zn2, zsub1))>;
24502448
}
24512449

24522450
class sme2_cvt_unpk_vector_vg2<bits<2>sz, bits<3> op, bit u, RegisterOperand first_ty,

0 commit comments

Comments
 (0)