Skip to content

Commit 02dc7a2

Browse files
- Move visitRegSequence from AArch64MIPeepholeOpt to SMEPeepholeOpt
1 parent 74e3dcd commit 02dc7a2

File tree

2 files changed

+84
-84
lines changed

2 files changed

+84
-84
lines changed

llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp

Lines changed: 0 additions & 84 deletions
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,6 @@
7171

7272
#include "AArch64ExpandImm.h"
7373
#include "AArch64InstrInfo.h"
74-
#include "AArch64Subtarget.h"
7574
#include "MCTargetDesc/AArch64AddressingModes.h"
7675
#include "llvm/CodeGen/MachineDominators.h"
7776
#include "llvm/CodeGen/MachineLoopInfo.h"
@@ -138,7 +137,6 @@ struct AArch64MIPeepholeOpt : public MachineFunctionPass {
138137
bool visitFMOVDr(MachineInstr &MI);
139138
bool visitUBFMXri(MachineInstr &MI);
140139
bool visitCopy(MachineInstr &MI);
141-
bool visitRegSequence(MachineInstr &MI);
142140
bool runOnMachineFunction(MachineFunction &MF) override;
143141

144142
StringRef getPassName() const override {
@@ -837,85 +835,6 @@ bool AArch64MIPeepholeOpt::visitCopy(MachineInstr &MI) {
837835
return true;
838836
}
839837

840-
// Using the FORM_TRANSPOSED_REG_TUPLE pseudo can improve register allocation
841-
// of multi-vector intrinsics. However, the psuedo should only be emitted if
842-
// the input registers of the REG_SEQUENCE are copy nodes where the source
843-
// register is in a StridedOrContiguous class. For example:
844-
//
845-
// %3:zpr2stridedorcontiguous = LD1B_2Z_IMM_PSEUDO ..
846-
// %4:zpr = COPY %3.zsub1:zpr2stridedorcontiguous
847-
// %5:zpr = COPY %3.zsub0:zpr2stridedorcontiguous
848-
// %6:zpr2stridedorcontiguous = LD1B_2Z_PSEUDO ..
849-
// %7:zpr = COPY %6.zsub1:zpr2stridedorcontiguous
850-
// %8:zpr = COPY %6.zsub0:zpr2stridedorcontiguous
851-
// %9:zpr2mul2 = REG_SEQUENCE %5:zpr, %subreg.zsub0, %8:zpr, %subreg.zsub1
852-
//
853-
// -> %9:zpr2mul2 = FORM_TRANSPOSED_REG_TUPLE_X2_PSEUDO %5:zpr, %8:zpr
854-
//
855-
bool AArch64MIPeepholeOpt::visitRegSequence(MachineInstr &MI) {
856-
MachineRegisterInfo &MRI = MI.getMF()->getRegInfo();
857-
858-
auto &ST = MI.getMF()->getSubtarget<AArch64Subtarget>();
859-
if (!ST.hasSME() || !ST.isStreaming())
860-
return false;
861-
862-
switch (MRI.getRegClass(MI.getOperand(0).getReg())->getID()) {
863-
case AArch64::ZPR2RegClassID:
864-
case AArch64::ZPR4RegClassID:
865-
case AArch64::ZPR2Mul2RegClassID:
866-
case AArch64::ZPR4Mul4RegClassID:
867-
break;
868-
default:
869-
return false;
870-
}
871-
872-
// The first operand is the register class created by the REG_SEQUENCE.
873-
// Each operand pair after this consists of a vreg + subreg index, so
874-
// for example a sequence of 2 registers will have a total of 5 operands.
875-
if (MI.getNumOperands() != 5 && MI.getNumOperands() != 9)
876-
return false;
877-
878-
MCRegister SubReg = MCRegister::NoRegister;
879-
for (unsigned I = 1; I < MI.getNumOperands(); I += 2) {
880-
MachineOperand &MO = MI.getOperand(I);
881-
882-
if (!MI.getOperand(I).isReg())
883-
return false;
884-
885-
MachineOperand *Def = MRI.getOneDef(MO.getReg());
886-
if (!Def || !Def->getParent()->isCopy())
887-
return false;
888-
889-
const MachineOperand &CopySrc = Def->getParent()->getOperand(1);
890-
unsigned OpSubReg = CopySrc.getSubReg();
891-
if (SubReg == MCRegister::NoRegister)
892-
SubReg = OpSubReg;
893-
894-
MachineOperand *CopySrcOp = MRI.getOneDef(CopySrc.getReg());
895-
if (!CopySrcOp || !CopySrcOp->isReg() || OpSubReg != SubReg ||
896-
CopySrcOp->getReg().isPhysical())
897-
return false;
898-
899-
const TargetRegisterClass *CopySrcClass =
900-
MRI.getRegClass(CopySrcOp->getReg());
901-
if (CopySrcClass != &AArch64::ZPR2StridedOrContiguousRegClass &&
902-
CopySrcClass != &AArch64::ZPR4StridedOrContiguousRegClass)
903-
return false;
904-
}
905-
906-
unsigned Opc = MI.getNumOperands() == 5
907-
? AArch64::FORM_TRANSPOSED_REG_TUPLE_X2_PSEUDO
908-
: AArch64::FORM_TRANSPOSED_REG_TUPLE_X4_PSEUDO;
909-
910-
MachineInstrBuilder MIB = BuildMI(*MI.getParent(), MI, MI.getDebugLoc(),
911-
TII->get(Opc), MI.getOperand(0).getReg());
912-
for (unsigned I = 1; I < MI.getNumOperands(); I += 2)
913-
MIB.addReg(MI.getOperand(I).getReg());
914-
915-
MI.eraseFromParent();
916-
return true;
917-
}
918-
919838
bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) {
920839
if (skipFunction(MF.getFunction()))
921840
return false;
@@ -1007,9 +926,6 @@ bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) {
1007926
case AArch64::COPY:
1008927
Changed |= visitCopy(MI);
1009928
break;
1010-
case AArch64::REG_SEQUENCE:
1011-
Changed |= visitRegSequence(MI);
1012-
break;
1013929
}
1014930
}
1015931
}

llvm/lib/Target/AArch64/SMEPeepholeOpt.cpp

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ struct SMEPeepholeOpt : public MachineFunctionPass {
4545

4646
bool optimizeStartStopPairs(MachineBasicBlock &MBB,
4747
bool &HasRemovedAllSMChanges) const;
48+
bool visitRegSequence(MachineInstr &MI);
4849
};
4950

5051
char SMEPeepholeOpt::ID = 0;
@@ -225,6 +226,83 @@ bool SMEPeepholeOpt::optimizeStartStopPairs(
225226
return Changed;
226227
}
227228

229+
// Using the FORM_TRANSPOSED_REG_TUPLE pseudo can improve register allocation
230+
// of multi-vector intrinsics. However, the psuedo should only be emitted if
231+
// the input registers of the REG_SEQUENCE are copy nodes where the source
232+
// register is in a StridedOrContiguous class. For example:
233+
//
234+
// %3:zpr2stridedorcontiguous = LD1B_2Z_IMM_PSEUDO ..
235+
// %4:zpr = COPY %3.zsub1:zpr2stridedorcontiguous
236+
// %5:zpr = COPY %3.zsub0:zpr2stridedorcontiguous
237+
// %6:zpr2stridedorcontiguous = LD1B_2Z_PSEUDO ..
238+
// %7:zpr = COPY %6.zsub1:zpr2stridedorcontiguous
239+
// %8:zpr = COPY %6.zsub0:zpr2stridedorcontiguous
240+
// %9:zpr2mul2 = REG_SEQUENCE %5:zpr, %subreg.zsub0, %8:zpr, %subreg.zsub1
241+
//
242+
// -> %9:zpr2mul2 = FORM_TRANSPOSED_REG_TUPLE_X2_PSEUDO %5:zpr, %8:zpr
243+
//
244+
bool SMEPeepholeOpt::visitRegSequence(MachineInstr &MI) {
245+
MachineRegisterInfo &MRI = MI.getMF()->getRegInfo();
246+
247+
switch (MRI.getRegClass(MI.getOperand(0).getReg())->getID()) {
248+
case AArch64::ZPR2RegClassID:
249+
case AArch64::ZPR4RegClassID:
250+
case AArch64::ZPR2Mul2RegClassID:
251+
case AArch64::ZPR4Mul4RegClassID:
252+
break;
253+
default:
254+
return false;
255+
}
256+
257+
// The first operand is the register class created by the REG_SEQUENCE.
258+
// Each operand pair after this consists of a vreg + subreg index, so
259+
// for example a sequence of 2 registers will have a total of 5 operands.
260+
if (MI.getNumOperands() != 5 && MI.getNumOperands() != 9)
261+
return false;
262+
263+
MCRegister SubReg = MCRegister::NoRegister;
264+
for (unsigned I = 1; I < MI.getNumOperands(); I += 2) {
265+
MachineOperand &MO = MI.getOperand(I);
266+
267+
if (!MI.getOperand(I).isReg())
268+
return false;
269+
270+
MachineOperand *Def = MRI.getOneDef(MO.getReg());
271+
if (!Def || !Def->getParent()->isCopy())
272+
return false;
273+
274+
const MachineOperand &CopySrc = Def->getParent()->getOperand(1);
275+
unsigned OpSubReg = CopySrc.getSubReg();
276+
if (SubReg == MCRegister::NoRegister)
277+
SubReg = OpSubReg;
278+
279+
MachineOperand *CopySrcOp = MRI.getOneDef(CopySrc.getReg());
280+
if (!CopySrcOp || !CopySrcOp->isReg() || OpSubReg != SubReg ||
281+
CopySrcOp->getReg().isPhysical())
282+
return false;
283+
284+
const TargetRegisterClass *CopySrcClass =
285+
MRI.getRegClass(CopySrcOp->getReg());
286+
if (CopySrcClass != &AArch64::ZPR2StridedOrContiguousRegClass &&
287+
CopySrcClass != &AArch64::ZPR4StridedOrContiguousRegClass)
288+
return false;
289+
}
290+
291+
unsigned Opc = MI.getNumOperands() == 5
292+
? AArch64::FORM_TRANSPOSED_REG_TUPLE_X2_PSEUDO
293+
: AArch64::FORM_TRANSPOSED_REG_TUPLE_X4_PSEUDO;
294+
295+
const TargetInstrInfo *TII =
296+
MI.getMF()->getSubtarget<AArch64Subtarget>().getInstrInfo();
297+
MachineInstrBuilder MIB = BuildMI(*MI.getParent(), MI, MI.getDebugLoc(),
298+
TII->get(Opc), MI.getOperand(0).getReg());
299+
for (unsigned I = 1; I < MI.getNumOperands(); I += 2)
300+
MIB.addReg(MI.getOperand(I).getReg());
301+
302+
MI.eraseFromParent();
303+
return true;
304+
}
305+
228306
INITIALIZE_PASS(SMEPeepholeOpt, "aarch64-sme-peephole-opt",
229307
"SME Peephole Optimization", false, false)
230308

@@ -247,6 +325,12 @@ bool SMEPeepholeOpt::runOnMachineFunction(MachineFunction &MF) {
247325
bool BlockHasAllSMChangesRemoved;
248326
Changed |= optimizeStartStopPairs(MBB, BlockHasAllSMChangesRemoved);
249327
FunctionHasAllSMChangesRemoved |= BlockHasAllSMChangesRemoved;
328+
329+
if (MF.getSubtarget<AArch64Subtarget>().isStreaming()) {
330+
for (MachineInstr &MI : make_early_inc_range(MBB))
331+
if (MI.getOpcode() == AArch64::REG_SEQUENCE)
332+
Changed |= visitRegSequence(MI);
333+
}
250334
}
251335

252336
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();

0 commit comments

Comments
 (0)