Skip to content

Commit 37c4ac8

Browse files
Baptiste SaleilAhsan Saghir
authored andcommitted
[PowerPC] Accumulator/Unprimed Accumulator register copy, spill and restore
This patch adds support for accumulator/unprimed accumulator register copy, spill and restore for MMA. Authored By: Baptiste Saleil Reviewed By: #powerpc, bsaleil, amyk Differential Revision: https://reviews.llvm.org/D90616
1 parent c8a0e27 commit 37c4ac8

File tree

7 files changed

+794
-15
lines changed

7 files changed

+794
-15
lines changed

llvm/lib/Target/PowerPC/PPCInstrInfo.cpp

Lines changed: 44 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1361,7 +1361,33 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
13611361
Opc = PPC::CROR;
13621362
else if (PPC::SPERCRegClass.contains(DestReg, SrcReg))
13631363
Opc = PPC::EVOR;
1364-
else
1364+
else if ((PPC::ACCRCRegClass.contains(DestReg) ||
1365+
PPC::UACCRCRegClass.contains(DestReg)) &&
1366+
(PPC::ACCRCRegClass.contains(SrcReg) ||
1367+
PPC::UACCRCRegClass.contains(SrcReg))) {
1368+
// If primed, de-prime the source register, copy the individual registers
1369+
// and prime the destination if needed. The vector subregisters are
1370+
// vs[(u)acc * 4] - vs[(u)acc * 4 + 3]. If the copy is not a kill and the
1371+
// source is primed, we need to re-prime it after the copy as well.
1372+
PPCRegisterInfo::emitAccCopyInfo(MBB, DestReg, SrcReg);
1373+
bool DestPrimed = PPC::ACCRCRegClass.contains(DestReg);
1374+
bool SrcPrimed = PPC::ACCRCRegClass.contains(SrcReg);
1375+
MCRegister VSLSrcReg =
1376+
PPC::VSL0 + (SrcReg - (SrcPrimed ? PPC::ACC0 : PPC::UACC0)) * 4;
1377+
MCRegister VSLDestReg =
1378+
PPC::VSL0 + (DestReg - (DestPrimed ? PPC::ACC0 : PPC::UACC0)) * 4;
1379+
if (SrcPrimed)
1380+
BuildMI(MBB, I, DL, get(PPC::XXMFACC), SrcReg).addReg(SrcReg);
1381+
for (unsigned Idx = 0; Idx < 4; Idx++)
1382+
BuildMI(MBB, I, DL, get(PPC::XXLOR), VSLDestReg + Idx)
1383+
.addReg(VSLSrcReg + Idx)
1384+
.addReg(VSLSrcReg + Idx, getKillRegState(KillSrc));
1385+
if (DestPrimed)
1386+
BuildMI(MBB, I, DL, get(PPC::XXMTACC), DestReg).addReg(DestReg);
1387+
if (SrcPrimed && !KillSrc)
1388+
BuildMI(MBB, I, DL, get(PPC::XXMTACC), SrcReg).addReg(SrcReg);
1389+
return;
1390+
} else
13651391
llvm_unreachable("Impossible reg-to-reg copy");
13661392

13671393
const MCInstrDesc &MCID = get(Opc);
@@ -1372,7 +1398,7 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
13721398
BuildMI(MBB, I, DL, MCID, DestReg).addReg(SrcReg, getKillRegState(KillSrc));
13731399
}
13741400

1375-
static unsigned getSpillIndex(const TargetRegisterClass *RC) {
1401+
unsigned PPCInstrInfo::getSpillIndex(const TargetRegisterClass *RC) const {
13761402
int OpcodeIndex = 0;
13771403

13781404
if (PPC::GPRCRegClass.hasSubClassEq(RC) ||
@@ -1401,6 +1427,18 @@ static unsigned getSpillIndex(const TargetRegisterClass *RC) {
14011427
OpcodeIndex = SOK_VectorFloat4Spill;
14021428
} else if (PPC::SPILLTOVSRRCRegClass.hasSubClassEq(RC)) {
14031429
OpcodeIndex = SOK_SpillToVSR;
1430+
} else if (PPC::ACCRCRegClass.hasSubClassEq(RC)) {
1431+
assert(Subtarget.pairedVectorMemops() &&
1432+
"Register unexpected when paired memops are disabled.");
1433+
OpcodeIndex = SOK_AccumulatorSpill;
1434+
} else if (PPC::UACCRCRegClass.hasSubClassEq(RC)) {
1435+
assert(Subtarget.pairedVectorMemops() &&
1436+
"Register unexpected when paired memops are disabled.");
1437+
OpcodeIndex = SOK_UAccumulatorSpill;
1438+
} else if (PPC::VSRpRCRegClass.hasSubClassEq(RC)) {
1439+
assert(Subtarget.pairedVectorMemops() &&
1440+
"Register unexpected when paired memops are disabled.");
1441+
OpcodeIndex = SOK_PairedVecSpill;
14041442
} else {
14051443
llvm_unreachable("Unknown regclass!");
14061444
}
@@ -2799,7 +2837,10 @@ MachineInstr *PPCInstrInfo::getForwardingDefMI(
27992837
}
28002838

28012839
unsigned PPCInstrInfo::getSpillTarget() const {
2802-
return Subtarget.hasP9Vector() ? 1 : 0;
2840+
// With P10, we may need to spill paired vector registers or accumulator
2841+
// registers. MMA implies paired vectors, so we can just check that.
2842+
bool IsP10Variant = Subtarget.isISA3_1() || Subtarget.pairedVectorMemops();
2843+
return IsP10Variant ? 2 : Subtarget.hasP9Vector() ? 1 : 0;
28032844
}
28042845

28052846
const unsigned *PPCInstrInfo::getStoreOpcodesForSpillArray() const {

llvm/lib/Target/PowerPC/PPCInstrInfo.h

Lines changed: 29 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -123,52 +123,72 @@ enum SpillOpcodeKey {
123123
SOK_VectorFloat8Spill,
124124
SOK_VectorFloat4Spill,
125125
SOK_SpillToVSR,
126+
SOK_PairedVecSpill,
127+
SOK_AccumulatorSpill,
128+
SOK_UAccumulatorSpill,
126129
SOK_SPESpill,
127130
SOK_LastOpcodeSpill // This must be last on the enum.
128131
};
129132

130133
// Define list of load and store spill opcodes.
134+
#define NoInstr PPC::INSTRUCTION_LIST_END
131135
#define Pwr8LoadOpcodes \
132136
{ \
133137
PPC::LWZ, PPC::LD, PPC::LFD, PPC::LFS, PPC::RESTORE_CR, \
134138
PPC::RESTORE_CRBIT, PPC::LVX, PPC::LXVD2X, PPC::LXSDX, PPC::LXSSPX, \
135-
PPC::SPILLTOVSR_LD, PPC::EVLDD \
139+
PPC::SPILLTOVSR_LD, NoInstr, NoInstr, NoInstr, PPC::EVLDD \
136140
}
137141

138142
#define Pwr9LoadOpcodes \
139143
{ \
140144
PPC::LWZ, PPC::LD, PPC::LFD, PPC::LFS, PPC::RESTORE_CR, \
141145
PPC::RESTORE_CRBIT, PPC::LVX, PPC::LXV, PPC::DFLOADf64, \
142-
PPC::DFLOADf32, PPC::SPILLTOVSR_LD \
146+
PPC::DFLOADf32, PPC::SPILLTOVSR_LD, NoInstr, NoInstr, NoInstr, NoInstr \
147+
}
148+
149+
#define Pwr10LoadOpcodes \
150+
{ \
151+
PPC::LWZ, PPC::LD, PPC::LFD, PPC::LFS, PPC::RESTORE_CR, \
152+
PPC::RESTORE_CRBIT, PPC::LVX, PPC::LXV, PPC::DFLOADf64, \
153+
PPC::DFLOADf32, PPC::SPILLTOVSR_LD, PPC::LXVP, PPC::RESTORE_ACC, \
154+
PPC::RESTORE_UACC, NoInstr \
143155
}
144156

145157
#define Pwr8StoreOpcodes \
146158
{ \
147159
PPC::STW, PPC::STD, PPC::STFD, PPC::STFS, PPC::SPILL_CR, PPC::SPILL_CRBIT, \
148160
PPC::STVX, PPC::STXVD2X, PPC::STXSDX, PPC::STXSSPX, \
149-
PPC::SPILLTOVSR_ST, PPC::EVSTDD \
161+
PPC::SPILLTOVSR_ST, NoInstr, NoInstr, NoInstr, PPC::EVSTDD \
150162
}
151163

152164
#define Pwr9StoreOpcodes \
153165
{ \
154166
PPC::STW, PPC::STD, PPC::STFD, PPC::STFS, PPC::SPILL_CR, PPC::SPILL_CRBIT, \
155167
PPC::STVX, PPC::STXV, PPC::DFSTOREf64, PPC::DFSTOREf32, \
156-
PPC::SPILLTOVSR_ST \
168+
PPC::SPILLTOVSR_ST, NoInstr, NoInstr, NoInstr, NoInstr \
169+
}
170+
171+
#define Pwr10StoreOpcodes \
172+
{ \
173+
PPC::STW, PPC::STD, PPC::STFD, PPC::STFS, PPC::SPILL_CR, PPC::SPILL_CRBIT, \
174+
PPC::STVX, PPC::STXV, PPC::DFSTOREf64, PPC::DFSTOREf32, \
175+
PPC::SPILLTOVSR_ST, PPC::STXVP, PPC::SPILL_ACC, PPC::SPILL_UACC, \
176+
NoInstr \
157177
}
158178

159179
// Initialize arrays for load and store spill opcodes on supported subtargets.
160180
#define StoreOpcodesForSpill \
161-
{ Pwr8StoreOpcodes, Pwr9StoreOpcodes }
181+
{ Pwr8StoreOpcodes, Pwr9StoreOpcodes, Pwr10StoreOpcodes }
162182
#define LoadOpcodesForSpill \
163-
{ Pwr8LoadOpcodes, Pwr9LoadOpcodes }
183+
{ Pwr8LoadOpcodes, Pwr9LoadOpcodes, Pwr10LoadOpcodes }
164184

165185
class PPCSubtarget;
166186
class PPCInstrInfo : public PPCGenInstrInfo {
167187
PPCSubtarget &Subtarget;
168188
const PPCRegisterInfo RI;
169-
const unsigned StoreSpillOpcodesArray[2][SOK_LastOpcodeSpill] =
189+
const unsigned StoreSpillOpcodesArray[3][SOK_LastOpcodeSpill] =
170190
StoreOpcodesForSpill;
171-
const unsigned LoadSpillOpcodesArray[2][SOK_LastOpcodeSpill] =
191+
const unsigned LoadSpillOpcodesArray[3][SOK_LastOpcodeSpill] =
172192
LoadOpcodesForSpill;
173193

174194
void StoreRegToStackSlot(MachineFunction &MF, unsigned SrcReg, bool isKill,
@@ -226,6 +246,7 @@ class PPCInstrInfo : public PPCGenInstrInfo {
226246
unsigned getSpillTarget() const;
227247
const unsigned *getStoreOpcodesForSpillArray() const;
228248
const unsigned *getLoadOpcodesForSpillArray() const;
249+
unsigned getSpillIndex(const TargetRegisterClass *RC) const;
229250
int16_t getFMAOpIdxInfo(unsigned Opcode) const;
230251
void reassociateFMA(MachineInstr &Root, MachineCombinerPattern Pattern,
231252
SmallVectorImpl<MachineInstr *> &InsInstrs,

llvm/lib/Target/PowerPC/PPCInstrPrefix.td

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1294,6 +1294,18 @@ let Predicates = [MMA] in {
12941294
XX3Form_AT3_XAB6<59, 99, (outs acc:$AT), (ins acc:$ATi, vsrc:$XA, vsrc:$XB),
12951295
"xvi8ger4spp $AT, $XA, $XB", IIC_VecGeneral, []>,
12961296
RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
1297+
let mayStore = 1 in {
1298+
def SPILL_ACC: PPCEmitTimePseudo<(outs), (ins acc:$AT, memrix16:$dst),
1299+
"#SPILL_ACC", []>;
1300+
def SPILL_UACC: PPCEmitTimePseudo<(outs), (ins uacc:$AT, memrix16:$dst),
1301+
"#SPILL_UACC", []>;
1302+
}
1303+
let mayLoad = 1, hasSideEffects = 0 in {
1304+
def RESTORE_ACC: PPCEmitTimePseudo<(outs acc:$AT), (ins memrix16:$src),
1305+
"#RESTORE_ACC", []>;
1306+
def RESTORE_UACC: PPCEmitTimePseudo<(outs uacc:$AT), (ins memrix16:$src),
1307+
"#RESTORE_UACC", []>;
1308+
}
12971309
}
12981310

12991311
let Predicates = [MMA, PrefixInstrs] in {

llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp

Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,21 @@ MaxCRBitSpillDist("ppc-max-crbit-spill-dist",
7575
"spill on ppc"),
7676
cl::Hidden, cl::init(100));
7777

78+
// Copies/moves of physical accumulators are expensive operations
79+
// that should be avoided whenever possible. MMA instructions are
80+
// meant to be used in performance-sensitive computational kernels.
81+
// This option is provided, at least for the time being, to give the
82+
// user a tool to detect this expensive operation and either rework
83+
// their code or report a compiler bug if that turns out to be the
84+
// cause.
85+
#ifndef NDEBUG
86+
static cl::opt<bool>
87+
ReportAccMoves("ppc-report-acc-moves",
88+
cl::desc("Emit information about accumulator register spills "
89+
"and copies"),
90+
cl::Hidden, cl::init(false));
91+
#endif
92+
7893
static unsigned offsetMinAlignForOpcode(unsigned OpC);
7994

8095
PPCRegisterInfo::PPCRegisterInfo(const PPCTargetMachine &TM)
@@ -936,6 +951,109 @@ void PPCRegisterInfo::lowerCRBitRestore(MachineBasicBlock::iterator II,
936951
MBB.erase(II);
937952
}
938953

954+
void PPCRegisterInfo::emitAccCopyInfo(MachineBasicBlock &MBB,
955+
MCRegister DestReg, MCRegister SrcReg) {
956+
#ifdef NDEBUG
957+
return;
958+
#else
959+
if (ReportAccMoves) {
960+
std::string Dest = PPC::ACCRCRegClass.contains(DestReg) ? "acc" : "uacc";
961+
std::string Src = PPC::ACCRCRegClass.contains(SrcReg) ? "acc" : "uacc";
962+
dbgs() << "Emitting copy from " << Src << " to " << Dest << ":\n";
963+
MBB.dump();
964+
}
965+
#endif
966+
}
967+
968+
static void emitAccSpillRestoreInfo(MachineBasicBlock &MBB, bool IsPrimed,
969+
bool IsRestore) {
970+
#ifdef NDEBUG
971+
return;
972+
#else
973+
if (ReportAccMoves) {
974+
dbgs() << "Emitting " << (IsPrimed ? "acc" : "uacc") << " register "
975+
<< (IsRestore ? "restore" : "spill") << ":\n";
976+
MBB.dump();
977+
}
978+
#endif
979+
}
980+
981+
/// lowerACCSpilling - Generate the code for spilling the accumulator register.
982+
/// Similarly to other spills/reloads that use pseudo-ops, we do not actually
983+
/// eliminate the FrameIndex here nor compute the stack offset. We simply
984+
/// create a real instruction with an FI and rely on eliminateFrameIndex to
985+
/// handle the FI elimination.
986+
void PPCRegisterInfo::lowerACCSpilling(MachineBasicBlock::iterator II,
987+
unsigned FrameIndex) const {
988+
MachineInstr &MI = *II; // SPILL_ACC <SrcReg>, <offset>
989+
MachineBasicBlock &MBB = *MI.getParent();
990+
MachineFunction &MF = *MBB.getParent();
991+
const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
992+
const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
993+
DebugLoc DL = MI.getDebugLoc();
994+
Register SrcReg = MI.getOperand(0).getReg();
995+
bool IsKilled = MI.getOperand(0).isKill();
996+
997+
bool IsPrimed = PPC::ACCRCRegClass.contains(SrcReg);
998+
Register Reg =
999+
PPC::VSRp0 + (SrcReg - (IsPrimed ? PPC::ACC0 : PPC::UACC0)) * 2;
1000+
bool IsLittleEndian = Subtarget.isLittleEndian();
1001+
1002+
emitAccSpillRestoreInfo(MBB, IsPrimed, false);
1003+
1004+
// De-prime the register being spilled, create two stores for the pair
1005+
// subregisters accounting for endianness and then re-prime the register if
1006+
// it isn't killed. This uses the Offset parameter to addFrameReference() to
1007+
// adjust the offset of the store that is within the 64-byte stack slot.
1008+
if (IsPrimed)
1009+
BuildMI(MBB, II, DL, TII.get(PPC::XXMFACC), SrcReg).addReg(SrcReg);
1010+
addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::STXVP))
1011+
.addReg(Reg, getKillRegState(IsKilled)),
1012+
FrameIndex, IsLittleEndian ? 32 : 0);
1013+
addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::STXVP))
1014+
.addReg(Reg + 1, getKillRegState(IsKilled)),
1015+
FrameIndex, IsLittleEndian ? 0 : 32);
1016+
if (IsPrimed && !IsKilled)
1017+
BuildMI(MBB, II, DL, TII.get(PPC::XXMTACC), SrcReg).addReg(SrcReg);
1018+
1019+
// Discard the pseudo instruction.
1020+
MBB.erase(II);
1021+
}
1022+
1023+
/// lowerACCRestore - Generate the code to restore the accumulator register.
1024+
void PPCRegisterInfo::lowerACCRestore(MachineBasicBlock::iterator II,
1025+
unsigned FrameIndex) const {
1026+
MachineInstr &MI = *II; // <DestReg> = RESTORE_ACC <offset>
1027+
MachineBasicBlock &MBB = *MI.getParent();
1028+
MachineFunction &MF = *MBB.getParent();
1029+
const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
1030+
const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
1031+
DebugLoc DL = MI.getDebugLoc();
1032+
1033+
Register DestReg = MI.getOperand(0).getReg();
1034+
assert(MI.definesRegister(DestReg) &&
1035+
"RESTORE_ACC does not define its destination");
1036+
1037+
bool IsPrimed = PPC::ACCRCRegClass.contains(DestReg);
1038+
Register Reg =
1039+
PPC::VSRp0 + (DestReg - (IsPrimed ? PPC::ACC0 : PPC::UACC0)) * 2;
1040+
bool IsLittleEndian = Subtarget.isLittleEndian();
1041+
1042+
emitAccSpillRestoreInfo(MBB, IsPrimed, true);
1043+
1044+
// Create two loads for the pair subregisters accounting for endianness and
1045+
// then prime the accumulator register being restored.
1046+
addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::LXVP), Reg),
1047+
FrameIndex, IsLittleEndian ? 32 : 0);
1048+
addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::LXVP), Reg + 1),
1049+
FrameIndex, IsLittleEndian ? 0 : 32);
1050+
if (IsPrimed)
1051+
BuildMI(MBB, II, DL, TII.get(PPC::XXMTACC), DestReg).addReg(DestReg);
1052+
1053+
// Discard the pseudo instruction.
1054+
MBB.erase(II);
1055+
}
1056+
9391057
bool PPCRegisterInfo::hasReservedSpillSlot(const MachineFunction &MF,
9401058
Register Reg, int &FrameIdx) const {
9411059
// For the nonvolatile condition registers (CR2, CR3, CR4) return true to
@@ -1067,6 +1185,12 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
10671185
} else if (OpC == PPC::RESTORE_CRBIT) {
10681186
lowerCRBitRestore(II, FrameIndex);
10691187
return;
1188+
} else if (OpC == PPC::SPILL_ACC || OpC == PPC::SPILL_UACC) {
1189+
lowerACCSpilling(II, FrameIndex);
1190+
return;
1191+
} else if (OpC == PPC::RESTORE_ACC || OpC == PPC::RESTORE_UACC) {
1192+
lowerACCRestore(II, FrameIndex);
1193+
return;
10701194
}
10711195

10721196
// Replace the FrameIndex with base register with GPR1 (SP) or GPR31 (FP).

llvm/lib/Target/PowerPC/PPCRegisterInfo.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,14 @@ class PPCRegisterInfo : public PPCGenRegisterInfo {
120120
void lowerCRBitRestore(MachineBasicBlock::iterator II,
121121
unsigned FrameIndex) const;
122122

123+
void lowerACCSpilling(MachineBasicBlock::iterator II,
124+
unsigned FrameIndex) const;
125+
void lowerACCRestore(MachineBasicBlock::iterator II,
126+
unsigned FrameIndex) const;
127+
128+
static void emitAccCopyInfo(MachineBasicBlock &MBB, MCRegister DestReg,
129+
MCRegister SrcReg);
130+
123131
bool hasReservedSpillSlot(const MachineFunction &MF, Register Reg,
124132
int &FrameIdx) const override;
125133
void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,

0 commit comments

Comments
 (0)