Skip to content

Commit b675a76

Browse files
committed
Merge memtag instructions with adjacent stack slots.
Summary: Detect a run of memory tagging instructions for adjacent stack frame slots, and replace them with a shorter instruction sequence * replace STG + STG with ST2G * replace STGloop + STGloop with STGloop This code needs to run when stack slot offsets are already known, but before FrameIndex operands in STG instructions are eliminated; that's the reason for the new hook in PrologueEpilogue. This change modifies STGloop and STZGloop pseudos to take the size as an immediate integer operand, and base address as a FI operand when possible. This is needed to simplify recognizing an STGloop instruction as operating on a stack slot post-regalloc. This improves memtag code size by ~0.25%, and it looks like an additional ~0.1% is possible by rearranging the stack frame such that consecutive STG instructions reference adjacent slots (patch pending). Reviewers: pcc, ostannard Subscribers: hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D70286
1 parent ba181d0 commit b675a76

13 files changed

+808
-43
lines changed

llvm/include/llvm/CodeGen/TargetFrameLowering.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -309,6 +309,13 @@ class TargetFrameLowering {
309309
RegScavenger *RS = nullptr) const {
310310
}
311311

312+
/// processFunctionBeforeFrameIndicesReplaced - This method is called
313+
/// immediately before MO_FrameIndex operands are eliminated, but after the
314+
/// frame is finalized. This method is optional.
315+
virtual void
316+
processFunctionBeforeFrameIndicesReplaced(MachineFunction &MF,
317+
RegScavenger *RS = nullptr) const {}
318+
312319
virtual unsigned getWinEHParentFrameOffset(const MachineFunction &MF) const {
313320
report_fatal_error("WinEH not implemented for this target");
314321
}

llvm/lib/CodeGen/PrologEpilogInserter.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -259,6 +259,10 @@ bool PEI::runOnMachineFunction(MachineFunction &MF) {
259259
for (auto &I : EntryDbgValues)
260260
I.first->insert(I.first->begin(), I.second.begin(), I.second.end());
261261

262+
// Allow the target machine to make final modifications to the function
263+
// before the frame layout is finalized.
264+
TFI->processFunctionBeforeFrameIndicesReplaced(MF, RS);
265+
262266
// Replace all MO_FrameIndex operands with physical register references
263267
// and actual offsets.
264268
//

llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -349,22 +349,38 @@ bool AArch64ExpandPseudo::expandSetTagLoop(
349349
MachineBasicBlock::iterator &NextMBBI) {
350350
MachineInstr &MI = *MBBI;
351351
DebugLoc DL = MI.getDebugLoc();
352-
Register SizeReg = MI.getOperand(2).getReg();
353-
Register AddressReg = MI.getOperand(3).getReg();
352+
Register SizeReg = MI.getOperand(0).getReg();
353+
Register AddressReg = MI.getOperand(1).getReg();
354354

355355
MachineFunction *MF = MBB.getParent();
356356

357357
bool ZeroData = MI.getOpcode() == AArch64::STZGloop;
358-
const unsigned OpCode =
358+
const unsigned OpCode1 =
359+
ZeroData ? AArch64::STZGPostIndex : AArch64::STGPostIndex;
360+
const unsigned OpCode2 =
359361
ZeroData ? AArch64::STZ2GPostIndex : AArch64::ST2GPostIndex;
360362

363+
unsigned Size = MI.getOperand(2).getImm();
364+
assert(Size > 0 && Size % 16 == 0);
365+
if (Size % (16 * 2) != 0) {
366+
BuildMI(MBB, MBBI, DL, TII->get(OpCode1), AddressReg)
367+
.addReg(AddressReg)
368+
.addReg(AddressReg)
369+
.addImm(1);
370+
Size -= 16;
371+
}
372+
MachineBasicBlock::iterator I =
373+
BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm), SizeReg)
374+
.addImm(Size);
375+
expandMOVImm(MBB, I, 64);
376+
361377
auto LoopBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
362378
auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
363379

364380
MF->insert(++MBB.getIterator(), LoopBB);
365381
MF->insert(++LoopBB->getIterator(), DoneBB);
366382

367-
BuildMI(LoopBB, DL, TII->get(OpCode))
383+
BuildMI(LoopBB, DL, TII->get(OpCode2))
368384
.addDef(AddressReg)
369385
.addReg(AddressReg)
370386
.addReg(AddressReg)

0 commit comments

Comments
 (0)