Skip to content

Commit edef42d

Browse files
committed
Find dead register to use to prevent save-reload
1 parent 589c7ab commit edef42d

File tree

1 file changed

+48
-15
lines changed

1 file changed

+48
-15
lines changed

llvm/lib/Target/X86/X86LowerTileCopy.cpp

Lines changed: 48 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#include "X86InstrBuilder.h"
2121
#include "X86InstrInfo.h"
2222
#include "X86Subtarget.h"
23+
#include "llvm/CodeGen/LiveRegUnits.h"
2324
#include "llvm/CodeGen/MachineBasicBlock.h"
2425
#include "llvm/CodeGen/MachineFrameInfo.h"
2526
#include "llvm/CodeGen/MachineFunction.h"
@@ -90,22 +91,52 @@ bool X86LowerTileCopy::runOnMachineFunction(MachineFunction &MF) {
9091
unsigned Size = TRI->getSpillSize(X86::TILERegClass);
9192
Align Alignment = TRI->getSpillAlign(X86::TILERegClass);
9293
int TileSS = MF.getFrameInfo().CreateSpillStackObject(Size, Alignment);
93-
// Allocate stack slot for stride register
94-
Size = TRI->getSpillSize(X86::GR64RegClass);
95-
Alignment = TRI->getSpillAlign(X86::GR64RegClass);
96-
int StrideSS = MF.getFrameInfo().CreateSpillStackObject(Size, Alignment);
9794

98-
// TODO: Pick a killed regiter to avoid save/reload. There is problem
95+
int StrideSS = 0;
96+
97+
LiveRegUnits UsedRegs(*TRI);
98+
UsedRegs.addLiveOuts(MBB);
99+
100+
auto InstUpToMI = MBB.end();
101+
while (InstUpToMI != MI)
102+
// The pre-decrement is on purpose here.
103+
// We want to have the liveness right before I.
104+
UsedRegs.stepBackward(*--InstUpToMI);
105+
106+
// Look for a temporary register to use.
107+
BitVector GR64Regs =
108+
TRI->getAllocatableSet(MF, TRI->getRegClass(X86::GR64RegClassID));
109+
// Pick a killed regiter to avoid save/reload. There is problem
99110
// to get live interval in this stage.
100111
Register GR64Cand = X86::RAX;
101112

113+
// Find the first available-register that is available
114+
bool found = false;
115+
for (auto RegT : GR64Regs.set_bits()) {
116+
if (UsedRegs.available(RegT)) {
117+
GR64Cand = RegT;
118+
break;
119+
}
120+
}
121+
102122
const DebugLoc &DL = MI.getDebugLoc();
103-
// mov %rax (%sp)
104-
BuildMI(MBB, MI, DL, TII->get(X86::IMPLICIT_DEF), GR64Cand);
105-
addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::MOV64mr)), StrideSS)
106-
.addReg(GR64Cand);
107-
// mov 64 %rax
108-
BuildMI(MBB, MI, DL, TII->get(X86::MOV64ri), GR64Cand).addImm(64);
123+
if (found) {
124+
// mov 64 %reg
125+
BuildMI(MBB, MI, DL, TII->get(X86::MOV64ri), GR64Cand).addImm(64);
126+
} else {
127+
// Allocate stack slot for stride register
128+
Size = TRI->getSpillSize(X86::GR64RegClass);
129+
Alignment = TRI->getSpillAlign(X86::GR64RegClass);
130+
StrideSS = MF.getFrameInfo().CreateSpillStackObject(Size, Alignment);
131+
132+
// mov %reg (%sp)
133+
BuildMI(MBB, MI, DL, TII->get(X86::IMPLICIT_DEF), GR64Cand);
134+
addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::MOV64mr)),
135+
StrideSS)
136+
.addReg(GR64Cand);
137+
// mov 64 %reg
138+
BuildMI(MBB, MI, DL, TII->get(X86::MOV64ri), GR64Cand).addImm(64);
139+
}
109140
// tilestored %tmm, (%sp, %idx)
110141
#define GET_EGPR_IF_ENABLED(OPC) (ST.hasEGPR() ? OPC##_EVEX : OPC)
111142
unsigned Opc = GET_EGPR_IF_ENABLED(X86::TILESTORED);
@@ -120,10 +151,12 @@ bool X86LowerTileCopy::runOnMachineFunction(MachineFunction &MF) {
120151
#undef GET_EGPR_IF_ENABLED
121152
NewMI = addFrameReference(BuildMI(MBB, MI, DL, TII->get(Opc), DstReg),
122153
TileSS);
123-
// restore %rax
124-
// mov (%sp) %rax
125-
addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::MOV64rm), GR64Cand),
126-
StrideSS);
154+
if (!found) {
155+
// restore %rax
156+
// mov (%sp) %rax
157+
addFrameReference(
158+
BuildMI(MBB, MI, DL, TII->get(X86::MOV64rm), GR64Cand), StrideSS);
159+
}
127160
MI.eraseFromParent();
128161
Changed = true;
129162
}

0 commit comments

Comments
 (0)