Skip to content

Commit 8d86cf4

Browse files
committed
[X86] Refine CLD insertion to trigger only when the direction flag is used
Rather than try to update every instruction that is affected by the direction flag, we could instead use findRegisterUseOperand.
1 parent 5e5b656 commit 8d86cf4

File tree

1 file changed

+24
-41
lines changed

1 file changed

+24
-41
lines changed

llvm/lib/Target/X86/X86FrameLowering.cpp

Lines changed: 24 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1418,34 +1418,6 @@ bool X86FrameLowering::needsDwarfCFI(const MachineFunction &MF) const {
14181418
return !isWin64Prologue(MF) && MF.needsFrameMoves();
14191419
}
14201420

1421-
/// Return true if an opcode is part of the REP group of instructions
1422-
static bool isOpcodeRep(unsigned Opcode) {
1423-
switch (Opcode) {
1424-
case X86::REPNE_PREFIX:
1425-
case X86::REP_MOVSB_32:
1426-
case X86::REP_MOVSB_64:
1427-
case X86::REP_MOVSD_32:
1428-
case X86::REP_MOVSD_64:
1429-
case X86::REP_MOVSQ_32:
1430-
case X86::REP_MOVSQ_64:
1431-
case X86::REP_MOVSW_32:
1432-
case X86::REP_MOVSW_64:
1433-
case X86::REP_PREFIX:
1434-
case X86::REP_STOSB_32:
1435-
case X86::REP_STOSB_64:
1436-
case X86::REP_STOSD_32:
1437-
case X86::REP_STOSD_64:
1438-
case X86::REP_STOSQ_32:
1439-
case X86::REP_STOSQ_64:
1440-
case X86::REP_STOSW_32:
1441-
case X86::REP_STOSW_64:
1442-
return true;
1443-
default:
1444-
break;
1445-
}
1446-
return false;
1447-
}
1448-
14491421
/// emitPrologue - Push callee-saved registers onto the stack, which
14501422
/// automatically adjust the stack pointer. Adjust the stack pointer to allocate
14511423
/// space for local variables. Also emit labels used by the exception handler to
@@ -2223,34 +2195,45 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
22232195
// in each prologue of interrupt handler function.
22242196
//
22252197
// Create "cld" instruction only in these cases:
2226-
// 1. The interrupt handling function uses any of the "rep" instructions.
2198+
// 1. If DF is used by any instruction (exempting PUSHF, as the purpose is to
2199+
// save eflags).
22272200
// 2. Interrupt handling function calls another function.
2228-
// 3. If there are any inline asm blocks, as we do not know what they do
2201+
// 3. If there are any inline asm blocks, as the ABI expects DF to be cleared
2202+
// unless manually set otherwise.
22292203
//
2230-
// TODO: We should also emit cld if we detect the use of std, but as of now,
2231-
// the compiler does not even emit that instruction or even define it, so in
2232-
// practice, this would only happen with inline asm, which we cover anyway.
22332204
if (Fn.getCallingConv() == CallingConv::X86_INTR) {
22342205
bool NeedsCLD = false;
22352206

22362207
for (const MachineBasicBlock &B : MF) {
22372208
for (const MachineInstr &MI : B) {
2238-
if (MI.isCall()) {
2209+
if (MI.isInlineAsm()) {
22392210
NeedsCLD = true;
22402211
break;
22412212
}
22422213

2243-
if (isOpcodeRep(MI.getOpcode())) {
2214+
if (MI.isCall()) {
22442215
NeedsCLD = true;
22452216
break;
22462217
}
22472218

2248-
if (MI.isInlineAsm()) {
2249-
// TODO: Parse asm for rep instructions or call sites?
2250-
// For now, let's play it safe and emit a cld instruction
2251-
// just in case.
2252-
NeedsCLD = true;
2253-
break;
2219+
if (MI.findRegisterUseOperand(X86::DF)) {
2220+
// Because EFLAGS being pushed and popped save the instruction, it
2221+
// counts as a use, but we ignore them because the purpose is to
2222+
// save EFLAGS to stack.
2223+
switch (MI.getOpcode()) {
2224+
case X86::PUSHF16:
2225+
case X86::PUSHF32:
2226+
case X86::PUSHF64:
2227+
case X86::PUSHFS16:
2228+
case X86::PUSHFS32:
2229+
case X86::PUSHFS64:
2230+
break;
2231+
default:
2232+
NeedsCLD = true;
2233+
break;
2234+
}
2235+
if (NeedsCLD)
2236+
break;
22542237
}
22552238
}
22562239
}

0 commit comments

Comments
 (0)