@@ -1418,34 +1418,6 @@ bool X86FrameLowering::needsDwarfCFI(const MachineFunction &MF) const {
1418
1418
return !isWin64Prologue (MF) && MF.needsFrameMoves ();
1419
1419
}
1420
1420
1421
- // / Return true if an opcode is part of the REP group of instructions
1422
- static bool isOpcodeRep (unsigned Opcode) {
1423
- switch (Opcode) {
1424
- case X86::REPNE_PREFIX:
1425
- case X86::REP_MOVSB_32:
1426
- case X86::REP_MOVSB_64:
1427
- case X86::REP_MOVSD_32:
1428
- case X86::REP_MOVSD_64:
1429
- case X86::REP_MOVSQ_32:
1430
- case X86::REP_MOVSQ_64:
1431
- case X86::REP_MOVSW_32:
1432
- case X86::REP_MOVSW_64:
1433
- case X86::REP_PREFIX:
1434
- case X86::REP_STOSB_32:
1435
- case X86::REP_STOSB_64:
1436
- case X86::REP_STOSD_32:
1437
- case X86::REP_STOSD_64:
1438
- case X86::REP_STOSQ_32:
1439
- case X86::REP_STOSQ_64:
1440
- case X86::REP_STOSW_32:
1441
- case X86::REP_STOSW_64:
1442
- return true ;
1443
- default :
1444
- break ;
1445
- }
1446
- return false ;
1447
- }
1448
-
1449
1421
// / emitPrologue - Push callee-saved registers onto the stack, which
1450
1422
// / automatically adjust the stack pointer. Adjust the stack pointer to allocate
1451
1423
// / space for local variables. Also emit labels used by the exception handler to
@@ -2223,35 +2195,54 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
2223
2195
// in each prologue of interrupt handler function.
2224
2196
//
2225
2197
// Create "cld" instruction only in these cases:
2226
- // 1. The interrupt handling function uses any of the "rep" instructions.
2198
+ // 1. If DF is used by any instruction (exempting PUSHF, as the purpose is to
2199
+ // save eflags).
2227
2200
// 2. Interrupt handling function calls another function.
2228
- // 3. If there are any inline asm blocks, as we do not know what they do
2201
+ // 3. If there are any inline asm blocks, as the ABI expects DF to be cleared
2202
+ // unless manually set otherwise.
2229
2203
//
2230
- // TODO: We should also emit cld if we detect the use of std, but as of now,
2231
- // the compiler does not even emit that instruction or even define it, so in
2232
- // practice, this would only happen with inline asm, which we cover anyway.
2233
2204
if (Fn.getCallingConv () == CallingConv::X86_INTR) {
2234
2205
bool NeedsCLD = false ;
2235
2206
2236
2207
for (const MachineBasicBlock &B : MF) {
2237
2208
for (const MachineInstr &MI : B) {
2238
- if (MI.isCall ()) {
2209
+ if (MI.isInlineAsm ()) {
2239
2210
NeedsCLD = true ;
2240
2211
break ;
2241
2212
}
2242
2213
2243
- if (isOpcodeRep (MI.getOpcode ())) {
2244
- NeedsCLD = true ;
2214
+ if (MI.findRegisterDefOperand (X86::DF)) {
2215
+ // We do not need CLD because we clobber DF anyway before the flag is
2216
+ // even used.
2217
+ // FIXME: Is this even possible? Only cld and std can do this.
2218
+ NeedsCLD = false ;
2245
2219
break ;
2246
2220
}
2247
2221
2248
- if (MI.isInlineAsm ()) {
2249
- // TODO: Parse asm for rep instructions or call sites?
2250
- // For now, let's play it safe and emit a cld instruction
2251
- // just in case.
2222
+ if (MI.isCall ()) {
2252
2223
NeedsCLD = true ;
2253
2224
break ;
2254
2225
}
2226
+
2227
+ if (MI.findRegisterUseOperand (X86::DF)) {
2228
+ // Because EFLAGS being pushed and popped save the instruction, it
2229
+ // counts as a use, but we ignore them because the purpose is to
2230
+ // save EFLAGS to stack.
2231
+ switch (MI.getOpcode ()) {
2232
+ case X86::PUSHF16:
2233
+ case X86::PUSHF32:
2234
+ case X86::PUSHF64:
2235
+ case X86::PUSHFS16:
2236
+ case X86::PUSHFS32:
2237
+ case X86::PUSHFS64:
2238
+ break ;
2239
+ default :
2240
+ NeedsCLD = true ;
2241
+ break ;
2242
+ }
2243
+ if (NeedsCLD)
2244
+ break ;
2245
+ }
2255
2246
}
2256
2247
}
2257
2248
0 commit comments