Skip to content

[X86] Resolve FIXME: Create cld only when needed #82415

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 63 additions & 4 deletions llvm/lib/Target/X86/X86FrameLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1418,6 +1418,34 @@ bool X86FrameLowering::needsDwarfCFI(const MachineFunction &MF) const {
return !isWin64Prologue(MF) && MF.needsFrameMoves();
}

/// Return true if an opcode is part of the REP group of instructions
static bool isOpcodeRep(unsigned Opcode) {
switch (Opcode) {
case X86::REPNE_PREFIX:
case X86::REP_MOVSB_32:
case X86::REP_MOVSB_64:
case X86::REP_MOVSD_32:
case X86::REP_MOVSD_64:
case X86::REP_MOVSQ_32:
case X86::REP_MOVSQ_64:
case X86::REP_MOVSW_32:
case X86::REP_MOVSW_64:
case X86::REP_PREFIX:
case X86::REP_STOSB_32:
case X86::REP_STOSB_64:
case X86::REP_STOSD_32:
case X86::REP_STOSD_64:
case X86::REP_STOSQ_32:
case X86::REP_STOSQ_64:
case X86::REP_STOSW_32:
case X86::REP_STOSW_64:
return true;
default:
break;
}
return false;
}

/// emitPrologue - Push callee-saved registers onto the stack, which
/// automatically adjust the stack pointer. Adjust the stack pointer to allocate
/// space for local variables. Also emit labels used by the exception handler to
Expand Down Expand Up @@ -2194,13 +2222,44 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
// flag (DF in EFLAGS register). Clear this flag by creating "cld" instruction
// in each prologue of interrupt handler function.
//
// FIXME: Create "cld" instruction only in these cases:
// Create "cld" instruction only in these cases:
// 1. The interrupt handling function uses any of the "rep" instructions.
// 2. Interrupt handling function calls another function.
// 3. If there are any inline asm blocks, as we do not know what they do
//
if (Fn.getCallingConv() == CallingConv::X86_INTR)
BuildMI(MBB, MBBI, DL, TII.get(X86::CLD))
.setMIFlag(MachineInstr::FrameSetup);
// TODO: We should also emit cld if we detect the use of std, but as of now,
// the compiler does not even emit that instruction or even define it, so in
// practice, this would only happen with inline asm, which we cover anyway.
if (Fn.getCallingConv() == CallingConv::X86_INTR) {
bool NeedsCLD = false;

for (const MachineBasicBlock &B : MF) {
for (const MachineInstr &MI : B) {
if (MI.isCall()) {
NeedsCLD = true;
break;
}

if (isOpcodeRep(MI.getOpcode())) {
NeedsCLD = true;
break;
}

if (MI.isInlineAsm()) {
// TODO: Parse asm for rep instructions or call sites?
// For now, let's play it safe and emit a cld instruction
// just in case.
NeedsCLD = true;
break;
}
}
}

if (NeedsCLD) {
BuildMI(MBB, MBBI, DL, TII.get(X86::CLD))
.setMIFlag(MachineInstr::FrameSetup);
}
}

// At this point we know if the function has WinCFI or not.
MF.setHasWinCFI(HasWinCFI);
Expand Down
10 changes: 0 additions & 10 deletions llvm/test/CodeGen/X86/x86-32-intrcc.ll
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,6 @@ define x86_intrcc void @test_isr_x87(ptr byval(%struct.interrupt_frame) %frame)
; CHECK-NEXT: pushl %ebp
; CHECK-NEXT: movl %esp, %ebp
; CHECK-NEXT: andl $-16, %esp
; CHECK-NEXT: cld
; CHECK-NEXT: fldt f80
; CHECK-NEXT: fld1
; CHECK-NEXT: faddp %st, %st(1)
Expand All @@ -163,7 +162,6 @@ define x86_intrcc void @test_isr_x87(ptr byval(%struct.interrupt_frame) %frame)
; CHECK0-NEXT: pushl %ebp
; CHECK0-NEXT: movl %esp, %ebp
; CHECK0-NEXT: andl $-16, %esp
; CHECK0-NEXT: cld
; CHECK0-NEXT: fldt f80
; CHECK0-NEXT: fld1
; CHECK0-NEXT: faddp %st, %st(1)
Expand All @@ -188,7 +186,6 @@ define dso_local x86_intrcc void @test_fp_1(ptr byval(%struct.interrupt_frame) %
; CHECK-NEXT: pushl %ecx
; CHECK-NEXT: pushl %eax
; CHECK-NEXT: andl $-16, %esp
; CHECK-NEXT: cld
; CHECK-NEXT: leal 20(%ebp), %eax
; CHECK-NEXT: leal 4(%ebp), %ecx
; CHECK-NEXT: movl %ecx, sink_address
Expand All @@ -206,7 +203,6 @@ define dso_local x86_intrcc void @test_fp_1(ptr byval(%struct.interrupt_frame) %
; CHECK0-NEXT: pushl %ecx
; CHECK0-NEXT: pushl %eax
; CHECK0-NEXT: andl $-16, %esp
; CHECK0-NEXT: cld
; CHECK0-NEXT: leal 4(%ebp), %ecx
; CHECK0-NEXT: movl %ecx, %eax
; CHECK0-NEXT: addl $16, %eax
Expand Down Expand Up @@ -234,7 +230,6 @@ define dso_local x86_intrcc void @test_fp_2(ptr byval(%struct.interrupt_frame) %
; CHECK-NEXT: pushl %ecx
; CHECK-NEXT: pushl %eax
; CHECK-NEXT: andl $-16, %esp
; CHECK-NEXT: cld
; CHECK-NEXT: movl 4(%ebp), %eax
; CHECK-NEXT: leal 24(%ebp), %ecx
; CHECK-NEXT: leal 8(%ebp), %edx
Expand All @@ -257,7 +252,6 @@ define dso_local x86_intrcc void @test_fp_2(ptr byval(%struct.interrupt_frame) %
; CHECK0-NEXT: pushl %ecx
; CHECK0-NEXT: pushl %eax
; CHECK0-NEXT: andl $-16, %esp
; CHECK0-NEXT: cld
; CHECK0-NEXT: movl 4(%ebp), %eax
; CHECK0-NEXT: leal 8(%ebp), %edx
; CHECK0-NEXT: movl %edx, %ecx
Expand Down Expand Up @@ -288,7 +282,6 @@ define x86_intrcc void @test_copy_elide(ptr byval(%struct.interrupt_frame) %fram
; CHECK-NEXT: movl %esp, %ebp
; CHECK-NEXT: pushl %eax
; CHECK-NEXT: andl $-16, %esp
; CHECK-NEXT: cld
; CHECK-NEXT: leal 4(%ebp), %eax
; CHECK-NEXT: movl %eax, sink_address
; CHECK-NEXT: leal -4(%ebp), %esp
Expand All @@ -303,7 +296,6 @@ define x86_intrcc void @test_copy_elide(ptr byval(%struct.interrupt_frame) %fram
; CHECK0-NEXT: movl %esp, %ebp
; CHECK0-NEXT: pushl %eax
; CHECK0-NEXT: andl $-16, %esp
; CHECK0-NEXT: cld
; CHECK0-NEXT: movl 4(%ebp), %eax
; CHECK0-NEXT: leal 4(%ebp), %eax
; CHECK0-NEXT: movl %eax, sink_address
Expand Down Expand Up @@ -358,7 +350,6 @@ define x86_intrcc void @test_isr_realign(ptr byval(%struct.interrupt_frame) %fra
; CHECK-NEXT: pushl %eax
; CHECK-NEXT: andl $-32, %esp
; CHECK-NEXT: subl $32, %esp
; CHECK-NEXT: cld
; CHECK-NEXT: movl 4(%ebp), %eax
; CHECK-NEXT: movl %eax, (%esp)
; CHECK-NEXT: leal -4(%ebp), %esp
Expand All @@ -374,7 +365,6 @@ define x86_intrcc void @test_isr_realign(ptr byval(%struct.interrupt_frame) %fra
; CHECK0-NEXT: pushl %eax
; CHECK0-NEXT: andl $-32, %esp
; CHECK0-NEXT: subl $32, %esp
; CHECK0-NEXT: cld
; CHECK0-NEXT: movl 4(%ebp), %eax
; CHECK0-NEXT: movl %eax, (%esp)
; CHECK0-NEXT: leal -4(%ebp), %esp
Expand Down
8 changes: 0 additions & 8 deletions llvm/test/CodeGen/X86/x86-64-intrcc-uintr.ll
Original file line number Diff line number Diff line change
Expand Up @@ -21,28 +21,24 @@ define dso_local x86_intrcc void @test_uintr_isr_cc_empty(ptr nocapture byval(%s
; CHECK-USER-LABEL: test_uintr_isr_cc_empty:
; CHECK-USER: # %bb.0: # %entry
; CHECK-USER-NEXT: pushq %rax
; CHECK-USER-NEXT: cld
; CHECK-USER-NEXT: addq $16, %rsp
; CHECK-USER-NEXT: uiret
;
; CHECK0-USER-LABEL: test_uintr_isr_cc_empty:
; CHECK0-USER: # %bb.0: # %entry
; CHECK0-USER-NEXT: pushq %rax
; CHECK0-USER-NEXT: cld
; CHECK0-USER-NEXT: addq $16, %rsp
; CHECK0-USER-NEXT: uiret
;
; CHECK-KERNEL-LABEL: test_uintr_isr_cc_empty:
; CHECK-KERNEL: # %bb.0: # %entry
; CHECK-KERNEL-NEXT: pushq %rax
; CHECK-KERNEL-NEXT: cld
; CHECK-KERNEL-NEXT: addq $16, %rsp
; CHECK-KERNEL-NEXT: iretq
;
; CHECK0-KERNEL-LABEL: test_uintr_isr_cc_empty:
; CHECK0-KERNEL: # %bb.0: # %entry
; CHECK0-KERNEL-NEXT: pushq %rax
; CHECK0-KERNEL-NEXT: cld
; CHECK0-KERNEL-NEXT: addq $16, %rsp
; CHECK0-KERNEL-NEXT: iretq
entry:
Expand Down Expand Up @@ -75,7 +71,6 @@ define dso_local x86_intrcc void @test_uintr_isr_cc_args(ptr nocapture readonly
; CHECK-USER-NEXT: pushq %rax
; CHECK-USER-NEXT: pushq %rdx
; CHECK-USER-NEXT: pushq %rcx
; CHECK-USER-NEXT: cld
; CHECK-USER-NEXT: movq 32(%rsp), %rax
; CHECK-USER-NEXT: movq 40(%rsp), %rcx
; CHECK-USER-NEXT: movq 48(%rsp), %rdx
Expand All @@ -96,7 +91,6 @@ define dso_local x86_intrcc void @test_uintr_isr_cc_args(ptr nocapture readonly
; CHECK0-USER-NEXT: pushq %rax
; CHECK0-USER-NEXT: pushq %rdx
; CHECK0-USER-NEXT: pushq %rcx
; CHECK0-USER-NEXT: cld
; CHECK0-USER-NEXT: movq 32(%rsp), %rax
; CHECK0-USER-NEXT: leaq 40(%rsp), %rcx
; CHECK0-USER-NEXT: movq (%rcx), %rdx
Expand All @@ -118,7 +112,6 @@ define dso_local x86_intrcc void @test_uintr_isr_cc_args(ptr nocapture readonly
; CHECK-KERNEL-NEXT: pushq %rax
; CHECK-KERNEL-NEXT: pushq %rdx
; CHECK-KERNEL-NEXT: pushq %rcx
; CHECK-KERNEL-NEXT: cld
; CHECK-KERNEL-NEXT: movq 32(%rsp), %rax
; CHECK-KERNEL-NEXT: movq 40(%rsp), %rcx
; CHECK-KERNEL-NEXT: movq 48(%rsp), %rdx
Expand All @@ -139,7 +132,6 @@ define dso_local x86_intrcc void @test_uintr_isr_cc_args(ptr nocapture readonly
; CHECK0-KERNEL-NEXT: pushq %rax
; CHECK0-KERNEL-NEXT: pushq %rdx
; CHECK0-KERNEL-NEXT: pushq %rcx
; CHECK0-KERNEL-NEXT: cld
; CHECK0-KERNEL-NEXT: movq 32(%rsp), %rax
; CHECK0-KERNEL-NEXT: leaq 40(%rsp), %rcx
; CHECK0-KERNEL-NEXT: movq (%rcx), %rdx
Expand Down
3 changes: 0 additions & 3 deletions llvm/test/CodeGen/X86/x86-64-intrcc.ll
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,6 @@ define dso_local x86_intrcc void @test_fp_1(ptr byval(%struct.interrupt_frame) %
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pushq %rbp
; CHECK-NEXT: movq %rsp, %rbp
; CHECK: cld
; CHECK-DAG: leaq 8(%rbp), %[[R1:[^ ]*]]
; CHECK-DAG: leaq 40(%rbp), %[[R2:[^ ]*]]
; CHECK: movq %[[R1]], sink_address
Expand All @@ -136,7 +135,6 @@ define dso_local x86_intrcc void @test_fp_2(ptr byval(%struct.interrupt_frame) %
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: pushq %rbp
; CHECK-NEXT: movq %rsp, %rbp
; CHECK: cld
; CHECK-DAG: movq 16(%rbp), %[[R3:[^ ]*]]
; CHECK-DAG: leaq 24(%rbp), %[[R1:[^ ]*]]
; CHECK-DAG: leaq 56(%rbp), %[[R2:[^ ]*]]
Expand Down Expand Up @@ -164,7 +162,6 @@ define x86_intrcc void @test_copy_elide(ptr byval(%struct.interrupt_frame) %fram
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: pushq %rbp
; CHECK-NEXT: movq %rsp, %rbp
; CHECK: cld
; CHECK: leaq 16(%rbp), %[[R1:[^ ]*]]
; CHECK: movq %[[R1]], sink_address(%rip)
entry:
Expand Down