Skip to content

Commit 0a54b36

Browse files
authored
[X86] Resolve FIXME: Create cld only when needed (#82415)
Only use cld when we also have rep instructions, are calling a function, or contain inline asm.
1 parent 6287b7b commit 0a54b36

File tree

4 files changed

+63
-25
lines changed

4 files changed

+63
-25
lines changed

llvm/lib/Target/X86/X86FrameLowering.cpp

Lines changed: 63 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1418,6 +1418,34 @@ bool X86FrameLowering::needsDwarfCFI(const MachineFunction &MF) const {
14181418
return !isWin64Prologue(MF) && MF.needsFrameMoves();
14191419
}
14201420

1421+
/// Return true if an opcode is part of the REP group of instructions
1422+
static bool isOpcodeRep(unsigned Opcode) {
1423+
switch (Opcode) {
1424+
case X86::REPNE_PREFIX:
1425+
case X86::REP_MOVSB_32:
1426+
case X86::REP_MOVSB_64:
1427+
case X86::REP_MOVSD_32:
1428+
case X86::REP_MOVSD_64:
1429+
case X86::REP_MOVSQ_32:
1430+
case X86::REP_MOVSQ_64:
1431+
case X86::REP_MOVSW_32:
1432+
case X86::REP_MOVSW_64:
1433+
case X86::REP_PREFIX:
1434+
case X86::REP_STOSB_32:
1435+
case X86::REP_STOSB_64:
1436+
case X86::REP_STOSD_32:
1437+
case X86::REP_STOSD_64:
1438+
case X86::REP_STOSQ_32:
1439+
case X86::REP_STOSQ_64:
1440+
case X86::REP_STOSW_32:
1441+
case X86::REP_STOSW_64:
1442+
return true;
1443+
default:
1444+
break;
1445+
}
1446+
return false;
1447+
}
1448+
14211449
/// emitPrologue - Push callee-saved registers onto the stack, which
14221450
/// automatically adjust the stack pointer. Adjust the stack pointer to allocate
14231451
/// space for local variables. Also emit labels used by the exception handler to
@@ -2194,13 +2222,44 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
21942222
// flag (DF in EFLAGS register). Clear this flag by creating "cld" instruction
21952223
// in each prologue of interrupt handler function.
21962224
//
2197-
// FIXME: Create "cld" instruction only in these cases:
2225+
// Create "cld" instruction only in these cases:
21982226
// 1. The interrupt handling function uses any of the "rep" instructions.
21992227
// 2. Interrupt handling function calls another function.
2228+
// 3. If there are any inline asm blocks, as we do not know what they do
22002229
//
2201-
if (Fn.getCallingConv() == CallingConv::X86_INTR)
2202-
BuildMI(MBB, MBBI, DL, TII.get(X86::CLD))
2203-
.setMIFlag(MachineInstr::FrameSetup);
2230+
// TODO: We should also emit cld if we detect the use of std, but as of now,
2231+
// the compiler does not even emit that instruction or even define it, so in
2232+
// practice, this would only happen with inline asm, which we cover anyway.
2233+
if (Fn.getCallingConv() == CallingConv::X86_INTR) {
2234+
bool NeedsCLD = false;
2235+
2236+
for (const MachineBasicBlock &B : MF) {
2237+
for (const MachineInstr &MI : B) {
2238+
if (MI.isCall()) {
2239+
NeedsCLD = true;
2240+
break;
2241+
}
2242+
2243+
if (isOpcodeRep(MI.getOpcode())) {
2244+
NeedsCLD = true;
2245+
break;
2246+
}
2247+
2248+
if (MI.isInlineAsm()) {
2249+
// TODO: Parse asm for rep instructions or call sites?
2250+
// For now, let's play it safe and emit a cld instruction
2251+
// just in case.
2252+
NeedsCLD = true;
2253+
break;
2254+
}
2255+
}
2256+
}
2257+
2258+
if (NeedsCLD) {
2259+
BuildMI(MBB, MBBI, DL, TII.get(X86::CLD))
2260+
.setMIFlag(MachineInstr::FrameSetup);
2261+
}
2262+
}
22042263

22052264
// At this point we know if the function has WinCFI or not.
22062265
MF.setHasWinCFI(HasWinCFI);

llvm/test/CodeGen/X86/x86-32-intrcc.ll

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,6 @@ define x86_intrcc void @test_isr_x87(ptr byval(%struct.interrupt_frame) %frame)
149149
; CHECK-NEXT: pushl %ebp
150150
; CHECK-NEXT: movl %esp, %ebp
151151
; CHECK-NEXT: andl $-16, %esp
152-
; CHECK-NEXT: cld
153152
; CHECK-NEXT: fldt f80
154153
; CHECK-NEXT: fld1
155154
; CHECK-NEXT: faddp %st, %st(1)
@@ -163,7 +162,6 @@ define x86_intrcc void @test_isr_x87(ptr byval(%struct.interrupt_frame) %frame)
163162
; CHECK0-NEXT: pushl %ebp
164163
; CHECK0-NEXT: movl %esp, %ebp
165164
; CHECK0-NEXT: andl $-16, %esp
166-
; CHECK0-NEXT: cld
167165
; CHECK0-NEXT: fldt f80
168166
; CHECK0-NEXT: fld1
169167
; CHECK0-NEXT: faddp %st, %st(1)
@@ -188,7 +186,6 @@ define dso_local x86_intrcc void @test_fp_1(ptr byval(%struct.interrupt_frame) %
188186
; CHECK-NEXT: pushl %ecx
189187
; CHECK-NEXT: pushl %eax
190188
; CHECK-NEXT: andl $-16, %esp
191-
; CHECK-NEXT: cld
192189
; CHECK-NEXT: leal 20(%ebp), %eax
193190
; CHECK-NEXT: leal 4(%ebp), %ecx
194191
; CHECK-NEXT: movl %ecx, sink_address
@@ -206,7 +203,6 @@ define dso_local x86_intrcc void @test_fp_1(ptr byval(%struct.interrupt_frame) %
206203
; CHECK0-NEXT: pushl %ecx
207204
; CHECK0-NEXT: pushl %eax
208205
; CHECK0-NEXT: andl $-16, %esp
209-
; CHECK0-NEXT: cld
210206
; CHECK0-NEXT: leal 4(%ebp), %ecx
211207
; CHECK0-NEXT: movl %ecx, %eax
212208
; CHECK0-NEXT: addl $16, %eax
@@ -234,7 +230,6 @@ define dso_local x86_intrcc void @test_fp_2(ptr byval(%struct.interrupt_frame) %
234230
; CHECK-NEXT: pushl %ecx
235231
; CHECK-NEXT: pushl %eax
236232
; CHECK-NEXT: andl $-16, %esp
237-
; CHECK-NEXT: cld
238233
; CHECK-NEXT: movl 4(%ebp), %eax
239234
; CHECK-NEXT: leal 24(%ebp), %ecx
240235
; CHECK-NEXT: leal 8(%ebp), %edx
@@ -257,7 +252,6 @@ define dso_local x86_intrcc void @test_fp_2(ptr byval(%struct.interrupt_frame) %
257252
; CHECK0-NEXT: pushl %ecx
258253
; CHECK0-NEXT: pushl %eax
259254
; CHECK0-NEXT: andl $-16, %esp
260-
; CHECK0-NEXT: cld
261255
; CHECK0-NEXT: movl 4(%ebp), %eax
262256
; CHECK0-NEXT: leal 8(%ebp), %edx
263257
; CHECK0-NEXT: movl %edx, %ecx
@@ -288,7 +282,6 @@ define x86_intrcc void @test_copy_elide(ptr byval(%struct.interrupt_frame) %fram
288282
; CHECK-NEXT: movl %esp, %ebp
289283
; CHECK-NEXT: pushl %eax
290284
; CHECK-NEXT: andl $-16, %esp
291-
; CHECK-NEXT: cld
292285
; CHECK-NEXT: leal 4(%ebp), %eax
293286
; CHECK-NEXT: movl %eax, sink_address
294287
; CHECK-NEXT: leal -4(%ebp), %esp
@@ -303,7 +296,6 @@ define x86_intrcc void @test_copy_elide(ptr byval(%struct.interrupt_frame) %fram
303296
; CHECK0-NEXT: movl %esp, %ebp
304297
; CHECK0-NEXT: pushl %eax
305298
; CHECK0-NEXT: andl $-16, %esp
306-
; CHECK0-NEXT: cld
307299
; CHECK0-NEXT: movl 4(%ebp), %eax
308300
; CHECK0-NEXT: leal 4(%ebp), %eax
309301
; CHECK0-NEXT: movl %eax, sink_address
@@ -358,7 +350,6 @@ define x86_intrcc void @test_isr_realign(ptr byval(%struct.interrupt_frame) %fra
358350
; CHECK-NEXT: pushl %eax
359351
; CHECK-NEXT: andl $-32, %esp
360352
; CHECK-NEXT: subl $32, %esp
361-
; CHECK-NEXT: cld
362353
; CHECK-NEXT: movl 4(%ebp), %eax
363354
; CHECK-NEXT: movl %eax, (%esp)
364355
; CHECK-NEXT: leal -4(%ebp), %esp
@@ -374,7 +365,6 @@ define x86_intrcc void @test_isr_realign(ptr byval(%struct.interrupt_frame) %fra
374365
; CHECK0-NEXT: pushl %eax
375366
; CHECK0-NEXT: andl $-32, %esp
376367
; CHECK0-NEXT: subl $32, %esp
377-
; CHECK0-NEXT: cld
378368
; CHECK0-NEXT: movl 4(%ebp), %eax
379369
; CHECK0-NEXT: movl %eax, (%esp)
380370
; CHECK0-NEXT: leal -4(%ebp), %esp

llvm/test/CodeGen/X86/x86-64-intrcc-uintr.ll

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -21,28 +21,24 @@ define dso_local x86_intrcc void @test_uintr_isr_cc_empty(ptr nocapture byval(%s
2121
; CHECK-USER-LABEL: test_uintr_isr_cc_empty:
2222
; CHECK-USER: # %bb.0: # %entry
2323
; CHECK-USER-NEXT: pushq %rax
24-
; CHECK-USER-NEXT: cld
2524
; CHECK-USER-NEXT: addq $16, %rsp
2625
; CHECK-USER-NEXT: uiret
2726
;
2827
; CHECK0-USER-LABEL: test_uintr_isr_cc_empty:
2928
; CHECK0-USER: # %bb.0: # %entry
3029
; CHECK0-USER-NEXT: pushq %rax
31-
; CHECK0-USER-NEXT: cld
3230
; CHECK0-USER-NEXT: addq $16, %rsp
3331
; CHECK0-USER-NEXT: uiret
3432
;
3533
; CHECK-KERNEL-LABEL: test_uintr_isr_cc_empty:
3634
; CHECK-KERNEL: # %bb.0: # %entry
3735
; CHECK-KERNEL-NEXT: pushq %rax
38-
; CHECK-KERNEL-NEXT: cld
3936
; CHECK-KERNEL-NEXT: addq $16, %rsp
4037
; CHECK-KERNEL-NEXT: iretq
4138
;
4239
; CHECK0-KERNEL-LABEL: test_uintr_isr_cc_empty:
4340
; CHECK0-KERNEL: # %bb.0: # %entry
4441
; CHECK0-KERNEL-NEXT: pushq %rax
45-
; CHECK0-KERNEL-NEXT: cld
4642
; CHECK0-KERNEL-NEXT: addq $16, %rsp
4743
; CHECK0-KERNEL-NEXT: iretq
4844
entry:
@@ -75,7 +71,6 @@ define dso_local x86_intrcc void @test_uintr_isr_cc_args(ptr nocapture readonly
7571
; CHECK-USER-NEXT: pushq %rax
7672
; CHECK-USER-NEXT: pushq %rdx
7773
; CHECK-USER-NEXT: pushq %rcx
78-
; CHECK-USER-NEXT: cld
7974
; CHECK-USER-NEXT: movq 32(%rsp), %rax
8075
; CHECK-USER-NEXT: movq 40(%rsp), %rcx
8176
; CHECK-USER-NEXT: movq 48(%rsp), %rdx
@@ -96,7 +91,6 @@ define dso_local x86_intrcc void @test_uintr_isr_cc_args(ptr nocapture readonly
9691
; CHECK0-USER-NEXT: pushq %rax
9792
; CHECK0-USER-NEXT: pushq %rdx
9893
; CHECK0-USER-NEXT: pushq %rcx
99-
; CHECK0-USER-NEXT: cld
10094
; CHECK0-USER-NEXT: movq 32(%rsp), %rax
10195
; CHECK0-USER-NEXT: leaq 40(%rsp), %rcx
10296
; CHECK0-USER-NEXT: movq (%rcx), %rdx
@@ -118,7 +112,6 @@ define dso_local x86_intrcc void @test_uintr_isr_cc_args(ptr nocapture readonly
118112
; CHECK-KERNEL-NEXT: pushq %rax
119113
; CHECK-KERNEL-NEXT: pushq %rdx
120114
; CHECK-KERNEL-NEXT: pushq %rcx
121-
; CHECK-KERNEL-NEXT: cld
122115
; CHECK-KERNEL-NEXT: movq 32(%rsp), %rax
123116
; CHECK-KERNEL-NEXT: movq 40(%rsp), %rcx
124117
; CHECK-KERNEL-NEXT: movq 48(%rsp), %rdx
@@ -139,7 +132,6 @@ define dso_local x86_intrcc void @test_uintr_isr_cc_args(ptr nocapture readonly
139132
; CHECK0-KERNEL-NEXT: pushq %rax
140133
; CHECK0-KERNEL-NEXT: pushq %rdx
141134
; CHECK0-KERNEL-NEXT: pushq %rcx
142-
; CHECK0-KERNEL-NEXT: cld
143135
; CHECK0-KERNEL-NEXT: movq 32(%rsp), %rax
144136
; CHECK0-KERNEL-NEXT: leaq 40(%rsp), %rcx
145137
; CHECK0-KERNEL-NEXT: movq (%rcx), %rdx

llvm/test/CodeGen/X86/x86-64-intrcc.ll

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,6 @@ define dso_local x86_intrcc void @test_fp_1(ptr byval(%struct.interrupt_frame) %
114114
; CHECK: # %bb.0: # %entry
115115
; CHECK-NEXT: pushq %rbp
116116
; CHECK-NEXT: movq %rsp, %rbp
117-
; CHECK: cld
118117
; CHECK-DAG: leaq 8(%rbp), %[[R1:[^ ]*]]
119118
; CHECK-DAG: leaq 40(%rbp), %[[R2:[^ ]*]]
120119
; CHECK: movq %[[R1]], sink_address
@@ -136,7 +135,6 @@ define dso_local x86_intrcc void @test_fp_2(ptr byval(%struct.interrupt_frame) %
136135
; CHECK-NEXT: pushq %rax
137136
; CHECK-NEXT: pushq %rbp
138137
; CHECK-NEXT: movq %rsp, %rbp
139-
; CHECK: cld
140138
; CHECK-DAG: movq 16(%rbp), %[[R3:[^ ]*]]
141139
; CHECK-DAG: leaq 24(%rbp), %[[R1:[^ ]*]]
142140
; CHECK-DAG: leaq 56(%rbp), %[[R2:[^ ]*]]
@@ -164,7 +162,6 @@ define x86_intrcc void @test_copy_elide(ptr byval(%struct.interrupt_frame) %fram
164162
; CHECK-NEXT: pushq %rax
165163
; CHECK-NEXT: pushq %rbp
166164
; CHECK-NEXT: movq %rsp, %rbp
167-
; CHECK: cld
168165
; CHECK: leaq 16(%rbp), %[[R1:[^ ]*]]
169166
; CHECK: movq %[[R1]], sink_address(%rip)
170167
entry:

0 commit comments

Comments
 (0)