Skip to content

Commit c089f02

Browse files
committed
[X86] Don't setup and teardown memory for a musttail call
Summary: musttail calls should not require allocating extra stack for arguments. Updates to arguments passed in memory should happen in place before the epilogue. This bug was mostly a missed optimization, unless inalloca was used and store to push conversion fired. If a reserved call frame was used for an inalloca musttail call, the call setup and teardown instructions would be deleted, and SP adjustments would be inserted in the prologue and epilogue. You can see these are removed from several test cases in this change. In the case where the stack frame was not reserved, i.e. call frame optimization fires and turns argument stores into pushes, then the imbalanced call frame setup instructions created for inalloca calls become a problem. They remain in the instruction stream, resulting in a call setup that allocates zero bytes (expected for inalloca), and a call teardown that deallocates the inalloca pack. This deallocation was unbalanced, leading to subsequent crashes. Reviewers: hans Subscribers: hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D71097
1 parent a7bdab2 commit c089f02

File tree

5 files changed

+50
-76
lines changed

5 files changed

+50
-76
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3808,7 +3808,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
38083808
"the only memory argument");
38093809
}
38103810

3811-
if (!IsSibcall)
3811+
if (!IsSibcall && !IsMustTail)
38123812
Chain = DAG.getCALLSEQ_START(Chain, NumBytesToPush,
38133813
NumBytes - NumBytesToPush, dl);
38143814

@@ -4093,7 +4093,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
40934093
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
40944094
SmallVector<SDValue, 8> Ops;
40954095

4096-
if (!IsSibcall && isTailCall) {
4096+
if (!IsSibcall && isTailCall && !IsMustTail) {
40974097
Chain = DAG.getCALLSEQ_END(Chain,
40984098
DAG.getIntPtrConstant(NumBytesToPop, dl, true),
40994099
DAG.getIntPtrConstant(0, dl, true), InFlag, dl);

llvm/test/CodeGen/X86/cfguard-checks.ll

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -171,8 +171,7 @@ entry:
171171

172172
; X64-LABEL: func_cf_tail
173173
; X64: leaq target_func(%rip), %rax
174-
; X64: movq __guard_dispatch_icall_fptr(%rip), %rcx
175-
; X64: rex64 jmpq *%rcx # TAILCALL
174+
; X64: rex64 jmpq *__guard_dispatch_icall_fptr(%rip) # TAILCALL
176175
; X64-NOT: callq
177176
}
178177

@@ -205,7 +204,6 @@ entry:
205204
; X64: movq (%rcx), %rax
206205
; X64-NEXT: movq 8(%rax), %rax
207206
; X64-NEXT: movq __guard_dispatch_icall_fptr(%rip), %rdx
208-
; X64-NEXT: addq $40, %rsp
209207
; X64-NEXT: rex64 jmpq *%rdx # TAILCALL
210208
; X64-NOT: callq
211209
}
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc %s -o - | FileCheck %s
3+
4+
; Previously, we would accidentally leave behind SP adjustments to setup a call
5+
; frame for the musttail call target, and SP adjustments would end up
6+
; unbalanced. Reported as https://crbug.com/1026882.
7+
8+
target datalayout = "e-m:x-p:32:32-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:32-n8:16:32-a:0:32-S32"
9+
target triple = "i386-pc-windows-msvc19.16.0"
10+
11+
; 20 bytes of memory.
12+
%struct.Args = type { i32, i32, i32, i32, i32 }
13+
14+
declare dso_local x86_thiscallcc void @methodWithVtorDisp(i8* nocapture readonly, <{ %struct.Args }>* inalloca)
15+
16+
define dso_local x86_thiscallcc void @methodWithVtorDisp_thunk(i8* %0, <{ %struct.Args }>* inalloca %1) #0 {
17+
; CHECK-LABEL: methodWithVtorDisp_thunk:
18+
; CHECK: # %bb.0:
19+
; CHECK-NEXT: pushl %esi
20+
; CHECK-NEXT: movl %ecx, %esi
21+
; CHECK-NEXT: subl -4(%ecx), %esi
22+
; CHECK-NEXT: pushl {{[0-9]+}}(%esp)
23+
; CHECK-NEXT: pushl $_methodWithVtorDisp_thunk
24+
; CHECK-NEXT: calll ___cyg_profile_func_exit
25+
; CHECK-NEXT: addl $8, %esp
26+
; CHECK-NEXT: movl %esi, %ecx
27+
; CHECK-NEXT: popl %esi
28+
; CHECK-NEXT: jmp _methodWithVtorDisp # TAILCALL
29+
%3 = getelementptr inbounds i8, i8* %0, i32 -4
30+
%4 = bitcast i8* %3 to i32*
31+
%5 = load i32, i32* %4, align 4
32+
%6 = sub i32 0, %5
33+
%7 = getelementptr i8, i8* %0, i32 %6
34+
musttail call x86_thiscallcc void @methodWithVtorDisp(i8* %7, <{ %struct.Args }>* inalloca nonnull %1)
35+
ret void
36+
}
37+
38+
attributes #0 = { nounwind optsize "instrument-function-exit-inlined"="__cyg_profile_func_exit" }

llvm/test/CodeGen/X86/musttail-tailcc.ll

Lines changed: 0 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,6 @@ declare tailcc i32 @tailcallee(i32 %a1, i32 %a2)
99
define tailcc i32 @tailcaller(i32 %in1, i32 %in2) nounwind {
1010
; X64-LABEL: tailcaller:
1111
; X64: # %bb.0: # %entry
12-
; X64-NEXT: pushq %rax
13-
; X64-NEXT: popq %rax
1412
; X64-NEXT: jmp tailcallee # TAILCALL
1513
;
1614
; X32-LABEL: tailcaller:
@@ -26,8 +24,6 @@ declare tailcc i8* @alias_callee()
2624
define tailcc noalias i8* @noalias_caller() nounwind {
2725
; X64-LABEL: noalias_caller:
2826
; X64: # %bb.0:
29-
; X64-NEXT: pushq %rax
30-
; X64-NEXT: popq %rax
3127
; X64-NEXT: jmp alias_callee # TAILCALL
3228
;
3329
; X32-LABEL: noalias_caller:
@@ -42,8 +38,6 @@ declare tailcc noalias i8* @noalias_callee()
4238
define tailcc i8* @alias_caller() nounwind {
4339
; X64-LABEL: alias_caller:
4440
; X64: # %bb.0:
45-
; X64-NEXT: pushq %rax
46-
; X64-NEXT: popq %rax
4741
; X64-NEXT: jmp noalias_callee # TAILCALL
4842
;
4943
; X32-LABEL: alias_caller:
@@ -56,25 +50,17 @@ define tailcc i8* @alias_caller() nounwind {
5650
define tailcc void @void_test(i32, i32, i32, i32) {
5751
; X64-LABEL: void_test:
5852
; X64: # %bb.0: # %entry
59-
; X64-NEXT: pushq %rax
60-
; X64-NEXT: .cfi_def_cfa_offset 16
61-
; X64-NEXT: popq %rax
62-
; X64-NEXT: .cfi_def_cfa_offset 8
6353
; X64-NEXT: jmp void_test # TAILCALL
6454
;
6555
; X32-LABEL: void_test:
6656
; X32: # %bb.0: # %entry
6757
; X32-NEXT: pushl %esi
6858
; X32-NEXT: .cfi_def_cfa_offset 8
69-
; X32-NEXT: subl $8, %esp
70-
; X32-NEXT: .cfi_def_cfa_offset 16
7159
; X32-NEXT: .cfi_offset %esi, -8
7260
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
7361
; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
7462
; X32-NEXT: movl %esi, {{[0-9]+}}(%esp)
7563
; X32-NEXT: movl %eax, {{[0-9]+}}(%esp)
76-
; X32-NEXT: addl $8, %esp
77-
; X32-NEXT: .cfi_def_cfa_offset 8
7864
; X32-NEXT: popl %esi
7965
; X32-NEXT: .cfi_def_cfa_offset 4
8066
; X32-NEXT: jmp void_test # TAILCALL
@@ -86,25 +72,17 @@ define tailcc void @void_test(i32, i32, i32, i32) {
8672
define tailcc i1 @i1test(i32, i32, i32, i32) {
8773
; X64-LABEL: i1test:
8874
; X64: # %bb.0: # %entry
89-
; X64-NEXT: pushq %rax
90-
; X64-NEXT: .cfi_def_cfa_offset 16
91-
; X64-NEXT: popq %rax
92-
; X64-NEXT: .cfi_def_cfa_offset 8
9375
; X64-NEXT: jmp i1test # TAILCALL
9476
;
9577
; X32-LABEL: i1test:
9678
; X32: # %bb.0: # %entry
9779
; X32-NEXT: pushl %esi
9880
; X32-NEXT: .cfi_def_cfa_offset 8
99-
; X32-NEXT: subl $8, %esp
100-
; X32-NEXT: .cfi_def_cfa_offset 16
10181
; X32-NEXT: .cfi_offset %esi, -8
10282
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
10383
; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
10484
; X32-NEXT: movl %esi, {{[0-9]+}}(%esp)
10585
; X32-NEXT: movl %eax, {{[0-9]+}}(%esp)
106-
; X32-NEXT: addl $8, %esp
107-
; X32-NEXT: .cfi_def_cfa_offset 8
10886
; X32-NEXT: popl %esi
10987
; X32-NEXT: .cfi_def_cfa_offset 4
11088
; X32-NEXT: jmp i1test # TAILCALL

llvm/test/CodeGen/X86/musttail-varargs.ll

Lines changed: 9 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -246,12 +246,13 @@ define void @f_thunk(i8* %this, ...) {
246246
; X86-NOSSE-NEXT: movl %esp, %ebp
247247
; X86-NOSSE-NEXT: pushl %esi
248248
; X86-NOSSE-NEXT: andl $-16, %esp
249-
; X86-NOSSE-NEXT: subl $48, %esp
249+
; X86-NOSSE-NEXT: subl $32, %esp
250250
; X86-NOSSE-NEXT: movl 8(%ebp), %esi
251251
; X86-NOSSE-NEXT: leal 12(%ebp), %eax
252-
; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp)
253-
; X86-NOSSE-NEXT: movl %esi, (%esp)
252+
; X86-NOSSE-NEXT: movl %eax, (%esp)
253+
; X86-NOSSE-NEXT: pushl %esi
254254
; X86-NOSSE-NEXT: calll _get_f
255+
; X86-NOSSE-NEXT: addl $4, %esp
255256
; X86-NOSSE-NEXT: movl %esi, 8(%ebp)
256257
; X86-NOSSE-NEXT: leal -4(%ebp), %esp
257258
; X86-NOSSE-NEXT: popl %esi
@@ -264,17 +265,18 @@ define void @f_thunk(i8* %this, ...) {
264265
; X86-SSE-NEXT: movl %esp, %ebp
265266
; X86-SSE-NEXT: pushl %esi
266267
; X86-SSE-NEXT: andl $-16, %esp
267-
; X86-SSE-NEXT: subl $96, %esp
268+
; X86-SSE-NEXT: subl $80, %esp
268269
; X86-SSE-NEXT: movaps %xmm2, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
269270
; X86-SSE-NEXT: movaps %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
270-
; X86-SSE-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
271+
; X86-SSE-NEXT: movaps %xmm0, (%esp) # 16-byte Spill
271272
; X86-SSE-NEXT: movl 8(%ebp), %esi
272273
; X86-SSE-NEXT: leal 12(%ebp), %eax
273274
; X86-SSE-NEXT: movl %eax, {{[0-9]+}}(%esp)
274-
; X86-SSE-NEXT: movl %esi, (%esp)
275+
; X86-SSE-NEXT: pushl %esi
275276
; X86-SSE-NEXT: calll _get_f
277+
; X86-SSE-NEXT: addl $4, %esp
276278
; X86-SSE-NEXT: movl %esi, 8(%ebp)
277-
; X86-SSE-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
279+
; X86-SSE-NEXT: movaps (%esp), %xmm0 # 16-byte Reload
278280
; X86-SSE-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 # 16-byte Reload
279281
; X86-SSE-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm2 # 16-byte Reload
280282
; X86-SSE-NEXT: leal -4(%ebp), %esp
@@ -300,38 +302,21 @@ define void @f_thunk(i8* %this, ...) {
300302
define void @g_thunk(i8* %fptr_i8, ...) {
301303
; LINUX-LABEL: g_thunk:
302304
; LINUX: # %bb.0:
303-
; LINUX-NEXT: pushq %rax
304-
; LINUX-NEXT: .cfi_def_cfa_offset 16
305-
; LINUX-NEXT: popq %r11
306-
; LINUX-NEXT: .cfi_def_cfa_offset 8
307305
; LINUX-NEXT: jmpq *%rdi # TAILCALL
308306
;
309307
; LINUX-X32-LABEL: g_thunk:
310308
; LINUX-X32: # %bb.0:
311-
; LINUX-X32-NEXT: pushq %rax
312-
; LINUX-X32-NEXT: .cfi_def_cfa_offset 16
313309
; LINUX-X32-NEXT: movl %edi, %r11d
314-
; LINUX-X32-NEXT: addl $8, %esp
315-
; LINUX-X32-NEXT: .cfi_def_cfa_offset 8
316310
; LINUX-X32-NEXT: jmpq *%r11 # TAILCALL
317311
;
318312
; WINDOWS-LABEL: g_thunk:
319313
; WINDOWS: # %bb.0:
320-
; WINDOWS-NEXT: subq $40, %rsp
321-
; WINDOWS-NEXT: .seh_stackalloc 40
322-
; WINDOWS-NEXT: .seh_endprologue
323-
; WINDOWS-NEXT: addq $40, %rsp
324314
; WINDOWS-NEXT: rex64 jmpq *%rcx # TAILCALL
325-
; WINDOWS-NEXT: .seh_handlerdata
326-
; WINDOWS-NEXT: .text
327-
; WINDOWS-NEXT: .seh_endproc
328315
;
329316
; X86-LABEL: g_thunk:
330317
; X86: # %bb.0:
331-
; X86-NEXT: pushl %eax
332318
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
333319
; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
334-
; X86-NEXT: popl %ecx
335320
; X86-NEXT: jmpl *%eax # TAILCALL
336321
%fptr = bitcast i8* %fptr_i8 to void (i8*, ...)*
337322
musttail call void (i8*, ...) %fptr(i8* %fptr_i8, ...)
@@ -347,78 +332,53 @@ define void @g_thunk(i8* %fptr_i8, ...) {
347332
define void @h_thunk(%struct.Foo* %this, ...) {
348333
; LINUX-LABEL: h_thunk:
349334
; LINUX: # %bb.0:
350-
; LINUX-NEXT: pushq %rax
351-
; LINUX-NEXT: .cfi_def_cfa_offset 16
352335
; LINUX-NEXT: cmpb $1, (%rdi)
353336
; LINUX-NEXT: jne .LBB2_2
354337
; LINUX-NEXT: # %bb.1: # %then
355338
; LINUX-NEXT: movq 8(%rdi), %r11
356-
; LINUX-NEXT: addq $8, %rsp
357-
; LINUX-NEXT: .cfi_def_cfa_offset 8
358339
; LINUX-NEXT: jmpq *%r11 # TAILCALL
359340
; LINUX-NEXT: .LBB2_2: # %else
360-
; LINUX-NEXT: .cfi_def_cfa_offset 16
361341
; LINUX-NEXT: movq 16(%rdi), %r11
362342
; LINUX-NEXT: movl $42, {{.*}}(%rip)
363-
; LINUX-NEXT: addq $8, %rsp
364-
; LINUX-NEXT: .cfi_def_cfa_offset 8
365343
; LINUX-NEXT: jmpq *%r11 # TAILCALL
366344
;
367345
; LINUX-X32-LABEL: h_thunk:
368346
; LINUX-X32: # %bb.0:
369-
; LINUX-X32-NEXT: pushq %rax
370-
; LINUX-X32-NEXT: .cfi_def_cfa_offset 16
371347
; LINUX-X32-NEXT: cmpb $1, (%edi)
372348
; LINUX-X32-NEXT: jne .LBB2_2
373349
; LINUX-X32-NEXT: # %bb.1: # %then
374350
; LINUX-X32-NEXT: movl 4(%edi), %r11d
375-
; LINUX-X32-NEXT: addl $8, %esp
376-
; LINUX-X32-NEXT: .cfi_def_cfa_offset 8
377351
; LINUX-X32-NEXT: jmpq *%r11 # TAILCALL
378352
; LINUX-X32-NEXT: .LBB2_2: # %else
379-
; LINUX-X32-NEXT: .cfi_def_cfa_offset 16
380353
; LINUX-X32-NEXT: movl 8(%edi), %r11d
381354
; LINUX-X32-NEXT: movl $42, {{.*}}(%rip)
382-
; LINUX-X32-NEXT: addl $8, %esp
383-
; LINUX-X32-NEXT: .cfi_def_cfa_offset 8
384355
; LINUX-X32-NEXT: jmpq *%r11 # TAILCALL
385356
;
386357
; WINDOWS-LABEL: h_thunk:
387358
; WINDOWS: # %bb.0:
388-
; WINDOWS-NEXT: subq $40, %rsp
389-
; WINDOWS-NEXT: .seh_stackalloc 40
390-
; WINDOWS-NEXT: .seh_endprologue
391359
; WINDOWS-NEXT: cmpb $1, (%rcx)
392360
; WINDOWS-NEXT: jne .LBB2_2
393361
; WINDOWS-NEXT: # %bb.1: # %then
394362
; WINDOWS-NEXT: movq 8(%rcx), %rax
395-
; WINDOWS-NEXT: addq $40, %rsp
396363
; WINDOWS-NEXT: rex64 jmpq *%rax # TAILCALL
397364
; WINDOWS-NEXT: .LBB2_2: # %else
398365
; WINDOWS-NEXT: movq 16(%rcx), %rax
399366
; WINDOWS-NEXT: movl $42, {{.*}}(%rip)
400-
; WINDOWS-NEXT: addq $40, %rsp
401367
; WINDOWS-NEXT: rex64 jmpq *%rax # TAILCALL
402-
; WINDOWS-NEXT: .seh_handlerdata
403-
; WINDOWS-NEXT: .text
404-
; WINDOWS-NEXT: .seh_endproc
405368
;
406369
; X86-LABEL: h_thunk:
407370
; X86: # %bb.0:
408-
; X86-NEXT: pushl %eax
409371
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
410372
; X86-NEXT: cmpb $1, (%eax)
411373
; X86-NEXT: jne LBB2_2
412374
; X86-NEXT: # %bb.1: # %then
413375
; X86-NEXT: movl 4(%eax), %ecx
414376
; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
415-
; X86-NEXT: popl %eax
416377
; X86-NEXT: jmpl *%ecx # TAILCALL
417378
; X86-NEXT: LBB2_2: # %else
418379
; X86-NEXT: movl 8(%eax), %ecx
419380
; X86-NEXT: movl $42, _g
420381
; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
421-
; X86-NEXT: popl %eax
422382
; X86-NEXT: jmpl *%ecx # TAILCALL
423383
%cond_p = getelementptr %struct.Foo, %struct.Foo* %this, i32 0, i32 0
424384
%cond = load i1, i1* %cond_p

0 commit comments

Comments
 (0)