Skip to content

Commit 6f53ae6

Browse files
authored
[X86] Properly chain PROBED_ALLOCA / SEG_ALLOCA (#116508)
These nodes should appear between CALLSEQ_START / CALLSEQ_END. Previously, they could be scheduled after CALLSEQ_END because the nodes didn't update the chain. The change in a test is due to X86 call frame optimizer pass bailing out for a particular call when CALLSEQ_START / CALLSEQ_END are not in the same basic block. This happens because SEG_ALLOCA is expanded into a sequence of basic blocks early. It didn't bail out before because the closing CALLSEQ_END was scheduled before SEG_ALLOCA, in the same basic block as CALLSEQ_START. While here, simplify creation of these nodes: allocating a virtual register and copying `Size` into it were unnecessary.
1 parent 61726ad commit 6f53ae6

File tree

2 files changed

+14
-22
lines changed

2 files changed

+14
-22
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 6 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -25270,13 +25270,9 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
2527025270
const TargetFrameLowering &TFI = *Subtarget.getFrameLowering();
2527125271
const Align StackAlign = TFI.getStackAlign();
2527225272
if (hasInlineStackProbe(MF)) {
25273-
MachineRegisterInfo &MRI = MF.getRegInfo();
25274-
25275-
const TargetRegisterClass *AddrRegClass = getRegClassFor(SPTy);
25276-
Register Vreg = MRI.createVirtualRegister(AddrRegClass);
25277-
Chain = DAG.getCopyToReg(Chain, dl, Vreg, Size);
25278-
Result = DAG.getNode(X86ISD::PROBED_ALLOCA, dl, SPTy, Chain,
25279-
DAG.getRegister(Vreg, SPTy));
25273+
Result = DAG.getNode(X86ISD::PROBED_ALLOCA, dl, {SPTy, MVT::Other},
25274+
{Chain, Size});
25275+
Chain = Result.getValue(1);
2528025276
} else {
2528125277
SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT);
2528225278
Chain = SP.getValue(1);
@@ -25288,8 +25284,6 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
2528825284
DAG.getSignedConstant(~(Alignment->value() - 1ULL), dl, VT));
2528925285
Chain = DAG.getCopyToReg(Chain, dl, SPReg, Result); // Output chain
2529025286
} else if (SplitStack) {
25291-
MachineRegisterInfo &MRI = MF.getRegInfo();
25292-
2529325287
if (Is64Bit) {
2529425288
// The 64 bit implementation of segmented stacks needs to clobber both r10
2529525289
// r11. This makes it impossible to use it along with nested parameters.
@@ -25301,11 +25295,9 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
2530125295
}
2530225296
}
2530325297

25304-
const TargetRegisterClass *AddrRegClass = getRegClassFor(SPTy);
25305-
Register Vreg = MRI.createVirtualRegister(AddrRegClass);
25306-
Chain = DAG.getCopyToReg(Chain, dl, Vreg, Size);
25307-
Result = DAG.getNode(X86ISD::SEG_ALLOCA, dl, SPTy, Chain,
25308-
DAG.getRegister(Vreg, SPTy));
25298+
Result =
25299+
DAG.getNode(X86ISD::SEG_ALLOCA, dl, {SPTy, MVT::Other}, {Chain, Size});
25300+
Chain = Result.getValue(1);
2530925301
} else {
2531025302
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
2531125303
Chain = DAG.getNode(X86ISD::DYN_ALLOCA, dl, NodeTys, Chain, Size);

llvm/test/CodeGen/X86/segmented-stacks-dynamic.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,9 @@ define i32 @test_basic(i32 %l) #0 {
2424
; X86-NEXT: pushl %eax
2525
; X86-NEXT: .cfi_offset %esi, -12
2626
; X86-NEXT: movl 8(%ebp), %esi
27+
; X86-NEXT: movl %esp, %eax
2728
; X86-NEXT: leal 15(,%esi,4), %ecx
2829
; X86-NEXT: andl $-16, %ecx
29-
; X86-NEXT: movl %esp, %eax
3030
; X86-NEXT: subl %ecx, %eax
3131
; X86-NEXT: cmpl %eax, %gs:48
3232
; X86-NEXT: jg .LBB0_4
@@ -39,17 +39,17 @@ define i32 @test_basic(i32 %l) #0 {
3939
; X86-NEXT: calll __morestack_allocate_stack_space
4040
; X86-NEXT: addl $16, %esp
4141
; X86-NEXT: .LBB0_5:
42-
; X86-NEXT: subl $8, %esp
43-
; X86-NEXT: pushl %esi
44-
; X86-NEXT: pushl %eax
42+
; X86-NEXT: subl $16, %esp
43+
; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
44+
; X86-NEXT: movl %eax, (%esp)
4545
; X86-NEXT: calll dummy_use@PLT
4646
; X86-NEXT: addl $16, %esp
4747
; X86-NEXT: testl %esi, %esi
4848
; X86-NEXT: je .LBB0_6
4949
; X86-NEXT: # %bb.8: # %false
5050
; X86-NEXT: decl %esi
51-
; X86-NEXT: subl $12, %esp
52-
; X86-NEXT: pushl %esi
51+
; X86-NEXT: subl $16, %esp
52+
; X86-NEXT: movl %esi, (%esp)
5353
; X86-NEXT: calll test_basic@PLT
5454
; X86-NEXT: jmp .LBB0_7
5555
; X86-NEXT: .LBB0_6: # %true
@@ -83,10 +83,10 @@ define i32 @test_basic(i32 %l) #0 {
8383
; X64-NEXT: pushq %rax
8484
; X64-NEXT: .cfi_offset %rbx, -24
8585
; X64-NEXT: movl %edi, %ebx
86-
; X64-NEXT: movl %edi, %eax
86+
; X64-NEXT: movq %rsp, %rdi
87+
; X64-NEXT: movl %ebx, %eax
8788
; X64-NEXT: leaq 15(,%rax,4), %rax
8889
; X64-NEXT: andq $-16, %rax
89-
; X64-NEXT: movq %rsp, %rdi
9090
; X64-NEXT: subq %rax, %rdi
9191
; X64-NEXT: cmpq %rdi, %fs:112
9292
; X64-NEXT: jg .LBB0_4

0 commit comments

Comments
 (0)