-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[X86] Properly chain PROBED_ALLOCA / SEG_ALLOCA #116508
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-x86 Author: Sergei Barannikov (s-barannikov) ChangesThese nodes should appear between CALLSEQ_START / CALLSEQ_END. Previously, they could be scheduled after CALLSEQ_END because the nodes didn't update the chain. The change in a test is due to X86 call frame optimizer pass bailing out for a particular call when CALLSEQ_START / CALLSEQ_END are not in the same basic block. This happens because SEG_ALLOCA is expanded into a sequence of basic blocks early. It didn't bail out before because the closing CALLSEQ_END was scheduled before SEG_ALLOCA, in the same basic block as CALLSEQ_START. While here, simplify creation of these nodes: allocating a virtual register and copying Full diff: https://github.com/llvm/llvm-project/pull/116508.diff 2 Files Affected:
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index c984c5b6da8735..61c24d148757ac 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -25270,13 +25270,9 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
const TargetFrameLowering &TFI = *Subtarget.getFrameLowering();
const Align StackAlign = TFI.getStackAlign();
if (hasInlineStackProbe(MF)) {
- MachineRegisterInfo &MRI = MF.getRegInfo();
-
- const TargetRegisterClass *AddrRegClass = getRegClassFor(SPTy);
- Register Vreg = MRI.createVirtualRegister(AddrRegClass);
- Chain = DAG.getCopyToReg(Chain, dl, Vreg, Size);
- Result = DAG.getNode(X86ISD::PROBED_ALLOCA, dl, SPTy, Chain,
- DAG.getRegister(Vreg, SPTy));
+ Result = DAG.getNode(X86ISD::PROBED_ALLOCA, dl, {SPTy, MVT::Other},
+ {Chain, Size});
+ Chain = Result.getValue(1);
} else {
SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT);
Chain = SP.getValue(1);
@@ -25288,8 +25284,6 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
DAG.getSignedConstant(~(Alignment->value() - 1ULL), dl, VT));
Chain = DAG.getCopyToReg(Chain, dl, SPReg, Result); // Output chain
} else if (SplitStack) {
- MachineRegisterInfo &MRI = MF.getRegInfo();
-
if (Is64Bit) {
// The 64 bit implementation of segmented stacks needs to clobber both r10
// r11. This makes it impossible to use it along with nested parameters.
@@ -25301,11 +25295,9 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
}
}
- const TargetRegisterClass *AddrRegClass = getRegClassFor(SPTy);
- Register Vreg = MRI.createVirtualRegister(AddrRegClass);
- Chain = DAG.getCopyToReg(Chain, dl, Vreg, Size);
- Result = DAG.getNode(X86ISD::SEG_ALLOCA, dl, SPTy, Chain,
- DAG.getRegister(Vreg, SPTy));
+ Result = DAG.getNode(X86ISD::SEG_ALLOCA, dl, {SPTy, MVT::Other},
+ {Chain, Size});
+ Chain = Result.getValue(1);
} else {
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
Chain = DAG.getNode(X86ISD::DYN_ALLOCA, dl, NodeTys, Chain, Size);
diff --git a/llvm/test/CodeGen/X86/segmented-stacks-dynamic.ll b/llvm/test/CodeGen/X86/segmented-stacks-dynamic.ll
index 4fb061e1bb764e..e8359cb088dc38 100644
--- a/llvm/test/CodeGen/X86/segmented-stacks-dynamic.ll
+++ b/llvm/test/CodeGen/X86/segmented-stacks-dynamic.ll
@@ -24,9 +24,9 @@ define i32 @test_basic(i32 %l) #0 {
; X86-NEXT: pushl %eax
; X86-NEXT: .cfi_offset %esi, -12
; X86-NEXT: movl 8(%ebp), %esi
+; X86-NEXT: movl %esp, %eax
; X86-NEXT: leal 15(,%esi,4), %ecx
; X86-NEXT: andl $-16, %ecx
-; X86-NEXT: movl %esp, %eax
; X86-NEXT: subl %ecx, %eax
; X86-NEXT: cmpl %eax, %gs:48
; X86-NEXT: jg .LBB0_4
@@ -39,17 +39,17 @@ define i32 @test_basic(i32 %l) #0 {
; X86-NEXT: calll __morestack_allocate_stack_space
; X86-NEXT: addl $16, %esp
; X86-NEXT: .LBB0_5:
-; X86-NEXT: subl $8, %esp
-; X86-NEXT: pushl %esi
-; X86-NEXT: pushl %eax
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: calll dummy_use@PLT
; X86-NEXT: addl $16, %esp
; X86-NEXT: testl %esi, %esi
; X86-NEXT: je .LBB0_6
; X86-NEXT: # %bb.8: # %false
; X86-NEXT: decl %esi
-; X86-NEXT: subl $12, %esp
-; X86-NEXT: pushl %esi
+; X86-NEXT: subl $16, %esp
+; X86-NEXT: movl %esi, (%esp)
; X86-NEXT: calll test_basic@PLT
; X86-NEXT: jmp .LBB0_7
; X86-NEXT: .LBB0_6: # %true
@@ -83,10 +83,10 @@ define i32 @test_basic(i32 %l) #0 {
; X64-NEXT: pushq %rax
; X64-NEXT: .cfi_offset %rbx, -24
; X64-NEXT: movl %edi, %ebx
-; X64-NEXT: movl %edi, %eax
+; X64-NEXT: movq %rsp, %rdi
+; X64-NEXT: movl %ebx, %eax
; X64-NEXT: leaq 15(,%rax,4), %rax
; X64-NEXT: andq $-16, %rax
-; X64-NEXT: movq %rsp, %rdi
; X64-NEXT: subq %rax, %rdi
; X64-NEXT: cmpq %rdi, %fs:112
; X64-NEXT: jg .LBB0_4
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
These nodes should appear between CALLSEQ_START / CALLSEQ_END. Previously, they could be scheduled after CALLSEQ_END because the nodes didn't update the chain. The change in a test is due to X86 call frame optimizer pass bailing out for a particular call when CALLSEQ_START / CALLSEQ_END are not in the same basic block. This happens because SEG_ALLOCA is expanded into a sequence of basic blocks early. It didn't bail out before because the closing CALLSEQ_END was scheduled before SEG_ALLOCA, in the same basic block as CALLSEQ_START. While here, simplify creation of these nodes: allocating a virtual register and copying `Size` into it were unnecessary.
8afbeb7
to
a51c0f4
Compare
These nodes should appear between CALLSEQ_START / CALLSEQ_END. Previously, they could be scheduled after CALLSEQ_END because the nodes didn't update the chain.
The change in a test is due to X86 call frame optimizer pass bailing out for a particular call when CALLSEQ_START / CALLSEQ_END are not in the same basic block. This happens because SEG_ALLOCA is expanded into a sequence of basic blocks early. It didn't bail out before because the closing CALLSEQ_END was scheduled before SEG_ALLOCA, in the same basic block as CALLSEQ_START.
While here, simplify creation of these nodes: allocating a virtual register and copying
Size
into it were unnecessary.