-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[llvm][clang] Allocate a new stack instead of spawning a new thread to get more stack space #133173
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-llvm-support @llvm/pr-subscribers-clang Author: Michael Spencer (Bigcheese) ChangesClang spawns a new thread to avoid running out of stack space. This can make debugging and performance analysis more difficult as how the threads are connected is difficult to recover. This patch introduces Full diff: https://github.com/llvm/llvm-project/pull/133173.diff 12 Files Affected:
diff --git a/clang/include/clang/Basic/Stack.h b/clang/include/clang/Basic/Stack.h
index 30ebd94aedd1f..9674b9d9b62c3 100644
--- a/clang/include/clang/Basic/Stack.h
+++ b/clang/include/clang/Basic/Stack.h
@@ -27,7 +27,10 @@ namespace clang {
/// Call this once on each thread, as soon after starting the thread as
/// feasible, to note the approximate address of the bottom of the stack.
- void noteBottomOfStack();
+ ///
+ /// \param ForceSet set to true if you know the call is near the bottom of a
+ /// new stack. Used for split stacks.
+ void noteBottomOfStack(bool ForceSet = false);
/// Determine whether the stack is nearly exhausted.
bool isStackNearlyExhausted();
diff --git a/clang/lib/Basic/Stack.cpp b/clang/lib/Basic/Stack.cpp
index aa15d8e66950f..8cbb84943f8d3 100644
--- a/clang/lib/Basic/Stack.cpp
+++ b/clang/lib/Basic/Stack.cpp
@@ -13,33 +13,13 @@
#include "clang/Basic/Stack.h"
#include "llvm/Support/CrashRecoveryContext.h"
+#include "llvm/Support/ProgramStack.h"
-#ifdef _MSC_VER
-#include <intrin.h> // for _AddressOfReturnAddress
-#endif
+static LLVM_THREAD_LOCAL uintptr_t BottomOfStack = 0;
-static LLVM_THREAD_LOCAL void *BottomOfStack = nullptr;
-
-static void *getStackPointer() {
-#if __GNUC__ || __has_builtin(__builtin_frame_address)
- return __builtin_frame_address(0);
-#elif defined(_MSC_VER)
- return _AddressOfReturnAddress();
-#else
- char CharOnStack = 0;
- // The volatile store here is intended to escape the local variable, to
- // prevent the compiler from optimizing CharOnStack into anything other
- // than a char on the stack.
- //
- // Tested on: MSVC 2015 - 2019, GCC 4.9 - 9, Clang 3.2 - 9, ICC 13 - 19.
- char *volatile Ptr = &CharOnStack;
- return Ptr;
-#endif
-}
-
-void clang::noteBottomOfStack() {
- if (!BottomOfStack)
- BottomOfStack = getStackPointer();
+void clang::noteBottomOfStack(bool ForceSet) {
+ if (!BottomOfStack || ForceSet)
+ BottomOfStack = llvm::getStackPointer();
}
bool clang::isStackNearlyExhausted() {
@@ -51,7 +31,8 @@ bool clang::isStackNearlyExhausted() {
if (!BottomOfStack)
return false;
- intptr_t StackDiff = (intptr_t)getStackPointer() - (intptr_t)BottomOfStack;
+ intptr_t StackDiff =
+ (intptr_t)llvm::getStackPointer() - (intptr_t)BottomOfStack;
size_t StackUsage = (size_t)std::abs(StackDiff);
// If the stack pointer has a surprising value, we do not understand this
@@ -66,9 +47,12 @@ bool clang::isStackNearlyExhausted() {
void clang::runWithSufficientStackSpaceSlow(llvm::function_ref<void()> Diag,
llvm::function_ref<void()> Fn) {
llvm::CrashRecoveryContext CRC;
- CRC.RunSafelyOnThread([&] {
- noteBottomOfStack();
+ // Preserve the BottomOfStack in case RunSafelyOnNewStack uses split stacks.
+ uintptr_t PrevBottom = BottomOfStack;
+ CRC.RunSafelyOnNewStack([&] {
+ noteBottomOfStack(true);
Diag();
Fn();
}, DesiredStackSize);
+ BottomOfStack = PrevBottom;
}
diff --git a/clang/lib/Frontend/CompilerInstance.cpp b/clang/lib/Frontend/CompilerInstance.cpp
index 4e13b6ced252f..0d6616a022001 100644
--- a/clang/lib/Frontend/CompilerInstance.cpp
+++ b/clang/lib/Frontend/CompilerInstance.cpp
@@ -1276,7 +1276,7 @@ compileModuleImpl(CompilerInstance &ImportingInstance, SourceLocation ImportLoc,
// Execute the action to actually build the module in-place. Use a separate
// thread so that we get a stack large enough.
- bool Crashed = !llvm::CrashRecoveryContext().RunSafelyOnThread(
+ bool Crashed = !llvm::CrashRecoveryContext().RunSafelyOnNewStack(
[&]() {
GenerateModuleFromModuleMapAction Action;
Instance.ExecuteAction(Action);
diff --git a/llvm/cmake/config-ix.cmake b/llvm/cmake/config-ix.cmake
index 15ae04f5a6913..8982b75b9abeb 100644
--- a/llvm/cmake/config-ix.cmake
+++ b/llvm/cmake/config-ix.cmake
@@ -21,6 +21,7 @@ if (ANDROID OR CYGWIN OR CMAKE_SYSTEM_NAME MATCHES "AIX|DragonFly|FreeBSD|Haiku|
set(HAVE_MACH_MACH_H 0)
set(HAVE_MALLOC_MALLOC_H 0)
set(HAVE_PTHREAD_H 1)
+ set(HAVE_SYS_RESOURCE_H 1)
set(HAVE_SYS_MMAN_H 1)
set(HAVE_SYSEXITS_H 1)
set(HAVE_UNISTD_H 1)
@@ -28,6 +29,7 @@ elseif (APPLE)
set(HAVE_MACH_MACH_H 1)
set(HAVE_MALLOC_MALLOC_H 1)
set(HAVE_PTHREAD_H 1)
+ set(HAVE_SYS_RESOURCE_H 1)
set(HAVE_SYS_MMAN_H 1)
set(HAVE_SYSEXITS_H 1)
set(HAVE_UNISTD_H 1)
@@ -35,6 +37,7 @@ elseif (PURE_WINDOWS)
set(HAVE_MACH_MACH_H 0)
set(HAVE_MALLOC_MALLOC_H 0)
set(HAVE_PTHREAD_H 0)
+ set(HAVE_SYS_RESOURCE_H 0)
set(HAVE_SYS_MMAN_H 0)
set(HAVE_SYSEXITS_H 0)
set(HAVE_UNISTD_H 0)
@@ -44,6 +47,7 @@ elseif (ZOS)
set(HAVE_MACH_MACH_H 0)
set(HAVE_MALLOC_MALLOC_H 0)
set(HAVE_PTHREAD_H 1)
+ set(HAVE_SYS_RESOURCE_H 1)
set(HAVE_SYS_MMAN_H 1)
set(HAVE_SYSEXITS_H 0)
set(HAVE_UNISTD_H 1)
@@ -52,6 +56,7 @@ else()
check_include_file(mach/mach.h HAVE_MACH_MACH_H)
check_include_file(malloc/malloc.h HAVE_MALLOC_MALLOC_H)
check_include_file(pthread.h HAVE_PTHREAD_H)
+ check_include_file(sys/resource.h HAVE_SYS_RESOURCE_H)
check_include_file(sys/mman.h HAVE_SYS_MMAN_H)
check_include_file(sysexits.h HAVE_SYSEXITS_H)
check_include_file(unistd.h HAVE_UNISTD_H)
diff --git a/llvm/include/llvm/Config/config.h.cmake b/llvm/include/llvm/Config/config.h.cmake
index 835201f2a45b0..12fdd20f9901c 100644
--- a/llvm/include/llvm/Config/config.h.cmake
+++ b/llvm/include/llvm/Config/config.h.cmake
@@ -150,6 +150,9 @@
/* Have pthread_rwlock_init */
#cmakedefine HAVE_PTHREAD_RWLOCK_INIT ${HAVE_PTHREAD_RWLOCK_INIT}
+/* Define to 1 if you have the <sys/resource.h> header file. */
+#cmakedefine HAVE_SYS_RESOURCE_H ${HAVE_SYS_RESOURCE_H}
+
/* Define to 1 if you have the `sbrk' function. */
#cmakedefine HAVE_SBRK ${HAVE_SBRK}
diff --git a/llvm/include/llvm/Support/CrashRecoveryContext.h b/llvm/include/llvm/Support/CrashRecoveryContext.h
index 26ddf97b3ef02..31293d6715757 100644
--- a/llvm/include/llvm/Support/CrashRecoveryContext.h
+++ b/llvm/include/llvm/Support/CrashRecoveryContext.h
@@ -97,6 +97,9 @@ class CrashRecoveryContext {
return RunSafelyOnThread([&]() { Fn(UserData); }, RequestedStackSize);
}
+ bool RunSafelyOnNewStack(function_ref<void()>,
+ unsigned RequestedStackSize = 0);
+
/// Explicitly trigger a crash recovery in the current process, and
/// return failure from RunSafely(). This function does not return.
[[noreturn]] void HandleExit(int RetCode);
diff --git a/llvm/include/llvm/Support/ProgramStack.h b/llvm/include/llvm/Support/ProgramStack.h
new file mode 100644
index 0000000000000..cc8fe98d7a8d1
--- /dev/null
+++ b/llvm/include/llvm/Support/ProgramStack.h
@@ -0,0 +1,45 @@
+//===--- ProgramStack.h -----------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_PROGRAMSTACK_H
+#define LLVM_SUPPORT_PROGRAMSTACK_H
+
+#include "llvm/ADT/STLFunctionalExtras.h"
+
+namespace llvm {
+
+/// \returns an address close to the current value of the stack pointer.
+///
+/// The value is not guaranteed to point to anything specific. It can be used to
+/// estimate how much stack space has been used since the previous call.
+uintptr_t getStackPointer();
+
+/// \returns the default stack size for this platform.
+///
+/// Based on \p RLIMIT_STACK or the equivalent.
+unsigned getDefaultStackSize();
+
+/// Runs Fn on a new stack of at least the given size.
+///
+/// \param StackSize requested stack size. A size of 0 uses the default stack
+/// size of the platform.
+///
+/// The preferred implementation is split stacks on platforms that have a good
+/// debugging experience for them. On other platforms a new thread is used.
+void runOnNewStack(unsigned StackSize, function_ref<void()> Fn);
+
+template <typename R, typename... Ts>
+R runOnNewStack(unsigned StackSize, function_ref<R(Ts...)> Fn, Ts &&...Args) {
+ std::optional<R> Ret;
+ runOnNewStack(StackSize, [&]() { Ret = Fn(std::forward<Ts>(Args)...); });
+ return std::move(*Ret);
+}
+
+} // namespace llvm
+
+#endif // LLVM_SUPPORT_PROGRAMSTACK_H
diff --git a/llvm/lib/Support/CMakeLists.txt b/llvm/lib/Support/CMakeLists.txt
index 2754c97fce6c1..8e4503a1fc84f 100644
--- a/llvm/lib/Support/CMakeLists.txt
+++ b/llvm/lib/Support/CMakeLists.txt
@@ -294,6 +294,7 @@ add_llvm_component_library(LLVMSupport
Path.cpp
Process.cpp
Program.cpp
+ ProgramStack.cpp
RWMutex.cpp
Signals.cpp
Threading.cpp
diff --git a/llvm/lib/Support/CrashRecoveryContext.cpp b/llvm/lib/Support/CrashRecoveryContext.cpp
index f53aea177d612..ca0c8744a398c 100644
--- a/llvm/lib/Support/CrashRecoveryContext.cpp
+++ b/llvm/lib/Support/CrashRecoveryContext.cpp
@@ -10,6 +10,7 @@
#include "llvm/Config/llvm-config.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/ExitCodes.h"
+#include "llvm/Support/ProgramStack.h"
#include "llvm/Support/Signals.h"
#include "llvm/Support/thread.h"
#include <cassert>
@@ -523,3 +524,21 @@ bool CrashRecoveryContext::RunSafelyOnThread(function_ref<void()> Fn,
CRC->setSwitchedThread();
return Info.Result;
}
+
+bool CrashRecoveryContext::RunSafelyOnNewStack(function_ref<void()> Fn,
+ unsigned RequestedStackSize) {
+ // If crash recovery is disabled, do nothing.
+ if (gCrashRecoveryEnabled) {
+ assert(!Impl && "Crash recovery context already initialized!");
+ CrashRecoveryContextImpl *CRCI = new CrashRecoveryContextImpl(this);
+ Impl = CRCI;
+
+ CRCI->ValidJumpBuffer = true;
+ if (setjmp(CRCI->JumpBuffer) != 0) {
+ return false;
+ }
+ }
+
+ runOnNewStack(RequestedStackSize, Fn);
+ return true;
+}
diff --git a/llvm/lib/Support/ProgramStack.cpp b/llvm/lib/Support/ProgramStack.cpp
new file mode 100644
index 0000000000000..3a48e86062133
--- /dev/null
+++ b/llvm/lib/Support/ProgramStack.cpp
@@ -0,0 +1,115 @@
+//===--- RunOnNewStack.cpp - Crash Recovery -------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/ProgramStack.h"
+#include "llvm/Config/config.h"
+#include "llvm/Support/Compiler.h"
+
+#ifdef HAVE_SYS_RESOURCE_H
+# include <sys/resource.h>
+#endif
+
+#ifdef _MSC_VER
+# include <intrin.h> // for _AddressOfReturnAddress
+#endif
+
+// Currently only Apple AArch64 is known to support split stacks in the debugger
+// and other tooling.
+#if defined(__APPLE__) && defined(__aarch64__) && \
+ LLVM_HAS_CPP_ATTRIBUTE(gnu::naked) && __has_extension(gnu_asm)
+# define LLVM_HAS_SPLIT_STACKS
+# define LLVM_HAS_SPLIT_STACKS_AARCH64
+#include <sys/mman.h>
+#endif
+
+#ifndef LLVM_HAS_SPLIT_STACKS
+# include "llvm/Support/thread.h"
+#endif
+
+using namespace llvm;
+
+uintptr_t llvm::getStackPointer() {
+#if __GNUC__ || __has_builtin(__builtin_frame_address)
+ return (uintptr_t)__builtin_frame_address(0);
+#elif defined(_MSC_VER)
+ return (uintptr_t)_AddressOfReturnAddress();
+#else
+ char CharOnStack = 0;
+ // The volatile store here is intended to escape the local variable, to
+ // prevent the compiler from optimizing CharOnStack into anything other
+ // than a char on the stack.
+ //
+ // Tested on: MSVC 2015 - 2019, GCC 4.9 - 9, Clang 3.2 - 9, ICC 13 - 19.
+ char *volatile Ptr = &CharOnStack;
+ return (uintptr_t)Ptr;
+#endif
+}
+
+unsigned llvm::getDefaultStackSize() {
+#ifdef HAVE_SYS_RESOURCE_H
+ rlimit RL;
+ getrlimit(RLIMIT_STACK, &RL);
+ return RL.rlim_cur;
+#else
+ // 8MiB seems good.
+ return 8 << 20;
+#endif
+}
+
+namespace {
+#ifdef LLVM_HAS_SPLIT_STACKS_AARCH64
+[[gnu::naked]] void runOnNewStackImpl(void *Stack, void (*Fn)(void *),
+ void *Ctx) {
+ __asm__ volatile(
+ "mov x16, sp\n\t"
+ "sub x0, x0, #0x20\n\t" // subtract space from stack
+ "stp xzr, x16, [x0, #0x00]\n\t" // save old sp
+ "stp x29, x30, [x0, #0x10]\n\t" // save fp, lr
+ "mov sp, x0\n\t" // switch to new stack
+ "add x29, x0, #0x10\n\t" // switch to new frame
+ ".cfi_def_cfa w29, 16\n\t"
+ ".cfi_offset w30, -8\n\t" // lr
+ ".cfi_offset w29, -16\n\t" // fp
+
+ "mov x0, x2\n\t" // Ctx is the only argument
+ "blr x1\n\t" // call Fn
+
+ "ldp x29, x30, [sp, #0x10]\n\t" // restore fp, lr
+ "ldp xzr, x16, [sp, #0x00]\n\t" // load old sp
+ "mov sp, x16\n\t"
+ "ret"
+ );
+}
+#endif
+
+#ifdef LLVM_HAS_SPLIT_STACKS
+void callback(void *Ctx) {
+ (*reinterpret_cast<function_ref<void()> *>(Ctx))();
+}
+#endif
+} // namespace
+
+#ifdef LLVM_HAS_SPLIT_STACKS
+void llvm::runOnNewStack(unsigned StackSize, function_ref<void()> Fn) {
+ if (StackSize == 0)
+ StackSize = getDefaultStackSize();
+
+ void *Stack = malloc(StackSize);
+ void *BottomOfStack = (char *)Stack + StackSize;
+
+ runOnNewStackImpl(BottomOfStack, callback, &Fn);
+
+ free(Stack);
+}
+#else
+void llvm::runOnNewStack(unsigned StackSize, function_ref<void()> Fn) {
+ llvm::thread Thread(
+ StackSize == 0 ? std::nullopt : std::optional<unsigned>(StackSize), Fn);
+ Thread.join();
+}
+#endif
diff --git a/llvm/unittests/Support/CMakeLists.txt b/llvm/unittests/Support/CMakeLists.txt
index 6c4e7cb689b20..e5bf820fb4d1c 100644
--- a/llvm/unittests/Support/CMakeLists.txt
+++ b/llvm/unittests/Support/CMakeLists.txt
@@ -70,6 +70,7 @@ add_llvm_unittest(SupportTests
PerThreadBumpPtrAllocatorTest.cpp
ProcessTest.cpp
ProgramTest.cpp
+ ProgramStackTest.cpp
RecyclerTest.cpp
RegexTest.cpp
ReverseIterationTest.cpp
diff --git a/llvm/unittests/Support/ProgramStackTest.cpp b/llvm/unittests/Support/ProgramStackTest.cpp
new file mode 100644
index 0000000000000..1b4a071739139
--- /dev/null
+++ b/llvm/unittests/Support/ProgramStackTest.cpp
@@ -0,0 +1,29 @@
+//===- unittest/Support/ProgramStackTest.cpp ------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/ProgramStack.h"
+#include "llvm/Support/Process.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+
+static uintptr_t func(int &A) {
+ A = 7;
+ return getStackPointer();
+}
+
+TEST(ProgramStackTest, runOnNewStack) {
+ int A = 0;
+ uintptr_t Stack = runOnNewStack(0, function_ref<uintptr_t(int &)>(func), A);
+ EXPECT_EQ(A, 7);
+ intptr_t StackDiff = (intptr_t)llvm::getStackPointer() - (intptr_t)Stack;
+ size_t StackDistance = (size_t)std::abs(StackDiff);
+ // Page size is used as it's large enough to guarantee were not on the same
+ // stack but not too large to cause spurious failures.
+ EXPECT_GT(StackDistance, llvm::sys::Process::getPageSizeEstimate());
+}
|
You can test this locally with the following command:git-clang-format --diff HEAD~1 HEAD --extensions h,cpp -- llvm/include/llvm/Support/ProgramStack.h llvm/lib/Support/ProgramStack.cpp llvm/unittests/Support/ProgramStackTest.cpp clang/include/clang/Basic/Stack.h clang/lib/Basic/Stack.cpp clang/lib/Frontend/CompilerInstance.cpp llvm/include/llvm/Support/CrashRecoveryContext.h llvm/lib/Support/CrashRecoveryContext.cpp View the diff from clang-format here.diff --git a/clang/lib/Basic/Stack.cpp b/clang/lib/Basic/Stack.cpp
index 8cbb84943..aa3862a95 100644
--- a/clang/lib/Basic/Stack.cpp
+++ b/clang/lib/Basic/Stack.cpp
@@ -49,10 +49,12 @@ void clang::runWithSufficientStackSpaceSlow(llvm::function_ref<void()> Diag,
llvm::CrashRecoveryContext CRC;
// Preserve the BottomOfStack in case RunSafelyOnNewStack uses split stacks.
uintptr_t PrevBottom = BottomOfStack;
- CRC.RunSafelyOnNewStack([&] {
- noteBottomOfStack(true);
- Diag();
- Fn();
- }, DesiredStackSize);
+ CRC.RunSafelyOnNewStack(
+ [&] {
+ noteBottomOfStack(true);
+ Diag();
+ Fn();
+ },
+ DesiredStackSize);
BottomOfStack = PrevBottom;
}
diff --git a/llvm/include/llvm/Support/ProgramStack.h b/llvm/include/llvm/Support/ProgramStack.h
index 3ce5de1c0..478789d20 100644
--- a/llvm/include/llvm/Support/ProgramStack.h
+++ b/llvm/include/llvm/Support/ProgramStack.h
@@ -18,8 +18,8 @@
// and other tooling.
#if defined(__APPLE__) && defined(__aarch64__) && \
LLVM_HAS_CPP_ATTRIBUTE(gnu::naked) && __has_extension(gnu_asm)
-# define LLVM_HAS_SPLIT_STACKS
-# define LLVM_HAS_SPLIT_STACKS_AARCH64
+#define LLVM_HAS_SPLIT_STACKS
+#define LLVM_HAS_SPLIT_STACKS_AARCH64
#endif
namespace llvm {
diff --git a/llvm/lib/Support/ProgramStack.cpp b/llvm/lib/Support/ProgramStack.cpp
index 9e5a546b3..5c5f61e54 100644
--- a/llvm/lib/Support/ProgramStack.cpp
+++ b/llvm/lib/Support/ProgramStack.cpp
@@ -11,15 +11,15 @@
#include "llvm/Support/Compiler.h"
#ifdef LLVM_ON_UNIX
-# include <sys/resource.h> // for getrlimit
+#include <sys/resource.h> // for getrlimit
#endif
#ifdef _MSC_VER
-# include <intrin.h> // for _AddressOfReturnAddress
+#include <intrin.h> // for _AddressOfReturnAddress
#endif
#ifndef LLVM_HAS_SPLIT_STACKS
-# include "llvm/Support/thread.h"
+#include "llvm/Support/thread.h"
#endif
using namespace llvm;
@@ -58,32 +58,28 @@ namespace {
#ifdef LLVM_HAS_SPLIT_STACKS_AARCH64
[[gnu::naked]] void runOnNewStackImpl(void *Stack, void (*Fn)(void *),
void *Ctx) {
- __asm__ volatile(
- "mov x16, sp\n\t"
- "sub x0, x0, #0x20\n\t" // subtract space from stack
- "stp xzr, x16, [x0, #0x00]\n\t" // save old sp
- "stp x29, x30, [x0, #0x10]\n\t" // save fp, lr
- "mov sp, x0\n\t" // switch to new stack
- "add x29, x0, #0x10\n\t" // switch to new frame
- ".cfi_def_cfa w29, 16\n\t"
- ".cfi_offset w30, -8\n\t" // lr
- ".cfi_offset w29, -16\n\t" // fp
+ __asm__ volatile("mov x16, sp\n\t"
+ "sub x0, x0, #0x20\n\t" // subtract space from stack
+ "stp xzr, x16, [x0, #0x00]\n\t" // save old sp
+ "stp x29, x30, [x0, #0x10]\n\t" // save fp, lr
+ "mov sp, x0\n\t" // switch to new stack
+ "add x29, x0, #0x10\n\t" // switch to new frame
+ ".cfi_def_cfa w29, 16\n\t"
+ ".cfi_offset w30, -8\n\t" // lr
+ ".cfi_offset w29, -16\n\t" // fp
- "mov x0, x2\n\t" // Ctx is the only argument
- "blr x1\n\t" // call Fn
+ "mov x0, x2\n\t" // Ctx is the only argument
+ "blr x1\n\t" // call Fn
- "ldp x29, x30, [sp, #0x10]\n\t" // restore fp, lr
- "ldp xzr, x16, [sp, #0x00]\n\t" // load old sp
- "mov sp, x16\n\t"
- "ret"
- );
+ "ldp x29, x30, [sp, #0x10]\n\t" // restore fp, lr
+ "ldp xzr, x16, [sp, #0x00]\n\t" // load old sp
+ "mov sp, x16\n\t"
+ "ret");
}
#endif
#ifdef LLVM_HAS_SPLIT_STACKS
-void callback(void *Ctx) {
- (*reinterpret_cast<function_ref<void()> *>(Ctx))();
-}
+void callback(void *Ctx) { (*reinterpret_cast<function_ref<void()> *>(Ctx))(); }
#endif
} // namespace
diff --git a/llvm/unittests/Support/ProgramStackTest.cpp b/llvm/unittests/Support/ProgramStackTest.cpp
index 31dfb3b88..f0fa47685 100644
--- a/llvm/unittests/Support/ProgramStackTest.cpp
+++ b/llvm/unittests/Support/ProgramStackTest.cpp
@@ -17,9 +17,7 @@ static uintptr_t func(int &A) {
return getStackPointer();
}
-static void func2(int &A) {
- A = 5;
-}
+static void func2(int &A) { A = 5; }
TEST(ProgramStackTest, runOnNewStack) {
int A = 0;
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If split stacks are negatively impacting profiling, debugging, or other compiler development tasks, I have to ask, have we considered optimizing clang stack usage?
There are multiple issues:
- Clang is a recursive descent parser. This isn't going to change, it just means we can't ignore stack usage.
- We use lots of needlessly nested "small" data structures on the stack, which means are stack frames are large
- Anecdotally I am told that LLVM is not great at stack coloring
This is not a blocking concern, but we should seriously consider doing some builds with -Wframe-larger-than or -Wstack-usage in Sema and Parser. We'd all be happier for it.
I had some comments, but overall this seems reasonable.
llvm/lib/Support/ProgramStack.cpp
Outdated
#elif defined(_MSC_VER) | ||
return (uintptr_t)_AddressOfReturnAddress(); | ||
#else | ||
char CharOnStack = 0; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This #else
case breaks down in situations where the compiler moves all the user data out to the heap, like with Seperate Data And Control Stacks (SCADS) or ASan use-after-return detection mode. I think we probably don't care, since those will be handled above.
I guess I don't have any actionable suggestions, other than to make the character variable itself volatile. It seems to me like it would be semantics-preserving for a compiler to promote CharOnStack into a global constant 0, for example, since we never write through the pointer that we store into a volatile local.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This part was moved from Stack.cpp in Clang. Making CharOnStack
volatile seems fine though.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah, I noticed, but I can't unsee the code from the review. :) Adding volatile seems like a reasonable improvement.
The comments also refer to toolchains we no longer support, but that's not very important.
llvm/lib/Support/ProgramStack.cpp
Outdated
getrlimit(RLIMIT_STACK, &RL); | ||
return RL.rlim_cur; | ||
#else | ||
// 8MiB seems good. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think we can add more commentary that this value was chosen for Clang, which uses deep recursive stacks to parse C++.
llvm/lib/Support/ProgramStack.cpp
Outdated
#include "llvm/Support/Compiler.h" | ||
|
||
#ifdef HAVE_SYS_RESOURCE_H | ||
# include <sys/resource.h> |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you include one of those // for getrlimit
comments? I know they go stale quickly, but I find them helpful. I never know what system headers have what.
llvm/cmake/config-ix.cmake
Outdated
@@ -52,6 +56,7 @@ else() | |||
check_include_file(mach/mach.h HAVE_MACH_MACH_H) | |||
check_include_file(malloc/malloc.h HAVE_MALLOC_MALLOC_H) | |||
check_include_file(pthread.h HAVE_PTHREAD_H) | |||
check_include_file(sys/resource.h HAVE_SYS_RESOURCE_H) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do we need this? We already unconditionally include sys/resource.h
from llvm/Support/Unix/Program.inc. IMO it would be better to do #ifndef _WIN32
than the config check, which slows down configuration and complicates the build system, and has to be ported over to downstream build systems like gn and Bazel.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I couldn't find good info on if sys/resource.h
is present everywhere we care about, but if we already use it on unix systems then I'm fine removing the config checks.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This should have a release note.
Strong +1 to this.
I'm a bit concerned about this changing behavior only for Apple platforms; that's a different folks doing triage really need to understand. Doesn't Linux also support this functionality, so we'd at least get the coverage on more than one platform? (Linux could be done in a follow-up, but I'm mostly worried we'll only ever implement this for Apple at which point I question whether the extra complexity is something the community should maintain or not.) |
I removed a few uses of ParsedAttributes recently, it helped #132021 (comment) - I'm sure there is a lot more room for improvement I wish we had some way to profile whether our SmallVectors and other SSO optimizations are reasonably dimensioned. But anyway, we still need a way to grow the stack :) |
The issue is we will always have this problem with implicitly built modules. It's naturally recursive as we need a totally separate compiler instance. I think it would be good to reduce stack usage, and there are likely some easy wins here, but I don't know if it's worth significantly changing the code to remove recursion and the need for this entirely. It's a pretty natural way to handle the recursive parts of C++.
I don't think it's really that big of a difference. It just changes what stack traces look like, and they will contain
Linux does support running code like this, but Linux also means hundreds of distros. There is likely a combination of unwinder and debugger that support this, but I wouldn't turn it on by default unless that set is in very common use. I'm fine with adding a CMake flag that enables this on at least Linux AArch64. It will always run correctly, just not all tools will handle the back trace. Same thing for Windows, although there I have no information on if the MS tools support it. I believe this is also easy to do on x86-64, but a lot harder on 32bit x86. |
bfd3672
to
cc25199
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The premerge tests failed on a modules crash recovery test case on other platforms, and those seem like true positives that need to be fixed.
Overall, I think the main risk with this approach is that it will break in-process, FP-based stack unwinders that validate that the target FP is within the thread's stack limits. Those seem like reasonable assumptions to me, since there's no good way to efficiently determine if an FP value is a legitimate stack address other than to compare against the bounds of the thread stack. That's what gives me caution and makes me want to let this be an Apple-only thing.
…o get more stack space Clang spawns a new thread to avoid running out of stack space. This can make debugging and performance analysis more difficult as how the threads are connected is difficult to recover. This patch introduces `runOnNewStack` and applies it in Clang. On platforms that have good support for it this allocates a new stack and moves to it using assembly. Doing split stacks like this actually runs on most platforms, but many debuggers and unwinders reject the large or backwards stack offsets that occur. Apple platforms and tools are known to support this, so this only enables it there for now.
cc25199
to
52b33e2
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks, looks good.
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/185/builds/16722 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/137/builds/16972 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/144/builds/22803 Here is the relevant piece of the build log for the reference
|
I'll have a fix in a sec. |
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/140/builds/21246 Here is the relevant piece of the build log for the reference
|
Should be fixed in 429a84f. I'm not sure that's actually ambiguous though. |
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/10/builds/3509 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/174/builds/16239 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/168/builds/10911 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/175/builds/16908 Here is the relevant piece of the build log for the reference
|
And another fix for |
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/27/builds/8674 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/33/builds/14912 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/187/builds/5424 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/202/builds/700 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/76/builds/8770 Here is the relevant piece of the build log for the reference
|
we're seeing the following after this patch:
|
+1 on the Fuchsia Clang ARM64 build. Accordingly, I'm issuing a revert. |
…thread to get more stack space (llvm#133173)" This change breaks the Clang build on Mac AArch64. This reverts commit d0c973a. This reverts commit 429a84f. This reverts commit 4f64c80.
I'm guessing that a stage2 build of clang would probably repro the issue, perhaps this was tested against older clangs that didn't warn on this. |
The error has been there for nearly a decade. Also looking at the assembly the |
Ah, |
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/127/builds/3025 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/146/builds/2713 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/49/builds/1310 Here is the relevant piece of the build log for the reference
|
…o get more stack space (llvm#133173) Clang spawns a new thread to avoid running out of stack space. This can make debugging and performance analysis more difficult as how the threads are connected is difficult to recover. This patch introduces `runOnNewStack` and applies it in Clang. On platforms that have good support for it this allocates a new stack and moves to it using assembly. Doing split stacks like this actually runs on most platforms, but many debuggers and unwinders reject the large or backwards stack offsets that occur. Apple platforms and tools are known to support this, so this only enables it there for now.
llvm#135865) …thread to get more stack space (llvm#133173)" This change breaks the Clang build on Mac AArch64. This reverts commit d0c973a. This reverts commit 429a84f. This reverts commit 4f64c80.
…thread to get more stack space (#136046) Reland #133173 Clang spawns a new thread to avoid running out of stack space. This can make debugging and performance analysis more difficult as how the threads are connected is difficult to recover. This patch introduces `runOnNewStack` and applies it in Clang. On platforms that have good support for it this allocates a new stack and moves to it using assembly. Doing split stacks like this actually runs on most platforms, but many debuggers and unwinders reject the large or backwards stack offsets that occur. Apple platforms and tools are known to support this, so this only enables it there for now.
…thread to get more stack space (llvm#136046) Reland llvm#133173 Clang spawns a new thread to avoid running out of stack space. This can make debugging and performance analysis more difficult as how the threads are connected is difficult to recover. This patch introduces `runOnNewStack` and applies it in Clang. On platforms that have good support for it this allocates a new stack and moves to it using assembly. Doing split stacks like this actually runs on most platforms, but many debuggers and unwinders reject the large or backwards stack offsets that occur. Apple platforms and tools are known to support this, so this only enables it there for now.
…thread to get more stack space (llvm#136046) Reland llvm#133173 Clang spawns a new thread to avoid running out of stack space. This can make debugging and performance analysis more difficult as how the threads are connected is difficult to recover. This patch introduces `runOnNewStack` and applies it in Clang. On platforms that have good support for it this allocates a new stack and moves to it using assembly. Doing split stacks like this actually runs on most platforms, but many debuggers and unwinders reject the large or backwards stack offsets that occur. Apple platforms and tools are known to support this, so this only enables it there for now.
…thread to get more stack space (llvm#136046) Reland llvm#133173 Clang spawns a new thread to avoid running out of stack space. This can make debugging and performance analysis more difficult as how the threads are connected is difficult to recover. This patch introduces `runOnNewStack` and applies it in Clang. On platforms that have good support for it this allocates a new stack and moves to it using assembly. Doing split stacks like this actually runs on most platforms, but many debuggers and unwinders reject the large or backwards stack offsets that occur. Apple platforms and tools are known to support this, so this only enables it there for now.
…ning a new thread to get more stack space (#136046) Reland llvm/llvm-project#133173 Clang spawns a new thread to avoid running out of stack space. This can make debugging and performance analysis more difficult as how the threads are connected is difficult to recover. This patch introduces `runOnNewStack` and applies it in Clang. On platforms that have good support for it this allocates a new stack and moves to it using assembly. Doing split stacks like this actually runs on most platforms, but many debuggers and unwinders reject the large or backwards stack offsets that occur. Apple platforms and tools are known to support this, so this only enables it there for now.
…thread to get more stack space (llvm#136046) Reland llvm#133173 Clang spawns a new thread to avoid running out of stack space. This can make debugging and performance analysis more difficult as how the threads are connected is difficult to recover. This patch introduces `runOnNewStack` and applies it in Clang. On platforms that have good support for it this allocates a new stack and moves to it using assembly. Doing split stacks like this actually runs on most platforms, but many debuggers and unwinders reject the large or backwards stack offsets that occur. Apple platforms and tools are known to support this, so this only enables it there for now.
…thread to get more stack space (llvm#136046) Reland llvm#133173 Clang spawns a new thread to avoid running out of stack space. This can make debugging and performance analysis more difficult as how the threads are connected is difficult to recover. This patch introduces `runOnNewStack` and applies it in Clang. On platforms that have good support for it this allocates a new stack and moves to it using assembly. Doing split stacks like this actually runs on most platforms, but many debuggers and unwinders reject the large or backwards stack offsets that occur. Apple platforms and tools are known to support this, so this only enables it there for now.
…thread to get more stack space (llvm#136046) Reland llvm#133173 Clang spawns a new thread to avoid running out of stack space. This can make debugging and performance analysis more difficult as how the threads are connected is difficult to recover. This patch introduces `runOnNewStack` and applies it in Clang. On platforms that have good support for it this allocates a new stack and moves to it using assembly. Doing split stacks like this actually runs on most platforms, but many debuggers and unwinders reject the large or backwards stack offsets that occur. Apple platforms and tools are known to support this, so this only enables it there for now.
Clang spawns a new thread to avoid running out of stack space. This can make debugging and performance analysis more difficult as how the threads are connected is difficult to recover.
This patch introduces
runOnNewStack
and applies it in Clang. On platforms that have good support for it this allocates a new stack and moves to it using assembly. Doing split stacks like this actually runs on most platforms, but many debuggers and unwinders reject the large or backwards stack offsets that occur. Apple platforms and tools are known to support this, so this only enables it there for now.