Skip to content

Commit d0c973a

Browse files
authored
[llvm][clang] Allocate a new stack instead of spawning a new thread to get more stack space (#133173)
Clang spawns a new thread to avoid running out of stack space. This can make debugging and performance analysis more difficult as how the threads are connected is difficult to recover. This patch introduces `runOnNewStack` and applies it in Clang. On platforms that have good support for it this allocates a new stack and moves to it using assembly. Doing split stacks like this actually runs on most platforms, but many debuggers and unwinders reject the large or backwards stack offsets that occur. Apple platforms and tools are known to support this, so this only enables it there for now.
1 parent a5aa0c4 commit d0c973a

File tree

11 files changed

+248
-30
lines changed

11 files changed

+248
-30
lines changed

clang/docs/ReleaseNotes.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,10 @@ Non-comprehensive list of changes in this release
195195
- Added `__builtin_elementwise_exp10`.
196196
- For AMDPGU targets, added `__builtin_v_cvt_off_f32_i4` that maps to the `v_cvt_off_f32_i4` instruction.
197197
- Added `__builtin_elementwise_minnum` and `__builtin_elementwise_maxnum`.
198+
- Clang itself now uses split stacks instead of threads for allocating more
199+
stack space when running on Apple AArch64 based platforms. This means that
200+
stack traces of Clang from debuggers, crashes, and profilers may look
201+
different than before.
198202

199203
New Compiler Flags
200204
------------------

clang/include/clang/Basic/Stack.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,10 @@ namespace clang {
2727

2828
/// Call this once on each thread, as soon after starting the thread as
2929
/// feasible, to note the approximate address of the bottom of the stack.
30-
void noteBottomOfStack();
30+
///
31+
/// \param ForceSet set to true if you know the call is near the bottom of a
32+
/// new stack. Used for split stacks.
33+
void noteBottomOfStack(bool ForceSet = false);
3134

3235
/// Determine whether the stack is nearly exhausted.
3336
bool isStackNearlyExhausted();

clang/lib/Basic/Stack.cpp

Lines changed: 12 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -13,33 +13,13 @@
1313

1414
#include "clang/Basic/Stack.h"
1515
#include "llvm/Support/CrashRecoveryContext.h"
16+
#include "llvm/Support/ProgramStack.h"
1617

17-
#ifdef _MSC_VER
18-
#include <intrin.h> // for _AddressOfReturnAddress
19-
#endif
18+
static LLVM_THREAD_LOCAL uintptr_t BottomOfStack = 0;
2019

21-
static LLVM_THREAD_LOCAL void *BottomOfStack = nullptr;
22-
23-
static void *getStackPointer() {
24-
#if __GNUC__ || __has_builtin(__builtin_frame_address)
25-
return __builtin_frame_address(0);
26-
#elif defined(_MSC_VER)
27-
return _AddressOfReturnAddress();
28-
#else
29-
char CharOnStack = 0;
30-
// The volatile store here is intended to escape the local variable, to
31-
// prevent the compiler from optimizing CharOnStack into anything other
32-
// than a char on the stack.
33-
//
34-
// Tested on: MSVC 2015 - 2019, GCC 4.9 - 9, Clang 3.2 - 9, ICC 13 - 19.
35-
char *volatile Ptr = &CharOnStack;
36-
return Ptr;
37-
#endif
38-
}
39-
40-
void clang::noteBottomOfStack() {
41-
if (!BottomOfStack)
42-
BottomOfStack = getStackPointer();
20+
void clang::noteBottomOfStack(bool ForceSet) {
21+
if (!BottomOfStack || ForceSet)
22+
BottomOfStack = llvm::getStackPointer();
4323
}
4424

4525
bool clang::isStackNearlyExhausted() {
@@ -51,7 +31,8 @@ bool clang::isStackNearlyExhausted() {
5131
if (!BottomOfStack)
5232
return false;
5333

54-
intptr_t StackDiff = (intptr_t)getStackPointer() - (intptr_t)BottomOfStack;
34+
intptr_t StackDiff =
35+
(intptr_t)llvm::getStackPointer() - (intptr_t)BottomOfStack;
5536
size_t StackUsage = (size_t)std::abs(StackDiff);
5637

5738
// If the stack pointer has a surprising value, we do not understand this
@@ -66,9 +47,12 @@ bool clang::isStackNearlyExhausted() {
6647
void clang::runWithSufficientStackSpaceSlow(llvm::function_ref<void()> Diag,
6748
llvm::function_ref<void()> Fn) {
6849
llvm::CrashRecoveryContext CRC;
69-
CRC.RunSafelyOnThread([&] {
70-
noteBottomOfStack();
50+
// Preserve the BottomOfStack in case RunSafelyOnNewStack uses split stacks.
51+
uintptr_t PrevBottom = BottomOfStack;
52+
CRC.RunSafelyOnNewStack([&] {
53+
noteBottomOfStack(true);
7154
Diag();
7255
Fn();
7356
}, DesiredStackSize);
57+
BottomOfStack = PrevBottom;
7458
}

clang/lib/Frontend/CompilerInstance.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1265,7 +1265,7 @@ bool CompilerInstance::compileModule(SourceLocation ImportLoc,
12651265

12661266
// Execute the action to actually build the module in-place. Use a separate
12671267
// thread so that we get a stack large enough.
1268-
bool Crashed = !llvm::CrashRecoveryContext().RunSafelyOnThread(
1268+
bool Crashed = !llvm::CrashRecoveryContext().RunSafelyOnNewStack(
12691269
[&]() {
12701270
GenerateModuleFromModuleMapAction Action;
12711271
Instance.ExecuteAction(Action);

llvm/include/llvm/Support/CrashRecoveryContext.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,9 @@ class CrashRecoveryContext {
9797
return RunSafelyOnThread([&]() { Fn(UserData); }, RequestedStackSize);
9898
}
9999

100+
bool RunSafelyOnNewStack(function_ref<void()>,
101+
unsigned RequestedStackSize = 0);
102+
100103
/// Explicitly trigger a crash recovery in the current process, and
101104
/// return failure from RunSafely(). This function does not return.
102105
[[noreturn]] void HandleExit(int RetCode);
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
//===--- ProgramStack.h -----------------------------------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef LLVM_SUPPORT_PROGRAMSTACK_H
10+
#define LLVM_SUPPORT_PROGRAMSTACK_H
11+
12+
#include "llvm/ADT/STLFunctionalExtras.h"
13+
14+
// LLVM_HAS_SPLIT_STACKS is exposed in the header because CrashRecoveryContext
15+
// needs to know if it's running on another thread or not.
16+
//
17+
// Currently only Apple AArch64 is known to support split stacks in the debugger
18+
// and other tooling.
19+
#if defined(__APPLE__) && defined(__aarch64__) && \
20+
LLVM_HAS_CPP_ATTRIBUTE(gnu::naked) && __has_extension(gnu_asm)
21+
# define LLVM_HAS_SPLIT_STACKS
22+
# define LLVM_HAS_SPLIT_STACKS_AARCH64
23+
#endif
24+
25+
namespace llvm {
26+
27+
/// \returns an address close to the current value of the stack pointer.
28+
///
29+
/// The value is not guaranteed to point to anything specific. It can be used to
30+
/// estimate how much stack space has been used since the previous call.
31+
uintptr_t getStackPointer();
32+
33+
/// \returns the default stack size for this platform.
34+
///
35+
/// Based on \p RLIMIT_STACK or the equivalent.
36+
unsigned getDefaultStackSize();
37+
38+
/// Runs Fn on a new stack of at least the given size.
39+
///
40+
/// \param StackSize requested stack size. A size of 0 uses the default stack
41+
/// size of the platform.
42+
///
43+
/// The preferred implementation is split stacks on platforms that have a good
44+
/// debugging experience for them. On other platforms a new thread is used.
45+
void runOnNewStack(unsigned StackSize, function_ref<void()> Fn);
46+
47+
template <typename R, typename... Ts>
48+
R runOnNewStack(unsigned StackSize, function_ref<R(Ts...)> Fn, Ts &&...Args) {
49+
std::optional<R> Ret;
50+
runOnNewStack(StackSize, [&]() { Ret = Fn(std::forward<Ts>(Args)...); });
51+
return std::move(*Ret);
52+
}
53+
54+
template <typename... Ts>
55+
void runOnNewStack(unsigned StackSize, function_ref<void(Ts...)> Fn,
56+
Ts &&...Args) {
57+
runOnNewStack(StackSize, [&]() { Fn(std::forward<Ts>(Args)...); });
58+
}
59+
60+
} // namespace llvm
61+
62+
#endif // LLVM_SUPPORT_PROGRAMSTACK_H

llvm/lib/Support/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -295,6 +295,7 @@ add_llvm_component_library(LLVMSupport
295295
Path.cpp
296296
Process.cpp
297297
Program.cpp
298+
ProgramStack.cpp
298299
RWMutex.cpp
299300
Signals.cpp
300301
Threading.cpp

llvm/lib/Support/CrashRecoveryContext.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#include "llvm/Config/llvm-config.h"
1111
#include "llvm/Support/ErrorHandling.h"
1212
#include "llvm/Support/ExitCodes.h"
13+
#include "llvm/Support/ProgramStack.h"
1314
#include "llvm/Support/Signals.h"
1415
#include "llvm/Support/thread.h"
1516
#include <cassert>
@@ -523,3 +524,13 @@ bool CrashRecoveryContext::RunSafelyOnThread(function_ref<void()> Fn,
523524
CRC->setSwitchedThread();
524525
return Info.Result;
525526
}
527+
528+
bool CrashRecoveryContext::RunSafelyOnNewStack(function_ref<void()> Fn,
529+
unsigned RequestedStackSize) {
530+
#ifdef LLVM_HAS_SPLIT_STACKS
531+
return runOnNewStack(RequestedStackSize,
532+
function_ref<bool()>([&]() { return RunSafely(Fn); }));
533+
#else
534+
return RunSafelyOnThread(Fn, RequestedStackSize);
535+
#endif
536+
}

llvm/lib/Support/ProgramStack.cpp

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
//===--- RunOnNewStack.cpp - Crash Recovery -------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include "llvm/Support/ProgramStack.h"
10+
#include "llvm/Config/config.h"
11+
#include "llvm/Support/Compiler.h"
12+
13+
#ifdef LLVM_ON_UNIX
14+
# include <sys/resource.h> // for getrlimit
15+
#endif
16+
17+
#ifdef _MSC_VER
18+
# include <intrin.h> // for _AddressOfReturnAddress
19+
#endif
20+
21+
#ifndef LLVM_HAS_SPLIT_STACKS
22+
# include "llvm/Support/thread.h"
23+
#endif
24+
25+
using namespace llvm;
26+
27+
uintptr_t llvm::getStackPointer() {
28+
#if __GNUC__ || __has_builtin(__builtin_frame_address)
29+
return (uintptr_t)__builtin_frame_address(0);
30+
#elif defined(_MSC_VER)
31+
return (uintptr_t)_AddressOfReturnAddress();
32+
#else
33+
volatile char CharOnStack = 0;
34+
// The volatile store here is intended to escape the local variable, to
35+
// prevent the compiler from optimizing CharOnStack into anything other
36+
// than a char on the stack.
37+
//
38+
// Tested on: MSVC 2015 - 2019, GCC 4.9 - 9, Clang 3.2 - 9, ICC 13 - 19.
39+
char *volatile Ptr = &CharOnStack;
40+
return (uintptr_t)Ptr;
41+
#endif
42+
}
43+
44+
unsigned llvm::getDefaultStackSize() {
45+
#ifdef LLVM_ON_UNIX
46+
rlimit RL;
47+
getrlimit(RLIMIT_STACK, &RL);
48+
return RL.rlim_cur;
49+
#else
50+
// Clang recursively parses, instantiates templates, and evaluates constant
51+
// expressions. We've found 8MiB to be a reasonable stack size given the way
52+
// Clang works and the way C++ is commonly written.
53+
return 8 << 20;
54+
#endif
55+
}
56+
57+
namespace {
58+
#ifdef LLVM_HAS_SPLIT_STACKS_AARCH64
59+
[[gnu::naked]] void runOnNewStackImpl(void *Stack, void (*Fn)(void *),
60+
void *Ctx) {
61+
__asm__ volatile(
62+
"mov x16, sp\n\t"
63+
"sub x0, x0, #0x20\n\t" // subtract space from stack
64+
"stp xzr, x16, [x0, #0x00]\n\t" // save old sp
65+
"stp x29, x30, [x0, #0x10]\n\t" // save fp, lr
66+
"mov sp, x0\n\t" // switch to new stack
67+
"add x29, x0, #0x10\n\t" // switch to new frame
68+
".cfi_def_cfa w29, 16\n\t"
69+
".cfi_offset w30, -8\n\t" // lr
70+
".cfi_offset w29, -16\n\t" // fp
71+
72+
"mov x0, x2\n\t" // Ctx is the only argument
73+
"blr x1\n\t" // call Fn
74+
75+
"ldp x29, x30, [sp, #0x10]\n\t" // restore fp, lr
76+
"ldp xzr, x16, [sp, #0x00]\n\t" // load old sp
77+
"mov sp, x16\n\t"
78+
"ret"
79+
);
80+
}
81+
#endif
82+
83+
#ifdef LLVM_HAS_SPLIT_STACKS
84+
void callback(void *Ctx) {
85+
(*reinterpret_cast<function_ref<void()> *>(Ctx))();
86+
}
87+
#endif
88+
} // namespace
89+
90+
#ifdef LLVM_HAS_SPLIT_STACKS
91+
void llvm::runOnNewStack(unsigned StackSize, function_ref<void()> Fn) {
92+
if (StackSize == 0)
93+
StackSize = getDefaultStackSize();
94+
95+
// We use malloc here instead of mmap because:
96+
// - it's simpler,
97+
// - many malloc implementations will reuse the allocation in cases where
98+
// we're bouncing accross the edge of a stack boundry, and
99+
// - many malloc implemenations will already provide guard pages for
100+
// allocations this large.
101+
void *Stack = malloc(StackSize);
102+
void *BottomOfStack = (char *)Stack + StackSize;
103+
104+
runOnNewStackImpl(BottomOfStack, callback, &Fn);
105+
106+
free(Stack);
107+
}
108+
#else
109+
void llvm::runOnNewStack(unsigned StackSize, function_ref<void()> Fn) {
110+
llvm::thread Thread(
111+
StackSize == 0 ? std::nullopt : std::optional<unsigned>(StackSize), Fn);
112+
Thread.join();
113+
}
114+
#endif

llvm/unittests/Support/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ add_llvm_unittest(SupportTests
7070
PerThreadBumpPtrAllocatorTest.cpp
7171
ProcessTest.cpp
7272
ProgramTest.cpp
73+
ProgramStackTest.cpp
7374
RecyclerTest.cpp
7475
RegexTest.cpp
7576
ReverseIterationTest.cpp
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
//===- unittest/Support/ProgramStackTest.cpp ------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include "llvm/Support/ProgramStack.h"
10+
#include "llvm/Support/Process.h"
11+
#include "gtest/gtest.h"
12+
13+
using namespace llvm;
14+
15+
static uintptr_t func(int &A) {
16+
A = 7;
17+
return getStackPointer();
18+
}
19+
20+
static void func2(int &A) {
21+
A = 5;
22+
}
23+
24+
TEST(ProgramStackTest, runOnNewStack) {
25+
int A = 0;
26+
uintptr_t Stack = runOnNewStack(0, function_ref<uintptr_t(int &)>(func), A);
27+
EXPECT_EQ(A, 7);
28+
intptr_t StackDiff = (intptr_t)llvm::getStackPointer() - (intptr_t)Stack;
29+
size_t StackDistance = (size_t)std::abs(StackDiff);
30+
// Page size is used as it's large enough to guarantee were not on the same
31+
// stack but not too large to cause spurious failures.
32+
EXPECT_GT(StackDistance, llvm::sys::Process::getPageSizeEstimate());
33+
runOnNewStack(0, function_ref<void(int &)>(func2), A);
34+
EXPECT_EQ(A, 5);
35+
}

0 commit comments

Comments
 (0)