mirror of https://github.com/facebook/rocksdb.git
Print stack traces more reliably with concurrency (#12086)
Summary: It's been relatively easy to break our stack trace printer: * If another thread reaches a signal condition such as a related SEGV or assertion failure while one is trying to print a stack trace from the signal handler, it seems to end the process abruptly without a stack trace. * If the process exits normally in one thread (such as main finishing) while another is trying to print a stack trace from the signal handler, it seems the process will often end normally without a stack trace. This change attempts to fix these issues, with * Keep the custom signal handler installed as long as possible, so that other threads will most likely re-enter our custom handler. (We only switch back to default for triggering core dump or whatever after stack trace.) * Use atomics and sleeps to implement a crude recursive mutex for ensuring all threads hitting the custom signal handler wait on the first that is trying to print a stack trace, while recursive signals in the same thread can still be handled cleanly. * Use an atexit handler to hook into normal exit to (a) wait on a pending printing of stack trace when detectable and applicable, and (b) detect and warn when printing a stack trace might be interrupted by a process exit in progress. (I don't know how to pause that *after* our atexit handler has been called; the best I know how to do is warn, "In a race with process already exiting...".) Pull Request resolved: https://github.com/facebook/rocksdb/pull/12086 Test Plan: manual, including with TSAN. I added this code to the end of a unit test file: ``` for (size_t i = 0; i < 3; ++i) { std::thread t([]() { assert(false); }); t.detach(); } ``` Followed by either `sleep(100)` or `usleep(100)` or usual process exit. And for recursive signal testing, inject `abort()` at various places in the handler. Reviewed By: cbi42 Differential Revision: D51531882 Pulled By: pdillinger fbshipit-source-id: 3473b863a43e61b722dfb7a2ed12a8120949b09c
This commit is contained in:
parent
a140b519b1
commit
f6fd4b9dbd
|
@ -25,6 +25,7 @@ void* SaveStack(int* /*num_frames*/, int /*first_frames_to_skip*/) {
|
||||||
|
|
||||||
#include <cxxabi.h>
|
#include <cxxabi.h>
|
||||||
#include <execinfo.h>
|
#include <execinfo.h>
|
||||||
|
#include <pthread.h>
|
||||||
#include <signal.h>
|
#include <signal.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
|
@ -48,6 +49,9 @@ void* SaveStack(int* /*num_frames*/, int /*first_frames_to_skip*/) {
|
||||||
#endif // GLIBC version
|
#endif // GLIBC version
|
||||||
#endif // OS_LINUX
|
#endif // OS_LINUX
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
#include <atomic>
|
||||||
|
|
||||||
#include "port/lang.h"
|
#include "port/lang.h"
|
||||||
|
|
||||||
namespace ROCKSDB_NAMESPACE {
|
namespace ROCKSDB_NAMESPACE {
|
||||||
|
@ -311,10 +315,49 @@ void* SaveStack(int* num_frames, int first_frames_to_skip) {
|
||||||
return callstack;
|
return callstack;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static std::atomic<uint64_t> g_thread_handling_stack_trace{0};
|
||||||
|
static int g_recursion_count = 0;
|
||||||
|
static std::atomic<bool> g_at_exit_called{false};
|
||||||
|
|
||||||
static void StackTraceHandler(int sig) {
|
static void StackTraceHandler(int sig) {
|
||||||
// reset to default handler
|
|
||||||
signal(sig, SIG_DFL);
|
|
||||||
fprintf(stderr, "Received signal %d (%s)\n", sig, strsignal(sig));
|
fprintf(stderr, "Received signal %d (%s)\n", sig, strsignal(sig));
|
||||||
|
// Crude recursive mutex with no signal-unsafe system calls, to avoid
|
||||||
|
// re-entrance from multiple threads and avoid core dumping while trying
|
||||||
|
// to print the stack trace.
|
||||||
|
uint64_t tid = 0;
|
||||||
|
{
|
||||||
|
const auto ptid = pthread_self();
|
||||||
|
// pthread_t is an opaque type
|
||||||
|
memcpy(&tid, &ptid, std::min(sizeof(tid), sizeof(ptid)));
|
||||||
|
// Essentially ensure non-zero
|
||||||
|
++tid;
|
||||||
|
}
|
||||||
|
for (;;) {
|
||||||
|
uint64_t expected = 0;
|
||||||
|
if (g_thread_handling_stack_trace.compare_exchange_strong(expected, tid)) {
|
||||||
|
// Acquired mutex
|
||||||
|
g_recursion_count = 0;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (expected == tid) {
|
||||||
|
++g_recursion_count;
|
||||||
|
fprintf(stderr, "Recursive call to stack trace handler (%d)\n",
|
||||||
|
g_recursion_count);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
// Sleep before trying again
|
||||||
|
usleep(1000);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (g_recursion_count > 2) {
|
||||||
|
// Give up after too many recursions
|
||||||
|
fprintf(stderr, "Too many recursive calls to stack trace handler (%d)\n",
|
||||||
|
g_recursion_count);
|
||||||
|
} else {
|
||||||
|
if (g_at_exit_called.load(std::memory_order_acquire)) {
|
||||||
|
fprintf(stderr, "In a race with process already exiting...\n");
|
||||||
|
}
|
||||||
|
|
||||||
// skip the top three signal handler related frames
|
// skip the top three signal handler related frames
|
||||||
PrintStack(3);
|
PrintStack(3);
|
||||||
|
|
||||||
|
@ -330,9 +373,27 @@ static void StackTraceHandler(int sig) {
|
||||||
"==> trace printed by the signal handler is likely obscured\n"
|
"==> trace printed by the signal handler is likely obscured\n"
|
||||||
"==> by TSAN output.)\n");
|
"==> by TSAN output.)\n");
|
||||||
#endif
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
// reset to default handler
|
||||||
|
signal(sig, SIG_DFL);
|
||||||
// re-signal to default handler (so we still get core dump if needed...)
|
// re-signal to default handler (so we still get core dump if needed...)
|
||||||
raise(sig);
|
raise(sig);
|
||||||
|
|
||||||
|
// release the mutex, in case this is somehow recoverable
|
||||||
|
if (g_recursion_count > 0) {
|
||||||
|
--g_recursion_count;
|
||||||
|
} else {
|
||||||
|
g_thread_handling_stack_trace.store(0, std::memory_order_release);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void AtExit() {
|
||||||
|
// wait for stack trace handler to finish, if needed
|
||||||
|
while (g_thread_handling_stack_trace.load(std::memory_order_acquire)) {
|
||||||
|
usleep(1000);
|
||||||
|
}
|
||||||
|
g_at_exit_called.store(true, std::memory_order_release);
|
||||||
}
|
}
|
||||||
|
|
||||||
void InstallStackTraceHandler() {
|
void InstallStackTraceHandler() {
|
||||||
|
@ -342,6 +403,7 @@ void InstallStackTraceHandler() {
|
||||||
signal(SIGSEGV, StackTraceHandler);
|
signal(SIGSEGV, StackTraceHandler);
|
||||||
signal(SIGBUS, StackTraceHandler);
|
signal(SIGBUS, StackTraceHandler);
|
||||||
signal(SIGABRT, StackTraceHandler);
|
signal(SIGABRT, StackTraceHandler);
|
||||||
|
atexit(AtExit);
|
||||||
// Allow ouside debugger to attach, even with Yama security restrictions.
|
// Allow ouside debugger to attach, even with Yama security restrictions.
|
||||||
// This is needed even outside of PrintStack() so that external mechanisms
|
// This is needed even outside of PrintStack() so that external mechanisms
|
||||||
// can dump stacks if they suspect that a test has hung.
|
// can dump stacks if they suspect that a test has hung.
|
||||||
|
|
Loading…
Reference in New Issue