rocksdb/port/stack_trace.cc

419 lines
13 KiB
C++
Raw Normal View History

// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
//
#include "port/stack_trace.h"
[RocksDB] Add stacktrace signal handler Summary: This diff provides the ability to print out a stacktrace when the process receives certain signals. Currently, we enable this for the following signals (program error related): SIGILL SIGSEGV SIGBUS SIGABRT Application simply #include "util/stack_trace.h" and call leveldb::InstallStackTraceHandler() during initialization, if signal handler is needed. It's not done automatically when openning db, because it's the application(process)'s responsibility to install signal handler and some applications might already have their own (like fbcode). Sample output: Received signal 11 (Segmentation fault) #0 0x408ff0 ./signal_test() [0x408ff0] /home/haobo/rocksdb/util/signal_test.cc:4 #1 0x40827d ./signal_test() [0x40827d] /home/haobo/rocksdb/util/signal_test.cc:24 #2 0x7f8bb183172e /usr/local/fbcode/gcc-4.7.1-glibc-2.14.1/lib/libc.so.6(__libc_start_main+0x10e) [0x7f8bb183172e] ??:0 #3 0x408ebc ./signal_test() [0x408ebc] /home/engshare/third-party/src/glibc/glibc-2.14.1/glibc-2.14.1/csu/../sysdeps/x86_64/elf/start.S:113 Segmentation fault (core dumped) For each frame, we print the raw pointer, the symbol provided by backtrace_symbols (still not good enough), and the source file/line. Note that address translation is done by directly shell out to addr2line. ??:0 means addr2line fails to do the translation. Hacky, but I think it's good for now. Test Plan: signal_test.cc Reviewers: dhruba, MarkCallaghan Reviewed By: dhruba CC: leveldb Differential Revision: https://reviews.facebook.net/D10173
2013-04-11 17:54:35 +00:00
#if !(defined(ROCKSDB_BACKTRACE) || defined(OS_MACOSX)) || defined(CYGWIN) || \
defined(OS_SOLARIS) || defined(OS_WIN)
// noop
namespace ROCKSDB_NAMESPACE {
namespace port {
void InstallStackTraceHandler() {}
void PrintStack(int /*first_frames_to_skip*/) {}
void PrintAndFreeStack(void* /*callstack*/, int /*num_frames*/) {}
void* SaveStack(int* /*num_frames*/, int /*first_frames_to_skip*/) {
return nullptr;
}
} // namespace port
} // namespace ROCKSDB_NAMESPACE
#else
[RocksDB] Add stacktrace signal handler Summary: This diff provides the ability to print out a stacktrace when the process receives certain signals. Currently, we enable this for the following signals (program error related): SIGILL SIGSEGV SIGBUS SIGABRT Application simply #include "util/stack_trace.h" and call leveldb::InstallStackTraceHandler() during initialization, if signal handler is needed. It's not done automatically when openning db, because it's the application(process)'s responsibility to install signal handler and some applications might already have their own (like fbcode). Sample output: Received signal 11 (Segmentation fault) #0 0x408ff0 ./signal_test() [0x408ff0] /home/haobo/rocksdb/util/signal_test.cc:4 #1 0x40827d ./signal_test() [0x40827d] /home/haobo/rocksdb/util/signal_test.cc:24 #2 0x7f8bb183172e /usr/local/fbcode/gcc-4.7.1-glibc-2.14.1/lib/libc.so.6(__libc_start_main+0x10e) [0x7f8bb183172e] ??:0 #3 0x408ebc ./signal_test() [0x408ebc] /home/engshare/third-party/src/glibc/glibc-2.14.1/glibc-2.14.1/csu/../sysdeps/x86_64/elf/start.S:113 Segmentation fault (core dumped) For each frame, we print the raw pointer, the symbol provided by backtrace_symbols (still not good enough), and the source file/line. Note that address translation is done by directly shell out to addr2line. ??:0 means addr2line fails to do the translation. Hacky, but I think it's good for now. Test Plan: signal_test.cc Reviewers: dhruba, MarkCallaghan Reviewed By: dhruba CC: leveldb Differential Revision: https://reviews.facebook.net/D10173
2013-04-11 17:54:35 +00:00
#include <cxxabi.h>
[RocksDB] Add stacktrace signal handler Summary: This diff provides the ability to print out a stacktrace when the process receives certain signals. Currently, we enable this for the following signals (program error related): SIGILL SIGSEGV SIGBUS SIGABRT Application simply #include "util/stack_trace.h" and call leveldb::InstallStackTraceHandler() during initialization, if signal handler is needed. It's not done automatically when openning db, because it's the application(process)'s responsibility to install signal handler and some applications might already have their own (like fbcode). Sample output: Received signal 11 (Segmentation fault) #0 0x408ff0 ./signal_test() [0x408ff0] /home/haobo/rocksdb/util/signal_test.cc:4 #1 0x40827d ./signal_test() [0x40827d] /home/haobo/rocksdb/util/signal_test.cc:24 #2 0x7f8bb183172e /usr/local/fbcode/gcc-4.7.1-glibc-2.14.1/lib/libc.so.6(__libc_start_main+0x10e) [0x7f8bb183172e] ??:0 #3 0x408ebc ./signal_test() [0x408ebc] /home/engshare/third-party/src/glibc/glibc-2.14.1/glibc-2.14.1/csu/../sysdeps/x86_64/elf/start.S:113 Segmentation fault (core dumped) For each frame, we print the raw pointer, the symbol provided by backtrace_symbols (still not good enough), and the source file/line. Note that address translation is done by directly shell out to addr2line. ??:0 means addr2line fails to do the translation. Hacky, but I think it's good for now. Test Plan: signal_test.cc Reviewers: dhruba, MarkCallaghan Reviewed By: dhruba CC: leveldb Differential Revision: https://reviews.facebook.net/D10173
2013-04-11 17:54:35 +00:00
#include <execinfo.h>
Print stack traces more reliably with concurrency (#12086) Summary: It's been relatively easy to break our stack trace printer: * If another thread reaches a signal condition such as a related SEGV or assertion failure while one is trying to print a stack trace from the signal handler, it seems to end the process abruptly without a stack trace. * If the process exits normally in one thread (such as main finishing) while another is trying to print a stack trace from the signal handler, it seems the process will often end normally without a stack trace. This change attempts to fix these issues, with * Keep the custom signal handler installed as long as possible, so that other threads will most likely re-enter our custom handler. (We only switch back to default for triggering core dump or whatever after stack trace.) * Use atomics and sleeps to implement a crude recursive mutex for ensuring all threads hitting the custom signal handler wait on the first that is trying to print a stack trace, while recursive signals in the same thread can still be handled cleanly. * Use an atexit handler to hook into normal exit to (a) wait on a pending printing of stack trace when detectable and applicable, and (b) detect and warn when printing a stack trace might be interrupted by a process exit in progress. (I don't know how to pause that *after* our atexit handler has been called; the best I know how to do is warn, "In a race with process already exiting...".) Pull Request resolved: https://github.com/facebook/rocksdb/pull/12086 Test Plan: manual, including with TSAN. I added this code to the end of a unit test file: ``` for (size_t i = 0; i < 3; ++i) { std::thread t([]() { assert(false); }); t.detach(); } ``` Followed by either `sleep(100)` or `usleep(100)` or usual process exit. And for recursive signal testing, inject `abort()` at various places in the handler. Reviewed By: cbi42 Differential Revision: D51531882 Pulled By: pdillinger fbshipit-source-id: 3473b863a43e61b722dfb7a2ed12a8120949b09c
2023-11-22 19:55:10 +00:00
#include <pthread.h>
[RocksDB] Add stacktrace signal handler Summary: This diff provides the ability to print out a stacktrace when the process receives certain signals. Currently, we enable this for the following signals (program error related): SIGILL SIGSEGV SIGBUS SIGABRT Application simply #include "util/stack_trace.h" and call leveldb::InstallStackTraceHandler() during initialization, if signal handler is needed. It's not done automatically when openning db, because it's the application(process)'s responsibility to install signal handler and some applications might already have their own (like fbcode). Sample output: Received signal 11 (Segmentation fault) #0 0x408ff0 ./signal_test() [0x408ff0] /home/haobo/rocksdb/util/signal_test.cc:4 #1 0x40827d ./signal_test() [0x40827d] /home/haobo/rocksdb/util/signal_test.cc:24 #2 0x7f8bb183172e /usr/local/fbcode/gcc-4.7.1-glibc-2.14.1/lib/libc.so.6(__libc_start_main+0x10e) [0x7f8bb183172e] ??:0 #3 0x408ebc ./signal_test() [0x408ebc] /home/engshare/third-party/src/glibc/glibc-2.14.1/glibc-2.14.1/csu/../sysdeps/x86_64/elf/start.S:113 Segmentation fault (core dumped) For each frame, we print the raw pointer, the symbol provided by backtrace_symbols (still not good enough), and the source file/line. Note that address translation is done by directly shell out to addr2line. ??:0 means addr2line fails to do the translation. Hacky, but I think it's good for now. Test Plan: signal_test.cc Reviewers: dhruba, MarkCallaghan Reviewed By: dhruba CC: leveldb Differential Revision: https://reviews.facebook.net/D10173
2013-04-11 17:54:35 +00:00
#include <unistd.h>
#include <csignal>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#ifdef OS_OPENBSD
#include <sys/wait.h>
#include <sys/sysctl.h>
#endif // OS_OPENBSD
#ifdef OS_FREEBSD
#include <sys/sysctl.h>
#include <sys/wait.h>
#endif // OS_FREEBSD
#ifdef OS_LINUX
#include <sys/prctl.h>
Support stack traces with gdb (and debugger invocation) (#11150) Summary: LIB_MODE=shared is much more efficient for building all the unit tests but comes with the downside of ugly stack traces, generally missing name demangling and source line info. Searching the internet suggests the reliable way to get stack traces with dynamic loading is with gdb. This change automatically tries to use gdb to get a stack trace if built with LIB_MODE=shared, and only on Linux because that's where we have the capability to attach to the proper thread. (We could revise the exact conditions in the future.) If there's a failure invoking gdb, it falls back on the old method. Obscure details of making the output reasonable / pretty are in the source code comments. Based on this, it was easy to make it so that running a test command with ROCKSDB_DEBUG=1 would invoke gdb whenever the stack trace handler was invoked, so I included that. Intended follow-up: make LIB_MODE=shared the new default `make` build config Pull Request resolved: https://github.com/facebook/rocksdb/pull/11150 Test Plan: manual, mostly by injecting an "assert(false)" into a unit test and trying different build modes etc. Although gdb is slower to start showing stack trace output, it seems overall faster in many if not most cases, presumably because it doesn't reload the symbol table for each stack entry. At least with parallel test runs, having many tests dumping stacks with the old method can take so long it appears to hang the test run. Reviewed By: cbi42 Differential Revision: D42894064 Pulled By: pdillinger fbshipit-source-id: 608143309d8c69c40049c9a4abcde4f22e87b4d8
2023-02-03 21:21:03 +00:00
#include <sys/types.h>
#include <sys/wait.h>
#if __GLIBC__ < 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ < 30)
#include <sys/syscall.h>
#define gettid() syscall(SYS_gettid)
#endif // GLIBC version
#endif // OS_LINUX
Print stack traces more reliably with concurrency (#12086) Summary: It's been relatively easy to break our stack trace printer: * If another thread reaches a signal condition such as a related SEGV or assertion failure while one is trying to print a stack trace from the signal handler, it seems to end the process abruptly without a stack trace. * If the process exits normally in one thread (such as main finishing) while another is trying to print a stack trace from the signal handler, it seems the process will often end normally without a stack trace. This change attempts to fix these issues, with * Keep the custom signal handler installed as long as possible, so that other threads will most likely re-enter our custom handler. (We only switch back to default for triggering core dump or whatever after stack trace.) * Use atomics and sleeps to implement a crude recursive mutex for ensuring all threads hitting the custom signal handler wait on the first that is trying to print a stack trace, while recursive signals in the same thread can still be handled cleanly. * Use an atexit handler to hook into normal exit to (a) wait on a pending printing of stack trace when detectable and applicable, and (b) detect and warn when printing a stack trace might be interrupted by a process exit in progress. (I don't know how to pause that *after* our atexit handler has been called; the best I know how to do is warn, "In a race with process already exiting...".) Pull Request resolved: https://github.com/facebook/rocksdb/pull/12086 Test Plan: manual, including with TSAN. I added this code to the end of a unit test file: ``` for (size_t i = 0; i < 3; ++i) { std::thread t([]() { assert(false); }); t.detach(); } ``` Followed by either `sleep(100)` or `usleep(100)` or usual process exit. And for recursive signal testing, inject `abort()` at various places in the handler. Reviewed By: cbi42 Differential Revision: D51531882 Pulled By: pdillinger fbshipit-source-id: 3473b863a43e61b722dfb7a2ed12a8120949b09c
2023-11-22 19:55:10 +00:00
#include <algorithm>
#include <atomic>
#include "port/lang.h"
namespace ROCKSDB_NAMESPACE::port {
namespace {
[RocksDB] Add stacktrace signal handler Summary: This diff provides the ability to print out a stacktrace when the process receives certain signals. Currently, we enable this for the following signals (program error related): SIGILL SIGSEGV SIGBUS SIGABRT Application simply #include "util/stack_trace.h" and call leveldb::InstallStackTraceHandler() during initialization, if signal handler is needed. It's not done automatically when openning db, because it's the application(process)'s responsibility to install signal handler and some applications might already have their own (like fbcode). Sample output: Received signal 11 (Segmentation fault) #0 0x408ff0 ./signal_test() [0x408ff0] /home/haobo/rocksdb/util/signal_test.cc:4 #1 0x40827d ./signal_test() [0x40827d] /home/haobo/rocksdb/util/signal_test.cc:24 #2 0x7f8bb183172e /usr/local/fbcode/gcc-4.7.1-glibc-2.14.1/lib/libc.so.6(__libc_start_main+0x10e) [0x7f8bb183172e] ??:0 #3 0x408ebc ./signal_test() [0x408ebc] /home/engshare/third-party/src/glibc/glibc-2.14.1/glibc-2.14.1/csu/../sysdeps/x86_64/elf/start.S:113 Segmentation fault (core dumped) For each frame, we print the raw pointer, the symbol provided by backtrace_symbols (still not good enough), and the source file/line. Note that address translation is done by directly shell out to addr2line. ??:0 means addr2line fails to do the translation. Hacky, but I think it's good for now. Test Plan: signal_test.cc Reviewers: dhruba, MarkCallaghan Reviewed By: dhruba CC: leveldb Differential Revision: https://reviews.facebook.net/D10173
2013-04-11 17:54:35 +00:00
#if defined(OS_LINUX) || defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_GNU_KFREEBSD)
const char* GetExecutableName() {
[RocksDB] Add stacktrace signal handler Summary: This diff provides the ability to print out a stacktrace when the process receives certain signals. Currently, we enable this for the following signals (program error related): SIGILL SIGSEGV SIGBUS SIGABRT Application simply #include "util/stack_trace.h" and call leveldb::InstallStackTraceHandler() during initialization, if signal handler is needed. It's not done automatically when openning db, because it's the application(process)'s responsibility to install signal handler and some applications might already have their own (like fbcode). Sample output: Received signal 11 (Segmentation fault) #0 0x408ff0 ./signal_test() [0x408ff0] /home/haobo/rocksdb/util/signal_test.cc:4 #1 0x40827d ./signal_test() [0x40827d] /home/haobo/rocksdb/util/signal_test.cc:24 #2 0x7f8bb183172e /usr/local/fbcode/gcc-4.7.1-glibc-2.14.1/lib/libc.so.6(__libc_start_main+0x10e) [0x7f8bb183172e] ??:0 #3 0x408ebc ./signal_test() [0x408ebc] /home/engshare/third-party/src/glibc/glibc-2.14.1/glibc-2.14.1/csu/../sysdeps/x86_64/elf/start.S:113 Segmentation fault (core dumped) For each frame, we print the raw pointer, the symbol provided by backtrace_symbols (still not good enough), and the source file/line. Note that address translation is done by directly shell out to addr2line. ??:0 means addr2line fails to do the translation. Hacky, but I think it's good for now. Test Plan: signal_test.cc Reviewers: dhruba, MarkCallaghan Reviewed By: dhruba CC: leveldb Differential Revision: https://reviews.facebook.net/D10173
2013-04-11 17:54:35 +00:00
static char name[1024];
#if defined(OS_FREEBSD)
int mib[4] = {CTL_KERN, KERN_PROC, KERN_PROC_PATHNAME, -1};
size_t namesz = sizeof(name);
auto ret = sysctl(mib, 4, name, &namesz, nullptr, 0);
if (-1 == ret) {
[RocksDB] Add stacktrace signal handler Summary: This diff provides the ability to print out a stacktrace when the process receives certain signals. Currently, we enable this for the following signals (program error related): SIGILL SIGSEGV SIGBUS SIGABRT Application simply #include "util/stack_trace.h" and call leveldb::InstallStackTraceHandler() during initialization, if signal handler is needed. It's not done automatically when openning db, because it's the application(process)'s responsibility to install signal handler and some applications might already have their own (like fbcode). Sample output: Received signal 11 (Segmentation fault) #0 0x408ff0 ./signal_test() [0x408ff0] /home/haobo/rocksdb/util/signal_test.cc:4 #1 0x40827d ./signal_test() [0x40827d] /home/haobo/rocksdb/util/signal_test.cc:24 #2 0x7f8bb183172e /usr/local/fbcode/gcc-4.7.1-glibc-2.14.1/lib/libc.so.6(__libc_start_main+0x10e) [0x7f8bb183172e] ??:0 #3 0x408ebc ./signal_test() [0x408ebc] /home/engshare/third-party/src/glibc/glibc-2.14.1/glibc-2.14.1/csu/../sysdeps/x86_64/elf/start.S:113 Segmentation fault (core dumped) For each frame, we print the raw pointer, the symbol provided by backtrace_symbols (still not good enough), and the source file/line. Note that address translation is done by directly shell out to addr2line. ??:0 means addr2line fails to do the translation. Hacky, but I think it's good for now. Test Plan: signal_test.cc Reviewers: dhruba, MarkCallaghan Reviewed By: dhruba CC: leveldb Differential Revision: https://reviews.facebook.net/D10173
2013-04-11 17:54:35 +00:00
return nullptr;
} else {
return name;
}
#elif defined(OS_OPENBSD)
int mib[4] = {CTL_KERN, KERN_PROC_ARGS, getpid(), KERN_PROC_ARGV};
size_t namesz = sizeof(name);
char* bin[namesz];
auto ret = sysctl(mib, 4, bin, &namesz, nullptr, 0);
if (-1 == ret) {
return nullptr;
} else {
return bin[0];
}
#else
char link[1024];
snprintf(link, sizeof(link), "/proc/%d/exe", getpid());
auto read = readlink(link, name, sizeof(name) - 1);
if (-1 == read) {
return nullptr;
} else {
name[read] = 0;
return name;
}
#endif
[RocksDB] Add stacktrace signal handler Summary: This diff provides the ability to print out a stacktrace when the process receives certain signals. Currently, we enable this for the following signals (program error related): SIGILL SIGSEGV SIGBUS SIGABRT Application simply #include "util/stack_trace.h" and call leveldb::InstallStackTraceHandler() during initialization, if signal handler is needed. It's not done automatically when openning db, because it's the application(process)'s responsibility to install signal handler and some applications might already have their own (like fbcode). Sample output: Received signal 11 (Segmentation fault) #0 0x408ff0 ./signal_test() [0x408ff0] /home/haobo/rocksdb/util/signal_test.cc:4 #1 0x40827d ./signal_test() [0x40827d] /home/haobo/rocksdb/util/signal_test.cc:24 #2 0x7f8bb183172e /usr/local/fbcode/gcc-4.7.1-glibc-2.14.1/lib/libc.so.6(__libc_start_main+0x10e) [0x7f8bb183172e] ??:0 #3 0x408ebc ./signal_test() [0x408ebc] /home/engshare/third-party/src/glibc/glibc-2.14.1/glibc-2.14.1/csu/../sysdeps/x86_64/elf/start.S:113 Segmentation fault (core dumped) For each frame, we print the raw pointer, the symbol provided by backtrace_symbols (still not good enough), and the source file/line. Note that address translation is done by directly shell out to addr2line. ??:0 means addr2line fails to do the translation. Hacky, but I think it's good for now. Test Plan: signal_test.cc Reviewers: dhruba, MarkCallaghan Reviewed By: dhruba CC: leveldb Differential Revision: https://reviews.facebook.net/D10173
2013-04-11 17:54:35 +00:00
}
void PrintStackTraceLine(const char* symbol, void* frame) {
static const char* executable = GetExecutableName();
if (symbol) {
fprintf(stderr, "%s ", symbol);
}
if (executable) {
// out source to addr2line, for the address translation
const int kLineMax = 256;
char cmd[kLineMax];
snprintf(cmd, kLineMax, "addr2line %p -e %s -f -C 2>&1", frame, executable);
auto f = popen(cmd, "r");
if (f) {
char line[kLineMax];
while (fgets(line, sizeof(line), f)) {
line[strlen(line) - 1] = 0; // remove newline
fprintf(stderr, "%s\t", line);
}
pclose(f);
}
} else {
fprintf(stderr, " %p", frame);
}
fprintf(stderr, "\n");
}
#elif defined(OS_MACOSX)
void PrintStackTraceLine(const char* symbol, void* frame) {
static int pid = getpid();
// out source to atos, for the address translation
const int kLineMax = 256;
char cmd[kLineMax];
2015-02-05 00:24:02 +00:00
snprintf(cmd, kLineMax, "xcrun atos %p -p %d 2>&1", frame, pid);
auto f = popen(cmd, "r");
if (f) {
char line[kLineMax];
while (fgets(line, sizeof(line), f)) {
line[strlen(line) - 1] = 0; // remove newline
fprintf(stderr, "%s\t", line);
}
pclose(f);
} else if (symbol) {
fprintf(stderr, "%s ", symbol);
}
fprintf(stderr, "\n");
}
#endif
const char* GetLldbScriptSelectThread(long long tid) {
// NOTE: called from a signal handler, so no heap allocation
static char script[80];
snprintf(script, sizeof(script),
"script -l python -- lldb.process.SetSelectedThreadByID(%lld)", tid);
return script;
}
} // namespace
void PrintStack(void* frames[], int num_frames) {
auto symbols = backtrace_symbols(frames, num_frames);
for (int i = 0; i < num_frames; ++i) {
fprintf(stderr, "#%-2d ", i);
PrintStackTraceLine((symbols != nullptr) ? symbols[i] : nullptr, frames[i]);
}
free(symbols);
}
void PrintStack(int first_frames_to_skip) {
// Default to getting stack traces with GDB, at least on Linux where we
// know how to attach to a particular thread.
//
// * Address space layout randomization (ASLR) interferes with getting good
// stack information from backtrace+addr2line. This is more likely to show
// up with LIB_MODE=shared builds (when kernel.randomize_va_space >= 1)
// but can also show up with LIB_MODE=static builds ((when
// kernel.randomize_va_space == 2).
// * It doesn't appear easy to detect when ASLR is in use.
// * With DEBUG_LEVEL < 2, backtrace() can skip frames that are not skipped
// in GDB.
//
// LLDB also available as an option
bool lldb_stack_trace = getenv("ROCKSDB_LLDB_STACK") != nullptr;
#if defined(OS_LINUX)
// Default true, override with ROCKSDB_BACKTRACE_STACK=1
bool gdb_stack_trace =
!lldb_stack_trace && getenv("ROCKSDB_BACKTRACE_STACK") == nullptr;
Support stack traces with gdb (and debugger invocation) (#11150) Summary: LIB_MODE=shared is much more efficient for building all the unit tests but comes with the downside of ugly stack traces, generally missing name demangling and source line info. Searching the internet suggests the reliable way to get stack traces with dynamic loading is with gdb. This change automatically tries to use gdb to get a stack trace if built with LIB_MODE=shared, and only on Linux because that's where we have the capability to attach to the proper thread. (We could revise the exact conditions in the future.) If there's a failure invoking gdb, it falls back on the old method. Obscure details of making the output reasonable / pretty are in the source code comments. Based on this, it was easy to make it so that running a test command with ROCKSDB_DEBUG=1 would invoke gdb whenever the stack trace handler was invoked, so I included that. Intended follow-up: make LIB_MODE=shared the new default `make` build config Pull Request resolved: https://github.com/facebook/rocksdb/pull/11150 Test Plan: manual, mostly by injecting an "assert(false)" into a unit test and trying different build modes etc. Although gdb is slower to start showing stack trace output, it seems overall faster in many if not most cases, presumably because it doesn't reload the symbol table for each stack entry. At least with parallel test runs, having many tests dumping stacks with the old method can take so long it appears to hang the test run. Reviewed By: cbi42 Differential Revision: D42894064 Pulled By: pdillinger fbshipit-source-id: 608143309d8c69c40049c9a4abcde4f22e87b4d8
2023-02-03 21:21:03 +00:00
#else
// Default false, override with ROCKSDB_GDB_STACK=1
bool gdb_stack_trace = getenv("ROCKSDB_GDB_STACK") != nullptr;
Support stack traces with gdb (and debugger invocation) (#11150) Summary: LIB_MODE=shared is much more efficient for building all the unit tests but comes with the downside of ugly stack traces, generally missing name demangling and source line info. Searching the internet suggests the reliable way to get stack traces with dynamic loading is with gdb. This change automatically tries to use gdb to get a stack trace if built with LIB_MODE=shared, and only on Linux because that's where we have the capability to attach to the proper thread. (We could revise the exact conditions in the future.) If there's a failure invoking gdb, it falls back on the old method. Obscure details of making the output reasonable / pretty are in the source code comments. Based on this, it was easy to make it so that running a test command with ROCKSDB_DEBUG=1 would invoke gdb whenever the stack trace handler was invoked, so I included that. Intended follow-up: make LIB_MODE=shared the new default `make` build config Pull Request resolved: https://github.com/facebook/rocksdb/pull/11150 Test Plan: manual, mostly by injecting an "assert(false)" into a unit test and trying different build modes etc. Although gdb is slower to start showing stack trace output, it seems overall faster in many if not most cases, presumably because it doesn't reload the symbol table for each stack entry. At least with parallel test runs, having many tests dumping stacks with the old method can take so long it appears to hang the test run. Reviewed By: cbi42 Differential Revision: D42894064 Pulled By: pdillinger fbshipit-source-id: 608143309d8c69c40049c9a4abcde4f22e87b4d8
2023-02-03 21:21:03 +00:00
#endif
// Also support invoking interactive debugger on stack trace, with this
// envvar set to non-empty
char* debug_env = getenv("ROCKSDB_DEBUG");
bool debug = debug_env != nullptr && strlen(debug_env) > 0;
Update tiered storage tests (ahead of next change) (#11917) Summary: After https://github.com/facebook/rocksdb/issues/11905, I am preparing a DBImpl change to ensure all sufficiently recent sequence numbers since Open are covered by SeqnoToTimeMapping. **Intended follow-up** However, there are a number of test changes I want to make prior to that to make it clear that I am not regressing the tests and production behavior at the same time. * Start mock time in the tests well beyond epoch (time 0) so that we aren't normally reaching into pre-history for current time minus the preserve/preclude duration. * Majorly clean up BasicSeqnoToTimeMapping to avoid confusing hard-coded bounds on GetProximalTimeBeforeSeqno() results. * There is an unresolved/unexplained issue marked with FIXME that should be investigated when GetProximalTimeBeforeSeqno() is put into production. * MultiCFs test was strangely generating 5 L0 files, four of which would be compacted into an L1, and then letting TTL compaction compact 1@L0+1@L1. Changing the starting time of the tests seemed to mess up the TTL compaction. But I suspect the TTL compaction was unintentional, so I've cut it down to just 4 L0 files, which compacts predictably. * Unrelated: allow ROCKSDB_NO_STACK=1 to skip printing a stack trace on assertion failures. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11917 Test Plan: no changes to production code Reviewed By: jowlyzhang Differential Revision: D49841436 Pulled By: pdillinger fbshipit-source-id: 753348ace9c548e82bcb77fcc8b2ffb7a6beeb0a
2023-10-02 23:19:05 +00:00
if (!debug && getenv("ROCKSDB_NO_STACK") != nullptr) {
// Skip stack trace
return;
}
if (lldb_stack_trace || gdb_stack_trace || debug) {
Support stack traces with gdb (and debugger invocation) (#11150) Summary: LIB_MODE=shared is much more efficient for building all the unit tests but comes with the downside of ugly stack traces, generally missing name demangling and source line info. Searching the internet suggests the reliable way to get stack traces with dynamic loading is with gdb. This change automatically tries to use gdb to get a stack trace if built with LIB_MODE=shared, and only on Linux because that's where we have the capability to attach to the proper thread. (We could revise the exact conditions in the future.) If there's a failure invoking gdb, it falls back on the old method. Obscure details of making the output reasonable / pretty are in the source code comments. Based on this, it was easy to make it so that running a test command with ROCKSDB_DEBUG=1 would invoke gdb whenever the stack trace handler was invoked, so I included that. Intended follow-up: make LIB_MODE=shared the new default `make` build config Pull Request resolved: https://github.com/facebook/rocksdb/pull/11150 Test Plan: manual, mostly by injecting an "assert(false)" into a unit test and trying different build modes etc. Although gdb is slower to start showing stack trace output, it seems overall faster in many if not most cases, presumably because it doesn't reload the symbol table for each stack entry. At least with parallel test runs, having many tests dumping stacks with the old method can take so long it appears to hang the test run. Reviewed By: cbi42 Differential Revision: D42894064 Pulled By: pdillinger fbshipit-source-id: 608143309d8c69c40049c9a4abcde4f22e87b4d8
2023-02-03 21:21:03 +00:00
// Allow ouside debugger to attach, even with Yama security restrictions
#ifdef PR_SET_PTRACER_ANY
(void)prctl(PR_SET_PTRACER, PR_SET_PTRACER_ANY, 0, 0, 0);
#endif
// Try to invoke GDB, either for stack trace or debugging.
long long attach_pid = getpid();
// NOTE: we're in a signal handler, so no heap allocation
char attach_pid_str[20];
snprintf(attach_pid_str, sizeof(attach_pid_str), "%lld", attach_pid);
Support stack traces with gdb (and debugger invocation) (#11150) Summary: LIB_MODE=shared is much more efficient for building all the unit tests but comes with the downside of ugly stack traces, generally missing name demangling and source line info. Searching the internet suggests the reliable way to get stack traces with dynamic loading is with gdb. This change automatically tries to use gdb to get a stack trace if built with LIB_MODE=shared, and only on Linux because that's where we have the capability to attach to the proper thread. (We could revise the exact conditions in the future.) If there's a failure invoking gdb, it falls back on the old method. Obscure details of making the output reasonable / pretty are in the source code comments. Based on this, it was easy to make it so that running a test command with ROCKSDB_DEBUG=1 would invoke gdb whenever the stack trace handler was invoked, so I included that. Intended follow-up: make LIB_MODE=shared the new default `make` build config Pull Request resolved: https://github.com/facebook/rocksdb/pull/11150 Test Plan: manual, mostly by injecting an "assert(false)" into a unit test and trying different build modes etc. Although gdb is slower to start showing stack trace output, it seems overall faster in many if not most cases, presumably because it doesn't reload the symbol table for each stack entry. At least with parallel test runs, having many tests dumping stacks with the old method can take so long it appears to hang the test run. Reviewed By: cbi42 Differential Revision: D42894064 Pulled By: pdillinger fbshipit-source-id: 608143309d8c69c40049c9a4abcde4f22e87b4d8
2023-02-03 21:21:03 +00:00
// `gdb -p PID` seems to always attach to main thread, but `gdb -p TID`
// seems to be able to attach to a particular thread in a process, which
// makes sense as the main thread TID == PID of the process.
// But I haven't found that gdb capability documented anywhere, so leave
// a back door to attach to main thread.
long long gdb_attach_id = attach_pid;
// Save current thread id before fork
long long attach_tid = 0;
Support stack traces with gdb (and debugger invocation) (#11150) Summary: LIB_MODE=shared is much more efficient for building all the unit tests but comes with the downside of ugly stack traces, generally missing name demangling and source line info. Searching the internet suggests the reliable way to get stack traces with dynamic loading is with gdb. This change automatically tries to use gdb to get a stack trace if built with LIB_MODE=shared, and only on Linux because that's where we have the capability to attach to the proper thread. (We could revise the exact conditions in the future.) If there's a failure invoking gdb, it falls back on the old method. Obscure details of making the output reasonable / pretty are in the source code comments. Based on this, it was easy to make it so that running a test command with ROCKSDB_DEBUG=1 would invoke gdb whenever the stack trace handler was invoked, so I included that. Intended follow-up: make LIB_MODE=shared the new default `make` build config Pull Request resolved: https://github.com/facebook/rocksdb/pull/11150 Test Plan: manual, mostly by injecting an "assert(false)" into a unit test and trying different build modes etc. Although gdb is slower to start showing stack trace output, it seems overall faster in many if not most cases, presumably because it doesn't reload the symbol table for each stack entry. At least with parallel test runs, having many tests dumping stacks with the old method can take so long it appears to hang the test run. Reviewed By: cbi42 Differential Revision: D42894064 Pulled By: pdillinger fbshipit-source-id: 608143309d8c69c40049c9a4abcde4f22e87b4d8
2023-02-03 21:21:03 +00:00
#ifdef OS_LINUX
attach_tid = gettid();
Support stack traces with gdb (and debugger invocation) (#11150) Summary: LIB_MODE=shared is much more efficient for building all the unit tests but comes with the downside of ugly stack traces, generally missing name demangling and source line info. Searching the internet suggests the reliable way to get stack traces with dynamic loading is with gdb. This change automatically tries to use gdb to get a stack trace if built with LIB_MODE=shared, and only on Linux because that's where we have the capability to attach to the proper thread. (We could revise the exact conditions in the future.) If there's a failure invoking gdb, it falls back on the old method. Obscure details of making the output reasonable / pretty are in the source code comments. Based on this, it was easy to make it so that running a test command with ROCKSDB_DEBUG=1 would invoke gdb whenever the stack trace handler was invoked, so I included that. Intended follow-up: make LIB_MODE=shared the new default `make` build config Pull Request resolved: https://github.com/facebook/rocksdb/pull/11150 Test Plan: manual, mostly by injecting an "assert(false)" into a unit test and trying different build modes etc. Although gdb is slower to start showing stack trace output, it seems overall faster in many if not most cases, presumably because it doesn't reload the symbol table for each stack entry. At least with parallel test runs, having many tests dumping stacks with the old method can take so long it appears to hang the test run. Reviewed By: cbi42 Differential Revision: D42894064 Pulled By: pdillinger fbshipit-source-id: 608143309d8c69c40049c9a4abcde4f22e87b4d8
2023-02-03 21:21:03 +00:00
if (getenv("ROCKSDB_DEBUG_USE_PID") == nullptr) {
gdb_attach_id = attach_tid;
Support stack traces with gdb (and debugger invocation) (#11150) Summary: LIB_MODE=shared is much more efficient for building all the unit tests but comes with the downside of ugly stack traces, generally missing name demangling and source line info. Searching the internet suggests the reliable way to get stack traces with dynamic loading is with gdb. This change automatically tries to use gdb to get a stack trace if built with LIB_MODE=shared, and only on Linux because that's where we have the capability to attach to the proper thread. (We could revise the exact conditions in the future.) If there's a failure invoking gdb, it falls back on the old method. Obscure details of making the output reasonable / pretty are in the source code comments. Based on this, it was easy to make it so that running a test command with ROCKSDB_DEBUG=1 would invoke gdb whenever the stack trace handler was invoked, so I included that. Intended follow-up: make LIB_MODE=shared the new default `make` build config Pull Request resolved: https://github.com/facebook/rocksdb/pull/11150 Test Plan: manual, mostly by injecting an "assert(false)" into a unit test and trying different build modes etc. Although gdb is slower to start showing stack trace output, it seems overall faster in many if not most cases, presumably because it doesn't reload the symbol table for each stack entry. At least with parallel test runs, having many tests dumping stacks with the old method can take so long it appears to hang the test run. Reviewed By: cbi42 Differential Revision: D42894064 Pulled By: pdillinger fbshipit-source-id: 608143309d8c69c40049c9a4abcde4f22e87b4d8
2023-02-03 21:21:03 +00:00
}
#endif
char gdb_attach_id_str[20];
snprintf(gdb_attach_id_str, sizeof(gdb_attach_id_str), "%lld",
gdb_attach_id);
Support stack traces with gdb (and debugger invocation) (#11150) Summary: LIB_MODE=shared is much more efficient for building all the unit tests but comes with the downside of ugly stack traces, generally missing name demangling and source line info. Searching the internet suggests the reliable way to get stack traces with dynamic loading is with gdb. This change automatically tries to use gdb to get a stack trace if built with LIB_MODE=shared, and only on Linux because that's where we have the capability to attach to the proper thread. (We could revise the exact conditions in the future.) If there's a failure invoking gdb, it falls back on the old method. Obscure details of making the output reasonable / pretty are in the source code comments. Based on this, it was easy to make it so that running a test command with ROCKSDB_DEBUG=1 would invoke gdb whenever the stack trace handler was invoked, so I included that. Intended follow-up: make LIB_MODE=shared the new default `make` build config Pull Request resolved: https://github.com/facebook/rocksdb/pull/11150 Test Plan: manual, mostly by injecting an "assert(false)" into a unit test and trying different build modes etc. Although gdb is slower to start showing stack trace output, it seems overall faster in many if not most cases, presumably because it doesn't reload the symbol table for each stack entry. At least with parallel test runs, having many tests dumping stacks with the old method can take so long it appears to hang the test run. Reviewed By: cbi42 Differential Revision: D42894064 Pulled By: pdillinger fbshipit-source-id: 608143309d8c69c40049c9a4abcde4f22e87b4d8
2023-02-03 21:21:03 +00:00
pid_t child_pid = fork();
if (child_pid == 0) {
// child process
if (debug) {
if (strcmp(debug_env, "lldb") == 0) {
fprintf(stderr, "Invoking LLDB for debugging (ROCKSDB_DEBUG=%s)...\n",
debug_env);
execlp(/*cmd in PATH*/ "lldb", /*arg0*/ "lldb", "-p", attach_pid_str,
/*"-Q",*/ "-o", GetLldbScriptSelectThread(attach_tid),
(char*)nullptr);
return;
} else {
fprintf(stderr, "Invoking GDB for debugging (ROCKSDB_DEBUG=%s)...\n",
debug_env);
execlp(/*cmd in PATH*/ "gdb", /*arg0*/ "gdb", "-p", gdb_attach_id_str,
(char*)nullptr);
return;
}
Support stack traces with gdb (and debugger invocation) (#11150) Summary: LIB_MODE=shared is much more efficient for building all the unit tests but comes with the downside of ugly stack traces, generally missing name demangling and source line info. Searching the internet suggests the reliable way to get stack traces with dynamic loading is with gdb. This change automatically tries to use gdb to get a stack trace if built with LIB_MODE=shared, and only on Linux because that's where we have the capability to attach to the proper thread. (We could revise the exact conditions in the future.) If there's a failure invoking gdb, it falls back on the old method. Obscure details of making the output reasonable / pretty are in the source code comments. Based on this, it was easy to make it so that running a test command with ROCKSDB_DEBUG=1 would invoke gdb whenever the stack trace handler was invoked, so I included that. Intended follow-up: make LIB_MODE=shared the new default `make` build config Pull Request resolved: https://github.com/facebook/rocksdb/pull/11150 Test Plan: manual, mostly by injecting an "assert(false)" into a unit test and trying different build modes etc. Although gdb is slower to start showing stack trace output, it seems overall faster in many if not most cases, presumably because it doesn't reload the symbol table for each stack entry. At least with parallel test runs, having many tests dumping stacks with the old method can take so long it appears to hang the test run. Reviewed By: cbi42 Differential Revision: D42894064 Pulled By: pdillinger fbshipit-source-id: 608143309d8c69c40049c9a4abcde4f22e87b4d8
2023-02-03 21:21:03 +00:00
} else {
// Redirect child stdout to original stderr
dup2(2, 1);
// No child stdin (don't use pager)
close(0);
if (lldb_stack_trace) {
fprintf(stderr, "Invoking LLDB for stack trace...\n");
Fix stack trace trimming with LLDB (#12101) Summary: I must have chosen trimming before frame 8 based on assertion failures, but that trims too many frame for a general segfault. So this changes to start printing at frame 4, as in this example where I've seeded a null deref: ``` Received signal 11 (Segmentation fault) Invoking LLDB for stack trace... Process 873208 stopped * thread #1, name = 'db_stress', stop reason = signal SIGSTOP frame #0: 0x00007fb1fe8f1033 libc.so.6`__GI___wait4(pid=873478, stat_loc=0x00007fb1fb114030, options=0, usage=0x0000000000000000) at wait4.c:30:10 thread #2, name = 'rocksdb:low', stop reason = signal SIGSTOP frame #0: 0x00007fb1fe8972a1 libc.so.6`__GI___futex_abstimed_wait_cancelable64 at futex-internal.c:57:12 Executable module set to "/data/users/peterd/rocksdb/db_stress". Architecture set to: x86_64-unknown-linux-gnu. True frame #4: 0x00007fb1fe844540 libc.so.6`__restore_rt at libc_sigaction.c:13 frame #5: 0x0000000000608514 db_stress`rocksdb::StressTest::InitDb(rocksdb::SharedState*) at db_stress_test_base.cc:345:18 frame #6: 0x0000000000585d62 db_stress`rocksdb::RunStressTestImpl(rocksdb::SharedState*) at db_stress_driver.cc:84:17 frame #7: 0x000000000058dd69 db_stress`rocksdb::RunStressTest(shared=0x00006120000001c0) at db_stress_driver.cc:266:34 frame #8: 0x0000000000453b34 db_stress`rocksdb::db_stress_tool(int, char**) at db_stress_tool.cc:370:20 ... ``` Pull Request resolved: https://github.com/facebook/rocksdb/pull/12101 Test Plan: manual (see above) Reviewed By: ajkr Differential Revision: D51593217 Pulled By: pdillinger fbshipit-source-id: 4a71eb8e516edbc32e682f9537bc77d073a7b4ed
2023-11-27 19:49:52 +00:00
// Skip top ~4 frames here in PrintStack
auto bt_in_lldb =
Fix stack trace trimming with LLDB (#12101) Summary: I must have chosen trimming before frame 8 based on assertion failures, but that trims too many frame for a general segfault. So this changes to start printing at frame 4, as in this example where I've seeded a null deref: ``` Received signal 11 (Segmentation fault) Invoking LLDB for stack trace... Process 873208 stopped * thread #1, name = 'db_stress', stop reason = signal SIGSTOP frame #0: 0x00007fb1fe8f1033 libc.so.6`__GI___wait4(pid=873478, stat_loc=0x00007fb1fb114030, options=0, usage=0x0000000000000000) at wait4.c:30:10 thread #2, name = 'rocksdb:low', stop reason = signal SIGSTOP frame #0: 0x00007fb1fe8972a1 libc.so.6`__GI___futex_abstimed_wait_cancelable64 at futex-internal.c:57:12 Executable module set to "/data/users/peterd/rocksdb/db_stress". Architecture set to: x86_64-unknown-linux-gnu. True frame #4: 0x00007fb1fe844540 libc.so.6`__restore_rt at libc_sigaction.c:13 frame #5: 0x0000000000608514 db_stress`rocksdb::StressTest::InitDb(rocksdb::SharedState*) at db_stress_test_base.cc:345:18 frame #6: 0x0000000000585d62 db_stress`rocksdb::RunStressTestImpl(rocksdb::SharedState*) at db_stress_driver.cc:84:17 frame #7: 0x000000000058dd69 db_stress`rocksdb::RunStressTest(shared=0x00006120000001c0) at db_stress_driver.cc:266:34 frame #8: 0x0000000000453b34 db_stress`rocksdb::db_stress_tool(int, char**) at db_stress_tool.cc:370:20 ... ``` Pull Request resolved: https://github.com/facebook/rocksdb/pull/12101 Test Plan: manual (see above) Reviewed By: ajkr Differential Revision: D51593217 Pulled By: pdillinger fbshipit-source-id: 4a71eb8e516edbc32e682f9537bc77d073a7b4ed
2023-11-27 19:49:52 +00:00
"script -l python -- for f in lldb.thread.frames[4:]: print(f)";
execlp(/*cmd in PATH*/ "lldb", /*arg0*/ "lldb", "-p", attach_pid_str,
"-b", "-Q", "-o", GetLldbScriptSelectThread(attach_tid), "-o",
bt_in_lldb, (char*)nullptr);
} else {
// gdb_stack_trace
fprintf(stderr, "Invoking GDB for stack trace...\n");
// Skip top ~4 frames here in PrintStack
// See https://stackoverflow.com/q/40991943/454544
auto bt_in_gdb =
"frame apply level 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 "
"21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 "
"42 43 44 -q frame";
// -n : Loading config files can apparently cause failures with the
// other options here.
// -batch : non-interactive; suppress banners as much as possible
execlp(/*cmd in PATH*/ "gdb", /*arg0*/ "gdb", "-n", "-batch", "-p",
gdb_attach_id_str, "-ex", bt_in_gdb, (char*)nullptr);
}
Support stack traces with gdb (and debugger invocation) (#11150) Summary: LIB_MODE=shared is much more efficient for building all the unit tests but comes with the downside of ugly stack traces, generally missing name demangling and source line info. Searching the internet suggests the reliable way to get stack traces with dynamic loading is with gdb. This change automatically tries to use gdb to get a stack trace if built with LIB_MODE=shared, and only on Linux because that's where we have the capability to attach to the proper thread. (We could revise the exact conditions in the future.) If there's a failure invoking gdb, it falls back on the old method. Obscure details of making the output reasonable / pretty are in the source code comments. Based on this, it was easy to make it so that running a test command with ROCKSDB_DEBUG=1 would invoke gdb whenever the stack trace handler was invoked, so I included that. Intended follow-up: make LIB_MODE=shared the new default `make` build config Pull Request resolved: https://github.com/facebook/rocksdb/pull/11150 Test Plan: manual, mostly by injecting an "assert(false)" into a unit test and trying different build modes etc. Although gdb is slower to start showing stack trace output, it seems overall faster in many if not most cases, presumably because it doesn't reload the symbol table for each stack entry. At least with parallel test runs, having many tests dumping stacks with the old method can take so long it appears to hang the test run. Reviewed By: cbi42 Differential Revision: D42894064 Pulled By: pdillinger fbshipit-source-id: 608143309d8c69c40049c9a4abcde4f22e87b4d8
2023-02-03 21:21:03 +00:00
return;
}
} else {
// parent process; wait for child
int wstatus;
waitpid(child_pid, &wstatus, 0);
if (WIFEXITED(wstatus) && WEXITSTATUS(wstatus) == EXIT_SUCCESS) {
// Good
return;
}
}
fprintf(stderr, "GDB failed; falling back on backtrace+addr2line...\n");
}
[RocksDB] Add stacktrace signal handler Summary: This diff provides the ability to print out a stacktrace when the process receives certain signals. Currently, we enable this for the following signals (program error related): SIGILL SIGSEGV SIGBUS SIGABRT Application simply #include "util/stack_trace.h" and call leveldb::InstallStackTraceHandler() during initialization, if signal handler is needed. It's not done automatically when openning db, because it's the application(process)'s responsibility to install signal handler and some applications might already have their own (like fbcode). Sample output: Received signal 11 (Segmentation fault) #0 0x408ff0 ./signal_test() [0x408ff0] /home/haobo/rocksdb/util/signal_test.cc:4 #1 0x40827d ./signal_test() [0x40827d] /home/haobo/rocksdb/util/signal_test.cc:24 #2 0x7f8bb183172e /usr/local/fbcode/gcc-4.7.1-glibc-2.14.1/lib/libc.so.6(__libc_start_main+0x10e) [0x7f8bb183172e] ??:0 #3 0x408ebc ./signal_test() [0x408ebc] /home/engshare/third-party/src/glibc/glibc-2.14.1/glibc-2.14.1/csu/../sysdeps/x86_64/elf/start.S:113 Segmentation fault (core dumped) For each frame, we print the raw pointer, the symbol provided by backtrace_symbols (still not good enough), and the source file/line. Note that address translation is done by directly shell out to addr2line. ??:0 means addr2line fails to do the translation. Hacky, but I think it's good for now. Test Plan: signal_test.cc Reviewers: dhruba, MarkCallaghan Reviewed By: dhruba CC: leveldb Differential Revision: https://reviews.facebook.net/D10173
2013-04-11 17:54:35 +00:00
const int kMaxFrames = 100;
void* frames[kMaxFrames];
[RocksDB] Add stacktrace signal handler Summary: This diff provides the ability to print out a stacktrace when the process receives certain signals. Currently, we enable this for the following signals (program error related): SIGILL SIGSEGV SIGBUS SIGABRT Application simply #include "util/stack_trace.h" and call leveldb::InstallStackTraceHandler() during initialization, if signal handler is needed. It's not done automatically when openning db, because it's the application(process)'s responsibility to install signal handler and some applications might already have their own (like fbcode). Sample output: Received signal 11 (Segmentation fault) #0 0x408ff0 ./signal_test() [0x408ff0] /home/haobo/rocksdb/util/signal_test.cc:4 #1 0x40827d ./signal_test() [0x40827d] /home/haobo/rocksdb/util/signal_test.cc:24 #2 0x7f8bb183172e /usr/local/fbcode/gcc-4.7.1-glibc-2.14.1/lib/libc.so.6(__libc_start_main+0x10e) [0x7f8bb183172e] ??:0 #3 0x408ebc ./signal_test() [0x408ebc] /home/engshare/third-party/src/glibc/glibc-2.14.1/glibc-2.14.1/csu/../sysdeps/x86_64/elf/start.S:113 Segmentation fault (core dumped) For each frame, we print the raw pointer, the symbol provided by backtrace_symbols (still not good enough), and the source file/line. Note that address translation is done by directly shell out to addr2line. ??:0 means addr2line fails to do the translation. Hacky, but I think it's good for now. Test Plan: signal_test.cc Reviewers: dhruba, MarkCallaghan Reviewed By: dhruba CC: leveldb Differential Revision: https://reviews.facebook.net/D10173
2013-04-11 17:54:35 +00:00
int num_frames = (int) backtrace(frames, kMaxFrames);
PrintStack(&frames[first_frames_to_skip], num_frames - first_frames_to_skip);
}
[RocksDB] Add stacktrace signal handler Summary: This diff provides the ability to print out a stacktrace when the process receives certain signals. Currently, we enable this for the following signals (program error related): SIGILL SIGSEGV SIGBUS SIGABRT Application simply #include "util/stack_trace.h" and call leveldb::InstallStackTraceHandler() during initialization, if signal handler is needed. It's not done automatically when openning db, because it's the application(process)'s responsibility to install signal handler and some applications might already have their own (like fbcode). Sample output: Received signal 11 (Segmentation fault) #0 0x408ff0 ./signal_test() [0x408ff0] /home/haobo/rocksdb/util/signal_test.cc:4 #1 0x40827d ./signal_test() [0x40827d] /home/haobo/rocksdb/util/signal_test.cc:24 #2 0x7f8bb183172e /usr/local/fbcode/gcc-4.7.1-glibc-2.14.1/lib/libc.so.6(__libc_start_main+0x10e) [0x7f8bb183172e] ??:0 #3 0x408ebc ./signal_test() [0x408ebc] /home/engshare/third-party/src/glibc/glibc-2.14.1/glibc-2.14.1/csu/../sysdeps/x86_64/elf/start.S:113 Segmentation fault (core dumped) For each frame, we print the raw pointer, the symbol provided by backtrace_symbols (still not good enough), and the source file/line. Note that address translation is done by directly shell out to addr2line. ??:0 means addr2line fails to do the translation. Hacky, but I think it's good for now. Test Plan: signal_test.cc Reviewers: dhruba, MarkCallaghan Reviewed By: dhruba CC: leveldb Differential Revision: https://reviews.facebook.net/D10173
2013-04-11 17:54:35 +00:00
void PrintAndFreeStack(void* callstack, int num_frames) {
PrintStack(static_cast<void**>(callstack), num_frames);
free(callstack);
}
void* SaveStack(int* num_frames, int first_frames_to_skip) {
const int kMaxFrames = 100;
void* frames[kMaxFrames];
int count = (int) backtrace(frames, kMaxFrames);
*num_frames = count - first_frames_to_skip;
void* callstack = malloc(sizeof(void*) * *num_frames);
memcpy(callstack, &frames[first_frames_to_skip], sizeof(void*) * *num_frames);
return callstack;
}
[RocksDB] Add stacktrace signal handler Summary: This diff provides the ability to print out a stacktrace when the process receives certain signals. Currently, we enable this for the following signals (program error related): SIGILL SIGSEGV SIGBUS SIGABRT Application simply #include "util/stack_trace.h" and call leveldb::InstallStackTraceHandler() during initialization, if signal handler is needed. It's not done automatically when openning db, because it's the application(process)'s responsibility to install signal handler and some applications might already have their own (like fbcode). Sample output: Received signal 11 (Segmentation fault) #0 0x408ff0 ./signal_test() [0x408ff0] /home/haobo/rocksdb/util/signal_test.cc:4 #1 0x40827d ./signal_test() [0x40827d] /home/haobo/rocksdb/util/signal_test.cc:24 #2 0x7f8bb183172e /usr/local/fbcode/gcc-4.7.1-glibc-2.14.1/lib/libc.so.6(__libc_start_main+0x10e) [0x7f8bb183172e] ??:0 #3 0x408ebc ./signal_test() [0x408ebc] /home/engshare/third-party/src/glibc/glibc-2.14.1/glibc-2.14.1/csu/../sysdeps/x86_64/elf/start.S:113 Segmentation fault (core dumped) For each frame, we print the raw pointer, the symbol provided by backtrace_symbols (still not good enough), and the source file/line. Note that address translation is done by directly shell out to addr2line. ??:0 means addr2line fails to do the translation. Hacky, but I think it's good for now. Test Plan: signal_test.cc Reviewers: dhruba, MarkCallaghan Reviewed By: dhruba CC: leveldb Differential Revision: https://reviews.facebook.net/D10173
2013-04-11 17:54:35 +00:00
Print stack traces more reliably with concurrency (#12086) Summary: It's been relatively easy to break our stack trace printer: * If another thread reaches a signal condition such as a related SEGV or assertion failure while one is trying to print a stack trace from the signal handler, it seems to end the process abruptly without a stack trace. * If the process exits normally in one thread (such as main finishing) while another is trying to print a stack trace from the signal handler, it seems the process will often end normally without a stack trace. This change attempts to fix these issues, with * Keep the custom signal handler installed as long as possible, so that other threads will most likely re-enter our custom handler. (We only switch back to default for triggering core dump or whatever after stack trace.) * Use atomics and sleeps to implement a crude recursive mutex for ensuring all threads hitting the custom signal handler wait on the first that is trying to print a stack trace, while recursive signals in the same thread can still be handled cleanly. * Use an atexit handler to hook into normal exit to (a) wait on a pending printing of stack trace when detectable and applicable, and (b) detect and warn when printing a stack trace might be interrupted by a process exit in progress. (I don't know how to pause that *after* our atexit handler has been called; the best I know how to do is warn, "In a race with process already exiting...".) Pull Request resolved: https://github.com/facebook/rocksdb/pull/12086 Test Plan: manual, including with TSAN. I added this code to the end of a unit test file: ``` for (size_t i = 0; i < 3; ++i) { std::thread t([]() { assert(false); }); t.detach(); } ``` Followed by either `sleep(100)` or `usleep(100)` or usual process exit. And for recursive signal testing, inject `abort()` at various places in the handler. Reviewed By: cbi42 Differential Revision: D51531882 Pulled By: pdillinger fbshipit-source-id: 3473b863a43e61b722dfb7a2ed12a8120949b09c
2023-11-22 19:55:10 +00:00
static std::atomic<uint64_t> g_thread_handling_stack_trace{0};
static int g_recursion_count = 0;
static std::atomic<bool> g_at_exit_called{false};
static void StackTraceHandler(int sig) {
fprintf(stderr, "Received signal %d (%s)\n", sig, strsignal(sig));
Print stack traces more reliably with concurrency (#12086) Summary: It's been relatively easy to break our stack trace printer: * If another thread reaches a signal condition such as a related SEGV or assertion failure while one is trying to print a stack trace from the signal handler, it seems to end the process abruptly without a stack trace. * If the process exits normally in one thread (such as main finishing) while another is trying to print a stack trace from the signal handler, it seems the process will often end normally without a stack trace. This change attempts to fix these issues, with * Keep the custom signal handler installed as long as possible, so that other threads will most likely re-enter our custom handler. (We only switch back to default for triggering core dump or whatever after stack trace.) * Use atomics and sleeps to implement a crude recursive mutex for ensuring all threads hitting the custom signal handler wait on the first that is trying to print a stack trace, while recursive signals in the same thread can still be handled cleanly. * Use an atexit handler to hook into normal exit to (a) wait on a pending printing of stack trace when detectable and applicable, and (b) detect and warn when printing a stack trace might be interrupted by a process exit in progress. (I don't know how to pause that *after* our atexit handler has been called; the best I know how to do is warn, "In a race with process already exiting...".) Pull Request resolved: https://github.com/facebook/rocksdb/pull/12086 Test Plan: manual, including with TSAN. I added this code to the end of a unit test file: ``` for (size_t i = 0; i < 3; ++i) { std::thread t([]() { assert(false); }); t.detach(); } ``` Followed by either `sleep(100)` or `usleep(100)` or usual process exit. And for recursive signal testing, inject `abort()` at various places in the handler. Reviewed By: cbi42 Differential Revision: D51531882 Pulled By: pdillinger fbshipit-source-id: 3473b863a43e61b722dfb7a2ed12a8120949b09c
2023-11-22 19:55:10 +00:00
// Crude recursive mutex with no signal-unsafe system calls, to avoid
// re-entrance from multiple threads and avoid core dumping while trying
// to print the stack trace.
uint64_t tid = 0;
{
const auto ptid = pthread_self();
// pthread_t is an opaque type
memcpy(&tid, &ptid, std::min(sizeof(tid), sizeof(ptid)));
// Essentially ensure non-zero
++tid;
}
for (;;) {
uint64_t expected = 0;
if (g_thread_handling_stack_trace.compare_exchange_strong(expected, tid)) {
// Acquired mutex
g_recursion_count = 0;
break;
}
if (expected == tid) {
++g_recursion_count;
fprintf(stderr, "Recursive call to stack trace handler (%d)\n",
g_recursion_count);
break;
}
// Sleep before trying again
usleep(1000);
}
if (g_recursion_count > 2) {
// Give up after too many recursions
fprintf(stderr, "Too many recursive calls to stack trace handler (%d)\n",
g_recursion_count);
} else {
if (g_at_exit_called.load(std::memory_order_acquire)) {
fprintf(stderr, "In a race with process already exiting...\n");
}
Print stack traces more reliably with concurrency (#12086) Summary: It's been relatively easy to break our stack trace printer: * If another thread reaches a signal condition such as a related SEGV or assertion failure while one is trying to print a stack trace from the signal handler, it seems to end the process abruptly without a stack trace. * If the process exits normally in one thread (such as main finishing) while another is trying to print a stack trace from the signal handler, it seems the process will often end normally without a stack trace. This change attempts to fix these issues, with * Keep the custom signal handler installed as long as possible, so that other threads will most likely re-enter our custom handler. (We only switch back to default for triggering core dump or whatever after stack trace.) * Use atomics and sleeps to implement a crude recursive mutex for ensuring all threads hitting the custom signal handler wait on the first that is trying to print a stack trace, while recursive signals in the same thread can still be handled cleanly. * Use an atexit handler to hook into normal exit to (a) wait on a pending printing of stack trace when detectable and applicable, and (b) detect and warn when printing a stack trace might be interrupted by a process exit in progress. (I don't know how to pause that *after* our atexit handler has been called; the best I know how to do is warn, "In a race with process already exiting...".) Pull Request resolved: https://github.com/facebook/rocksdb/pull/12086 Test Plan: manual, including with TSAN. I added this code to the end of a unit test file: ``` for (size_t i = 0; i < 3; ++i) { std::thread t([]() { assert(false); }); t.detach(); } ``` Followed by either `sleep(100)` or `usleep(100)` or usual process exit. And for recursive signal testing, inject `abort()` at various places in the handler. Reviewed By: cbi42 Differential Revision: D51531882 Pulled By: pdillinger fbshipit-source-id: 3473b863a43e61b722dfb7a2ed12a8120949b09c
2023-11-22 19:55:10 +00:00
// skip the top three signal handler related frames
PrintStack(3);
// Efforts to fix or suppress TSAN warnings "signal-unsafe call inside of
// a signal" have failed, so just warn the user about them.
#ifdef __SANITIZE_THREAD__
Print stack traces more reliably with concurrency (#12086) Summary: It's been relatively easy to break our stack trace printer: * If another thread reaches a signal condition such as a related SEGV or assertion failure while one is trying to print a stack trace from the signal handler, it seems to end the process abruptly without a stack trace. * If the process exits normally in one thread (such as main finishing) while another is trying to print a stack trace from the signal handler, it seems the process will often end normally without a stack trace. This change attempts to fix these issues, with * Keep the custom signal handler installed as long as possible, so that other threads will most likely re-enter our custom handler. (We only switch back to default for triggering core dump or whatever after stack trace.) * Use atomics and sleeps to implement a crude recursive mutex for ensuring all threads hitting the custom signal handler wait on the first that is trying to print a stack trace, while recursive signals in the same thread can still be handled cleanly. * Use an atexit handler to hook into normal exit to (a) wait on a pending printing of stack trace when detectable and applicable, and (b) detect and warn when printing a stack trace might be interrupted by a process exit in progress. (I don't know how to pause that *after* our atexit handler has been called; the best I know how to do is warn, "In a race with process already exiting...".) Pull Request resolved: https://github.com/facebook/rocksdb/pull/12086 Test Plan: manual, including with TSAN. I added this code to the end of a unit test file: ``` for (size_t i = 0; i < 3; ++i) { std::thread t([]() { assert(false); }); t.detach(); } ``` Followed by either `sleep(100)` or `usleep(100)` or usual process exit. And for recursive signal testing, inject `abort()` at various places in the handler. Reviewed By: cbi42 Differential Revision: D51531882 Pulled By: pdillinger fbshipit-source-id: 3473b863a43e61b722dfb7a2ed12a8120949b09c
2023-11-22 19:55:10 +00:00
fprintf(stderr,
"==> NOTE: any above warnings about \"signal-unsafe call\" are\n"
"==> ignorable, as they are expected when generating a stack\n"
"==> trace because of a signal under TSAN. Consider why the\n"
"==> signal was generated to begin with, and the stack trace\n"
"==> in the TSAN warning can be useful for that. (The stack\n"
"==> trace printed by the signal handler is likely obscured\n"
"==> by TSAN output.)\n");
#endif
Print stack traces more reliably with concurrency (#12086) Summary: It's been relatively easy to break our stack trace printer: * If another thread reaches a signal condition such as a related SEGV or assertion failure while one is trying to print a stack trace from the signal handler, it seems to end the process abruptly without a stack trace. * If the process exits normally in one thread (such as main finishing) while another is trying to print a stack trace from the signal handler, it seems the process will often end normally without a stack trace. This change attempts to fix these issues, with * Keep the custom signal handler installed as long as possible, so that other threads will most likely re-enter our custom handler. (We only switch back to default for triggering core dump or whatever after stack trace.) * Use atomics and sleeps to implement a crude recursive mutex for ensuring all threads hitting the custom signal handler wait on the first that is trying to print a stack trace, while recursive signals in the same thread can still be handled cleanly. * Use an atexit handler to hook into normal exit to (a) wait on a pending printing of stack trace when detectable and applicable, and (b) detect and warn when printing a stack trace might be interrupted by a process exit in progress. (I don't know how to pause that *after* our atexit handler has been called; the best I know how to do is warn, "In a race with process already exiting...".) Pull Request resolved: https://github.com/facebook/rocksdb/pull/12086 Test Plan: manual, including with TSAN. I added this code to the end of a unit test file: ``` for (size_t i = 0; i < 3; ++i) { std::thread t([]() { assert(false); }); t.detach(); } ``` Followed by either `sleep(100)` or `usleep(100)` or usual process exit. And for recursive signal testing, inject `abort()` at various places in the handler. Reviewed By: cbi42 Differential Revision: D51531882 Pulled By: pdillinger fbshipit-source-id: 3473b863a43e61b722dfb7a2ed12a8120949b09c
2023-11-22 19:55:10 +00:00
}
Print stack traces more reliably with concurrency (#12086) Summary: It's been relatively easy to break our stack trace printer: * If another thread reaches a signal condition such as a related SEGV or assertion failure while one is trying to print a stack trace from the signal handler, it seems to end the process abruptly without a stack trace. * If the process exits normally in one thread (such as main finishing) while another is trying to print a stack trace from the signal handler, it seems the process will often end normally without a stack trace. This change attempts to fix these issues, with * Keep the custom signal handler installed as long as possible, so that other threads will most likely re-enter our custom handler. (We only switch back to default for triggering core dump or whatever after stack trace.) * Use atomics and sleeps to implement a crude recursive mutex for ensuring all threads hitting the custom signal handler wait on the first that is trying to print a stack trace, while recursive signals in the same thread can still be handled cleanly. * Use an atexit handler to hook into normal exit to (a) wait on a pending printing of stack trace when detectable and applicable, and (b) detect and warn when printing a stack trace might be interrupted by a process exit in progress. (I don't know how to pause that *after* our atexit handler has been called; the best I know how to do is warn, "In a race with process already exiting...".) Pull Request resolved: https://github.com/facebook/rocksdb/pull/12086 Test Plan: manual, including with TSAN. I added this code to the end of a unit test file: ``` for (size_t i = 0; i < 3; ++i) { std::thread t([]() { assert(false); }); t.detach(); } ``` Followed by either `sleep(100)` or `usleep(100)` or usual process exit. And for recursive signal testing, inject `abort()` at various places in the handler. Reviewed By: cbi42 Differential Revision: D51531882 Pulled By: pdillinger fbshipit-source-id: 3473b863a43e61b722dfb7a2ed12a8120949b09c
2023-11-22 19:55:10 +00:00
// reset to default handler
signal(sig, SIG_DFL);
[RocksDB] Add stacktrace signal handler Summary: This diff provides the ability to print out a stacktrace when the process receives certain signals. Currently, we enable this for the following signals (program error related): SIGILL SIGSEGV SIGBUS SIGABRT Application simply #include "util/stack_trace.h" and call leveldb::InstallStackTraceHandler() during initialization, if signal handler is needed. It's not done automatically when openning db, because it's the application(process)'s responsibility to install signal handler and some applications might already have their own (like fbcode). Sample output: Received signal 11 (Segmentation fault) #0 0x408ff0 ./signal_test() [0x408ff0] /home/haobo/rocksdb/util/signal_test.cc:4 #1 0x40827d ./signal_test() [0x40827d] /home/haobo/rocksdb/util/signal_test.cc:24 #2 0x7f8bb183172e /usr/local/fbcode/gcc-4.7.1-glibc-2.14.1/lib/libc.so.6(__libc_start_main+0x10e) [0x7f8bb183172e] ??:0 #3 0x408ebc ./signal_test() [0x408ebc] /home/engshare/third-party/src/glibc/glibc-2.14.1/glibc-2.14.1/csu/../sysdeps/x86_64/elf/start.S:113 Segmentation fault (core dumped) For each frame, we print the raw pointer, the symbol provided by backtrace_symbols (still not good enough), and the source file/line. Note that address translation is done by directly shell out to addr2line. ??:0 means addr2line fails to do the translation. Hacky, but I think it's good for now. Test Plan: signal_test.cc Reviewers: dhruba, MarkCallaghan Reviewed By: dhruba CC: leveldb Differential Revision: https://reviews.facebook.net/D10173
2013-04-11 17:54:35 +00:00
// re-signal to default handler (so we still get core dump if needed...)
raise(sig);
Print stack traces more reliably with concurrency (#12086) Summary: It's been relatively easy to break our stack trace printer: * If another thread reaches a signal condition such as a related SEGV or assertion failure while one is trying to print a stack trace from the signal handler, it seems to end the process abruptly without a stack trace. * If the process exits normally in one thread (such as main finishing) while another is trying to print a stack trace from the signal handler, it seems the process will often end normally without a stack trace. This change attempts to fix these issues, with * Keep the custom signal handler installed as long as possible, so that other threads will most likely re-enter our custom handler. (We only switch back to default for triggering core dump or whatever after stack trace.) * Use atomics and sleeps to implement a crude recursive mutex for ensuring all threads hitting the custom signal handler wait on the first that is trying to print a stack trace, while recursive signals in the same thread can still be handled cleanly. * Use an atexit handler to hook into normal exit to (a) wait on a pending printing of stack trace when detectable and applicable, and (b) detect and warn when printing a stack trace might be interrupted by a process exit in progress. (I don't know how to pause that *after* our atexit handler has been called; the best I know how to do is warn, "In a race with process already exiting...".) Pull Request resolved: https://github.com/facebook/rocksdb/pull/12086 Test Plan: manual, including with TSAN. I added this code to the end of a unit test file: ``` for (size_t i = 0; i < 3; ++i) { std::thread t([]() { assert(false); }); t.detach(); } ``` Followed by either `sleep(100)` or `usleep(100)` or usual process exit. And for recursive signal testing, inject `abort()` at various places in the handler. Reviewed By: cbi42 Differential Revision: D51531882 Pulled By: pdillinger fbshipit-source-id: 3473b863a43e61b722dfb7a2ed12a8120949b09c
2023-11-22 19:55:10 +00:00
// release the mutex, in case this is somehow recoverable
if (g_recursion_count > 0) {
--g_recursion_count;
} else {
g_thread_handling_stack_trace.store(0, std::memory_order_release);
}
}
static void AtExit() {
// wait for stack trace handler to finish, if needed
while (g_thread_handling_stack_trace.load(std::memory_order_acquire)) {
usleep(1000);
}
g_at_exit_called.store(true, std::memory_order_release);
[RocksDB] Add stacktrace signal handler Summary: This diff provides the ability to print out a stacktrace when the process receives certain signals. Currently, we enable this for the following signals (program error related): SIGILL SIGSEGV SIGBUS SIGABRT Application simply #include "util/stack_trace.h" and call leveldb::InstallStackTraceHandler() during initialization, if signal handler is needed. It's not done automatically when openning db, because it's the application(process)'s responsibility to install signal handler and some applications might already have their own (like fbcode). Sample output: Received signal 11 (Segmentation fault) #0 0x408ff0 ./signal_test() [0x408ff0] /home/haobo/rocksdb/util/signal_test.cc:4 #1 0x40827d ./signal_test() [0x40827d] /home/haobo/rocksdb/util/signal_test.cc:24 #2 0x7f8bb183172e /usr/local/fbcode/gcc-4.7.1-glibc-2.14.1/lib/libc.so.6(__libc_start_main+0x10e) [0x7f8bb183172e] ??:0 #3 0x408ebc ./signal_test() [0x408ebc] /home/engshare/third-party/src/glibc/glibc-2.14.1/glibc-2.14.1/csu/../sysdeps/x86_64/elf/start.S:113 Segmentation fault (core dumped) For each frame, we print the raw pointer, the symbol provided by backtrace_symbols (still not good enough), and the source file/line. Note that address translation is done by directly shell out to addr2line. ??:0 means addr2line fails to do the translation. Hacky, but I think it's good for now. Test Plan: signal_test.cc Reviewers: dhruba, MarkCallaghan Reviewed By: dhruba CC: leveldb Differential Revision: https://reviews.facebook.net/D10173
2013-04-11 17:54:35 +00:00
}
void InstallStackTraceHandler() {
// just use the plain old signal as it's simple and sufficient
// for this use case
signal(SIGILL, StackTraceHandler);
signal(SIGSEGV, StackTraceHandler);
signal(SIGBUS, StackTraceHandler);
signal(SIGABRT, StackTraceHandler);
Print stack traces more reliably with concurrency (#12086) Summary: It's been relatively easy to break our stack trace printer: * If another thread reaches a signal condition such as a related SEGV or assertion failure while one is trying to print a stack trace from the signal handler, it seems to end the process abruptly without a stack trace. * If the process exits normally in one thread (such as main finishing) while another is trying to print a stack trace from the signal handler, it seems the process will often end normally without a stack trace. This change attempts to fix these issues, with * Keep the custom signal handler installed as long as possible, so that other threads will most likely re-enter our custom handler. (We only switch back to default for triggering core dump or whatever after stack trace.) * Use atomics and sleeps to implement a crude recursive mutex for ensuring all threads hitting the custom signal handler wait on the first that is trying to print a stack trace, while recursive signals in the same thread can still be handled cleanly. * Use an atexit handler to hook into normal exit to (a) wait on a pending printing of stack trace when detectable and applicable, and (b) detect and warn when printing a stack trace might be interrupted by a process exit in progress. (I don't know how to pause that *after* our atexit handler has been called; the best I know how to do is warn, "In a race with process already exiting...".) Pull Request resolved: https://github.com/facebook/rocksdb/pull/12086 Test Plan: manual, including with TSAN. I added this code to the end of a unit test file: ``` for (size_t i = 0; i < 3; ++i) { std::thread t([]() { assert(false); }); t.detach(); } ``` Followed by either `sleep(100)` or `usleep(100)` or usual process exit. And for recursive signal testing, inject `abort()` at various places in the handler. Reviewed By: cbi42 Differential Revision: D51531882 Pulled By: pdillinger fbshipit-source-id: 3473b863a43e61b722dfb7a2ed12a8120949b09c
2023-11-22 19:55:10 +00:00
atexit(AtExit);
Support stack traces with gdb (and debugger invocation) (#11150) Summary: LIB_MODE=shared is much more efficient for building all the unit tests but comes with the downside of ugly stack traces, generally missing name demangling and source line info. Searching the internet suggests the reliable way to get stack traces with dynamic loading is with gdb. This change automatically tries to use gdb to get a stack trace if built with LIB_MODE=shared, and only on Linux because that's where we have the capability to attach to the proper thread. (We could revise the exact conditions in the future.) If there's a failure invoking gdb, it falls back on the old method. Obscure details of making the output reasonable / pretty are in the source code comments. Based on this, it was easy to make it so that running a test command with ROCKSDB_DEBUG=1 would invoke gdb whenever the stack trace handler was invoked, so I included that. Intended follow-up: make LIB_MODE=shared the new default `make` build config Pull Request resolved: https://github.com/facebook/rocksdb/pull/11150 Test Plan: manual, mostly by injecting an "assert(false)" into a unit test and trying different build modes etc. Although gdb is slower to start showing stack trace output, it seems overall faster in many if not most cases, presumably because it doesn't reload the symbol table for each stack entry. At least with parallel test runs, having many tests dumping stacks with the old method can take so long it appears to hang the test run. Reviewed By: cbi42 Differential Revision: D42894064 Pulled By: pdillinger fbshipit-source-id: 608143309d8c69c40049c9a4abcde4f22e87b4d8
2023-02-03 21:21:03 +00:00
// Allow ouside debugger to attach, even with Yama security restrictions.
// This is needed even outside of PrintStack() so that external mechanisms
// can dump stacks if they suspect that a test has hung.
#ifdef PR_SET_PTRACER_ANY
(void)prctl(PR_SET_PTRACER, PR_SET_PTRACER_ANY, 0, 0, 0);
#endif
[RocksDB] Add stacktrace signal handler Summary: This diff provides the ability to print out a stacktrace when the process receives certain signals. Currently, we enable this for the following signals (program error related): SIGILL SIGSEGV SIGBUS SIGABRT Application simply #include "util/stack_trace.h" and call leveldb::InstallStackTraceHandler() during initialization, if signal handler is needed. It's not done automatically when openning db, because it's the application(process)'s responsibility to install signal handler and some applications might already have their own (like fbcode). Sample output: Received signal 11 (Segmentation fault) #0 0x408ff0 ./signal_test() [0x408ff0] /home/haobo/rocksdb/util/signal_test.cc:4 #1 0x40827d ./signal_test() [0x40827d] /home/haobo/rocksdb/util/signal_test.cc:24 #2 0x7f8bb183172e /usr/local/fbcode/gcc-4.7.1-glibc-2.14.1/lib/libc.so.6(__libc_start_main+0x10e) [0x7f8bb183172e] ??:0 #3 0x408ebc ./signal_test() [0x408ebc] /home/engshare/third-party/src/glibc/glibc-2.14.1/glibc-2.14.1/csu/../sysdeps/x86_64/elf/start.S:113 Segmentation fault (core dumped) For each frame, we print the raw pointer, the symbol provided by backtrace_symbols (still not good enough), and the source file/line. Note that address translation is done by directly shell out to addr2line. ??:0 means addr2line fails to do the translation. Hacky, but I think it's good for now. Test Plan: signal_test.cc Reviewers: dhruba, MarkCallaghan Reviewed By: dhruba CC: leveldb Differential Revision: https://reviews.facebook.net/D10173
2013-04-11 17:54:35 +00:00
}
} // namespace ROCKSDB_NAMESPACE::port
#endif