mirror of
https://github.com/facebook/rocksdb.git
synced 2024-11-30 22:41:48 +00:00
54cb9c77d9
Summary: The following are risks associated with pointer-to-pointer reinterpret_cast: * Can produce the "wrong result" (crash or memory corruption). IIRC, in theory this can happen for any up-cast or down-cast for a non-standard-layout type, though in practice would only happen for multiple inheritance cases (where the base class pointer might be "inside" the derived object). We don't use multiple inheritance a lot, but we do. * Can mask useful compiler errors upon code change, including converting between unrelated pointer types that you are expecting to be related, and converting between pointer and scalar types unintentionally. I can only think of some obscure cases where static_cast could be troublesome when it compiles as a replacement: * Going through `void*` could plausibly cause unnecessary or broken pointer arithmetic. Suppose we have `struct Derived: public Base1, public Base2`. If we have `Derived*` -> `void*` -> `Base2*` -> `Derived*` through reinterpret casts, this could plausibly work (though technical UB) assuming the `Base2*` is not dereferenced. Changing to static cast could introduce breaking pointer arithmetic. * Unnecessary (but safe) pointer arithmetic could arise in a case like `Derived*` -> `Base2*` -> `Derived*` where before the Base2 pointer might not have been dereferenced. This could potentially affect performance. With some light scripting, I tried replacing pointer-to-pointer reinterpret_casts with static_cast and kept the cases that still compile. Most occurrences of reinterpret_cast have successfully been changed (except for java/ and third-party/). 294 changed, 257 remain. A couple of related interventions included here: * Previously Cache::Handle was not actually derived from in the implementations and just used as a `void*` stand-in with reinterpret_cast. Now there is a relationship to allow static_cast. In theory, this could introduce pointer arithmetic (as described above) but is unlikely without multiple inheritance AND non-empty Cache::Handle. * Remove some unnecessary casts to void* as this is allowed to be implicit (for better or worse). Most of the remaining reinterpret_casts are for converting to/from raw bytes of objects. We could consider better idioms for these patterns in follow-up work. I wish there were a way to implement a template variant of static_cast that would only compile if no pointer arithmetic is generated, but best I can tell, this is not possible. AFAIK the best you could do is a dynamic check that the void* conversion after the static cast is unchanged. Pull Request resolved: https://github.com/facebook/rocksdb/pull/12308 Test Plan: existing tests, CI Reviewed By: ltamasi Differential Revision: D53204947 Pulled By: pdillinger fbshipit-source-id: 9de23e618263b0d5b9820f4e15966876888a16e2
305 lines
10 KiB
C++
305 lines
10 KiB
C++
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
|
// This source code is licensed under both the GPLv2 (found in the
|
|
// COPYING file in the root directory) and Apache 2.0 License
|
|
// (found in the LICENSE.Apache file in the root directory).
|
|
|
|
#include "memory/jemalloc_nodump_allocator.h"
|
|
|
|
#include <string>
|
|
#include <thread>
|
|
|
|
#include "port/likely.h"
|
|
#include "port/port.h"
|
|
#include "rocksdb/convenience.h"
|
|
#include "rocksdb/utilities/customizable_util.h"
|
|
#include "rocksdb/utilities/object_registry.h"
|
|
#include "rocksdb/utilities/options_type.h"
|
|
#include "util/fastrange.h"
|
|
#include "util/random.h"
|
|
#include "util/string_util.h"
|
|
|
|
namespace ROCKSDB_NAMESPACE {
|
|
|
|
#ifdef ROCKSDB_JEMALLOC_NODUMP_ALLOCATOR
|
|
std::atomic<extent_alloc_t*> JemallocNodumpAllocator::original_alloc_{nullptr};
|
|
#endif // ROCKSDB_JEMALLOC_NODUMP_ALLOCATOR
|
|
|
|
static std::unordered_map<std::string, OptionTypeInfo> jemalloc_type_info = {
|
|
{"limit_tcache_size",
|
|
{offsetof(struct JemallocAllocatorOptions, limit_tcache_size),
|
|
OptionType::kBoolean, OptionVerificationType::kNormal,
|
|
OptionTypeFlags::kNone}},
|
|
{"tcache_size_lower_bound",
|
|
{offsetof(struct JemallocAllocatorOptions, tcache_size_lower_bound),
|
|
OptionType::kSizeT, OptionVerificationType::kNormal,
|
|
OptionTypeFlags::kNone}},
|
|
{"tcache_size_upper_bound",
|
|
{offsetof(struct JemallocAllocatorOptions, tcache_size_upper_bound),
|
|
OptionType::kSizeT, OptionVerificationType::kNormal,
|
|
OptionTypeFlags::kNone}},
|
|
{"num_arenas",
|
|
{offsetof(struct JemallocAllocatorOptions, num_arenas), OptionType::kSizeT,
|
|
OptionVerificationType::kNormal, OptionTypeFlags::kNone}},
|
|
};
|
|
bool JemallocNodumpAllocator::IsSupported(std::string* why) {
|
|
#ifndef ROCKSDB_JEMALLOC
|
|
*why = "Not compiled with ROCKSDB_JEMALLOC";
|
|
return false;
|
|
#else
|
|
static const std::string unsupported =
|
|
"JemallocNodumpAllocator only available with jemalloc version >= 5 "
|
|
"and MADV_DONTDUMP is available.";
|
|
if (!HasJemalloc()) {
|
|
*why = unsupported;
|
|
return false;
|
|
}
|
|
#ifndef ROCKSDB_JEMALLOC_NODUMP_ALLOCATOR
|
|
*why = unsupported;
|
|
return false;
|
|
#else
|
|
return true;
|
|
#endif // ROCKSDB_JEMALLOC_NODUMP_ALLOCATOR
|
|
#endif // ROCKSDB_MALLOC
|
|
}
|
|
|
|
JemallocNodumpAllocator::JemallocNodumpAllocator(
|
|
const JemallocAllocatorOptions& options)
|
|
: options_(options)
|
|
#ifdef ROCKSDB_JEMALLOC_NODUMP_ALLOCATOR
|
|
,
|
|
tcache_(&JemallocNodumpAllocator::DestroyThreadSpecificCache) {
|
|
#else // ROCKSDB_JEMALLOC_NODUMP_ALLOCATOR
|
|
{
|
|
#endif // ROCKSDB_JEMALLOC_NODUMP_ALLOCATOR
|
|
RegisterOptions(&options_, &jemalloc_type_info);
|
|
}
|
|
|
|
#ifdef ROCKSDB_JEMALLOC_NODUMP_ALLOCATOR
|
|
JemallocNodumpAllocator::~JemallocNodumpAllocator() {
|
|
// Destroy tcache before destroying arena.
|
|
autovector<void*> tcache_list;
|
|
tcache_.Scrape(&tcache_list, nullptr);
|
|
for (void* tcache_index : tcache_list) {
|
|
DestroyThreadSpecificCache(tcache_index);
|
|
}
|
|
for (auto arena_index : arena_indexes_) {
|
|
// Destroy arena. Silently ignore error.
|
|
Status s = DestroyArena(arena_index);
|
|
assert(s.ok());
|
|
s.PermitUncheckedError();
|
|
}
|
|
}
|
|
|
|
size_t JemallocNodumpAllocator::UsableSize(void* p,
|
|
size_t /*allocation_size*/) const {
|
|
return malloc_usable_size(static_cast<void*>(p));
|
|
}
|
|
|
|
void* JemallocNodumpAllocator::Allocate(size_t size) {
|
|
int tcache_flag = GetThreadSpecificCache(size);
|
|
uint32_t arena_index = GetArenaIndex();
|
|
return mallocx(size, MALLOCX_ARENA(arena_index) | tcache_flag);
|
|
}
|
|
|
|
void JemallocNodumpAllocator::Deallocate(void* p) {
|
|
// Obtain tcache.
|
|
size_t size = 0;
|
|
if (options_.limit_tcache_size) {
|
|
size = malloc_usable_size(p);
|
|
}
|
|
int tcache_flag = GetThreadSpecificCache(size);
|
|
// No need to pass arena index to dallocx(). Jemalloc will find arena index
|
|
// from its own metadata.
|
|
dallocx(p, tcache_flag);
|
|
}
|
|
|
|
uint32_t JemallocNodumpAllocator::GetArenaIndex() const {
|
|
if (arena_indexes_.size() == 1) {
|
|
return arena_indexes_[0];
|
|
}
|
|
|
|
static std::atomic<uint32_t> next_seed = 0;
|
|
// Core-local may work in place of `thread_local` as we should be able to
|
|
// tolerate occasional stale reads in thread migration cases. However we need
|
|
// to make Random thread-safe and prevent cacheline bouncing. Whether this is
|
|
// worthwhile is still an open question.
|
|
thread_local Random tl_random(next_seed.fetch_add(1));
|
|
return arena_indexes_[FastRange32(
|
|
tl_random.Next(), static_cast<uint32_t>(arena_indexes_.size()))];
|
|
}
|
|
|
|
Status JemallocNodumpAllocator::InitializeArenas() {
|
|
assert(!init_);
|
|
init_ = true;
|
|
|
|
for (size_t i = 0; i < options_.num_arenas; i++) {
|
|
// Create arena.
|
|
unsigned arena_index;
|
|
size_t arena_index_size = sizeof(arena_index);
|
|
int ret =
|
|
mallctl("arenas.create", &arena_index, &arena_index_size, nullptr, 0);
|
|
if (ret != 0) {
|
|
return Status::Incomplete(
|
|
"Failed to create jemalloc arena, error code: " +
|
|
std::to_string(ret));
|
|
}
|
|
arena_indexes_.push_back(arena_index);
|
|
|
|
// Read existing hooks.
|
|
std::string key =
|
|
"arena." + std::to_string(arena_indexes_[i]) + ".extent_hooks";
|
|
extent_hooks_t* hooks;
|
|
size_t hooks_size = sizeof(hooks);
|
|
ret = mallctl(key.c_str(), &hooks, &hooks_size, nullptr, 0);
|
|
if (ret != 0) {
|
|
return Status::Incomplete("Failed to read existing hooks, error code: " +
|
|
std::to_string(ret));
|
|
}
|
|
|
|
// Store existing alloc.
|
|
extent_alloc_t* original_alloc = hooks->alloc;
|
|
extent_alloc_t* expected = nullptr;
|
|
bool success =
|
|
JemallocNodumpAllocator::original_alloc_.compare_exchange_strong(
|
|
expected, original_alloc);
|
|
if (!success && original_alloc != expected) {
|
|
// This could happen if jemalloc creates new arenas with different initial
|
|
// values in their `alloc` function pointers. See `original_alloc_` API
|
|
// doc for more details.
|
|
return Status::Incomplete("Original alloc conflict.");
|
|
}
|
|
|
|
// Set the custom hook.
|
|
per_arena_hooks_.emplace_back();
|
|
per_arena_hooks_.back().reset(new extent_hooks_t(*hooks));
|
|
per_arena_hooks_.back()->alloc = &JemallocNodumpAllocator::Alloc;
|
|
extent_hooks_t* hooks_ptr = per_arena_hooks_.back().get();
|
|
ret = mallctl(key.c_str(), nullptr, nullptr, &hooks_ptr, sizeof(hooks_ptr));
|
|
if (ret != 0) {
|
|
return Status::Incomplete("Failed to set custom hook, error code: " +
|
|
std::to_string(ret));
|
|
}
|
|
}
|
|
return Status::OK();
|
|
}
|
|
|
|
#endif // ROCKSDB_JEMALLOC_NODUMP_ALLOCATOR
|
|
|
|
Status JemallocNodumpAllocator::PrepareOptions(
|
|
const ConfigOptions& config_options) {
|
|
std::string message;
|
|
|
|
if (!IsSupported(&message)) {
|
|
return Status::NotSupported(message);
|
|
} else if (options_.limit_tcache_size &&
|
|
options_.tcache_size_lower_bound >=
|
|
options_.tcache_size_upper_bound) {
|
|
return Status::InvalidArgument(
|
|
"tcache_size_lower_bound larger or equal to tcache_size_upper_bound.");
|
|
} else if (options_.num_arenas < 1) {
|
|
return Status::InvalidArgument("num_arenas must be a positive integer");
|
|
} else if (IsMutable()) {
|
|
Status s = MemoryAllocator::PrepareOptions(config_options);
|
|
#ifdef ROCKSDB_JEMALLOC_NODUMP_ALLOCATOR
|
|
if (s.ok()) {
|
|
s = InitializeArenas();
|
|
}
|
|
#endif // ROCKSDB_JEMALLOC_NODUMP_ALLOCATOR
|
|
return s;
|
|
} else {
|
|
// Already prepared
|
|
return Status::OK();
|
|
}
|
|
}
|
|
|
|
#ifdef ROCKSDB_JEMALLOC_NODUMP_ALLOCATOR
|
|
int JemallocNodumpAllocator::GetThreadSpecificCache(size_t size) {
|
|
// We always enable tcache. The only corner case is when there are a ton of
|
|
// threads accessing with low frequency, then it could consume a lot of
|
|
// memory (may reach # threads * ~1MB) without bringing too much benefit.
|
|
if (options_.limit_tcache_size && (size <= options_.tcache_size_lower_bound ||
|
|
size > options_.tcache_size_upper_bound)) {
|
|
return MALLOCX_TCACHE_NONE;
|
|
}
|
|
unsigned* tcache_index = static_cast<unsigned*>(tcache_.Get());
|
|
if (UNLIKELY(tcache_index == nullptr)) {
|
|
// Instantiate tcache.
|
|
tcache_index = new unsigned(0);
|
|
size_t tcache_index_size = sizeof(unsigned);
|
|
int ret =
|
|
mallctl("tcache.create", tcache_index, &tcache_index_size, nullptr, 0);
|
|
if (ret != 0) {
|
|
// No good way to expose the error. Silently disable tcache.
|
|
delete tcache_index;
|
|
return MALLOCX_TCACHE_NONE;
|
|
}
|
|
tcache_.Reset(static_cast<void*>(tcache_index));
|
|
}
|
|
return MALLOCX_TCACHE(*tcache_index);
|
|
}
|
|
void* JemallocNodumpAllocator::Alloc(extent_hooks_t* extent, void* new_addr,
|
|
size_t size, size_t alignment, bool* zero,
|
|
bool* commit, unsigned arena_ind) {
|
|
extent_alloc_t* original_alloc =
|
|
original_alloc_.load(std::memory_order_relaxed);
|
|
assert(original_alloc != nullptr);
|
|
void* result = original_alloc(extent, new_addr, size, alignment, zero, commit,
|
|
arena_ind);
|
|
if (result != nullptr) {
|
|
int ret = madvise(result, size, MADV_DONTDUMP);
|
|
if (ret != 0) {
|
|
fprintf(
|
|
stderr,
|
|
"JemallocNodumpAllocator failed to set MADV_DONTDUMP, error code: %d",
|
|
ret);
|
|
assert(false);
|
|
}
|
|
}
|
|
return result;
|
|
}
|
|
|
|
Status JemallocNodumpAllocator::DestroyArena(uint32_t arena_index) {
|
|
assert(arena_index != 0);
|
|
std::string key = "arena." + std::to_string(arena_index) + ".destroy";
|
|
int ret = mallctl(key.c_str(), nullptr, 0, nullptr, 0);
|
|
if (ret != 0) {
|
|
return Status::Incomplete("Failed to destroy jemalloc arena, error code: " +
|
|
std::to_string(ret));
|
|
}
|
|
return Status::OK();
|
|
}
|
|
|
|
void JemallocNodumpAllocator::DestroyThreadSpecificCache(void* ptr) {
|
|
assert(ptr != nullptr);
|
|
unsigned* tcache_index = static_cast<unsigned*>(ptr);
|
|
size_t tcache_index_size = sizeof(unsigned);
|
|
int ret __attribute__((__unused__)) =
|
|
mallctl("tcache.destroy", nullptr, 0, tcache_index, tcache_index_size);
|
|
// Silently ignore error.
|
|
assert(ret == 0);
|
|
delete tcache_index;
|
|
}
|
|
|
|
#endif // ROCKSDB_JEMALLOC_NODUMP_ALLOCATOR
|
|
|
|
Status NewJemallocNodumpAllocator(
|
|
const JemallocAllocatorOptions& options,
|
|
std::shared_ptr<MemoryAllocator>* memory_allocator) {
|
|
if (memory_allocator == nullptr) {
|
|
return Status::InvalidArgument("memory_allocator must be non-null.");
|
|
}
|
|
#ifndef ROCKSDB_JEMALLOC
|
|
(void)options;
|
|
return Status::NotSupported("Not compiled with JEMALLOC");
|
|
#else
|
|
std::unique_ptr<MemoryAllocator> allocator(
|
|
new JemallocNodumpAllocator(options));
|
|
Status s = allocator->PrepareOptions(ConfigOptions());
|
|
if (s.ok()) {
|
|
memory_allocator->reset(allocator.release());
|
|
}
|
|
return s;
|
|
#endif
|
|
}
|
|
} // namespace ROCKSDB_NAMESPACE
|