mirror of
https://github.com/facebook/rocksdb.git
synced 2024-11-27 11:43:49 +00:00
925d8252e5
Summary:
RocksDB's jemalloc no-dump allocator (`NewJemallocNodumpAllocator()`) was using a single manual arena. This arena's lock contention could be very high when thread caching is disabled for RocksDB blocks (e.g., when using `MALLOC_CONF='tcache_max:4096'` and `rocksdb_block_size=16384`).
This PR changes the jemalloc no-dump allocator to use a configurable number of manual arenas. That number is required to be a power of two so we can avoid division. The allocator shards allocation requests randomly across those manual arenas.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/11400
Test Plan:
- mysqld setup
- Branch: fb-mysql-8.0.28 (653eba2e56
)
- Build: `mysqlbuild.sh --clean --release`
- Set env var `MALLOC_CONF='tcache_max:$tcache_max'`
- Added CLI args `--rocksdb_cache_dump=false --rocksdb_block_cache_size=4294967296 --rocksdb_block_size=16384`
- Ran under /usr/bin/time
- Large database scenario
- Setup command: `mysqlslap -h 127.0.0.1 -P 13020 --auto-generate-sql=1 --auto-generate-sql-load-type=write --auto-generate-sql-guid-primary=1 --number-char-cols=8 --auto-generate-sql-execute-number=262144 --concurrency=32 --no-drop`
- Benchmark command: `mysqlslap -h 127.0.0.1 -P 13020 --query='select count(*) from mysqlslap.t1;' --number-of-queries=320 --concurrency=32`
- Results:
| tcache_max | num_arenas | Peak RSS MB (% change) | Query latency seconds (% change) |
|---|---|---|---|
| 4096 | **(baseline)** | 4541 | 37.1 |
| 4096 | 1 | 4535 (-0.1%) | 36.7 (-1%) |
| 4096 | 8 | 4687 (+3%) | 10.2 (-73%) |
| 16384 | **(baseline)** | 4514 | 8.4 |
| 16384 | 1 | 4526 (+0.3%) | 8.5 (+1%) |
| 16384 | 8 | 4580 (+1%) | 8.5 (+1%) |
Reviewed By: pdillinger
Differential Revision: D45220794
Pulled By: ajkr
fbshipit-source-id: 9a50c9872bdef5d299e52b115a65ee8a5557d58d
100 lines
3.5 KiB
C++
100 lines
3.5 KiB
C++
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
|
// This source code is licensed under both the GPLv2 (found in the
|
|
// COPYING file in the root directory) and Apache 2.0 License
|
|
// (found in the LICENSE.Apache file in the root directory).
|
|
|
|
#pragma once
|
|
|
|
#include <atomic>
|
|
#include <vector>
|
|
|
|
#include "port/jemalloc_helper.h"
|
|
#include "port/port.h"
|
|
#include "rocksdb/memory_allocator.h"
|
|
#include "util/thread_local.h"
|
|
#include "utilities/memory_allocators.h"
|
|
|
|
#if defined(ROCKSDB_JEMALLOC) && defined(ROCKSDB_PLATFORM_POSIX)
|
|
|
|
#include <sys/mman.h>
|
|
|
|
#if (JEMALLOC_VERSION_MAJOR >= 5) && defined(MADV_DONTDUMP)
|
|
#define ROCKSDB_JEMALLOC_NODUMP_ALLOCATOR
|
|
#endif // (JEMALLOC_VERSION_MAJOR >= 5) && MADV_DONTDUMP
|
|
#endif // ROCKSDB_JEMALLOC && ROCKSDB_PLATFORM_POSIX
|
|
|
|
namespace ROCKSDB_NAMESPACE {
|
|
|
|
// Allocation requests are randomly sharded across
|
|
// `JemallocAllocatorOptions::num_arenas` arenas to reduce contention on per-
|
|
// arena mutexes.
|
|
class JemallocNodumpAllocator : public BaseMemoryAllocator {
|
|
public:
|
|
explicit JemallocNodumpAllocator(JemallocAllocatorOptions& options);
|
|
#ifdef ROCKSDB_JEMALLOC_NODUMP_ALLOCATOR
|
|
~JemallocNodumpAllocator();
|
|
#endif // ROCKSDB_JEMALLOC_NODUMP_ALLOCATOR
|
|
|
|
static const char* kClassName() { return "JemallocNodumpAllocator"; }
|
|
const char* Name() const override { return kClassName(); }
|
|
static bool IsSupported() {
|
|
std::string unused;
|
|
return IsSupported(&unused);
|
|
}
|
|
static bool IsSupported(std::string* why);
|
|
bool IsMutable() const { return !init_; }
|
|
|
|
Status PrepareOptions(const ConfigOptions& config_options) override;
|
|
|
|
#ifdef ROCKSDB_JEMALLOC_NODUMP_ALLOCATOR
|
|
void* Allocate(size_t size) override;
|
|
void Deallocate(void* p) override;
|
|
size_t UsableSize(void* p, size_t allocation_size) const override;
|
|
#endif // ROCKSDB_JEMALLOC_NODUMP_ALLOCATOR
|
|
|
|
private:
|
|
#ifdef ROCKSDB_JEMALLOC_NODUMP_ALLOCATOR
|
|
Status InitializeArenas();
|
|
|
|
uint32_t GetArenaIndex() const;
|
|
|
|
// Custom alloc hook to replace jemalloc default alloc.
|
|
static void* Alloc(extent_hooks_t* extent, void* new_addr, size_t size,
|
|
size_t alignment, bool* zero, bool* commit,
|
|
unsigned arena_ind);
|
|
|
|
// Destroy arena on destruction of the allocator, or on failure.
|
|
static Status DestroyArena(uint32_t arena_index);
|
|
|
|
// Destroy tcache on destruction of the allocator, or thread exit.
|
|
static void DestroyThreadSpecificCache(void* ptr);
|
|
|
|
// Get or create tcache. Return flag suitable to use with `mallocx`:
|
|
// either MALLOCX_TCACHE_NONE or MALLOCX_TCACHE(tc).
|
|
int GetThreadSpecificCache(size_t size);
|
|
#endif // ROCKSDB_JEMALLOC_NODUMP_ALLOCATOR
|
|
JemallocAllocatorOptions options_;
|
|
|
|
#ifdef ROCKSDB_JEMALLOC_NODUMP_ALLOCATOR
|
|
// A function pointer to jemalloc default alloc. Use atomic to make sure
|
|
// NewJemallocNodumpAllocator is thread-safe.
|
|
//
|
|
// Hack: original_alloc_ needs to be static for Alloc() to access it.
|
|
// alloc needs to be static to pass to jemalloc as function pointer. We can
|
|
// use a single process-wide value as long as we assume that any newly created
|
|
// arena has the same original value in its `alloc` function pointer.
|
|
static std::atomic<extent_alloc_t*> original_alloc_;
|
|
|
|
// Custom hooks has to outlive corresponding arena.
|
|
std::vector<std::unique_ptr<extent_hooks_t>> per_arena_hooks_;
|
|
|
|
// Hold thread-local tcache index.
|
|
ThreadLocalPtr tcache_;
|
|
|
|
std::vector<uint32_t> arena_indexes_;
|
|
#endif // ROCKSDB_JEMALLOC_NODUMP_ALLOCATOR
|
|
|
|
bool init_ = false;
|
|
};
|
|
} // namespace ROCKSDB_NAMESPACE
|