From bb87164db308589fdd3a46471bbbb3871962244b Mon Sep 17 00:00:00 2001 From: Peter Dillinger Date: Tue, 3 May 2022 12:32:02 -0700 Subject: [PATCH] Fork and simplify LRUCache for developing enhancements (#9917) Summary: To support a project to prototype and evaluate algorithmic enhancments and alternatives to LRUCache, here I have separated out LRUCache into internal-only "FastLRUCache" and cut it down to essentials, so that details like secondary cache handling and priorities do not interfere with prototyping. These can be re-integrated later as needed, along with refactoring to minimize code duplication (which would slow down prototyping for now). Pull Request resolved: https://github.com/facebook/rocksdb/pull/9917 Test Plan: unit tests updated to ensure basic functionality has (likely) been preserved Reviewed By: anand1976 Differential Revision: D35995554 Pulled By: pdillinger fbshipit-source-id: d67b20b7ada3b5d3bfe56d897a73885894a1d9db --- CMakeLists.txt | 1 + TARGETS | 2 + cache/cache_test.cc | 18 +- cache/fast_lru_cache.cc | 511 ++++++++++++++++++++++++++++++++++++++ cache/fast_lru_cache.h | 299 ++++++++++++++++++++++ cache/lru_cache.cc | 3 + cache/lru_cache.h | 7 + db/db_block_cache_test.cc | 7 +- src.mk | 1 + 9 files changed, 844 insertions(+), 5 deletions(-) create mode 100644 cache/fast_lru_cache.cc create mode 100644 cache/fast_lru_cache.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 008304cfcf..ac9f3a6abc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -596,6 +596,7 @@ set(SOURCES cache/cache_reservation_manager.cc cache/clock_cache.cc cache/compressed_secondary_cache.cc + cache/fast_lru_cache.cc cache/lru_cache.cc cache/sharded_cache.cc db/arena_wrapped_db_iter.cc diff --git a/TARGETS b/TARGETS index 5b72ea483b..0bc5ad0f53 100644 --- a/TARGETS +++ b/TARGETS @@ -15,6 +15,7 @@ cpp_library_wrapper(name="rocksdb_lib", srcs=[ "cache/cache_reservation_manager.cc", "cache/clock_cache.cc", "cache/compressed_secondary_cache.cc", + "cache/fast_lru_cache.cc", "cache/lru_cache.cc", "cache/sharded_cache.cc", "db/arena_wrapped_db_iter.cc", @@ -335,6 +336,7 @@ cpp_library_wrapper(name="rocksdb_whole_archive_lib", srcs=[ "cache/cache_reservation_manager.cc", "cache/clock_cache.cc", "cache/compressed_secondary_cache.cc", + "cache/fast_lru_cache.cc", "cache/lru_cache.cc", "cache/sharded_cache.cc", "db/arena_wrapped_db_iter.cc", diff --git a/cache/cache_test.cc b/cache/cache_test.cc index 8562490ee3..d7efb6652e 100644 --- a/cache/cache_test.cc +++ b/cache/cache_test.cc @@ -14,7 +14,9 @@ #include #include #include + #include "cache/clock_cache.h" +#include "cache/fast_lru_cache.h" #include "cache/lru_cache.h" #include "test_util/testharness.h" #include "util/coding.h" @@ -39,6 +41,7 @@ static int DecodeValue(void* v) { const std::string kLRU = "lru"; const std::string kClock = "clock"; +const std::string kFast = "fast"; void dumbDeleter(const Slice& /*key*/, void* /*value*/) {} @@ -83,6 +86,9 @@ class CacheTest : public testing::TestWithParam { if (type == kClock) { return NewClockCache(capacity); } + if (type == kFast) { + return NewFastLRUCache(capacity); + } return nullptr; } @@ -103,6 +109,10 @@ class CacheTest : public testing::TestWithParam { return NewClockCache(capacity, num_shard_bits, strict_capacity_limit, charge_policy); } + if (type == kFast) { + return NewFastLRUCache(capacity, num_shard_bits, strict_capacity_limit, + charge_policy); + } return nullptr; } @@ -838,11 +848,13 @@ TEST_P(CacheTest, GetChargeAndDeleter) { std::shared_ptr (*new_clock_cache_func)( size_t, int, bool, CacheMetadataChargePolicy) = NewClockCache; INSTANTIATE_TEST_CASE_P(CacheTestInstance, CacheTest, - testing::Values(kLRU, kClock)); + testing::Values(kLRU, kClock, kFast)); #else -INSTANTIATE_TEST_CASE_P(CacheTestInstance, CacheTest, testing::Values(kLRU)); +INSTANTIATE_TEST_CASE_P(CacheTestInstance, CacheTest, + testing::Values(kLRU, kFast)); #endif // SUPPORT_CLOCK_CACHE -INSTANTIATE_TEST_CASE_P(CacheTestInstance, LRUCacheTest, testing::Values(kLRU)); +INSTANTIATE_TEST_CASE_P(CacheTestInstance, LRUCacheTest, + testing::Values(kLRU, kFast)); } // namespace ROCKSDB_NAMESPACE diff --git a/cache/fast_lru_cache.cc b/cache/fast_lru_cache.cc new file mode 100644 index 0000000000..10ae7367fe --- /dev/null +++ b/cache/fast_lru_cache.cc @@ -0,0 +1,511 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). +// +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#include "cache/fast_lru_cache.h" + +#include +#include +#include + +#include "monitoring/perf_context_imp.h" +#include "monitoring/statistics.h" +#include "port/lang.h" +#include "util/mutexlock.h" + +namespace ROCKSDB_NAMESPACE { + +namespace fast_lru_cache { + +LRUHandleTable::LRUHandleTable(int max_upper_hash_bits) + : length_bits_(/* historical starting size*/ 4), + list_(new LRUHandle* [size_t{1} << length_bits_] {}), + elems_(0), + max_length_bits_(max_upper_hash_bits) {} + +LRUHandleTable::~LRUHandleTable() { + ApplyToEntriesRange( + [](LRUHandle* h) { + if (!h->HasRefs()) { + h->Free(); + } + }, + 0, uint32_t{1} << length_bits_); +} + +LRUHandle* LRUHandleTable::Lookup(const Slice& key, uint32_t hash) { + return *FindPointer(key, hash); +} + +LRUHandle* LRUHandleTable::Insert(LRUHandle* h) { + LRUHandle** ptr = FindPointer(h->key(), h->hash); + LRUHandle* old = *ptr; + h->next_hash = (old == nullptr ? nullptr : old->next_hash); + *ptr = h; + if (old == nullptr) { + ++elems_; + if ((elems_ >> length_bits_) > 0) { // elems_ >= length + // Since each cache entry is fairly large, we aim for a small + // average linked list length (<= 1). + Resize(); + } + } + return old; +} + +LRUHandle* LRUHandleTable::Remove(const Slice& key, uint32_t hash) { + LRUHandle** ptr = FindPointer(key, hash); + LRUHandle* result = *ptr; + if (result != nullptr) { + *ptr = result->next_hash; + --elems_; + } + return result; +} + +LRUHandle** LRUHandleTable::FindPointer(const Slice& key, uint32_t hash) { + LRUHandle** ptr = &list_[hash >> (32 - length_bits_)]; + while (*ptr != nullptr && ((*ptr)->hash != hash || key != (*ptr)->key())) { + ptr = &(*ptr)->next_hash; + } + return ptr; +} + +void LRUHandleTable::Resize() { + if (length_bits_ >= max_length_bits_) { + // Due to reaching limit of hash information, if we made the table bigger, + // we would allocate more addresses but only the same number would be used. + return; + } + if (length_bits_ >= 31) { + // Avoid undefined behavior shifting uint32_t by 32. + return; + } + + uint32_t old_length = uint32_t{1} << length_bits_; + int new_length_bits = length_bits_ + 1; + std::unique_ptr new_list { + new LRUHandle* [size_t{1} << new_length_bits] {} + }; + uint32_t count = 0; + for (uint32_t i = 0; i < old_length; i++) { + LRUHandle* h = list_[i]; + while (h != nullptr) { + LRUHandle* next = h->next_hash; + uint32_t hash = h->hash; + LRUHandle** ptr = &new_list[hash >> (32 - new_length_bits)]; + h->next_hash = *ptr; + *ptr = h; + h = next; + count++; + } + } + assert(elems_ == count); + list_ = std::move(new_list); + length_bits_ = new_length_bits; +} + +LRUCacheShard::LRUCacheShard(size_t capacity, bool strict_capacity_limit, + CacheMetadataChargePolicy metadata_charge_policy, + int max_upper_hash_bits) + : capacity_(0), + strict_capacity_limit_(strict_capacity_limit), + table_(max_upper_hash_bits), + usage_(0), + lru_usage_(0) { + set_metadata_charge_policy(metadata_charge_policy); + // Make empty circular linked list. + lru_.next = &lru_; + lru_.prev = &lru_; + lru_low_pri_ = &lru_; + SetCapacity(capacity); +} + +void LRUCacheShard::EraseUnRefEntries() { + autovector last_reference_list; + { + MutexLock l(&mutex_); + while (lru_.next != &lru_) { + LRUHandle* old = lru_.next; + // LRU list contains only elements which can be evicted. + assert(old->InCache() && !old->HasRefs()); + LRU_Remove(old); + table_.Remove(old->key(), old->hash); + old->SetInCache(false); + size_t total_charge = old->CalcTotalCharge(metadata_charge_policy_); + assert(usage_ >= total_charge); + usage_ -= total_charge; + last_reference_list.push_back(old); + } + } + + // Free the entries here outside of mutex for performance reasons. + for (auto entry : last_reference_list) { + entry->Free(); + } +} + +void LRUCacheShard::ApplyToSomeEntries( + const std::function& callback, + uint32_t average_entries_per_lock, uint32_t* state) { + // The state is essentially going to be the starting hash, which works + // nicely even if we resize between calls because we use upper-most + // hash bits for table indexes. + MutexLock l(&mutex_); + uint32_t length_bits = table_.GetLengthBits(); + uint32_t length = uint32_t{1} << length_bits; + + assert(average_entries_per_lock > 0); + // Assuming we are called with same average_entries_per_lock repeatedly, + // this simplifies some logic (index_end will not overflow). + assert(average_entries_per_lock < length || *state == 0); + + uint32_t index_begin = *state >> (32 - length_bits); + uint32_t index_end = index_begin + average_entries_per_lock; + if (index_end >= length) { + // Going to end + index_end = length; + *state = UINT32_MAX; + } else { + *state = index_end << (32 - length_bits); + } + + table_.ApplyToEntriesRange( + [callback](LRUHandle* h) { + callback(h->key(), h->value, h->charge, h->deleter); + }, + index_begin, index_end); +} + +void LRUCacheShard::LRU_Remove(LRUHandle* e) { + assert(e->next != nullptr); + assert(e->prev != nullptr); + e->next->prev = e->prev; + e->prev->next = e->next; + e->prev = e->next = nullptr; + size_t total_charge = e->CalcTotalCharge(metadata_charge_policy_); + assert(lru_usage_ >= total_charge); + lru_usage_ -= total_charge; +} + +void LRUCacheShard::LRU_Insert(LRUHandle* e) { + assert(e->next == nullptr); + assert(e->prev == nullptr); + size_t total_charge = e->CalcTotalCharge(metadata_charge_policy_); + // Inset "e" to head of LRU list. + e->next = &lru_; + e->prev = lru_.prev; + e->prev->next = e; + e->next->prev = e; + lru_usage_ += total_charge; +} + +void LRUCacheShard::EvictFromLRU(size_t charge, + autovector* deleted) { + while ((usage_ + charge) > capacity_ && lru_.next != &lru_) { + LRUHandle* old = lru_.next; + // LRU list contains only elements which can be evicted. + assert(old->InCache() && !old->HasRefs()); + LRU_Remove(old); + table_.Remove(old->key(), old->hash); + old->SetInCache(false); + size_t old_total_charge = old->CalcTotalCharge(metadata_charge_policy_); + assert(usage_ >= old_total_charge); + usage_ -= old_total_charge; + deleted->push_back(old); + } +} + +void LRUCacheShard::SetCapacity(size_t capacity) { + autovector last_reference_list; + { + MutexLock l(&mutex_); + capacity_ = capacity; + EvictFromLRU(0, &last_reference_list); + } + + // Free the entries here outside of mutex for performance reasons. + for (auto entry : last_reference_list) { + entry->Free(); + } +} + +void LRUCacheShard::SetStrictCapacityLimit(bool strict_capacity_limit) { + MutexLock l(&mutex_); + strict_capacity_limit_ = strict_capacity_limit; +} + +Status LRUCacheShard::InsertItem(LRUHandle* e, Cache::Handle** handle, + bool free_handle_on_fail) { + Status s = Status::OK(); + autovector last_reference_list; + size_t total_charge = e->CalcTotalCharge(metadata_charge_policy_); + + { + MutexLock l(&mutex_); + + // Free the space following strict LRU policy until enough space + // is freed or the lru list is empty. + EvictFromLRU(total_charge, &last_reference_list); + + if ((usage_ + total_charge) > capacity_ && + (strict_capacity_limit_ || handle == nullptr)) { + e->SetInCache(false); + if (handle == nullptr) { + // Don't insert the entry but still return ok, as if the entry inserted + // into cache and get evicted immediately. + last_reference_list.push_back(e); + } else { + if (free_handle_on_fail) { + delete[] reinterpret_cast(e); + *handle = nullptr; + } + s = Status::Incomplete("Insert failed due to LRU cache being full."); + } + } else { + // Insert into the cache. Note that the cache might get larger than its + // capacity if not enough space was freed up. + LRUHandle* old = table_.Insert(e); + usage_ += total_charge; + if (old != nullptr) { + s = Status::OkOverwritten(); + assert(old->InCache()); + old->SetInCache(false); + if (!old->HasRefs()) { + // old is on LRU because it's in cache and its reference count is 0. + LRU_Remove(old); + size_t old_total_charge = + old->CalcTotalCharge(metadata_charge_policy_); + assert(usage_ >= old_total_charge); + usage_ -= old_total_charge; + last_reference_list.push_back(old); + } + } + if (handle == nullptr) { + LRU_Insert(e); + } else { + // If caller already holds a ref, no need to take one here. + if (!e->HasRefs()) { + e->Ref(); + } + *handle = reinterpret_cast(e); + } + } + } + + // Free the entries here outside of mutex for performance reasons. + for (auto entry : last_reference_list) { + entry->Free(); + } + + return s; +} + +Cache::Handle* LRUCacheShard::Lookup(const Slice& key, uint32_t hash) { + LRUHandle* e = nullptr; + { + MutexLock l(&mutex_); + e = table_.Lookup(key, hash); + if (e != nullptr) { + assert(e->InCache()); + if (!e->HasRefs()) { + // The entry is in LRU since it's in hash and has no external references + LRU_Remove(e); + } + e->Ref(); + } + } + return reinterpret_cast(e); +} + +bool LRUCacheShard::Ref(Cache::Handle* h) { + LRUHandle* e = reinterpret_cast(h); + MutexLock l(&mutex_); + // To create another reference - entry must be already externally referenced. + assert(e->HasRefs()); + e->Ref(); + return true; +} + +bool LRUCacheShard::Release(Cache::Handle* handle, bool erase_if_last_ref) { + if (handle == nullptr) { + return false; + } + LRUHandle* e = reinterpret_cast(handle); + bool last_reference = false; + { + MutexLock l(&mutex_); + last_reference = e->Unref(); + if (last_reference && e->InCache()) { + // The item is still in cache, and nobody else holds a reference to it. + if (usage_ > capacity_ || erase_if_last_ref) { + // The LRU list must be empty since the cache is full. + assert(lru_.next == &lru_ || erase_if_last_ref); + // Take this opportunity and remove the item. + table_.Remove(e->key(), e->hash); + e->SetInCache(false); + } else { + // Put the item back on the LRU list, and don't free it. + LRU_Insert(e); + last_reference = false; + } + } + // If it was the last reference, then decrement the cache usage. + if (last_reference) { + size_t total_charge = e->CalcTotalCharge(metadata_charge_policy_); + assert(usage_ >= total_charge); + usage_ -= total_charge; + } + } + + // Free the entry here outside of mutex for performance reasons. + if (last_reference) { + e->Free(); + } + return last_reference; +} + +Status LRUCacheShard::Insert(const Slice& key, uint32_t hash, void* value, + size_t charge, Cache::DeleterFn deleter, + Cache::Handle** handle, + Cache::Priority /*priority*/) { + // Allocate the memory here outside of the mutex. + // If the cache is full, we'll have to release it. + // It shouldn't happen very often though. + LRUHandle* e = reinterpret_cast( + new char[sizeof(LRUHandle) - 1 + key.size()]); + + e->value = value; + e->flags = 0; + e->deleter = deleter; + e->charge = charge; + e->key_length = key.size(); + e->hash = hash; + e->refs = 0; + e->next = e->prev = nullptr; + e->SetInCache(true); + memcpy(e->key_data, key.data(), key.size()); + + return InsertItem(e, handle, /* free_handle_on_fail */ true); +} + +void LRUCacheShard::Erase(const Slice& key, uint32_t hash) { + LRUHandle* e; + bool last_reference = false; + { + MutexLock l(&mutex_); + e = table_.Remove(key, hash); + if (e != nullptr) { + assert(e->InCache()); + e->SetInCache(false); + if (!e->HasRefs()) { + // The entry is in LRU since it's in hash and has no external references + LRU_Remove(e); + size_t total_charge = e->CalcTotalCharge(metadata_charge_policy_); + assert(usage_ >= total_charge); + usage_ -= total_charge; + last_reference = true; + } + } + } + + // Free the entry here outside of mutex for performance reasons. + // last_reference will only be true if e != nullptr. + if (last_reference) { + e->Free(); + } +} + +size_t LRUCacheShard::GetUsage() const { + MutexLock l(&mutex_); + return usage_; +} + +size_t LRUCacheShard::GetPinnedUsage() const { + MutexLock l(&mutex_); + assert(usage_ >= lru_usage_); + return usage_ - lru_usage_; +} + +std::string LRUCacheShard::GetPrintableOptions() const { return std::string{}; } + +LRUCache::LRUCache(size_t capacity, int num_shard_bits, + bool strict_capacity_limit, + CacheMetadataChargePolicy metadata_charge_policy) + : ShardedCache(capacity, num_shard_bits, strict_capacity_limit) { + num_shards_ = 1 << num_shard_bits; + shards_ = reinterpret_cast( + port::cacheline_aligned_alloc(sizeof(LRUCacheShard) * num_shards_)); + size_t per_shard = (capacity + (num_shards_ - 1)) / num_shards_; + for (int i = 0; i < num_shards_; i++) { + new (&shards_[i]) + LRUCacheShard(per_shard, strict_capacity_limit, metadata_charge_policy, + /* max_upper_hash_bits */ 32 - num_shard_bits); + } +} + +LRUCache::~LRUCache() { + if (shards_ != nullptr) { + assert(num_shards_ > 0); + for (int i = 0; i < num_shards_; i++) { + shards_[i].~LRUCacheShard(); + } + port::cacheline_aligned_free(shards_); + } +} + +CacheShard* LRUCache::GetShard(uint32_t shard) { + return reinterpret_cast(&shards_[shard]); +} + +const CacheShard* LRUCache::GetShard(uint32_t shard) const { + return reinterpret_cast(&shards_[shard]); +} + +void* LRUCache::Value(Handle* handle) { + return reinterpret_cast(handle)->value; +} + +size_t LRUCache::GetCharge(Handle* handle) const { + return reinterpret_cast(handle)->charge; +} + +Cache::DeleterFn LRUCache::GetDeleter(Handle* handle) const { + auto h = reinterpret_cast(handle); + return h->deleter; +} + +uint32_t LRUCache::GetHash(Handle* handle) const { + return reinterpret_cast(handle)->hash; +} + +void LRUCache::DisownData() { + // Leak data only if that won't generate an ASAN/valgrind warning. + if (!kMustFreeHeapAllocations) { + shards_ = nullptr; + num_shards_ = 0; + } +} + +} // namespace fast_lru_cache + +std::shared_ptr NewFastLRUCache( + size_t capacity, int num_shard_bits, bool strict_capacity_limit, + CacheMetadataChargePolicy metadata_charge_policy) { + if (num_shard_bits >= 20) { + return nullptr; // The cache cannot be sharded into too many fine pieces. + } + if (num_shard_bits < 0) { + num_shard_bits = GetDefaultCacheShardBits(capacity); + } + return std::make_shared( + capacity, num_shard_bits, strict_capacity_limit, metadata_charge_policy); +} + +} // namespace ROCKSDB_NAMESPACE diff --git a/cache/fast_lru_cache.h b/cache/fast_lru_cache.h new file mode 100644 index 0000000000..a672afaf72 --- /dev/null +++ b/cache/fast_lru_cache.h @@ -0,0 +1,299 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). +// +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. +#pragma once + +#include +#include + +#include "cache/sharded_cache.h" +#include "port/lang.h" +#include "port/malloc.h" +#include "port/port.h" +#include "rocksdb/secondary_cache.h" +#include "util/autovector.h" + +namespace ROCKSDB_NAMESPACE { +namespace fast_lru_cache { + +// An experimental (under development!) alternative to LRUCache + +struct LRUHandle { + void* value; + Cache::DeleterFn deleter; + LRUHandle* next_hash; + LRUHandle* next; + LRUHandle* prev; + size_t charge; // TODO(opt): Only allow uint32_t? + size_t key_length; + // The hash of key(). Used for fast sharding and comparisons. + uint32_t hash; + // The number of external refs to this entry. The cache itself is not counted. + uint32_t refs; + + enum Flags : uint8_t { + // Whether this entry is referenced by the hash table. + IN_CACHE = (1 << 0), + }; + uint8_t flags; + + // Beginning of the key (MUST BE THE LAST FIELD IN THIS STRUCT!) + char key_data[1]; + + Slice key() const { return Slice(key_data, key_length); } + + // Increase the reference count by 1. + void Ref() { refs++; } + + // Just reduce the reference count by 1. Return true if it was last reference. + bool Unref() { + assert(refs > 0); + refs--; + return refs == 0; + } + + // Return true if there are external refs, false otherwise. + bool HasRefs() const { return refs > 0; } + + bool InCache() const { return flags & IN_CACHE; } + + void SetInCache(bool in_cache) { + if (in_cache) { + flags |= IN_CACHE; + } else { + flags &= ~IN_CACHE; + } + } + + void Free() { + assert(refs == 0); + if (deleter) { + (*deleter)(key(), value); + } + delete[] reinterpret_cast(this); + } + + // Calculate the memory usage by metadata. + inline size_t CalcTotalCharge( + CacheMetadataChargePolicy metadata_charge_policy) { + size_t meta_charge = 0; + if (metadata_charge_policy == kFullChargeCacheMetadata) { +#ifdef ROCKSDB_MALLOC_USABLE_SIZE + meta_charge += malloc_usable_size(static_cast(this)); +#else + // This is the size that is used when a new handle is created. + meta_charge += sizeof(LRUHandle) - 1 + key_length; +#endif + } + return charge + meta_charge; + } +}; + +// We provide our own simple hash table since it removes a whole bunch +// of porting hacks and is also faster than some of the built-in hash +// table implementations in some of the compiler/runtime combinations +// we have tested. E.g., readrandom speeds up by ~5% over the g++ +// 4.4.3's builtin hashtable. +class LRUHandleTable { + public: + // If the table uses more hash bits than `max_upper_hash_bits`, + // it will eat into the bits used for sharding, which are constant + // for a given LRUHandleTable. + explicit LRUHandleTable(int max_upper_hash_bits); + ~LRUHandleTable(); + + LRUHandle* Lookup(const Slice& key, uint32_t hash); + LRUHandle* Insert(LRUHandle* h); + LRUHandle* Remove(const Slice& key, uint32_t hash); + + template + void ApplyToEntriesRange(T func, uint32_t index_begin, uint32_t index_end) { + for (uint32_t i = index_begin; i < index_end; i++) { + LRUHandle* h = list_[i]; + while (h != nullptr) { + auto n = h->next_hash; + assert(h->InCache()); + func(h); + h = n; + } + } + } + + int GetLengthBits() const { return length_bits_; } + + private: + // Return a pointer to slot that points to a cache entry that + // matches key/hash. If there is no such cache entry, return a + // pointer to the trailing slot in the corresponding linked list. + LRUHandle** FindPointer(const Slice& key, uint32_t hash); + + void Resize(); + + // Number of hash bits (upper because lower bits used for sharding) + // used for table index. Length == 1 << length_bits_ + int length_bits_; + + // The table consists of an array of buckets where each bucket is + // a linked list of cache entries that hash into the bucket. + std::unique_ptr list_; + + // Number of elements currently in the table. + uint32_t elems_; + + // Set from max_upper_hash_bits (see constructor). + const int max_length_bits_; +}; + +// A single shard of sharded cache. +class ALIGN_AS(CACHE_LINE_SIZE) LRUCacheShard final : public CacheShard { + public: + LRUCacheShard(size_t capacity, bool strict_capacity_limit, + CacheMetadataChargePolicy metadata_charge_policy, + int max_upper_hash_bits); + ~LRUCacheShard() override = default; + + // Separate from constructor so caller can easily make an array of LRUCache + // if current usage is more than new capacity, the function will attempt to + // free the needed space. + void SetCapacity(size_t capacity) override; + + // Set the flag to reject insertion if cache if full. + void SetStrictCapacityLimit(bool strict_capacity_limit) override; + + // Like Cache methods, but with an extra "hash" parameter. + Status Insert(const Slice& key, uint32_t hash, void* value, size_t charge, + Cache::DeleterFn deleter, Cache::Handle** handle, + Cache::Priority priority) override; + + Status Insert(const Slice& key, uint32_t hash, void* value, + const Cache::CacheItemHelper* helper, size_t charge, + Cache::Handle** handle, Cache::Priority priority) override { + return Insert(key, hash, value, charge, helper->del_cb, handle, priority); + } + + Cache::Handle* Lookup(const Slice& key, uint32_t hash, + const Cache::CacheItemHelper* /*helper*/, + const Cache::CreateCallback& /*create_cb*/, + Cache::Priority /*priority*/, bool /*wait*/, + Statistics* /*stats*/) override { + return Lookup(key, hash); + } + Cache::Handle* Lookup(const Slice& key, uint32_t hash) override; + + bool Release(Cache::Handle* handle, bool /*useful*/, + bool erase_if_last_ref) override { + return Release(handle, erase_if_last_ref); + } + bool IsReady(Cache::Handle* /*handle*/) override { return true; } + void Wait(Cache::Handle* /*handle*/) override {} + + bool Ref(Cache::Handle* handle) override; + bool Release(Cache::Handle* handle, bool erase_if_last_ref = false) override; + void Erase(const Slice& key, uint32_t hash) override; + + size_t GetUsage() const override; + size_t GetPinnedUsage() const override; + + void ApplyToSomeEntries( + const std::function& callback, + uint32_t average_entries_per_lock, uint32_t* state) override; + + void EraseUnRefEntries() override; + + std::string GetPrintableOptions() const override; + + private: + friend class LRUCache; + // Insert an item into the hash table and, if handle is null, insert into + // the LRU list. Older items are evicted as necessary. If the cache is full + // and free_handle_on_fail is true, the item is deleted and handle is set to + // nullptr. + Status InsertItem(LRUHandle* item, Cache::Handle** handle, + bool free_handle_on_fail); + + void LRU_Remove(LRUHandle* e); + void LRU_Insert(LRUHandle* e); + + // Free some space following strict LRU policy until enough space + // to hold (usage_ + charge) is freed or the lru list is empty + // This function is not thread safe - it needs to be executed while + // holding the mutex_. + void EvictFromLRU(size_t charge, autovector* deleted); + + // Initialized before use. + size_t capacity_; + + // Whether to reject insertion if cache reaches its full capacity. + bool strict_capacity_limit_; + + // Dummy head of LRU list. + // lru.prev is newest entry, lru.next is oldest entry. + // LRU contains items which can be evicted, ie reference only by cache + LRUHandle lru_; + + // Pointer to head of low-pri pool in LRU list. + LRUHandle* lru_low_pri_; + + // ------------^^^^^^^^^^^^^----------- + // Not frequently modified data members + // ------------------------------------ + // + // We separate data members that are updated frequently from the ones that + // are not frequently updated so that they don't share the same cache line + // which will lead into false cache sharing + // + // ------------------------------------ + // Frequently modified data members + // ------------vvvvvvvvvvvvv----------- + LRUHandleTable table_; + + // Memory size for entries residing in the cache. + size_t usage_; + + // Memory size for entries residing only in the LRU list. + size_t lru_usage_; + + // mutex_ protects the following state. + // We don't count mutex_ as the cache's internal state so semantically we + // don't mind mutex_ invoking the non-const actions. + mutable port::Mutex mutex_; +}; + +class LRUCache +#ifdef NDEBUG + final +#endif + : public ShardedCache { + public: + LRUCache(size_t capacity, int num_shard_bits, bool strict_capacity_limit, + CacheMetadataChargePolicy metadata_charge_policy = + kDontChargeCacheMetadata); + ~LRUCache() override; + const char* Name() const override { return "LRUCache"; } + CacheShard* GetShard(uint32_t shard) override; + const CacheShard* GetShard(uint32_t shard) const override; + void* Value(Handle* handle) override; + size_t GetCharge(Handle* handle) const override; + uint32_t GetHash(Handle* handle) const override; + DeleterFn GetDeleter(Handle* handle) const override; + void DisownData() override; + + private: + LRUCacheShard* shards_ = nullptr; + int num_shards_ = 0; +}; +} // namespace fast_lru_cache + +std::shared_ptr NewFastLRUCache( + size_t capacity, int num_shard_bits = -1, + bool strict_capacity_limit = false, + CacheMetadataChargePolicy metadata_charge_policy = + kDefaultCacheMetadataChargePolicy); + +} // namespace ROCKSDB_NAMESPACE diff --git a/cache/lru_cache.cc b/cache/lru_cache.cc index 5765ffb50a..a28c2b5154 100644 --- a/cache/lru_cache.cc +++ b/cache/lru_cache.cc @@ -19,6 +19,7 @@ #include "util/mutexlock.h" namespace ROCKSDB_NAMESPACE { +namespace lru_cache { LRUHandleTable::LRUHandleTable(int max_upper_hash_bits) : length_bits_(/* historical starting size*/ 4), @@ -759,6 +760,8 @@ void LRUCache::WaitAll(std::vector& handles) { } } +} // namespace lru_cache + std::shared_ptr NewLRUCache( size_t capacity, int num_shard_bits, bool strict_capacity_limit, double high_pri_pool_ratio, diff --git a/cache/lru_cache.h b/cache/lru_cache.h index 0dd83d8908..2da78eb67e 100644 --- a/cache/lru_cache.h +++ b/cache/lru_cache.h @@ -19,6 +19,7 @@ #include "util/autovector.h" namespace ROCKSDB_NAMESPACE { +namespace lru_cache { // LRU cache implementation. This class is not thread-safe. @@ -479,4 +480,10 @@ class LRUCache std::shared_ptr secondary_cache_; }; +} // namespace lru_cache + +using LRUCache = lru_cache::LRUCache; +using LRUHandle = lru_cache::LRUHandle; +using LRUCacheShard = lru_cache::LRUCacheShard; + } // namespace ROCKSDB_NAMESPACE diff --git a/db/db_block_cache_test.cc b/db/db_block_cache_test.cc index b6afb8003d..518105af2a 100644 --- a/db/db_block_cache_test.cc +++ b/db/db_block_cache_test.cc @@ -13,6 +13,7 @@ #include "cache/cache_entry_roles.h" #include "cache/cache_key.h" +#include "cache/fast_lru_cache.h" #include "cache/lru_cache.h" #include "db/column_family.h" #include "db/db_impl/db_impl.h" @@ -934,7 +935,8 @@ TEST_F(DBBlockCacheTest, AddRedundantStats) { int iterations_tested = 0; for (std::shared_ptr base_cache : {NewLRUCache(capacity, num_shard_bits), - NewClockCache(capacity, num_shard_bits)}) { + NewClockCache(capacity, num_shard_bits), + NewFastLRUCache(capacity, num_shard_bits)}) { if (!base_cache) { // Skip clock cache when not supported continue; @@ -1288,7 +1290,8 @@ TEST_F(DBBlockCacheTest, CacheEntryRoleStats) { int iterations_tested = 0; for (bool partition : {false, true}) { for (std::shared_ptr cache : - {NewLRUCache(capacity), NewClockCache(capacity)}) { + {NewLRUCache(capacity), NewClockCache(capacity), + NewFastLRUCache(capacity)}) { if (!cache) { // Skip clock cache when not supported continue; diff --git a/src.mk b/src.mk index 6e39f00e63..747d18d2b6 100644 --- a/src.mk +++ b/src.mk @@ -5,6 +5,7 @@ LIB_SOURCES = \ cache/cache_key.cc \ cache/cache_reservation_manager.cc \ cache/clock_cache.cc \ + cache/fast_lru_cache.cc \ cache/lru_cache.cc \ cache/compressed_secondary_cache.cc \ cache/sharded_cache.cc \