mirror of https://github.com/facebook/rocksdb.git
547 lines
19 KiB
C++
547 lines
19 KiB
C++
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved
|
|
// This source code is licensed under both the GPLv2 (found in the
|
|
// COPYING file in the root directory) and Apache 2.0 License
|
|
// (found in the LICENSE.Apache file in the root directory).
|
|
//
|
|
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style license that can be
|
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
|
#pragma once
|
|
|
|
#include <memory>
|
|
#include <string>
|
|
|
|
#include "cache/sharded_cache.h"
|
|
#include "port/lang.h"
|
|
#include "port/malloc.h"
|
|
#include "port/port.h"
|
|
#include "rocksdb/secondary_cache.h"
|
|
#include "util/autovector.h"
|
|
#include "util/distributed_mutex.h"
|
|
|
|
namespace ROCKSDB_NAMESPACE {
|
|
namespace lru_cache {
|
|
|
|
// LRU cache implementation. This class is not thread-safe.
|
|
|
|
// An entry is a variable length heap-allocated structure.
|
|
// Entries are referenced by cache and/or by any external entity.
|
|
// The cache keeps all its entries in a hash table. Some elements
|
|
// are also stored on LRU list.
|
|
//
|
|
// LRUHandle can be in these states:
|
|
// 1. Referenced externally AND in hash table.
|
|
// In that case the entry is *not* in the LRU list
|
|
// (refs >= 1 && in_cache == true)
|
|
// 2. Not referenced externally AND in hash table.
|
|
// In that case the entry is in the LRU list and can be freed.
|
|
// (refs == 0 && in_cache == true)
|
|
// 3. Referenced externally AND not in hash table.
|
|
// In that case the entry is not in the LRU list and not in hash table.
|
|
// The entry must be freed if refs becomes 0 in this state.
|
|
// (refs >= 1 && in_cache == false)
|
|
// If you call LRUCacheShard::Release enough times on an entry in state 1, it
|
|
// will go into state 2. To move from state 1 to state 3, either call
|
|
// LRUCacheShard::Erase or LRUCacheShard::Insert with the same key (but
|
|
// possibly different value). To move from state 2 to state 1, use
|
|
// LRUCacheShard::Lookup.
|
|
// While refs > 0, public properties like value and deleter must not change.
|
|
|
|
struct LRUHandle {
|
|
void* value;
|
|
union Info {
|
|
Info() {}
|
|
~Info() {}
|
|
Cache::DeleterFn deleter;
|
|
const Cache::CacheItemHelper* helper;
|
|
} info_;
|
|
// An entry is not added to the LRUHandleTable until the secondary cache
|
|
// lookup is complete, so its safe to have this union.
|
|
union {
|
|
LRUHandle* next_hash;
|
|
SecondaryCacheResultHandle* sec_handle;
|
|
};
|
|
LRUHandle* next;
|
|
LRUHandle* prev;
|
|
size_t total_charge; // TODO(opt): Only allow uint32_t?
|
|
size_t key_length;
|
|
// The hash of key(). Used for fast sharding and comparisons.
|
|
uint32_t hash;
|
|
// The number of external refs to this entry. The cache itself is not counted.
|
|
uint32_t refs;
|
|
|
|
// Mutable flags - access controlled by mutex
|
|
// The m_ and M_ prefixes (and im_ and IM_ later) are to hopefully avoid
|
|
// checking an M_ flag on im_flags or an IM_ flag on m_flags.
|
|
uint8_t m_flags;
|
|
enum MFlags : uint8_t {
|
|
// Whether this entry is referenced by the hash table.
|
|
M_IN_CACHE = (1 << 0),
|
|
// Whether this entry has had any lookups (hits).
|
|
M_HAS_HIT = (1 << 1),
|
|
// Whether this entry is in high-pri pool.
|
|
M_IN_HIGH_PRI_POOL = (1 << 2),
|
|
// Whether this entry is in low-pri pool.
|
|
M_IN_LOW_PRI_POOL = (1 << 3),
|
|
};
|
|
|
|
// "Immutable" flags - only set in single-threaded context and then
|
|
// can be accessed without mutex
|
|
uint8_t im_flags;
|
|
enum ImFlags : uint8_t {
|
|
// Whether this entry is high priority entry.
|
|
IM_IS_HIGH_PRI = (1 << 0),
|
|
// Whether this entry is low priority entry.
|
|
IM_IS_LOW_PRI = (1 << 1),
|
|
// Can this be inserted into the secondary cache.
|
|
IM_IS_SECONDARY_CACHE_COMPATIBLE = (1 << 2),
|
|
// Is the handle still being read from a lower tier.
|
|
IM_IS_PENDING = (1 << 3),
|
|
// Whether this handle is still in a lower tier
|
|
IM_IS_IN_SECONDARY_CACHE = (1 << 4),
|
|
// Marks result handles that should not be inserted into cache
|
|
IM_IS_STANDALONE = (1 << 5),
|
|
};
|
|
|
|
// Beginning of the key (MUST BE THE LAST FIELD IN THIS STRUCT!)
|
|
char key_data[1];
|
|
|
|
Slice key() const { return Slice(key_data, key_length); }
|
|
|
|
// For HandleImpl concept
|
|
uint32_t GetHash() const { return hash; }
|
|
|
|
// Increase the reference count by 1.
|
|
void Ref() { refs++; }
|
|
|
|
// Just reduce the reference count by 1. Return true if it was last reference.
|
|
bool Unref() {
|
|
assert(refs > 0);
|
|
refs--;
|
|
return refs == 0;
|
|
}
|
|
|
|
// Return true if there are external refs, false otherwise.
|
|
bool HasRefs() const { return refs > 0; }
|
|
|
|
bool InCache() const { return m_flags & M_IN_CACHE; }
|
|
bool IsHighPri() const { return im_flags & IM_IS_HIGH_PRI; }
|
|
bool InHighPriPool() const { return m_flags & M_IN_HIGH_PRI_POOL; }
|
|
bool IsLowPri() const { return im_flags & IM_IS_LOW_PRI; }
|
|
bool InLowPriPool() const { return m_flags & M_IN_LOW_PRI_POOL; }
|
|
bool HasHit() const { return m_flags & M_HAS_HIT; }
|
|
bool IsSecondaryCacheCompatible() const {
|
|
return im_flags & IM_IS_SECONDARY_CACHE_COMPATIBLE;
|
|
}
|
|
bool IsPending() const { return im_flags & IM_IS_PENDING; }
|
|
bool IsInSecondaryCache() const {
|
|
return im_flags & IM_IS_IN_SECONDARY_CACHE;
|
|
}
|
|
bool IsStandalone() const { return im_flags & IM_IS_STANDALONE; }
|
|
|
|
void SetInCache(bool in_cache) {
|
|
if (in_cache) {
|
|
m_flags |= M_IN_CACHE;
|
|
} else {
|
|
m_flags &= ~M_IN_CACHE;
|
|
}
|
|
}
|
|
|
|
void SetPriority(Cache::Priority priority) {
|
|
if (priority == Cache::Priority::HIGH) {
|
|
im_flags |= IM_IS_HIGH_PRI;
|
|
im_flags &= ~IM_IS_LOW_PRI;
|
|
} else if (priority == Cache::Priority::LOW) {
|
|
im_flags &= ~IM_IS_HIGH_PRI;
|
|
im_flags |= IM_IS_LOW_PRI;
|
|
} else {
|
|
im_flags &= ~IM_IS_HIGH_PRI;
|
|
im_flags &= ~IM_IS_LOW_PRI;
|
|
}
|
|
}
|
|
|
|
void SetInHighPriPool(bool in_high_pri_pool) {
|
|
if (in_high_pri_pool) {
|
|
m_flags |= M_IN_HIGH_PRI_POOL;
|
|
} else {
|
|
m_flags &= ~M_IN_HIGH_PRI_POOL;
|
|
}
|
|
}
|
|
|
|
void SetInLowPriPool(bool in_low_pri_pool) {
|
|
if (in_low_pri_pool) {
|
|
m_flags |= M_IN_LOW_PRI_POOL;
|
|
} else {
|
|
m_flags &= ~M_IN_LOW_PRI_POOL;
|
|
}
|
|
}
|
|
|
|
void SetHit() { m_flags |= M_HAS_HIT; }
|
|
|
|
void SetSecondaryCacheCompatible(bool compat) {
|
|
if (compat) {
|
|
im_flags |= IM_IS_SECONDARY_CACHE_COMPATIBLE;
|
|
} else {
|
|
im_flags &= ~IM_IS_SECONDARY_CACHE_COMPATIBLE;
|
|
}
|
|
}
|
|
|
|
void SetIsPending(bool pending) {
|
|
if (pending) {
|
|
im_flags |= IM_IS_PENDING;
|
|
} else {
|
|
im_flags &= ~IM_IS_PENDING;
|
|
}
|
|
}
|
|
|
|
void SetIsInSecondaryCache(bool is_in_secondary_cache) {
|
|
if (is_in_secondary_cache) {
|
|
im_flags |= IM_IS_IN_SECONDARY_CACHE;
|
|
} else {
|
|
im_flags &= ~IM_IS_IN_SECONDARY_CACHE;
|
|
}
|
|
}
|
|
|
|
void SetIsStandalone(bool is_standalone) {
|
|
if (is_standalone) {
|
|
im_flags |= IM_IS_STANDALONE;
|
|
} else {
|
|
im_flags &= ~IM_IS_STANDALONE;
|
|
}
|
|
}
|
|
|
|
void Free() {
|
|
assert(refs == 0);
|
|
|
|
if (!IsSecondaryCacheCompatible() && info_.deleter) {
|
|
(*info_.deleter)(key(), value);
|
|
} else if (IsSecondaryCacheCompatible()) {
|
|
if (IsPending()) {
|
|
assert(sec_handle != nullptr);
|
|
SecondaryCacheResultHandle* tmp_sec_handle = sec_handle;
|
|
tmp_sec_handle->Wait();
|
|
value = tmp_sec_handle->Value();
|
|
delete tmp_sec_handle;
|
|
}
|
|
if (value) {
|
|
(*info_.helper->del_cb)(key(), value);
|
|
}
|
|
}
|
|
|
|
free(this);
|
|
}
|
|
|
|
inline size_t CalcuMetaCharge(
|
|
CacheMetadataChargePolicy metadata_charge_policy) const {
|
|
if (metadata_charge_policy != kFullChargeCacheMetadata) {
|
|
return 0;
|
|
} else {
|
|
#ifdef ROCKSDB_MALLOC_USABLE_SIZE
|
|
return malloc_usable_size(
|
|
const_cast<void*>(static_cast<const void*>(this)));
|
|
#else
|
|
// This is the size that is used when a new handle is created.
|
|
return sizeof(LRUHandle) - 1 + key_length;
|
|
#endif
|
|
}
|
|
}
|
|
|
|
// Calculate the memory usage by metadata.
|
|
inline void CalcTotalCharge(
|
|
size_t charge, CacheMetadataChargePolicy metadata_charge_policy) {
|
|
total_charge = charge + CalcuMetaCharge(metadata_charge_policy);
|
|
}
|
|
|
|
inline size_t GetCharge(
|
|
CacheMetadataChargePolicy metadata_charge_policy) const {
|
|
size_t meta_charge = CalcuMetaCharge(metadata_charge_policy);
|
|
assert(total_charge >= meta_charge);
|
|
return total_charge - meta_charge;
|
|
}
|
|
};
|
|
|
|
// We provide our own simple hash table since it removes a whole bunch
|
|
// of porting hacks and is also faster than some of the built-in hash
|
|
// table implementations in some of the compiler/runtime combinations
|
|
// we have tested. E.g., readrandom speeds up by ~5% over the g++
|
|
// 4.4.3's builtin hashtable.
|
|
class LRUHandleTable {
|
|
public:
|
|
explicit LRUHandleTable(int max_upper_hash_bits);
|
|
~LRUHandleTable();
|
|
|
|
LRUHandle* Lookup(const Slice& key, uint32_t hash);
|
|
LRUHandle* Insert(LRUHandle* h);
|
|
LRUHandle* Remove(const Slice& key, uint32_t hash);
|
|
|
|
template <typename T>
|
|
void ApplyToEntriesRange(T func, size_t index_begin, size_t index_end) {
|
|
for (size_t i = index_begin; i < index_end; i++) {
|
|
LRUHandle* h = list_[i];
|
|
while (h != nullptr) {
|
|
auto n = h->next_hash;
|
|
assert(h->InCache());
|
|
func(h);
|
|
h = n;
|
|
}
|
|
}
|
|
}
|
|
|
|
int GetLengthBits() const { return length_bits_; }
|
|
|
|
size_t GetOccupancyCount() const { return elems_; }
|
|
|
|
private:
|
|
// Return a pointer to slot that points to a cache entry that
|
|
// matches key/hash. If there is no such cache entry, return a
|
|
// pointer to the trailing slot in the corresponding linked list.
|
|
LRUHandle** FindPointer(const Slice& key, uint32_t hash);
|
|
|
|
void Resize();
|
|
|
|
// Number of hash bits (upper because lower bits used for sharding)
|
|
// used for table index. Length == 1 << length_bits_
|
|
int length_bits_;
|
|
|
|
// The table consists of an array of buckets where each bucket is
|
|
// a linked list of cache entries that hash into the bucket.
|
|
std::unique_ptr<LRUHandle*[]> list_;
|
|
|
|
// Number of elements currently in the table.
|
|
uint32_t elems_;
|
|
|
|
// Set from max_upper_hash_bits (see constructor).
|
|
const int max_length_bits_;
|
|
};
|
|
|
|
// A single shard of sharded cache.
|
|
class ALIGN_AS(CACHE_LINE_SIZE) LRUCacheShard final : public CacheShardBase {
|
|
public:
|
|
LRUCacheShard(size_t capacity, bool strict_capacity_limit,
|
|
double high_pri_pool_ratio, double low_pri_pool_ratio,
|
|
bool use_adaptive_mutex,
|
|
CacheMetadataChargePolicy metadata_charge_policy,
|
|
int max_upper_hash_bits, SecondaryCache* secondary_cache);
|
|
|
|
public: // Type definitions expected as parameter to ShardedCache
|
|
using HandleImpl = LRUHandle;
|
|
using HashVal = uint32_t;
|
|
using HashCref = uint32_t;
|
|
|
|
public: // Function definitions expected as parameter to ShardedCache
|
|
static inline HashVal ComputeHash(const Slice& key) {
|
|
return Lower32of64(GetSliceNPHash64(key));
|
|
}
|
|
|
|
// Separate from constructor so caller can easily make an array of LRUCache
|
|
// if current usage is more than new capacity, the function will attempt to
|
|
// free the needed space.
|
|
void SetCapacity(size_t capacity);
|
|
|
|
// Set the flag to reject insertion if cache if full.
|
|
void SetStrictCapacityLimit(bool strict_capacity_limit);
|
|
|
|
// Set percentage of capacity reserved for high-pri cache entries.
|
|
void SetHighPriorityPoolRatio(double high_pri_pool_ratio);
|
|
|
|
// Set percentage of capacity reserved for low-pri cache entries.
|
|
void SetLowPriorityPoolRatio(double low_pri_pool_ratio);
|
|
|
|
// Like Cache methods, but with an extra "hash" parameter.
|
|
inline Status Insert(const Slice& key, uint32_t hash, void* value,
|
|
size_t charge, Cache::DeleterFn deleter,
|
|
LRUHandle** handle, Cache::Priority priority) {
|
|
return Insert(key, hash, value, charge, deleter, nullptr, handle, priority);
|
|
}
|
|
inline Status Insert(const Slice& key, uint32_t hash, void* value,
|
|
const Cache::CacheItemHelper* helper, size_t charge,
|
|
LRUHandle** handle, Cache::Priority priority) {
|
|
assert(helper);
|
|
return Insert(key, hash, value, charge, nullptr, helper, handle, priority);
|
|
}
|
|
// If helper_cb is null, the values of the following arguments don't matter.
|
|
LRUHandle* Lookup(const Slice& key, uint32_t hash,
|
|
const Cache::CacheItemHelper* helper,
|
|
const Cache::CreateCallback& create_cb,
|
|
Cache::Priority priority, bool wait, Statistics* stats);
|
|
inline LRUHandle* Lookup(const Slice& key, uint32_t hash) {
|
|
return Lookup(key, hash, nullptr, nullptr, Cache::Priority::LOW, true,
|
|
nullptr);
|
|
}
|
|
bool Release(LRUHandle* handle, bool useful, bool erase_if_last_ref);
|
|
bool IsReady(LRUHandle* /*handle*/);
|
|
void Wait(LRUHandle* /*handle*/) {}
|
|
bool Ref(LRUHandle* handle);
|
|
void Erase(const Slice& key, uint32_t hash);
|
|
|
|
// Although in some platforms the update of size_t is atomic, to make sure
|
|
// GetUsage() and GetPinnedUsage() work correctly under any platform, we'll
|
|
// protect them with mutex_.
|
|
|
|
size_t GetUsage() const;
|
|
size_t GetPinnedUsage() const;
|
|
size_t GetOccupancyCount() const;
|
|
size_t GetTableAddressCount() const;
|
|
|
|
void ApplyToSomeEntries(
|
|
const std::function<void(const Slice& key, void* value, size_t charge,
|
|
DeleterFn deleter)>& callback,
|
|
size_t average_entries_per_lock, size_t* state);
|
|
|
|
void EraseUnRefEntries();
|
|
|
|
public: // other function definitions
|
|
void TEST_GetLRUList(LRUHandle** lru, LRUHandle** lru_low_pri,
|
|
LRUHandle** lru_bottom_pri);
|
|
|
|
// Retrieves number of elements in LRU, for unit test purpose only.
|
|
// Not threadsafe.
|
|
size_t TEST_GetLRUSize();
|
|
|
|
// Retrieves high pri pool ratio
|
|
double GetHighPriPoolRatio();
|
|
|
|
// Retrieves low pri pool ratio
|
|
double GetLowPriPoolRatio();
|
|
|
|
void AppendPrintableOptions(std::string& /*str*/) const;
|
|
|
|
private:
|
|
friend class LRUCache;
|
|
// Insert an item into the hash table and, if handle is null, insert into
|
|
// the LRU list. Older items are evicted as necessary. If the cache is full
|
|
// and free_handle_on_fail is true, the item is deleted and handle is set to
|
|
// nullptr.
|
|
Status InsertItem(LRUHandle* item, LRUHandle** handle,
|
|
bool free_handle_on_fail);
|
|
Status Insert(const Slice& key, uint32_t hash, void* value, size_t charge,
|
|
DeleterFn deleter, const Cache::CacheItemHelper* helper,
|
|
LRUHandle** handle, Cache::Priority priority);
|
|
// Promote an item looked up from the secondary cache to the LRU cache.
|
|
// The item may be still in the secondary cache.
|
|
// It is only inserted into the hash table and not the LRU list, and only
|
|
// if the cache is not at full capacity, as is the case during Insert. The
|
|
// caller should hold a reference on the LRUHandle. When the caller releases
|
|
// the last reference, the item is added to the LRU list.
|
|
// The item is promoted to the high pri or low pri pool as specified by the
|
|
// caller in Lookup.
|
|
void Promote(LRUHandle* e);
|
|
void LRU_Remove(LRUHandle* e);
|
|
void LRU_Insert(LRUHandle* e);
|
|
|
|
// Overflow the last entry in high-pri pool to low-pri pool until size of
|
|
// high-pri pool is no larger than the size specify by high_pri_pool_pct.
|
|
void MaintainPoolSize();
|
|
|
|
// Free some space following strict LRU policy until enough space
|
|
// to hold (usage_ + charge) is freed or the lru list is empty
|
|
// This function is not thread safe - it needs to be executed while
|
|
// holding the mutex_.
|
|
void EvictFromLRU(size_t charge, autovector<LRUHandle*>* deleted);
|
|
|
|
// Try to insert the evicted handles into the secondary cache.
|
|
void TryInsertIntoSecondaryCache(autovector<LRUHandle*> evicted_handles);
|
|
|
|
// Initialized before use.
|
|
size_t capacity_;
|
|
|
|
// Memory size for entries in high-pri pool.
|
|
size_t high_pri_pool_usage_;
|
|
|
|
// Memory size for entries in low-pri pool.
|
|
size_t low_pri_pool_usage_;
|
|
|
|
// Whether to reject insertion if cache reaches its full capacity.
|
|
bool strict_capacity_limit_;
|
|
|
|
// Ratio of capacity reserved for high priority cache entries.
|
|
double high_pri_pool_ratio_;
|
|
|
|
// High-pri pool size, equals to capacity * high_pri_pool_ratio.
|
|
// Remember the value to avoid recomputing each time.
|
|
double high_pri_pool_capacity_;
|
|
|
|
// Ratio of capacity reserved for low priority cache entries.
|
|
double low_pri_pool_ratio_;
|
|
|
|
// Low-pri pool size, equals to capacity * low_pri_pool_ratio.
|
|
// Remember the value to avoid recomputing each time.
|
|
double low_pri_pool_capacity_;
|
|
|
|
// Dummy head of LRU list.
|
|
// lru.prev is newest entry, lru.next is oldest entry.
|
|
// LRU contains items which can be evicted, ie reference only by cache
|
|
LRUHandle lru_;
|
|
|
|
// Pointer to head of low-pri pool in LRU list.
|
|
LRUHandle* lru_low_pri_;
|
|
|
|
// Pointer to head of bottom-pri pool in LRU list.
|
|
LRUHandle* lru_bottom_pri_;
|
|
|
|
// ------------^^^^^^^^^^^^^-----------
|
|
// Not frequently modified data members
|
|
// ------------------------------------
|
|
//
|
|
// We separate data members that are updated frequently from the ones that
|
|
// are not frequently updated so that they don't share the same cache line
|
|
// which will lead into false cache sharing
|
|
//
|
|
// ------------------------------------
|
|
// Frequently modified data members
|
|
// ------------vvvvvvvvvvvvv-----------
|
|
LRUHandleTable table_;
|
|
|
|
// Memory size for entries residing in the cache.
|
|
size_t usage_;
|
|
|
|
// Memory size for entries residing only in the LRU list.
|
|
size_t lru_usage_;
|
|
|
|
// mutex_ protects the following state.
|
|
// We don't count mutex_ as the cache's internal state so semantically we
|
|
// don't mind mutex_ invoking the non-const actions.
|
|
mutable DMutex mutex_;
|
|
|
|
// Owned by LRUCache
|
|
SecondaryCache* secondary_cache_;
|
|
};
|
|
|
|
class LRUCache
|
|
#ifdef NDEBUG
|
|
final
|
|
#endif
|
|
: public ShardedCache<LRUCacheShard> {
|
|
public:
|
|
LRUCache(size_t capacity, int num_shard_bits, bool strict_capacity_limit,
|
|
double high_pri_pool_ratio, double low_pri_pool_ratio,
|
|
std::shared_ptr<MemoryAllocator> memory_allocator = nullptr,
|
|
bool use_adaptive_mutex = kDefaultToAdaptiveMutex,
|
|
CacheMetadataChargePolicy metadata_charge_policy =
|
|
kDontChargeCacheMetadata,
|
|
std::shared_ptr<SecondaryCache> secondary_cache = nullptr);
|
|
const char* Name() const override { return "LRUCache"; }
|
|
void* Value(Handle* handle) override;
|
|
size_t GetCharge(Handle* handle) const override;
|
|
DeleterFn GetDeleter(Handle* handle) const override;
|
|
void WaitAll(std::vector<Handle*>& handles) override;
|
|
|
|
// Retrieves number of elements in LRU, for unit test purpose only.
|
|
size_t TEST_GetLRUSize();
|
|
// Retrieves high pri pool ratio.
|
|
double GetHighPriPoolRatio();
|
|
|
|
void AppendPrintableOptions(std::string& str) const override;
|
|
|
|
private:
|
|
std::shared_ptr<SecondaryCache> secondary_cache_;
|
|
};
|
|
|
|
} // namespace lru_cache
|
|
|
|
using LRUCache = lru_cache::LRUCache;
|
|
using LRUHandle = lru_cache::LRUHandle;
|
|
using LRUCacheShard = lru_cache::LRUCacheShard;
|
|
|
|
} // namespace ROCKSDB_NAMESPACE
|