rocksdb/cache/sharded_cache.h

//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
//  This source code is licensed under both the GPLv2 (found in the
//  COPYING file in the root directory) and Apache 2.0 License
//  (found in the LICENSE.Apache file in the root directory).
//
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.

#pragma once

#include <atomic>
#include <string>

#include "port/port.h"
#include "rocksdb/cache.h"
#include "util/hash.h"

namespace rocksdb {

// Single cache shard interface.
class CacheShard {
 public:
  CacheShard() = default;
  virtual ~CacheShard() = default;

  virtual Status Insert(const Slice& key, uint32_t hash, void* value,
                        size_t charge,
                        void (*deleter)(const Slice& key, void* value),
                        Cache::Handle** handle, Cache::Priority priority) = 0;
  virtual Cache::Handle* Lookup(const Slice& key, uint32_t hash) = 0;
  virtual bool Ref(Cache::Handle* handle) = 0;
  virtual bool Release(Cache::Handle* handle, bool force_erase = false) = 0;
  virtual void Erase(const Slice& key, uint32_t hash) = 0;
  virtual void SetCapacity(size_t capacity) = 0;
  virtual void SetStrictCapacityLimit(bool strict_capacity_limit) = 0;
  virtual size_t GetUsage() const = 0;
  virtual size_t GetPinnedUsage() const = 0;
  virtual void ApplyToAllCacheEntries(void (*callback)(void*, size_t),
                                      bool thread_safe) = 0;
  virtual void EraseUnRefEntries() = 0;
  virtual std::string GetPrintableOptions() const { return ""; }
};

// Generic cache interface which shards cache by hash of keys. 2^num_shard_bits
// shards will be created, with capacity split evenly to each of the shards.
// Keys are sharded by the highest num_shard_bits bits of hash value.
class ShardedCache : public Cache {
 public:
  ShardedCache(size_t capacity, int num_shard_bits, bool strict_capacity_limit,
               std::shared_ptr<MemoryAllocator> memory_allocator = nullptr);
  virtual ~ShardedCache() = default;
  virtual const char* Name() const override = 0;
  virtual CacheShard* GetShard(int shard) = 0;
  virtual const CacheShard* GetShard(int shard) const = 0;
  virtual void* Value(Handle* handle) override = 0;
  virtual size_t GetCharge(Handle* handle) const = 0;
  virtual uint32_t GetHash(Handle* handle) const = 0;
  virtual void DisownData() override = 0;

  virtual void SetCapacity(size_t capacity) override;
  virtual void SetStrictCapacityLimit(bool strict_capacity_limit) override;

  virtual Status Insert(const Slice& key, void* value, size_t charge,
                        void (*deleter)(const Slice& key, void* value),
                        Handle** handle, Priority priority) override;
  virtual Handle* Lookup(const Slice& key, Statistics* stats) override;
  virtual bool Ref(Handle* handle) override;
  virtual bool Release(Handle* handle, bool force_erase = false) override;
  virtual void Erase(const Slice& key) override;
  virtual uint64_t NewId() override;
  virtual size_t GetCapacity() const override;
  virtual bool HasStrictCapacityLimit() const override;
  virtual size_t GetUsage() const override;
  virtual size_t GetUsage(Handle* handle) const override;
  virtual size_t GetPinnedUsage() const override;
  virtual void ApplyToAllCacheEntries(void (*callback)(void*, size_t),
                                      bool thread_safe) override;
  virtual void EraseUnRefEntries() override;
  virtual std::string GetPrintableOptions() const override;

  int GetNumShardBits() const { return num_shard_bits_; }

 private:
  static inline uint32_t HashSlice(const Slice& s) {
    return static_cast<uint32_t>(GetSliceNPHash64(s));
  }

  uint32_t Shard(uint32_t hash) {
    // Note, hash >> 32 yields hash in gcc, not the zero we expect!
    return (num_shard_bits_ > 0) ? (hash >> (32 - num_shard_bits_)) : 0;
  }

  int num_shard_bits_;
  mutable port::Mutex capacity_mutex_;
  size_t capacity_;
  bool strict_capacity_limit_;
  std::atomic<uint64_t> last_id_;
};

extern int GetDefaultCacheShardBits(size_t capacity);

}  // namespace rocksdb
Refactor cache.cc Summary: Refactor cache.cc so that I can plugin clock cache (D55581). Mainly move `ShardedCache` to separate file, move `LRUHandle` back to cache.cc and rename it lru_cache.cc. Test Plan: make check -j64 Reviewers: lightmark, sdong Reviewed By: sdong Subscribers: andrewkr, dhruba, leveldb Differential Revision: https://reviews.facebook.net/D59655 2016-07-15 17:41:36 +00:00			`// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.`
Change RocksDB License Summary: Closes https://github.com/facebook/rocksdb/pull/2589 Differential Revision: D5431502 Pulled By: siying fbshipit-source-id: 8ebf8c87883daa9daa54b2303d11ce01ab1f6f75 2017-07-15 23:03:42 +00:00			`// This source code is licensed under both the GPLv2 (found in the`
			`// COPYING file in the root directory) and Apache 2.0 License`
			`// (found in the LICENSE.Apache file in the root directory).`
Refactor cache.cc Summary: Refactor cache.cc so that I can plugin clock cache (D55581). Mainly move `ShardedCache` to separate file, move `LRUHandle` back to cache.cc and rename it lru_cache.cc. Test Plan: make check -j64 Reviewers: lightmark, sdong Reviewed By: sdong Subscribers: andrewkr, dhruba, leveldb Differential Revision: https://reviews.facebook.net/D59655 2016-07-15 17:41:36 +00:00			`//`
			`// Copyright (c) 2011 The LevelDB Authors. All rights reserved.`
			`// Use of this source code is governed by a BSD-style license that can be`
			`// found in the LICENSE file. See the AUTHORS file for names of contributors.`

			`#pragma once`

			`#include <atomic>`
Print cache options to info log Summary: Improve cache options logging to info log. Also print the value of cache_index_and_filter_blocks_with_high_priority. Closes https://github.com/facebook/rocksdb/pull/1709 Differential Revision: D4358776 Pulled By: yiwu-arbug fbshipit-source-id: 8f030a0 2016-12-22 22:44:01 +00:00			`#include <string>`
Refactor cache.cc Summary: Refactor cache.cc so that I can plugin clock cache (D55581). Mainly move `ShardedCache` to separate file, move `LRUHandle` back to cache.cc and rename it lru_cache.cc. Test Plan: make check -j64 Reviewers: lightmark, sdong Reviewed By: sdong Subscribers: andrewkr, dhruba, leveldb Differential Revision: https://reviews.facebook.net/D59655 2016-07-15 17:41:36 +00:00
			`#include "port/port.h"`
			`#include "rocksdb/cache.h"`
			`#include "util/hash.h"`

			`namespace rocksdb {`

			`// Single cache shard interface.`
			`class CacheShard {`
			`public:`
			`CacheShard() = default;`
			`virtual ~CacheShard() = default;`

			`virtual Status Insert(const Slice& key, uint32_t hash, void* value,`
			`size_t charge,`
			`void (deleter)(const Slice& key, void value),`
LRU cache mid-point insertion Summary: Add mid-point insertion functionality to LRU cache. Caller of `Cache::Insert()` can set an additional parameter to make a cache entry have higher priority. The LRU cache will reserve at most `capacity * high_pri_pool_pct` bytes for high-pri cache entries. If `high_pri_pool_pct` is zero, the cache degenerates to normal LRU cache. Context: If we are to put index and filter blocks into RocksDB block cache, index/filter block can be swap out too early. We want to add an option to RocksDB to reserve some capacity in block cache just for index/filter blocks, to mitigate the issue. In later diffs I'll update block based table reader to use the interface to cache index/filter blocks at high priority, and expose the option to `DBOptions` and make it dynamic changeable. Test Plan: unit test. Reviewers: IslamAbdelRahman, sdong, lightmark Reviewed By: lightmark Subscribers: andrewkr, dhruba, march, leveldb Differential Revision: https://reviews.facebook.net/D61977 2016-08-19 23:43:31 +00:00			`Cache::Handle** handle, Cache::Priority priority) = 0;`
Refactor cache.cc Summary: Refactor cache.cc so that I can plugin clock cache (D55581). Mainly move `ShardedCache` to separate file, move `LRUHandle` back to cache.cc and rename it lru_cache.cc. Test Plan: make check -j64 Reviewers: lightmark, sdong Reviewed By: sdong Subscribers: andrewkr, dhruba, leveldb Differential Revision: https://reviews.facebook.net/D59655 2016-07-15 17:41:36 +00:00			`virtual Cache::Handle* Lookup(const Slice& key, uint32_t hash) = 0;`
Allow incrementing refcount on cache handles Summary: Previously the only way to increment a handle's refcount was to invoke Lookup(), which (1) did hash table lookup to get cache handle, (2) incremented that handle's refcount. For a future DeleteRange optimization, I added a function, Ref(), for when the caller already has a cache handle and only needs to do (2). Closes https://github.com/facebook/rocksdb/pull/1761 Differential Revision: D4397114 Pulled By: ajkr fbshipit-source-id: 9addbe5 2017-01-11 00:48:23 +00:00			`virtual bool Ref(Cache::Handle* handle) = 0;`
Add erase option to release cache Summary: This is useful when we put the entries in the block cache for accounting purposes and do not expect it to be used after it is released. If the cache does not erase the item in such cases not only the performance of cache is negatively affected but the item's destructor not being called at the time of release might violate the assumptions about the lifetime of the object. The new change adds a force_erase option to the Release method and returns a boolean to indicate whehter the item is successfully deleted. Closes https://github.com/facebook/rocksdb/pull/2180 Differential Revision: D4916032 Pulled By: maysamyabandeh fbshipit-source-id: 94409a346069923cac9de8e57adc313b4ed46f28 2017-04-24 18:21:47 +00:00			`virtual bool Release(Cache::Handle* handle, bool force_erase = false) = 0;`
Refactor cache.cc Summary: Refactor cache.cc so that I can plugin clock cache (D55581). Mainly move `ShardedCache` to separate file, move `LRUHandle` back to cache.cc and rename it lru_cache.cc. Test Plan: make check -j64 Reviewers: lightmark, sdong Reviewed By: sdong Subscribers: andrewkr, dhruba, leveldb Differential Revision: https://reviews.facebook.net/D59655 2016-07-15 17:41:36 +00:00			`virtual void Erase(const Slice& key, uint32_t hash) = 0;`
			`virtual void SetCapacity(size_t capacity) = 0;`
			`virtual void SetStrictCapacityLimit(bool strict_capacity_limit) = 0;`
			`virtual size_t GetUsage() const = 0;`
			`virtual size_t GetPinnedUsage() const = 0;`
			`virtual void ApplyToAllCacheEntries(void (callback)(void, size_t),`
			`bool thread_safe) = 0;`
			`virtual void EraseUnRefEntries() = 0;`
Print cache options to info log Summary: Improve cache options logging to info log. Also print the value of cache_index_and_filter_blocks_with_high_priority. Closes https://github.com/facebook/rocksdb/pull/1709 Differential Revision: D4358776 Pulled By: yiwu-arbug fbshipit-source-id: 8f030a0 2016-12-22 22:44:01 +00:00			`virtual std::string GetPrintableOptions() const { return ""; }`
Refactor cache.cc Summary: Refactor cache.cc so that I can plugin clock cache (D55581). Mainly move `ShardedCache` to separate file, move `LRUHandle` back to cache.cc and rename it lru_cache.cc. Test Plan: make check -j64 Reviewers: lightmark, sdong Reviewed By: sdong Subscribers: andrewkr, dhruba, leveldb Differential Revision: https://reviews.facebook.net/D59655 2016-07-15 17:41:36 +00:00			`};`

			`// Generic cache interface which shards cache by hash of keys. 2^num_shard_bits`
			`// shards will be created, with capacity split evenly to each of the shards.`
			`// Keys are sharded by the highest num_shard_bits bits of hash value.`
			`class ShardedCache : public Cache {`
			`public:`
Revert "Move MemoryAllocator option from Cache to BlockBasedTableOpti… (#4697) Summary: …ons (#4676)" This reverts commit b32d087dbb3b9d9f2c9597caa650d0ca9d2e2d7f. `MemoryAllocator` needs to be with `Cache`, since cache entry can outlive DB and block based table. The cache needs to hold reference to memory allocator when deleting cache entry. Pull Request resolved: https://github.com/facebook/rocksdb/pull/4697 Differential Revision: D13133490 Pulled By: yiwu-arbug fbshipit-source-id: 8ef7e8a51263bfd929f892fd062665ff4ce9ce5a 2018-11-21 19:28:02 +00:00			`ShardedCache(size_t capacity, int num_shard_bits, bool strict_capacity_limit,`
			`std::shared_ptr<MemoryAllocator> memory_allocator = nullptr);`
Refactor cache.cc Summary: Refactor cache.cc so that I can plugin clock cache (D55581). Mainly move `ShardedCache` to separate file, move `LRUHandle` back to cache.cc and rename it lru_cache.cc. Test Plan: make check -j64 Reviewers: lightmark, sdong Reviewed By: sdong Subscribers: andrewkr, dhruba, leveldb Differential Revision: https://reviews.facebook.net/D59655 2016-07-15 17:41:36 +00:00			`virtual ~ShardedCache() = default;`
add Name() to Cache Summary: preparation for detecting Cache type. If SimCache, we then may trigger some command like "setSimCapacity()" with setOptions() Test Plan: make all check Reviewers: yiwu, sdong Reviewed By: sdong Subscribers: andrewkr, dhruba, leveldb Differential Revision: https://reviews.facebook.net/D61953 2016-08-12 21:16:57 +00:00			`virtual const char* Name() const override = 0;`
Refactor cache.cc Summary: Refactor cache.cc so that I can plugin clock cache (D55581). Mainly move `ShardedCache` to separate file, move `LRUHandle` back to cache.cc and rename it lru_cache.cc. Test Plan: make check -j64 Reviewers: lightmark, sdong Reviewed By: sdong Subscribers: andrewkr, dhruba, leveldb Differential Revision: https://reviews.facebook.net/D59655 2016-07-15 17:41:36 +00:00			`virtual CacheShard* GetShard(int shard) = 0;`
			`virtual const CacheShard* GetShard(int shard) const = 0;`
			`virtual void* Value(Handle* handle) override = 0;`
			`virtual size_t GetCharge(Handle* handle) const = 0;`
			`virtual uint32_t GetHash(Handle* handle) const = 0;`
			`virtual void DisownData() override = 0;`

			`virtual void SetCapacity(size_t capacity) override;`
			`virtual void SetStrictCapacityLimit(bool strict_capacity_limit) override;`

			`virtual Status Insert(const Slice& key, void* value, size_t charge,`
			`void (deleter)(const Slice& key, void value),`
LRU cache mid-point insertion Summary: Add mid-point insertion functionality to LRU cache. Caller of `Cache::Insert()` can set an additional parameter to make a cache entry have higher priority. The LRU cache will reserve at most `capacity * high_pri_pool_pct` bytes for high-pri cache entries. If `high_pri_pool_pct` is zero, the cache degenerates to normal LRU cache. Context: If we are to put index and filter blocks into RocksDB block cache, index/filter block can be swap out too early. We want to add an option to RocksDB to reserve some capacity in block cache just for index/filter blocks, to mitigate the issue. In later diffs I'll update block based table reader to use the interface to cache index/filter blocks at high priority, and expose the option to `DBOptions` and make it dynamic changeable. Test Plan: unit test. Reviewers: IslamAbdelRahman, sdong, lightmark Reviewed By: lightmark Subscribers: andrewkr, dhruba, march, leveldb Differential Revision: https://reviews.facebook.net/D61977 2016-08-19 23:43:31 +00:00			`Handle** handle, Priority priority) override;`
add stats to Cache::LookUp() Summary: basically for SimCache stats. I find most times it is hard to pass Statistics* to SimCache constructor. Test Plan: make all check Reviewers: andrewkr, sdong, yiwu Reviewed By: yiwu Subscribers: andrewkr, dhruba, leveldb Differential Revision: https://reviews.facebook.net/D62193 2016-09-01 20:50:39 +00:00			`virtual Handle* Lookup(const Slice& key, Statistics* stats) override;`
Allow incrementing refcount on cache handles Summary: Previously the only way to increment a handle's refcount was to invoke Lookup(), which (1) did hash table lookup to get cache handle, (2) incremented that handle's refcount. For a future DeleteRange optimization, I added a function, Ref(), for when the caller already has a cache handle and only needs to do (2). Closes https://github.com/facebook/rocksdb/pull/1761 Differential Revision: D4397114 Pulled By: ajkr fbshipit-source-id: 9addbe5 2017-01-11 00:48:23 +00:00			`virtual bool Ref(Handle* handle) override;`
Add erase option to release cache Summary: This is useful when we put the entries in the block cache for accounting purposes and do not expect it to be used after it is released. If the cache does not erase the item in such cases not only the performance of cache is negatively affected but the item's destructor not being called at the time of release might violate the assumptions about the lifetime of the object. The new change adds a force_erase option to the Release method and returns a boolean to indicate whehter the item is successfully deleted. Closes https://github.com/facebook/rocksdb/pull/2180 Differential Revision: D4916032 Pulled By: maysamyabandeh fbshipit-source-id: 94409a346069923cac9de8e57adc313b4ed46f28 2017-04-24 18:21:47 +00:00			`virtual bool Release(Handle* handle, bool force_erase = false) override;`
Refactor cache.cc Summary: Refactor cache.cc so that I can plugin clock cache (D55581). Mainly move `ShardedCache` to separate file, move `LRUHandle` back to cache.cc and rename it lru_cache.cc. Test Plan: make check -j64 Reviewers: lightmark, sdong Reviewed By: sdong Subscribers: andrewkr, dhruba, leveldb Differential Revision: https://reviews.facebook.net/D59655 2016-07-15 17:41:36 +00:00			`virtual void Erase(const Slice& key) override;`
			`virtual uint64_t NewId() override;`
			`virtual size_t GetCapacity() const override;`
			`virtual bool HasStrictCapacityLimit() const override;`
			`virtual size_t GetUsage() const override;`
			`virtual size_t GetUsage(Handle* handle) const override;`
			`virtual size_t GetPinnedUsage() const override;`
			`virtual void ApplyToAllCacheEntries(void (callback)(void, size_t),`
			`bool thread_safe) override;`
			`virtual void EraseUnRefEntries() override;`
Print cache options to info log Summary: Improve cache options logging to info log. Also print the value of cache_index_and_filter_blocks_with_high_priority. Closes https://github.com/facebook/rocksdb/pull/1709 Differential Revision: D4358776 Pulled By: yiwu-arbug fbshipit-source-id: 8f030a0 2016-12-22 22:44:01 +00:00			`virtual std::string GetPrintableOptions() const override;`
Refactor cache.cc Summary: Refactor cache.cc so that I can plugin clock cache (D55581). Mainly move `ShardedCache` to separate file, move `LRUHandle` back to cache.cc and rename it lru_cache.cc. Test Plan: make check -j64 Reviewers: lightmark, sdong Reviewed By: sdong Subscribers: andrewkr, dhruba, leveldb Differential Revision: https://reviews.facebook.net/D59655 2016-07-15 17:41:36 +00:00
NewLRUCache() to pick number of shard bits based on capacity if not given Summary: If the users use the NewLRUCache() without passing in the number of shard bits, instead of using hard-coded 6, we'll determine it based on capacity. Closes https://github.com/facebook/rocksdb/pull/1584 Differential Revision: D4242517 Pulled By: siying fbshipit-source-id: 86b0f18 2017-01-27 14:35:41 +00:00			`int GetNumShardBits() const { return num_shard_bits_; }`

Refactor cache.cc Summary: Refactor cache.cc so that I can plugin clock cache (D55581). Mainly move `ShardedCache` to separate file, move `LRUHandle` back to cache.cc and rename it lru_cache.cc. Test Plan: make check -j64 Reviewers: lightmark, sdong Reviewed By: sdong Subscribers: andrewkr, dhruba, leveldb Differential Revision: https://reviews.facebook.net/D59655 2016-07-15 17:41:36 +00:00			`private:`
			`static inline uint32_t HashSlice(const Slice& s) {`
Consolidate hash function used for non-persistent data in a new function (#5155) Summary: Create new function NPHash64() and GetSliceNPHash64(), which are currently implemented using murmurhash. Replace the current direct call of murmurhash() to use the new functions if the hash results are not used in on-disk format. This will make it easier to try out or switch to alternative functions in the uses where data format compatibility doesn't need to be considered. This part shouldn't have any performance impact. Also, the sharded cache hash function is changed to the new format, because it falls into this categoery. It doesn't show visible performance impact in db_bench results. CPU showed by perf is increased from about 0.2% to 0.4% in an extreme benchmark setting (4KB blocks, no-compression, everything cached in block cache). We've known that the current hash function used, our own Hash() has serious hash quality problem. It can generate a lots of conflicts with similar input. In this use case, it means extra lock contention for reads from the same file. This slight CPU regression is worthy to me to counter the potential bad performance with hot keys. And hopefully this will get further improved in the future with a better hash function. cache_test's condition is relaxed a little bit to. The new hash is slightly more skewed in this use case, but I manually checked the data and see the hash results are still in a reasonable range. Pull Request resolved: https://github.com/facebook/rocksdb/pull/5155 Differential Revision: D14834821 Pulled By: siying fbshipit-source-id: ec9a2c0a2f8ae4b54d08b13a5c2e9cc97aa80cb5 2019-04-08 20:24:29 +00:00			`return static_cast<uint32_t>(GetSliceNPHash64(s));`
Refactor cache.cc Summary: Refactor cache.cc so that I can plugin clock cache (D55581). Mainly move `ShardedCache` to separate file, move `LRUHandle` back to cache.cc and rename it lru_cache.cc. Test Plan: make check -j64 Reviewers: lightmark, sdong Reviewed By: sdong Subscribers: andrewkr, dhruba, leveldb Differential Revision: https://reviews.facebook.net/D59655 2016-07-15 17:41:36 +00:00			`}`

			`uint32_t Shard(uint32_t hash) {`
			`// Note, hash >> 32 yields hash in gcc, not the zero we expect!`
			`return (num_shard_bits_ > 0) ? (hash >> (32 - num_shard_bits_)) : 0;`
			`}`

			`int num_shard_bits_;`
			`mutable port::Mutex capacity_mutex_;`
			`size_t capacity_;`
			`bool strict_capacity_limit_;`
			`std::atomic<uint64_t> last_id_;`
			`};`

NewLRUCache() to pick number of shard bits based on capacity if not given Summary: If the users use the NewLRUCache() without passing in the number of shard bits, instead of using hard-coded 6, we'll determine it based on capacity. Closes https://github.com/facebook/rocksdb/pull/1584 Differential Revision: D4242517 Pulled By: siying fbshipit-source-id: 86b0f18 2017-01-27 14:35:41 +00:00			`extern int GetDefaultCacheShardBits(size_t capacity);`

Refactor cache.cc Summary: Refactor cache.cc so that I can plugin clock cache (D55581). Mainly move `ShardedCache` to separate file, move `LRUHandle` back to cache.cc and rename it lru_cache.cc. Test Plan: make check -j64 Reviewers: lightmark, sdong Reviewed By: sdong Subscribers: andrewkr, dhruba, leveldb Differential Revision: https://reviews.facebook.net/D59655 2016-07-15 17:41:36 +00:00			`} // namespace rocksdb`