Change default block cache from 8MB to 32MB (#11350)

Summary: ... which increases default number of shards from 16 to 64. Although the default block cache size is only recommended for applications where RocksDB is not performance-critical, under stress conditions, block cache mutex contention could become a performance bottleneck. This change of default should alleviate that. Note that reducing the size of cache shards (recommended minimum 512MB) could cause thrashing, e.g. on filter blocks, so capacity needs to increase to safely increase number of shards. The 8MB default dates back to 2011 or earlier (f779e7a5), when the most simultaneous threads you could get from a single CPU socket was 20 (e.g. Intel Xeon E7-8870). Now more than 100 is available. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11350 Test Plan: unit tests updated Reviewed By: cbi42 Differential Revision: D44674873 Pulled By: pdillinger fbshipit-source-id: 91ed3070789b42679283c7e6dc97c41a6a97bdf4
2023-04-04 15:33:24 -07:00 · 2023-04-04 15:33:24 -07:00 · 3c17930ede
parent e5a560ec98
commit 3c17930ede
5 changed files with 8 additions and 10 deletions
--- a/HISTORY.md
+++ b/HISTORY.md
@ -5,6 +5,7 @@
 * Add `multi_get_for_update` to C API.

 ### Behavior changes
+* Changed default block cache size from an 8MB to 32MB LRUCache, which increases the default number of cache shards from 16 to 64. This change is intended to minimize cache mutex contention under stress conditions. See https://github.com/facebook/rocksdb/wiki/Block-Cache for more information.
 * For level compaction with `level_compaction_dynamic_level_bytes=true`, RocksDB now trivially moves levels down to fill LSM starting from bottommost level during DB open. See more in comments for option `level_compaction_dynamic_level_bytes`.

 ### New Features
--- a/include/rocksdb/table.h
+++ b/include/rocksdb/table.h
@ -259,7 +259,7 @@ struct BlockBasedTableOptions {
  bool no_block_cache = false;

  // If non-NULL use the specified cache for blocks.
-  // If NULL, rocksdb will automatically create and use an 8MB internal cache.
+  // If NULL, rocksdb will automatically create and use a 32MB internal cache.
  std::shared_ptr<Cache> block_cache = nullptr;

  // If non-NULL use the specified cache for pages read from device
--- a/options/options_test.cc
+++ b/options/options_test.cc
@ -2161,7 +2161,7 @@ TEST_F(OptionsTest, ConvertOptionsTest) {
  const auto table_opt = table_factory->GetOptions<BlockBasedTableOptions>();
  ASSERT_NE(table_opt, nullptr);

-  ASSERT_EQ(table_opt->block_cache->GetCapacity(), 8UL << 20);
+  ASSERT_EQ(table_opt->block_cache->GetCapacity(), 32UL << 20);
  ASSERT_EQ(table_opt->block_size, leveldb_opt.block_size);
  ASSERT_EQ(table_opt->block_restart_interval,
            leveldb_opt.block_restart_interval);
--- a/table/block_based/block_based_table_factory.cc
+++ b/table/block_based/block_based_table_factory.cc
@ -443,11 +443,8 @@ void BlockBasedTableFactory::InitializeOptions() {
    table_options_.block_cache.reset();
  } else if (table_options_.block_cache == nullptr) {
    LRUCacheOptions co;
-    co.capacity = 8 << 20;
-    // It makes little sense to pay overhead for mid-point insertion while the
-    // block size is only 8MB.
-    co.high_pri_pool_ratio = 0.0;
-    co.low_pri_pool_ratio = 0.0;
+    // 32MB, the recommended minimum size for 64 shards, to reduce contention
+    co.capacity = 32 << 20;
    table_options_.block_cache = NewLRUCache(co);
  }
  if (table_options_.block_size_deviation < 0 ||
--- a/tools/db_bench_tool.cc
+++ b/tools/db_bench_tool.cc
@ -548,7 +548,7 @@ DEFINE_bool(universal_allow_trivial_move, false,
 DEFINE_bool(universal_incremental, false,
            "Enable incremental compactions in universal compaction.");

-DEFINE_int64(cache_size, 8 << 20,  // 8MB
+DEFINE_int64(cache_size, 32 << 20,  // 32MB
             "Number of bytes to use as a cache of uncompressed data");

 DEFINE_int32(cache_numshardbits, -1,
@ -569,7 +569,7 @@ DEFINE_string(cache_type, "lru_cache", "Type of block cache.");
 DEFINE_bool(use_compressed_secondary_cache, false,
            "Use the CompressedSecondaryCache as the secondary cache.");

-DEFINE_int64(compressed_secondary_cache_size, 8 << 20,  // 8MB
+DEFINE_int64(compressed_secondary_cache_size, 32 << 20,  // 32MB
             "Number of bytes to use as a cache of data");

 DEFINE_int32(compressed_secondary_cache_numshardbits, 6,
@ -4590,7 +4590,7 @@ class Benchmark {
      if (FLAGS_cache_size > 0) {
        // This violates this function's rules on when to set options. But we
        // have to do it because the case of unconfigured block cache in OPTIONS
-        // file is indistinguishable (it is sanitized to 8MB by this point, not
+        // file is indistinguishable (it is sanitized to 32MB by this point, not
        // nullptr), and our regression tests assume this will be the shared
        // block cache, even with OPTIONS file provided.
        table_options->block_cache = cache_;