diff --git a/db/c.cc b/db/c.cc index d745d5aee6..d1d0aafe1e 100644 --- a/db/c.cc +++ b/db/c.cc @@ -3048,6 +3048,11 @@ int rocksdb_options_get_blob_file_starting_level(rocksdb_options_t* opt) { return opt->rep.blob_file_starting_level; } +void rocksdb_options_set_blob_cache(rocksdb_options_t* opt, + rocksdb_cache_t* blob_cache) { + opt->rep.blob_cache = blob_cache->rep; +} + void rocksdb_options_set_num_levels(rocksdb_options_t* opt, int n) { opt->rep.num_levels = n; } diff --git a/db/db_options_test.cc b/db/db_options_test.cc index 46aa252112..229ad904ec 100644 --- a/db/db_options_test.cc +++ b/db/db_options_test.cc @@ -220,6 +220,7 @@ TEST_F(DBOptionsTest, SetMutableTableOptions) { ColumnFamilyHandle* cfh = dbfull()->DefaultColumnFamily(); Options c_opts = dbfull()->GetOptions(cfh); + const auto* c_bbto = c_opts.table_factory->GetOptions(); ASSERT_NE(c_bbto, nullptr); diff --git a/include/rocksdb/advanced_options.h b/include/rocksdb/advanced_options.h index e3f4ccee72..9cdd947bed 100644 --- a/include/rocksdb/advanced_options.h +++ b/include/rocksdb/advanced_options.h @@ -10,6 +10,7 @@ #include +#include "rocksdb/cache.h" #include "rocksdb/compression_type.h" #include "rocksdb/memtablerep.h" #include "rocksdb/universal_compaction.h" @@ -227,7 +228,7 @@ enum class Temperature : uint8_t { }; // The control option of how the cache tiers will be used. Currently rocksdb -// support block cahe (volatile tier), secondary cache (non-volatile tier). +// support block cache (volatile tier), secondary cache (non-volatile tier). // In the future, we may add more caching layers. enum class CacheTier : uint8_t { kVolatileTier = 0, @@ -953,6 +954,13 @@ struct AdvancedColumnFamilyOptions { // Dynamically changeable through the SetOptions() API int blob_file_starting_level = 0; + // This feature is WORK IN PROGRESS + // If non-NULL use the specified cache for blobs. + // If NULL, rocksdb will not use a blob cache. + // + // Default: nullptr (disabled) + std::shared_ptr blob_cache = nullptr; + // Create ColumnFamilyOptions with default values for all fields AdvancedColumnFamilyOptions(); // Create ColumnFamilyOptions from Options diff --git a/include/rocksdb/c.h b/include/rocksdb/c.h index 5563335512..93737c4bdb 100644 --- a/include/rocksdb/c.h +++ b/include/rocksdb/c.h @@ -1264,6 +1264,9 @@ extern ROCKSDB_LIBRARY_API void rocksdb_options_set_blob_file_starting_level( extern ROCKSDB_LIBRARY_API int rocksdb_options_get_blob_file_starting_level( rocksdb_options_t* opt); +extern ROCKSDB_LIBRARY_API void rocksdb_options_set_blob_cache( + rocksdb_options_t* opt, rocksdb_cache_t* blob_cache); + /* returns a pointer to a malloc()-ed, null terminated string */ extern ROCKSDB_LIBRARY_API char* rocksdb_options_statistics_get_string( rocksdb_options_t* opt); diff --git a/include/rocksdb/file_system.h b/include/rocksdb/file_system.h index 7bc19976b7..19f4a402ae 100644 --- a/include/rocksdb/file_system.h +++ b/include/rocksdb/file_system.h @@ -762,7 +762,7 @@ struct FSReadRequest { // returns fewer bytes if end of file is hit (or `status` is not OK). size_t len; - // A buffer that MultiRead() can optionally place data in. It can + // A buffer that MultiRead() can optionally place data in. It can // ignore this and allocate its own buffer. // The lifecycle of scratch will be until IO is completed. // diff --git a/include/rocksdb/options.h b/include/rocksdb/options.h index cc175dccc6..19bc3cb190 100644 --- a/include/rocksdb/options.h +++ b/include/rocksdb/options.h @@ -1412,7 +1412,6 @@ struct Options : public DBOptions, public ColumnFamilyOptions { Options* DisableExtraChecks(); }; -// // An application can issue a read request (via Get/Iterators) and specify // if that read should process data that ALREADY resides on a specified cache // level. For example, if an application specifies kBlockCacheTier then the diff --git a/options/cf_options.cc b/options/cf_options.cc index 2da55a6fe6..1e98652657 100644 --- a/options/cf_options.cc +++ b/options/cf_options.cc @@ -732,6 +732,16 @@ static std::unordered_map OptionTypeInfo::AsCustomSharedPtr( offsetof(struct ImmutableCFOptions, sst_partitioner_factory), OptionVerificationType::kByName, OptionTypeFlags::kAllowNull)}, + {"blob_cache", + {offsetof(struct ImmutableCFOptions, blob_cache), OptionType::kUnknown, + OptionVerificationType::kNormal, + (OptionTypeFlags::kCompareNever | OptionTypeFlags::kDontSerialize), + // Parses the input value as a Cache + [](const ConfigOptions& opts, const std::string&, + const std::string& value, void* addr) { + auto* cache = static_cast*>(addr); + return Cache::CreateFromString(opts, value, cache); + }}}, }; const std::string OptionsHelper::kCFOptionsName = "ColumnFamilyOptions"; @@ -870,7 +880,8 @@ ImmutableCFOptions::ImmutableCFOptions(const ColumnFamilyOptions& cf_options) cf_options.memtable_insert_with_hint_prefix_extractor), cf_paths(cf_options.cf_paths), compaction_thread_limiter(cf_options.compaction_thread_limiter), - sst_partitioner_factory(cf_options.sst_partitioner_factory) {} + sst_partitioner_factory(cf_options.sst_partitioner_factory), + blob_cache(cf_options.blob_cache) {} ImmutableOptions::ImmutableOptions() : ImmutableOptions(Options()) {} diff --git a/options/cf_options.h b/options/cf_options.h index c6bfe8f78e..bfdc2e102a 100644 --- a/options/cf_options.h +++ b/options/cf_options.h @@ -78,6 +78,8 @@ struct ImmutableCFOptions { std::shared_ptr compaction_thread_limiter; std::shared_ptr sst_partitioner_factory; + + std::shared_ptr blob_cache; }; struct ImmutableOptions : public ImmutableDBOptions, public ImmutableCFOptions { diff --git a/options/options.cc b/options/options.cc index 8424549b75..bba166be49 100644 --- a/options/options.cc +++ b/options/options.cc @@ -101,7 +101,8 @@ AdvancedColumnFamilyOptions::AdvancedColumnFamilyOptions(const Options& options) blob_garbage_collection_force_threshold( options.blob_garbage_collection_force_threshold), blob_compaction_readahead_size(options.blob_compaction_readahead_size), - blob_file_starting_level(options.blob_file_starting_level) { + blob_file_starting_level(options.blob_file_starting_level), + blob_cache(options.blob_cache) { assert(memtable_factory.get() != nullptr); if (max_bytes_for_level_multiplier_additional.size() < static_cast(num_levels)) { @@ -417,6 +418,12 @@ void ColumnFamilyOptions::Dump(Logger* log) const { blob_compaction_readahead_size); ROCKS_LOG_HEADER(log, " Options.blob_file_starting_level: %d", blob_file_starting_level); + if (blob_cache) { + ROCKS_LOG_HEADER(log, " Options.blob_cache: %s", + blob_cache->Name()); + ROCKS_LOG_HEADER(log, " blob_cache options: %s", + blob_cache->GetPrintableOptions().c_str()); + } } // ColumnFamilyOptions::Dump void Options::Dump(Logger* log) const { diff --git a/options/options_helper.cc b/options/options_helper.cc index 65eb708c16..6af73c840d 100644 --- a/options/options_helper.cc +++ b/options/options_helper.cc @@ -303,6 +303,7 @@ void UpdateColumnFamilyOptions(const ImmutableCFOptions& ioptions, cf_opts->cf_paths = ioptions.cf_paths; cf_opts->compaction_thread_limiter = ioptions.compaction_thread_limiter; cf_opts->sst_partitioner_factory = ioptions.sst_partitioner_factory; + cf_opts->blob_cache = ioptions.blob_cache; // TODO(yhchiang): find some way to handle the following derived options // * max_file_size diff --git a/options/options_settable_test.cc b/options/options_settable_test.cc index 944248da25..42a6fd577a 100644 --- a/options/options_settable_test.cc +++ b/options/options_settable_test.cc @@ -377,7 +377,7 @@ TEST_F(OptionsSettableTest, DBOptionsAllFieldsSettable) { // test is not updated accordingly. // After adding an option, we need to make sure it is settable by // GetColumnFamilyOptionsFromString() and add the option to the input -// string passed to GetColumnFamilyOptionsFromString()in this test. +// string passed to GetColumnFamilyOptionsFromString() in this test. // If it is a complicated type, you also need to add the field to // kColumnFamilyOptionsExcluded, and maybe add customized verification // for it. @@ -400,6 +400,8 @@ TEST_F(OptionsSettableTest, ColumnFamilyOptionsAllFieldsSettable) { {offsetof(struct ColumnFamilyOptions, table_properties_collector_factories), sizeof(ColumnFamilyOptions::TablePropertiesCollectorFactories)}, + {offsetof(struct ColumnFamilyOptions, blob_cache), + sizeof(std::shared_ptr)}, {offsetof(struct ColumnFamilyOptions, comparator), sizeof(Comparator*)}, {offsetof(struct ColumnFamilyOptions, merge_operator), sizeof(std::shared_ptr)}, @@ -523,9 +525,12 @@ TEST_F(OptionsSettableTest, ColumnFamilyOptionsAllFieldsSettable) { "blob_file_starting_level=1;" "bottommost_temperature=kWarm;" "compaction_options_fifo={max_table_files_size=3;allow_" - "compaction=false;age_for_warm=1;};", + "compaction=false;age_for_warm=1;};" + "blob_cache=1M;", new_options)); + ASSERT_NE(new_options->blob_cache.get(), nullptr); + ASSERT_EQ(unset_bytes_base, NumUnsetBytes(new_options_ptr, sizeof(ColumnFamilyOptions), kColumnFamilyOptionsExcluded)); diff --git a/options/options_test.cc b/options/options_test.cc index 1992e39a54..7c688f290d 100644 --- a/options/options_test.cc +++ b/options/options_test.cc @@ -601,6 +601,22 @@ TEST_F(OptionsTest, GetColumnFamilyOptionsFromStringTest) { ASSERT_TRUE(new_cf_opt.memtable_factory != nullptr); ASSERT_EQ(std::string(new_cf_opt.memtable_factory->Name()), "SkipListFactory"); ASSERT_TRUE(new_cf_opt.memtable_factory->IsInstanceOf("SkipListFactory")); + + // blob cache + ASSERT_OK(GetColumnFamilyOptionsFromString( + config_options, base_cf_opt, + "blob_cache={capacity=1M;num_shard_bits=4;" + "strict_capacity_limit=true;high_pri_pool_ratio=0.5;};", + &new_cf_opt)); + ASSERT_NE(new_cf_opt.blob_cache, nullptr); + ASSERT_EQ(new_cf_opt.blob_cache->GetCapacity(), 1024UL * 1024UL); + ASSERT_EQ(static_cast(new_cf_opt.blob_cache.get()) + ->GetNumShardBits(), + 4); + ASSERT_EQ(new_cf_opt.blob_cache->HasStrictCapacityLimit(), true); + ASSERT_EQ(static_cast(new_cf_opt.blob_cache.get()) + ->GetHighPriPoolRatio(), + 0.5); } TEST_F(OptionsTest, CompressionOptionsFromString) { @@ -2767,6 +2783,22 @@ TEST_F(OptionsOldApiTest, GetColumnFamilyOptionsFromStringTest) { &new_cf_opt)); ASSERT_TRUE(new_cf_opt.memtable_factory != nullptr); ASSERT_TRUE(new_cf_opt.memtable_factory->IsInstanceOf("SkipListFactory")); + + // blob cache + ASSERT_OK(GetColumnFamilyOptionsFromString( + base_cf_opt, + "blob_cache={capacity=1M;num_shard_bits=4;" + "strict_capacity_limit=true;high_pri_pool_ratio=0.5;};", + &new_cf_opt)); + ASSERT_NE(new_cf_opt.blob_cache, nullptr); + ASSERT_EQ(new_cf_opt.blob_cache->GetCapacity(), 1024UL * 1024UL); + ASSERT_EQ(static_cast(new_cf_opt.blob_cache.get()) + ->GetNumShardBits(), + 4); + ASSERT_EQ(new_cf_opt.blob_cache->HasStrictCapacityLimit(), true); + ASSERT_EQ(static_cast(new_cf_opt.blob_cache.get()) + ->GetHighPriPoolRatio(), + 0.5); } TEST_F(OptionsTest, SliceTransformCreateFromString) { diff --git a/table/block_based/block_based_table_reader.cc b/table/block_based/block_based_table_reader.cc index e119d1bc71..31b75cf5ef 100644 --- a/table/block_based/block_based_table_reader.cc +++ b/table/block_based/block_based_table_reader.cc @@ -563,7 +563,7 @@ void BlockBasedTable::SetupBaseCacheKey(const TableProperties* properties, // assert(!db_id.empty()); // Minimum block size is 5 bytes; therefore we can trim off two lower bits - // from offets. See GetCacheKey. + // from offsets. See GetCacheKey. *out_base_cache_key = OffsetableCacheKey(db_id, db_session_id, file_num, /*max_offset*/ file_size >> 2); }