Add blob cache option in the column family options (#10155)

Summary:
There is currently no caching mechanism for blobs, which is not ideal especially when the database resides on remote storage (where we cannot rely on the OS page cache). As part of this task, we would like to make it possible for the application to configure a blob cache.
This PR is a part of https://github.com/facebook/rocksdb/issues/10156

Pull Request resolved: https://github.com/facebook/rocksdb/pull/10155

Reviewed By: ltamasi

Differential Revision: D37150819

Pulled By: gangliao

fbshipit-source-id: b807c7916ea5d411588128f8e22a49f171388fe2
This commit is contained in:
Gang Liao 2022-06-14 14:19:26 -07:00 committed by Facebook GitHub Bot
parent 1d2950b8dd
commit cba398df8a
13 changed files with 82 additions and 8 deletions

View file

@ -3048,6 +3048,11 @@ int rocksdb_options_get_blob_file_starting_level(rocksdb_options_t* opt) {
return opt->rep.blob_file_starting_level;
}
void rocksdb_options_set_blob_cache(rocksdb_options_t* opt,
rocksdb_cache_t* blob_cache) {
opt->rep.blob_cache = blob_cache->rep;
}
void rocksdb_options_set_num_levels(rocksdb_options_t* opt, int n) {
opt->rep.num_levels = n;
}

View file

@ -220,6 +220,7 @@ TEST_F(DBOptionsTest, SetMutableTableOptions) {
ColumnFamilyHandle* cfh = dbfull()->DefaultColumnFamily();
Options c_opts = dbfull()->GetOptions(cfh);
const auto* c_bbto =
c_opts.table_factory->GetOptions<BlockBasedTableOptions>();
ASSERT_NE(c_bbto, nullptr);

View file

@ -10,6 +10,7 @@
#include <memory>
#include "rocksdb/cache.h"
#include "rocksdb/compression_type.h"
#include "rocksdb/memtablerep.h"
#include "rocksdb/universal_compaction.h"
@ -227,7 +228,7 @@ enum class Temperature : uint8_t {
};
// The control option of how the cache tiers will be used. Currently rocksdb
// support block cahe (volatile tier), secondary cache (non-volatile tier).
// support block cache (volatile tier), secondary cache (non-volatile tier).
// In the future, we may add more caching layers.
enum class CacheTier : uint8_t {
kVolatileTier = 0,
@ -953,6 +954,13 @@ struct AdvancedColumnFamilyOptions {
// Dynamically changeable through the SetOptions() API
int blob_file_starting_level = 0;
// This feature is WORK IN PROGRESS
// If non-NULL use the specified cache for blobs.
// If NULL, rocksdb will not use a blob cache.
//
// Default: nullptr (disabled)
std::shared_ptr<Cache> blob_cache = nullptr;
// Create ColumnFamilyOptions with default values for all fields
AdvancedColumnFamilyOptions();
// Create ColumnFamilyOptions from Options

View file

@ -1264,6 +1264,9 @@ extern ROCKSDB_LIBRARY_API void rocksdb_options_set_blob_file_starting_level(
extern ROCKSDB_LIBRARY_API int rocksdb_options_get_blob_file_starting_level(
rocksdb_options_t* opt);
extern ROCKSDB_LIBRARY_API void rocksdb_options_set_blob_cache(
rocksdb_options_t* opt, rocksdb_cache_t* blob_cache);
/* returns a pointer to a malloc()-ed, null terminated string */
extern ROCKSDB_LIBRARY_API char* rocksdb_options_statistics_get_string(
rocksdb_options_t* opt);

View file

@ -762,7 +762,7 @@ struct FSReadRequest {
// returns fewer bytes if end of file is hit (or `status` is not OK).
size_t len;
// A buffer that MultiRead() can optionally place data in. It can
// A buffer that MultiRead() can optionally place data in. It can
// ignore this and allocate its own buffer.
// The lifecycle of scratch will be until IO is completed.
//

View file

@ -1412,7 +1412,6 @@ struct Options : public DBOptions, public ColumnFamilyOptions {
Options* DisableExtraChecks();
};
//
// An application can issue a read request (via Get/Iterators) and specify
// if that read should process data that ALREADY resides on a specified cache
// level. For example, if an application specifies kBlockCacheTier then the

View file

@ -732,6 +732,16 @@ static std::unordered_map<std::string, OptionTypeInfo>
OptionTypeInfo::AsCustomSharedPtr<SstPartitionerFactory>(
offsetof(struct ImmutableCFOptions, sst_partitioner_factory),
OptionVerificationType::kByName, OptionTypeFlags::kAllowNull)},
{"blob_cache",
{offsetof(struct ImmutableCFOptions, blob_cache), OptionType::kUnknown,
OptionVerificationType::kNormal,
(OptionTypeFlags::kCompareNever | OptionTypeFlags::kDontSerialize),
// Parses the input value as a Cache
[](const ConfigOptions& opts, const std::string&,
const std::string& value, void* addr) {
auto* cache = static_cast<std::shared_ptr<Cache>*>(addr);
return Cache::CreateFromString(opts, value, cache);
}}},
};
const std::string OptionsHelper::kCFOptionsName = "ColumnFamilyOptions";
@ -870,7 +880,8 @@ ImmutableCFOptions::ImmutableCFOptions(const ColumnFamilyOptions& cf_options)
cf_options.memtable_insert_with_hint_prefix_extractor),
cf_paths(cf_options.cf_paths),
compaction_thread_limiter(cf_options.compaction_thread_limiter),
sst_partitioner_factory(cf_options.sst_partitioner_factory) {}
sst_partitioner_factory(cf_options.sst_partitioner_factory),
blob_cache(cf_options.blob_cache) {}
ImmutableOptions::ImmutableOptions() : ImmutableOptions(Options()) {}

View file

@ -78,6 +78,8 @@ struct ImmutableCFOptions {
std::shared_ptr<ConcurrentTaskLimiter> compaction_thread_limiter;
std::shared_ptr<SstPartitionerFactory> sst_partitioner_factory;
std::shared_ptr<Cache> blob_cache;
};
struct ImmutableOptions : public ImmutableDBOptions, public ImmutableCFOptions {

View file

@ -101,7 +101,8 @@ AdvancedColumnFamilyOptions::AdvancedColumnFamilyOptions(const Options& options)
blob_garbage_collection_force_threshold(
options.blob_garbage_collection_force_threshold),
blob_compaction_readahead_size(options.blob_compaction_readahead_size),
blob_file_starting_level(options.blob_file_starting_level) {
blob_file_starting_level(options.blob_file_starting_level),
blob_cache(options.blob_cache) {
assert(memtable_factory.get() != nullptr);
if (max_bytes_for_level_multiplier_additional.size() <
static_cast<unsigned int>(num_levels)) {
@ -417,6 +418,12 @@ void ColumnFamilyOptions::Dump(Logger* log) const {
blob_compaction_readahead_size);
ROCKS_LOG_HEADER(log, " Options.blob_file_starting_level: %d",
blob_file_starting_level);
if (blob_cache) {
ROCKS_LOG_HEADER(log, " Options.blob_cache: %s",
blob_cache->Name());
ROCKS_LOG_HEADER(log, " blob_cache options: %s",
blob_cache->GetPrintableOptions().c_str());
}
} // ColumnFamilyOptions::Dump
void Options::Dump(Logger* log) const {

View file

@ -303,6 +303,7 @@ void UpdateColumnFamilyOptions(const ImmutableCFOptions& ioptions,
cf_opts->cf_paths = ioptions.cf_paths;
cf_opts->compaction_thread_limiter = ioptions.compaction_thread_limiter;
cf_opts->sst_partitioner_factory = ioptions.sst_partitioner_factory;
cf_opts->blob_cache = ioptions.blob_cache;
// TODO(yhchiang): find some way to handle the following derived options
// * max_file_size

View file

@ -377,7 +377,7 @@ TEST_F(OptionsSettableTest, DBOptionsAllFieldsSettable) {
// test is not updated accordingly.
// After adding an option, we need to make sure it is settable by
// GetColumnFamilyOptionsFromString() and add the option to the input
// string passed to GetColumnFamilyOptionsFromString()in this test.
// string passed to GetColumnFamilyOptionsFromString() in this test.
// If it is a complicated type, you also need to add the field to
// kColumnFamilyOptionsExcluded, and maybe add customized verification
// for it.
@ -400,6 +400,8 @@ TEST_F(OptionsSettableTest, ColumnFamilyOptionsAllFieldsSettable) {
{offsetof(struct ColumnFamilyOptions,
table_properties_collector_factories),
sizeof(ColumnFamilyOptions::TablePropertiesCollectorFactories)},
{offsetof(struct ColumnFamilyOptions, blob_cache),
sizeof(std::shared_ptr<Cache>)},
{offsetof(struct ColumnFamilyOptions, comparator), sizeof(Comparator*)},
{offsetof(struct ColumnFamilyOptions, merge_operator),
sizeof(std::shared_ptr<MergeOperator>)},
@ -523,9 +525,12 @@ TEST_F(OptionsSettableTest, ColumnFamilyOptionsAllFieldsSettable) {
"blob_file_starting_level=1;"
"bottommost_temperature=kWarm;"
"compaction_options_fifo={max_table_files_size=3;allow_"
"compaction=false;age_for_warm=1;};",
"compaction=false;age_for_warm=1;};"
"blob_cache=1M;",
new_options));
ASSERT_NE(new_options->blob_cache.get(), nullptr);
ASSERT_EQ(unset_bytes_base,
NumUnsetBytes(new_options_ptr, sizeof(ColumnFamilyOptions),
kColumnFamilyOptionsExcluded));

View file

@ -601,6 +601,22 @@ TEST_F(OptionsTest, GetColumnFamilyOptionsFromStringTest) {
ASSERT_TRUE(new_cf_opt.memtable_factory != nullptr);
ASSERT_EQ(std::string(new_cf_opt.memtable_factory->Name()), "SkipListFactory");
ASSERT_TRUE(new_cf_opt.memtable_factory->IsInstanceOf("SkipListFactory"));
// blob cache
ASSERT_OK(GetColumnFamilyOptionsFromString(
config_options, base_cf_opt,
"blob_cache={capacity=1M;num_shard_bits=4;"
"strict_capacity_limit=true;high_pri_pool_ratio=0.5;};",
&new_cf_opt));
ASSERT_NE(new_cf_opt.blob_cache, nullptr);
ASSERT_EQ(new_cf_opt.blob_cache->GetCapacity(), 1024UL * 1024UL);
ASSERT_EQ(static_cast<ShardedCache*>(new_cf_opt.blob_cache.get())
->GetNumShardBits(),
4);
ASSERT_EQ(new_cf_opt.blob_cache->HasStrictCapacityLimit(), true);
ASSERT_EQ(static_cast<LRUCache*>(new_cf_opt.blob_cache.get())
->GetHighPriPoolRatio(),
0.5);
}
TEST_F(OptionsTest, CompressionOptionsFromString) {
@ -2767,6 +2783,22 @@ TEST_F(OptionsOldApiTest, GetColumnFamilyOptionsFromStringTest) {
&new_cf_opt));
ASSERT_TRUE(new_cf_opt.memtable_factory != nullptr);
ASSERT_TRUE(new_cf_opt.memtable_factory->IsInstanceOf("SkipListFactory"));
// blob cache
ASSERT_OK(GetColumnFamilyOptionsFromString(
base_cf_opt,
"blob_cache={capacity=1M;num_shard_bits=4;"
"strict_capacity_limit=true;high_pri_pool_ratio=0.5;};",
&new_cf_opt));
ASSERT_NE(new_cf_opt.blob_cache, nullptr);
ASSERT_EQ(new_cf_opt.blob_cache->GetCapacity(), 1024UL * 1024UL);
ASSERT_EQ(static_cast<ShardedCache*>(new_cf_opt.blob_cache.get())
->GetNumShardBits(),
4);
ASSERT_EQ(new_cf_opt.blob_cache->HasStrictCapacityLimit(), true);
ASSERT_EQ(static_cast<LRUCache*>(new_cf_opt.blob_cache.get())
->GetHighPriPoolRatio(),
0.5);
}
TEST_F(OptionsTest, SliceTransformCreateFromString) {

View file

@ -563,7 +563,7 @@ void BlockBasedTable::SetupBaseCacheKey(const TableProperties* properties,
// assert(!db_id.empty());
// Minimum block size is 5 bytes; therefore we can trim off two lower bits
// from offets. See GetCacheKey.
// from offsets. See GetCacheKey.
*out_base_cache_key = OffsetableCacheKey(db_id, db_session_id, file_num,
/*max_offset*/ file_size >> 2);
}