mirror of https://github.com/facebook/rocksdb.git
Revert "Adding pin_l0_filter_and_index_blocks_in_cache feature."
This reverts commit 522de4f59e
.
It has bug of index block cleaning up.
This commit is contained in:
parent
5f8741a69d
commit
b1fafcaca6
|
@ -63,7 +63,7 @@ Status BuildTable(
|
||||||
const CompressionType compression,
|
const CompressionType compression,
|
||||||
const CompressionOptions& compression_opts, bool paranoid_file_checks,
|
const CompressionOptions& compression_opts, bool paranoid_file_checks,
|
||||||
InternalStats* internal_stats, const Env::IOPriority io_priority,
|
InternalStats* internal_stats, const Env::IOPriority io_priority,
|
||||||
TableProperties* table_properties, int level) {
|
TableProperties* table_properties) {
|
||||||
// Reports the IOStats for flush for every following bytes.
|
// Reports the IOStats for flush for every following bytes.
|
||||||
const size_t kReportFlushIOStatsEvery = 1048576;
|
const size_t kReportFlushIOStatsEvery = 1048576;
|
||||||
Status s;
|
Status s;
|
||||||
|
@ -149,8 +149,7 @@ Status BuildTable(
|
||||||
ReadOptions(), env_options, internal_comparator, meta->fd, nullptr,
|
ReadOptions(), env_options, internal_comparator, meta->fd, nullptr,
|
||||||
(internal_stats == nullptr) ? nullptr
|
(internal_stats == nullptr) ? nullptr
|
||||||
: internal_stats->GetFileReadHist(0),
|
: internal_stats->GetFileReadHist(0),
|
||||||
false /* for_compaction */, nullptr /* arena */,
|
false));
|
||||||
false /* skip_filter */, level));
|
|
||||||
s = it->status();
|
s = it->status();
|
||||||
if (s.ok() && paranoid_file_checks) {
|
if (s.ok() && paranoid_file_checks) {
|
||||||
for (it->SeekToFirst(); it->Valid(); it->Next()) {
|
for (it->SeekToFirst(); it->Valid(); it->Next()) {
|
||||||
|
|
|
@ -61,6 +61,6 @@ extern Status BuildTable(
|
||||||
const CompressionOptions& compression_opts, bool paranoid_file_checks,
|
const CompressionOptions& compression_opts, bool paranoid_file_checks,
|
||||||
InternalStats* internal_stats,
|
InternalStats* internal_stats,
|
||||||
const Env::IOPriority io_priority = Env::IO_HIGH,
|
const Env::IOPriority io_priority = Env::IO_HIGH,
|
||||||
TableProperties* table_properties = nullptr, int level = -1);
|
TableProperties* table_properties = nullptr);
|
||||||
|
|
||||||
} // namespace rocksdb
|
} // namespace rocksdb
|
||||||
|
|
5
db/c.cc
5
db/c.cc
|
@ -1288,11 +1288,6 @@ void rocksdb_block_based_options_set_cache_index_and_filter_blocks(
|
||||||
options->rep.cache_index_and_filter_blocks = v;
|
options->rep.cache_index_and_filter_blocks = v;
|
||||||
}
|
}
|
||||||
|
|
||||||
void rocksdb_block_based_options_set_pin_l0_filter_and_index_blocks_in_cache(
|
|
||||||
rocksdb_block_based_table_options_t* options, unsigned char v) {
|
|
||||||
options->rep.pin_l0_filter_and_index_blocks_in_cache = v;
|
|
||||||
}
|
|
||||||
|
|
||||||
void rocksdb_block_based_options_set_skip_table_builder_flush(
|
void rocksdb_block_based_options_set_skip_table_builder_flush(
|
||||||
rocksdb_block_based_table_options_t* options, unsigned char v) {
|
rocksdb_block_based_table_options_t* options, unsigned char v) {
|
||||||
options->rep.skip_table_builder_flush = v;
|
options->rep.skip_table_builder_flush = v;
|
||||||
|
|
|
@ -465,8 +465,6 @@ class ColumnFamilySet {
|
||||||
// Don't call while iterating over ColumnFamilySet
|
// Don't call while iterating over ColumnFamilySet
|
||||||
void FreeDeadColumnFamilies();
|
void FreeDeadColumnFamilies();
|
||||||
|
|
||||||
Cache* get_table_cache() { return table_cache_; }
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
friend class ColumnFamilyData;
|
friend class ColumnFamilyData;
|
||||||
// helper function that gets called from cfd destructor
|
// helper function that gets called from cfd destructor
|
||||||
|
|
|
@ -424,97 +424,6 @@ TEST_F(DBTest, IndexAndFilterBlocksOfNewTableAddedToCache) {
|
||||||
TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT));
|
TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT));
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(DBTest, IndexAndFilterBlocksOfNewTableAddedToCacheWithPinning) {
|
|
||||||
Options options = CurrentOptions();
|
|
||||||
options.create_if_missing = true;
|
|
||||||
options.statistics = rocksdb::CreateDBStatistics();
|
|
||||||
BlockBasedTableOptions table_options;
|
|
||||||
table_options.cache_index_and_filter_blocks = true;
|
|
||||||
table_options.pin_l0_filter_and_index_blocks_in_cache = true;
|
|
||||||
table_options.filter_policy.reset(NewBloomFilterPolicy(20));
|
|
||||||
options.table_factory.reset(new BlockBasedTableFactory(table_options));
|
|
||||||
CreateAndReopenWithCF({"pikachu"}, options);
|
|
||||||
|
|
||||||
ASSERT_OK(Put(1, "key", "val"));
|
|
||||||
// Create a new table.
|
|
||||||
ASSERT_OK(Flush(1));
|
|
||||||
|
|
||||||
// index/filter blocks added to block cache right after table creation.
|
|
||||||
ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS));
|
|
||||||
ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT));
|
|
||||||
ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS));
|
|
||||||
ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_INDEX_HIT));
|
|
||||||
|
|
||||||
// only index/filter were added
|
|
||||||
ASSERT_EQ(2, TestGetTickerCount(options, BLOCK_CACHE_ADD));
|
|
||||||
ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_DATA_MISS));
|
|
||||||
|
|
||||||
std::string value;
|
|
||||||
// Miss and hit count should remain the same, they're all pinned.
|
|
||||||
db_->KeyMayExist(ReadOptions(), handles_[1], "key", &value);
|
|
||||||
ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS));
|
|
||||||
ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT));
|
|
||||||
ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS));
|
|
||||||
ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_INDEX_HIT));
|
|
||||||
|
|
||||||
// Miss and hit count should remain the same, they're all pinned.
|
|
||||||
value = Get(1, "key");
|
|
||||||
ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS));
|
|
||||||
ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT));
|
|
||||||
ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS));
|
|
||||||
ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_INDEX_HIT));
|
|
||||||
}
|
|
||||||
|
|
||||||
TEST_F(DBTest, MultiLevelIndexAndFilterBlocksCachedWithPinning) {
|
|
||||||
Options options = CurrentOptions();
|
|
||||||
options.create_if_missing = true;
|
|
||||||
options.statistics = rocksdb::CreateDBStatistics();
|
|
||||||
BlockBasedTableOptions table_options;
|
|
||||||
table_options.cache_index_and_filter_blocks = true;
|
|
||||||
table_options.pin_l0_filter_and_index_blocks_in_cache = true;
|
|
||||||
table_options.filter_policy.reset(NewBloomFilterPolicy(20));
|
|
||||||
options.table_factory.reset(new BlockBasedTableFactory(table_options));
|
|
||||||
CreateAndReopenWithCF({"pikachu"}, options);
|
|
||||||
|
|
||||||
Put(1, "a", "begin");
|
|
||||||
Put(1, "z", "end");
|
|
||||||
ASSERT_OK(Flush(1));
|
|
||||||
// move this table to L1
|
|
||||||
dbfull()->TEST_CompactRange(0, nullptr, nullptr, handles_[1]);
|
|
||||||
|
|
||||||
// reset block cache
|
|
||||||
table_options.block_cache = NewLRUCache(64 * 1024);
|
|
||||||
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
|
|
||||||
TryReopenWithColumnFamilies({"default", "pikachu"}, options);
|
|
||||||
// create new table at L0
|
|
||||||
Put(1, "a2", "begin2");
|
|
||||||
Put(1, "z2", "end2");
|
|
||||||
ASSERT_OK(Flush(1));
|
|
||||||
|
|
||||||
// get base cache values
|
|
||||||
uint64_t fm = TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS);
|
|
||||||
uint64_t fh = TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT);
|
|
||||||
uint64_t im = TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS);
|
|
||||||
uint64_t ih = TestGetTickerCount(options, BLOCK_CACHE_INDEX_HIT);
|
|
||||||
|
|
||||||
std::string value;
|
|
||||||
// this should be read from L0
|
|
||||||
// so cache values don't change
|
|
||||||
value = Get(1, "a2");
|
|
||||||
ASSERT_EQ(fm, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS));
|
|
||||||
ASSERT_EQ(fh, TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT));
|
|
||||||
ASSERT_EQ(im, TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS));
|
|
||||||
ASSERT_EQ(ih, TestGetTickerCount(options, BLOCK_CACHE_INDEX_HIT));
|
|
||||||
|
|
||||||
// this should be read from L1
|
|
||||||
// the file is opened, prefetching results in a cache filter miss
|
|
||||||
// the block is loaded and added to the cache,
|
|
||||||
// then the get results in a cache hit for L1
|
|
||||||
value = Get(1, "a");
|
|
||||||
ASSERT_EQ(fm + 1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS));
|
|
||||||
ASSERT_EQ(fh + 1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT));
|
|
||||||
}
|
|
||||||
|
|
||||||
TEST_F(DBTest, ParanoidFileChecks) {
|
TEST_F(DBTest, ParanoidFileChecks) {
|
||||||
Options options = CurrentOptions();
|
Options options = CurrentOptions();
|
||||||
options.create_if_missing = true;
|
options.create_if_missing = true;
|
||||||
|
|
|
@ -234,14 +234,14 @@ Status FlushJob::WriteLevel0Table(const autovector<MemTable*>& mems,
|
||||||
|
|
||||||
TEST_SYNC_POINT_CALLBACK("FlushJob::WriteLevel0Table:output_compression",
|
TEST_SYNC_POINT_CALLBACK("FlushJob::WriteLevel0Table:output_compression",
|
||||||
&output_compression_);
|
&output_compression_);
|
||||||
s = BuildTable(
|
s = BuildTable(dbname_, db_options_.env, *cfd_->ioptions(), env_options_,
|
||||||
dbname_, db_options_.env, *cfd_->ioptions(), env_options_,
|
cfd_->table_cache(), iter.get(), meta,
|
||||||
cfd_->table_cache(), iter.get(), meta, cfd_->internal_comparator(),
|
cfd_->internal_comparator(),
|
||||||
cfd_->int_tbl_prop_collector_factories(), cfd_->GetID(),
|
cfd_->int_tbl_prop_collector_factories(), cfd_->GetID(),
|
||||||
existing_snapshots_, earliest_write_conflict_snapshot_,
|
existing_snapshots_, earliest_write_conflict_snapshot_,
|
||||||
output_compression_, cfd_->ioptions()->compression_opts,
|
output_compression_, cfd_->ioptions()->compression_opts,
|
||||||
mutable_cf_options_.paranoid_file_checks, cfd_->internal_stats(),
|
mutable_cf_options_.paranoid_file_checks,
|
||||||
Env::IO_HIGH, &table_properties_, 0 /* level */);
|
cfd_->internal_stats(), Env::IO_HIGH, &table_properties_);
|
||||||
info.table_properties = table_properties_;
|
info.table_properties = table_properties_;
|
||||||
LogFlush(db_options_.info_log);
|
LogFlush(db_options_.info_log);
|
||||||
}
|
}
|
||||||
|
|
|
@ -88,7 +88,7 @@ Status TableCache::GetTableReader(
|
||||||
const EnvOptions& env_options,
|
const EnvOptions& env_options,
|
||||||
const InternalKeyComparator& internal_comparator, const FileDescriptor& fd,
|
const InternalKeyComparator& internal_comparator, const FileDescriptor& fd,
|
||||||
bool sequential_mode, bool record_read_stats, HistogramImpl* file_read_hist,
|
bool sequential_mode, bool record_read_stats, HistogramImpl* file_read_hist,
|
||||||
unique_ptr<TableReader>* table_reader, bool skip_filters, int level) {
|
unique_ptr<TableReader>* table_reader, bool skip_filters) {
|
||||||
std::string fname =
|
std::string fname =
|
||||||
TableFileName(ioptions_.db_paths, fd.GetNumber(), fd.GetPathId());
|
TableFileName(ioptions_.db_paths, fd.GetNumber(), fd.GetPathId());
|
||||||
unique_ptr<RandomAccessFile> file;
|
unique_ptr<RandomAccessFile> file;
|
||||||
|
@ -109,7 +109,7 @@ Status TableCache::GetTableReader(
|
||||||
file_read_hist));
|
file_read_hist));
|
||||||
s = ioptions_.table_factory->NewTableReader(
|
s = ioptions_.table_factory->NewTableReader(
|
||||||
TableReaderOptions(ioptions_, env_options, internal_comparator,
|
TableReaderOptions(ioptions_, env_options, internal_comparator,
|
||||||
skip_filters, level),
|
skip_filters),
|
||||||
std::move(file_reader), fd.GetFileSize(), table_reader);
|
std::move(file_reader), fd.GetFileSize(), table_reader);
|
||||||
TEST_SYNC_POINT("TableCache::GetTableReader:0");
|
TEST_SYNC_POINT("TableCache::GetTableReader:0");
|
||||||
}
|
}
|
||||||
|
@ -120,8 +120,7 @@ Status TableCache::FindTable(const EnvOptions& env_options,
|
||||||
const InternalKeyComparator& internal_comparator,
|
const InternalKeyComparator& internal_comparator,
|
||||||
const FileDescriptor& fd, Cache::Handle** handle,
|
const FileDescriptor& fd, Cache::Handle** handle,
|
||||||
const bool no_io, bool record_read_stats,
|
const bool no_io, bool record_read_stats,
|
||||||
HistogramImpl* file_read_hist, bool skip_filters,
|
HistogramImpl* file_read_hist, bool skip_filters) {
|
||||||
int level) {
|
|
||||||
PERF_TIMER_GUARD(find_table_nanos);
|
PERF_TIMER_GUARD(find_table_nanos);
|
||||||
Status s;
|
Status s;
|
||||||
uint64_t number = fd.GetNumber();
|
uint64_t number = fd.GetNumber();
|
||||||
|
@ -137,7 +136,7 @@ Status TableCache::FindTable(const EnvOptions& env_options,
|
||||||
unique_ptr<TableReader> table_reader;
|
unique_ptr<TableReader> table_reader;
|
||||||
s = GetTableReader(env_options, internal_comparator, fd,
|
s = GetTableReader(env_options, internal_comparator, fd,
|
||||||
false /* sequential mode */, record_read_stats,
|
false /* sequential mode */, record_read_stats,
|
||||||
file_read_hist, &table_reader, skip_filters, level);
|
file_read_hist, &table_reader, skip_filters);
|
||||||
if (!s.ok()) {
|
if (!s.ok()) {
|
||||||
assert(table_reader == nullptr);
|
assert(table_reader == nullptr);
|
||||||
RecordTick(ioptions_.statistics, NO_FILE_ERRORS);
|
RecordTick(ioptions_.statistics, NO_FILE_ERRORS);
|
||||||
|
@ -159,7 +158,7 @@ InternalIterator* TableCache::NewIterator(
|
||||||
const ReadOptions& options, const EnvOptions& env_options,
|
const ReadOptions& options, const EnvOptions& env_options,
|
||||||
const InternalKeyComparator& icomparator, const FileDescriptor& fd,
|
const InternalKeyComparator& icomparator, const FileDescriptor& fd,
|
||||||
TableReader** table_reader_ptr, HistogramImpl* file_read_hist,
|
TableReader** table_reader_ptr, HistogramImpl* file_read_hist,
|
||||||
bool for_compaction, Arena* arena, bool skip_filters, int level) {
|
bool for_compaction, Arena* arena, bool skip_filters) {
|
||||||
PERF_TIMER_GUARD(new_table_iterator_nanos);
|
PERF_TIMER_GUARD(new_table_iterator_nanos);
|
||||||
|
|
||||||
if (table_reader_ptr != nullptr) {
|
if (table_reader_ptr != nullptr) {
|
||||||
|
@ -174,8 +173,7 @@ InternalIterator* TableCache::NewIterator(
|
||||||
unique_ptr<TableReader> table_reader_unique_ptr;
|
unique_ptr<TableReader> table_reader_unique_ptr;
|
||||||
Status s = GetTableReader(
|
Status s = GetTableReader(
|
||||||
env_options, icomparator, fd, /* sequential mode */ true,
|
env_options, icomparator, fd, /* sequential mode */ true,
|
||||||
/* record stats */ false, nullptr, &table_reader_unique_ptr,
|
/* record stats */ false, nullptr, &table_reader_unique_ptr);
|
||||||
false /* skip_filters */, level);
|
|
||||||
if (!s.ok()) {
|
if (!s.ok()) {
|
||||||
return NewErrorInternalIterator(s, arena);
|
return NewErrorInternalIterator(s, arena);
|
||||||
}
|
}
|
||||||
|
@ -186,7 +184,7 @@ InternalIterator* TableCache::NewIterator(
|
||||||
Status s = FindTable(env_options, icomparator, fd, &handle,
|
Status s = FindTable(env_options, icomparator, fd, &handle,
|
||||||
options.read_tier == kBlockCacheTier /* no_io */,
|
options.read_tier == kBlockCacheTier /* no_io */,
|
||||||
!for_compaction /* record read_stats */,
|
!for_compaction /* record read_stats */,
|
||||||
file_read_hist, skip_filters, level);
|
file_read_hist, skip_filters);
|
||||||
if (!s.ok()) {
|
if (!s.ok()) {
|
||||||
return NewErrorInternalIterator(s, arena);
|
return NewErrorInternalIterator(s, arena);
|
||||||
}
|
}
|
||||||
|
@ -218,7 +216,7 @@ Status TableCache::Get(const ReadOptions& options,
|
||||||
const InternalKeyComparator& internal_comparator,
|
const InternalKeyComparator& internal_comparator,
|
||||||
const FileDescriptor& fd, const Slice& k,
|
const FileDescriptor& fd, const Slice& k,
|
||||||
GetContext* get_context, HistogramImpl* file_read_hist,
|
GetContext* get_context, HistogramImpl* file_read_hist,
|
||||||
bool skip_filters, int level) {
|
bool skip_filters) {
|
||||||
TableReader* t = fd.table_reader;
|
TableReader* t = fd.table_reader;
|
||||||
Status s;
|
Status s;
|
||||||
Cache::Handle* handle = nullptr;
|
Cache::Handle* handle = nullptr;
|
||||||
|
@ -267,8 +265,7 @@ Status TableCache::Get(const ReadOptions& options,
|
||||||
if (!t) {
|
if (!t) {
|
||||||
s = FindTable(env_options_, internal_comparator, fd, &handle,
|
s = FindTable(env_options_, internal_comparator, fd, &handle,
|
||||||
options.read_tier == kBlockCacheTier /* no_io */,
|
options.read_tier == kBlockCacheTier /* no_io */,
|
||||||
true /* record_read_stats */, file_read_hist, skip_filters,
|
true /* record_read_stats */, file_read_hist, skip_filters);
|
||||||
level);
|
|
||||||
if (s.ok()) {
|
if (s.ok()) {
|
||||||
t = GetTableReaderFromHandle(handle);
|
t = GetTableReaderFromHandle(handle);
|
||||||
}
|
}
|
||||||
|
|
|
@ -45,37 +45,34 @@ class TableCache {
|
||||||
// the cache and should not be deleted, and is valid for as long as the
|
// the cache and should not be deleted, and is valid for as long as the
|
||||||
// returned iterator is live.
|
// returned iterator is live.
|
||||||
// @param skip_filters Disables loading/accessing the filter block
|
// @param skip_filters Disables loading/accessing the filter block
|
||||||
// @param level The level this table is at, -1 for "not set / don't know"
|
|
||||||
InternalIterator* NewIterator(
|
InternalIterator* NewIterator(
|
||||||
const ReadOptions& options, const EnvOptions& toptions,
|
const ReadOptions& options, const EnvOptions& toptions,
|
||||||
const InternalKeyComparator& internal_comparator,
|
const InternalKeyComparator& internal_comparator,
|
||||||
const FileDescriptor& file_fd, TableReader** table_reader_ptr = nullptr,
|
const FileDescriptor& file_fd, TableReader** table_reader_ptr = nullptr,
|
||||||
HistogramImpl* file_read_hist = nullptr, bool for_compaction = false,
|
HistogramImpl* file_read_hist = nullptr, bool for_compaction = false,
|
||||||
Arena* arena = nullptr, bool skip_filters = false, int level = -1);
|
Arena* arena = nullptr, bool skip_filters = false);
|
||||||
|
|
||||||
// If a seek to internal key "k" in specified file finds an entry,
|
// If a seek to internal key "k" in specified file finds an entry,
|
||||||
// call (*handle_result)(arg, found_key, found_value) repeatedly until
|
// call (*handle_result)(arg, found_key, found_value) repeatedly until
|
||||||
// it returns false.
|
// it returns false.
|
||||||
// @param skip_filters Disables loading/accessing the filter block
|
// @param skip_filters Disables loading/accessing the filter block
|
||||||
// @param level The level this table is at, -1 for "not set / don't know"
|
|
||||||
Status Get(const ReadOptions& options,
|
Status Get(const ReadOptions& options,
|
||||||
const InternalKeyComparator& internal_comparator,
|
const InternalKeyComparator& internal_comparator,
|
||||||
const FileDescriptor& file_fd, const Slice& k,
|
const FileDescriptor& file_fd, const Slice& k,
|
||||||
GetContext* get_context, HistogramImpl* file_read_hist = nullptr,
|
GetContext* get_context, HistogramImpl* file_read_hist = nullptr,
|
||||||
bool skip_filters = false, int level = -1);
|
bool skip_filters = false);
|
||||||
|
|
||||||
// Evict any entry for the specified file number
|
// Evict any entry for the specified file number
|
||||||
static void Evict(Cache* cache, uint64_t file_number);
|
static void Evict(Cache* cache, uint64_t file_number);
|
||||||
|
|
||||||
// Find table reader
|
// Find table reader
|
||||||
// @param skip_filters Disables loading/accessing the filter block
|
// @param skip_filters Disables loading/accessing the filter block
|
||||||
// @param level == -1 means not specified
|
|
||||||
Status FindTable(const EnvOptions& toptions,
|
Status FindTable(const EnvOptions& toptions,
|
||||||
const InternalKeyComparator& internal_comparator,
|
const InternalKeyComparator& internal_comparator,
|
||||||
const FileDescriptor& file_fd, Cache::Handle**,
|
const FileDescriptor& file_fd, Cache::Handle**,
|
||||||
const bool no_io = false, bool record_read_stats = true,
|
const bool no_io = false, bool record_read_stats = true,
|
||||||
HistogramImpl* file_read_hist = nullptr,
|
HistogramImpl* file_read_hist = nullptr,
|
||||||
bool skip_filters = false, int level = -1);
|
bool skip_filters = false);
|
||||||
|
|
||||||
// Get TableReader from a cache handle.
|
// Get TableReader from a cache handle.
|
||||||
TableReader* GetTableReaderFromHandle(Cache::Handle* handle);
|
TableReader* GetTableReaderFromHandle(Cache::Handle* handle);
|
||||||
|
@ -109,7 +106,7 @@ class TableCache {
|
||||||
const FileDescriptor& fd, bool sequential_mode,
|
const FileDescriptor& fd, bool sequential_mode,
|
||||||
bool record_read_stats, HistogramImpl* file_read_hist,
|
bool record_read_stats, HistogramImpl* file_read_hist,
|
||||||
unique_ptr<TableReader>* table_reader,
|
unique_ptr<TableReader>* table_reader,
|
||||||
bool skip_filters = false, int level = -1);
|
bool skip_filters = false);
|
||||||
|
|
||||||
const ImmutableCFOptions& ioptions_;
|
const ImmutableCFOptions& ioptions_;
|
||||||
const EnvOptions& env_options_;
|
const EnvOptions& env_options_;
|
||||||
|
|
|
@ -91,7 +91,6 @@ class FilePicker {
|
||||||
const InternalKeyComparator* internal_comparator)
|
const InternalKeyComparator* internal_comparator)
|
||||||
: num_levels_(num_levels),
|
: num_levels_(num_levels),
|
||||||
curr_level_(static_cast<unsigned int>(-1)),
|
curr_level_(static_cast<unsigned int>(-1)),
|
||||||
returned_file_level_(static_cast<unsigned int>(-1)),
|
|
||||||
hit_file_level_(static_cast<unsigned int>(-1)),
|
hit_file_level_(static_cast<unsigned int>(-1)),
|
||||||
search_left_bound_(0),
|
search_left_bound_(0),
|
||||||
search_right_bound_(FileIndexer::kLevelMaxIndex),
|
search_right_bound_(FileIndexer::kLevelMaxIndex),
|
||||||
|
@ -118,8 +117,6 @@ class FilePicker {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int GetCurrentLevel() { return returned_file_level_; }
|
|
||||||
|
|
||||||
FdWithKeyRange* GetNextFile() {
|
FdWithKeyRange* GetNextFile() {
|
||||||
while (!search_ended_) { // Loops over different levels.
|
while (!search_ended_) { // Loops over different levels.
|
||||||
while (curr_index_in_curr_level_ < curr_file_level_->num_files) {
|
while (curr_index_in_curr_level_ < curr_file_level_->num_files) {
|
||||||
|
@ -192,7 +189,6 @@ class FilePicker {
|
||||||
}
|
}
|
||||||
prev_file_ = f;
|
prev_file_ = f;
|
||||||
#endif
|
#endif
|
||||||
returned_file_level_ = curr_level_;
|
|
||||||
if (curr_level_ > 0 && cmp_largest < 0) {
|
if (curr_level_ > 0 && cmp_largest < 0) {
|
||||||
// No more files to search in this level.
|
// No more files to search in this level.
|
||||||
search_ended_ = !PrepareNextLevel();
|
search_ended_ = !PrepareNextLevel();
|
||||||
|
@ -219,7 +215,6 @@ class FilePicker {
|
||||||
private:
|
private:
|
||||||
unsigned int num_levels_;
|
unsigned int num_levels_;
|
||||||
unsigned int curr_level_;
|
unsigned int curr_level_;
|
||||||
unsigned int returned_file_level_;
|
|
||||||
unsigned int hit_file_level_;
|
unsigned int hit_file_level_;
|
||||||
int32_t search_left_bound_;
|
int32_t search_left_bound_;
|
||||||
int32_t search_right_bound_;
|
int32_t search_right_bound_;
|
||||||
|
@ -490,7 +485,7 @@ class LevelFileIteratorState : public TwoLevelIteratorState {
|
||||||
const EnvOptions& env_options,
|
const EnvOptions& env_options,
|
||||||
const InternalKeyComparator& icomparator,
|
const InternalKeyComparator& icomparator,
|
||||||
HistogramImpl* file_read_hist, bool for_compaction,
|
HistogramImpl* file_read_hist, bool for_compaction,
|
||||||
bool prefix_enabled, bool skip_filters, int level)
|
bool prefix_enabled, bool skip_filters)
|
||||||
: TwoLevelIteratorState(prefix_enabled),
|
: TwoLevelIteratorState(prefix_enabled),
|
||||||
table_cache_(table_cache),
|
table_cache_(table_cache),
|
||||||
read_options_(read_options),
|
read_options_(read_options),
|
||||||
|
@ -498,8 +493,7 @@ class LevelFileIteratorState : public TwoLevelIteratorState {
|
||||||
icomparator_(icomparator),
|
icomparator_(icomparator),
|
||||||
file_read_hist_(file_read_hist),
|
file_read_hist_(file_read_hist),
|
||||||
for_compaction_(for_compaction),
|
for_compaction_(for_compaction),
|
||||||
skip_filters_(skip_filters),
|
skip_filters_(skip_filters) {}
|
||||||
level_(level) {}
|
|
||||||
|
|
||||||
InternalIterator* NewSecondaryIterator(const Slice& meta_handle) override {
|
InternalIterator* NewSecondaryIterator(const Slice& meta_handle) override {
|
||||||
if (meta_handle.size() != sizeof(FileDescriptor)) {
|
if (meta_handle.size() != sizeof(FileDescriptor)) {
|
||||||
|
@ -511,7 +505,7 @@ class LevelFileIteratorState : public TwoLevelIteratorState {
|
||||||
return table_cache_->NewIterator(
|
return table_cache_->NewIterator(
|
||||||
read_options_, env_options_, icomparator_, *fd,
|
read_options_, env_options_, icomparator_, *fd,
|
||||||
nullptr /* don't need reference to table*/, file_read_hist_,
|
nullptr /* don't need reference to table*/, file_read_hist_,
|
||||||
for_compaction_, nullptr /* arena */, skip_filters_, level_);
|
for_compaction_, nullptr /* arena */, skip_filters_);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -527,7 +521,6 @@ class LevelFileIteratorState : public TwoLevelIteratorState {
|
||||||
HistogramImpl* file_read_hist_;
|
HistogramImpl* file_read_hist_;
|
||||||
bool for_compaction_;
|
bool for_compaction_;
|
||||||
bool skip_filters_;
|
bool skip_filters_;
|
||||||
int level_;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
// A wrapper of version builder which references the current version in
|
// A wrapper of version builder which references the current version in
|
||||||
|
@ -795,8 +788,7 @@ void Version::AddIterators(const ReadOptions& read_options,
|
||||||
const auto& file = storage_info_.LevelFilesBrief(0).files[i];
|
const auto& file = storage_info_.LevelFilesBrief(0).files[i];
|
||||||
merge_iter_builder->AddIterator(cfd_->table_cache()->NewIterator(
|
merge_iter_builder->AddIterator(cfd_->table_cache()->NewIterator(
|
||||||
read_options, soptions, cfd_->internal_comparator(), file.fd, nullptr,
|
read_options, soptions, cfd_->internal_comparator(), file.fd, nullptr,
|
||||||
cfd_->internal_stats()->GetFileReadHist(0), false, arena,
|
cfd_->internal_stats()->GetFileReadHist(0), false, arena));
|
||||||
false /* skip_filters */, 0 /* level */));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// For levels > 0, we can use a concatenating iterator that sequentially
|
// For levels > 0, we can use a concatenating iterator that sequentially
|
||||||
|
@ -811,7 +803,7 @@ void Version::AddIterators(const ReadOptions& read_options,
|
||||||
cfd_->internal_stats()->GetFileReadHist(level),
|
cfd_->internal_stats()->GetFileReadHist(level),
|
||||||
false /* for_compaction */,
|
false /* for_compaction */,
|
||||||
cfd_->ioptions()->prefix_extractor != nullptr,
|
cfd_->ioptions()->prefix_extractor != nullptr,
|
||||||
IsFilterSkipped(level), level);
|
IsFilterSkipped(level));
|
||||||
mem = arena->AllocateAligned(sizeof(LevelFileNumIterator));
|
mem = arena->AllocateAligned(sizeof(LevelFileNumIterator));
|
||||||
auto* first_level_iter = new (mem) LevelFileNumIterator(
|
auto* first_level_iter = new (mem) LevelFileNumIterator(
|
||||||
cfd_->internal_comparator(), &storage_info_.LevelFilesBrief(level));
|
cfd_->internal_comparator(), &storage_info_.LevelFilesBrief(level));
|
||||||
|
@ -916,8 +908,7 @@ void Version::Get(const ReadOptions& read_options, const LookupKey& k,
|
||||||
read_options, *internal_comparator(), f->fd, ikey, &get_context,
|
read_options, *internal_comparator(), f->fd, ikey, &get_context,
|
||||||
cfd_->internal_stats()->GetFileReadHist(fp.GetHitFileLevel()),
|
cfd_->internal_stats()->GetFileReadHist(fp.GetHitFileLevel()),
|
||||||
IsFilterSkipped(static_cast<int>(fp.GetHitFileLevel()),
|
IsFilterSkipped(static_cast<int>(fp.GetHitFileLevel()),
|
||||||
fp.IsHitFileLastInLevel()),
|
fp.IsHitFileLastInLevel()));
|
||||||
fp.GetCurrentLevel());
|
|
||||||
// TODO: examine the behavior for corrupted key
|
// TODO: examine the behavior for corrupted key
|
||||||
if (!status->ok()) {
|
if (!status->ok()) {
|
||||||
return;
|
return;
|
||||||
|
@ -2063,16 +2054,9 @@ VersionSet::VersionSet(const std::string& dbname, const DBOptions* db_options,
|
||||||
env_options_(storage_options),
|
env_options_(storage_options),
|
||||||
env_options_compactions_(env_options_) {}
|
env_options_compactions_(env_options_) {}
|
||||||
|
|
||||||
void CloseTables(void* ptr, size_t) {
|
|
||||||
TableReader* table_reader = reinterpret_cast<TableReader*>(ptr);
|
|
||||||
table_reader->Close();
|
|
||||||
}
|
|
||||||
|
|
||||||
VersionSet::~VersionSet() {
|
VersionSet::~VersionSet() {
|
||||||
// we need to delete column_family_set_ because its destructor depends on
|
// we need to delete column_family_set_ because its destructor depends on
|
||||||
// VersionSet
|
// VersionSet
|
||||||
column_family_set_->get_table_cache()->ApplyToAllCacheEntries(&CloseTables,
|
|
||||||
false);
|
|
||||||
column_family_set_.reset();
|
column_family_set_.reset();
|
||||||
for (auto file : obsolete_files_) {
|
for (auto file : obsolete_files_) {
|
||||||
delete file;
|
delete file;
|
||||||
|
@ -3283,8 +3267,7 @@ InternalIterator* VersionSet::MakeInputIterator(Compaction* c) {
|
||||||
read_options, env_options_compactions_,
|
read_options, env_options_compactions_,
|
||||||
cfd->internal_comparator(), flevel->files[i].fd, nullptr,
|
cfd->internal_comparator(), flevel->files[i].fd, nullptr,
|
||||||
nullptr, /* no per level latency histogram*/
|
nullptr, /* no per level latency histogram*/
|
||||||
true /* for_compaction */, nullptr /* arena */,
|
true /* for compaction */);
|
||||||
false /* skip_filters */, (int)which /* level */);
|
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// Create concatenating iterator for the files from this level
|
// Create concatenating iterator for the files from this level
|
||||||
|
@ -3294,7 +3277,7 @@ InternalIterator* VersionSet::MakeInputIterator(Compaction* c) {
|
||||||
cfd->internal_comparator(),
|
cfd->internal_comparator(),
|
||||||
nullptr /* no per level latency histogram */,
|
nullptr /* no per level latency histogram */,
|
||||||
true /* for_compaction */, false /* prefix enabled */,
|
true /* for_compaction */, false /* prefix enabled */,
|
||||||
false /* skip_filters */, (int)which /* level */),
|
false /* skip_filters */),
|
||||||
new LevelFileNumIterator(cfd->internal_comparator(),
|
new LevelFileNumIterator(cfd->internal_comparator(),
|
||||||
c->input_levels(which)));
|
c->input_levels(which)));
|
||||||
}
|
}
|
||||||
|
|
|
@ -138,7 +138,6 @@
|
||||||
block_size=8192
|
block_size=8192
|
||||||
block_restart_interval=16
|
block_restart_interval=16
|
||||||
cache_index_and_filter_blocks=false
|
cache_index_and_filter_blocks=false
|
||||||
pin_l0_filter_and_index_blocks_in_cache=false
|
|
||||||
index_type=kBinarySearch
|
index_type=kBinarySearch
|
||||||
hash_index_allow_collision=true
|
hash_index_allow_collision=true
|
||||||
flush_block_policy_factory=FlushBlockBySizePolicyFactory
|
flush_block_policy_factory=FlushBlockBySizePolicyFactory
|
||||||
|
|
|
@ -451,9 +451,6 @@ extern ROCKSDB_LIBRARY_API void
|
||||||
rocksdb_block_based_options_set_cache_index_and_filter_blocks(
|
rocksdb_block_based_options_set_cache_index_and_filter_blocks(
|
||||||
rocksdb_block_based_table_options_t*, unsigned char);
|
rocksdb_block_based_table_options_t*, unsigned char);
|
||||||
extern ROCKSDB_LIBRARY_API void
|
extern ROCKSDB_LIBRARY_API void
|
||||||
rocksdb_block_based_options_set_pin_l0_filter_and_index_blocks_in_cache(
|
|
||||||
rocksdb_block_based_table_options_t*, unsigned char);
|
|
||||||
extern ROCKSDB_LIBRARY_API void
|
|
||||||
rocksdb_block_based_options_set_skip_table_builder_flush(
|
rocksdb_block_based_options_set_skip_table_builder_flush(
|
||||||
rocksdb_block_based_table_options_t* options, unsigned char);
|
rocksdb_block_based_table_options_t* options, unsigned char);
|
||||||
extern ROCKSDB_LIBRARY_API void rocksdb_options_set_block_based_table_factory(
|
extern ROCKSDB_LIBRARY_API void rocksdb_options_set_block_based_table_factory(
|
||||||
|
|
|
@ -64,12 +64,6 @@ struct BlockBasedTableOptions {
|
||||||
// block during table initialization.
|
// block during table initialization.
|
||||||
bool cache_index_and_filter_blocks = false;
|
bool cache_index_and_filter_blocks = false;
|
||||||
|
|
||||||
// if cache_index_and_filter_blocks is true and the below is true, then
|
|
||||||
// filter and index blocks are stored in the cache, but a reference is
|
|
||||||
// held in the "table reader" object so the blocks are pinned and only
|
|
||||||
// evicted from cache when the table reader is freed.
|
|
||||||
bool pin_l0_filter_and_index_blocks_in_cache = false;
|
|
||||||
|
|
||||||
// The index type that will be used for this table.
|
// The index type that will be used for this table.
|
||||||
enum IndexType : char {
|
enum IndexType : char {
|
||||||
// A space efficient index block that is optimized for
|
// A space efficient index block that is optimized for
|
||||||
|
|
|
@ -38,14 +38,13 @@ jlong Java_org_rocksdb_PlainTableConfig_newTableFactoryHandle(
|
||||||
/*
|
/*
|
||||||
* Class: org_rocksdb_BlockBasedTableConfig
|
* Class: org_rocksdb_BlockBasedTableConfig
|
||||||
* Method: newTableFactoryHandle
|
* Method: newTableFactoryHandle
|
||||||
* Signature: (ZJIJIIZIZZZJIBBI)J
|
* Signature: (ZJIJIIZIZZJIBBI)J
|
||||||
*/
|
*/
|
||||||
jlong Java_org_rocksdb_BlockBasedTableConfig_newTableFactoryHandle(
|
jlong Java_org_rocksdb_BlockBasedTableConfig_newTableFactoryHandle(
|
||||||
JNIEnv* env, jobject jobj, jboolean no_block_cache, jlong block_cache_size,
|
JNIEnv* env, jobject jobj, jboolean no_block_cache, jlong block_cache_size,
|
||||||
jint block_cache_num_shardbits, jlong block_size, jint block_size_deviation,
|
jint block_cache_num_shardbits, jlong block_size, jint block_size_deviation,
|
||||||
jint block_restart_interval, jboolean whole_key_filtering,
|
jint block_restart_interval, jboolean whole_key_filtering,
|
||||||
jlong jfilterPolicy, jboolean cache_index_and_filter_blocks,
|
jlong jfilterPolicy, jboolean cache_index_and_filter_blocks,
|
||||||
jboolean pin_l0_filter_and_index_blocks_in_cache,
|
|
||||||
jboolean hash_index_allow_collision, jlong block_cache_compressed_size,
|
jboolean hash_index_allow_collision, jlong block_cache_compressed_size,
|
||||||
jint block_cache_compressd_num_shard_bits, jbyte jchecksum_type,
|
jint block_cache_compressd_num_shard_bits, jbyte jchecksum_type,
|
||||||
jbyte jindex_type, jint jformat_version) {
|
jbyte jindex_type, jint jformat_version) {
|
||||||
|
@ -71,8 +70,6 @@ jlong Java_org_rocksdb_BlockBasedTableConfig_newTableFactoryHandle(
|
||||||
options.filter_policy = *pFilterPolicy;
|
options.filter_policy = *pFilterPolicy;
|
||||||
}
|
}
|
||||||
options.cache_index_and_filter_blocks = cache_index_and_filter_blocks;
|
options.cache_index_and_filter_blocks = cache_index_and_filter_blocks;
|
||||||
options.pin_l0_filter_and_index_blocks_in_cache =
|
|
||||||
pin_l0_filter_and_index_blocks_in_cache;
|
|
||||||
options.hash_index_allow_collision = hash_index_allow_collision;
|
options.hash_index_allow_collision = hash_index_allow_collision;
|
||||||
if (block_cache_compressed_size > 0) {
|
if (block_cache_compressed_size > 0) {
|
||||||
if (block_cache_compressd_num_shard_bits > 0) {
|
if (block_cache_compressd_num_shard_bits > 0) {
|
||||||
|
|
|
@ -64,7 +64,7 @@ Status BlockBasedTableFactory::NewTableReader(
|
||||||
table_reader_options.ioptions, table_reader_options.env_options,
|
table_reader_options.ioptions, table_reader_options.env_options,
|
||||||
table_options_, table_reader_options.internal_comparator, std::move(file),
|
table_options_, table_reader_options.internal_comparator, std::move(file),
|
||||||
file_size, table_reader, prefetch_enabled,
|
file_size, table_reader, prefetch_enabled,
|
||||||
table_reader_options.skip_filters, table_reader_options.level);
|
table_reader_options.skip_filters);
|
||||||
}
|
}
|
||||||
|
|
||||||
TableBuilder* BlockBasedTableFactory::NewTableBuilder(
|
TableBuilder* BlockBasedTableFactory::NewTableBuilder(
|
||||||
|
@ -94,12 +94,6 @@ Status BlockBasedTableFactory::SanitizeOptions(
|
||||||
return Status::InvalidArgument("Enable cache_index_and_filter_blocks, "
|
return Status::InvalidArgument("Enable cache_index_and_filter_blocks, "
|
||||||
", but block cache is disabled");
|
", but block cache is disabled");
|
||||||
}
|
}
|
||||||
if (table_options_.pin_l0_filter_and_index_blocks_in_cache &&
|
|
||||||
table_options_.no_block_cache) {
|
|
||||||
return Status::InvalidArgument(
|
|
||||||
"Enable pin_l0_filter_and_index_blocks_in_cache, "
|
|
||||||
", but block cache is disabled");
|
|
||||||
}
|
|
||||||
if (!BlockBasedTableSupportedVersion(table_options_.format_version)) {
|
if (!BlockBasedTableSupportedVersion(table_options_.format_version)) {
|
||||||
return Status::InvalidArgument(
|
return Status::InvalidArgument(
|
||||||
"Unsupported BlockBasedTable format_version. Please check "
|
"Unsupported BlockBasedTable format_version. Please check "
|
||||||
|
@ -121,10 +115,6 @@ std::string BlockBasedTableFactory::GetPrintableTableOptions() const {
|
||||||
snprintf(buffer, kBufferSize, " cache_index_and_filter_blocks: %d\n",
|
snprintf(buffer, kBufferSize, " cache_index_and_filter_blocks: %d\n",
|
||||||
table_options_.cache_index_and_filter_blocks);
|
table_options_.cache_index_and_filter_blocks);
|
||||||
ret.append(buffer);
|
ret.append(buffer);
|
||||||
snprintf(buffer, kBufferSize,
|
|
||||||
" pin_l0_filter_and_index_blocks_in_cache: %d\n",
|
|
||||||
table_options_.pin_l0_filter_and_index_blocks_in_cache);
|
|
||||||
ret.append(buffer);
|
|
||||||
snprintf(buffer, kBufferSize, " index_type: %d\n",
|
snprintf(buffer, kBufferSize, " index_type: %d\n",
|
||||||
table_options_.index_type);
|
table_options_.index_type);
|
||||||
ret.append(buffer);
|
ret.append(buffer);
|
||||||
|
|
|
@ -340,28 +340,6 @@ class HashIndexReader : public IndexReader {
|
||||||
BlockContents prefixes_contents_;
|
BlockContents prefixes_contents_;
|
||||||
};
|
};
|
||||||
|
|
||||||
// CachableEntry represents the entries that *may* be fetched from block cache.
|
|
||||||
// field `value` is the item we want to get.
|
|
||||||
// field `cache_handle` is the cache handle to the block cache. If the value
|
|
||||||
// was not read from cache, `cache_handle` will be nullptr.
|
|
||||||
template <class TValue>
|
|
||||||
struct BlockBasedTable::CachableEntry {
|
|
||||||
CachableEntry(TValue* _value, Cache::Handle* _cache_handle)
|
|
||||||
: value(_value), cache_handle(_cache_handle) {}
|
|
||||||
CachableEntry() : CachableEntry(nullptr, nullptr) {}
|
|
||||||
void Release(Cache* cache) {
|
|
||||||
if (cache_handle) {
|
|
||||||
cache->Release(cache_handle);
|
|
||||||
value = nullptr;
|
|
||||||
cache_handle = nullptr;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
bool IsSet() const { return cache_handle != nullptr; }
|
|
||||||
|
|
||||||
TValue* value = nullptr;
|
|
||||||
// if the entry is from the cache, cache_handle will be populated.
|
|
||||||
Cache::Handle* cache_handle = nullptr;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct BlockBasedTable::Rep {
|
struct BlockBasedTable::Rep {
|
||||||
Rep(const ImmutableCFOptions& _ioptions, const EnvOptions& _env_options,
|
Rep(const ImmutableCFOptions& _ioptions, const EnvOptions& _env_options,
|
||||||
|
@ -416,21 +394,34 @@ struct BlockBasedTable::Rep {
|
||||||
// and compatible with existing code, we introduce a wrapper that allows
|
// and compatible with existing code, we introduce a wrapper that allows
|
||||||
// block to extract prefix without knowing if a key is internal or not.
|
// block to extract prefix without knowing if a key is internal or not.
|
||||||
unique_ptr<SliceTransform> internal_prefix_transform;
|
unique_ptr<SliceTransform> internal_prefix_transform;
|
||||||
|
|
||||||
// only used in level 0 files:
|
|
||||||
// when pin_l0_filter_and_index_blocks_in_cache is true, we do use the
|
|
||||||
// LRU cache, but we always keep the filter & idndex block's handle checked
|
|
||||||
// out here (=we don't call Release()), plus the parsed out objects
|
|
||||||
// the LRU cache will never push flush them out, hence they're pinned
|
|
||||||
CachableEntry<FilterBlockReader> filter_entry;
|
|
||||||
CachableEntry<IndexReader> index_entry;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
BlockBasedTable::~BlockBasedTable() {
|
BlockBasedTable::~BlockBasedTable() {
|
||||||
Close();
|
|
||||||
delete rep_;
|
delete rep_;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// CachableEntry represents the entries that *may* be fetched from block cache.
|
||||||
|
// field `value` is the item we want to get.
|
||||||
|
// field `cache_handle` is the cache handle to the block cache. If the value
|
||||||
|
// was not read from cache, `cache_handle` will be nullptr.
|
||||||
|
template <class TValue>
|
||||||
|
struct BlockBasedTable::CachableEntry {
|
||||||
|
CachableEntry(TValue* _value, Cache::Handle* _cache_handle)
|
||||||
|
: value(_value), cache_handle(_cache_handle) {}
|
||||||
|
CachableEntry() : CachableEntry(nullptr, nullptr) {}
|
||||||
|
void Release(Cache* cache) {
|
||||||
|
if (cache_handle) {
|
||||||
|
cache->Release(cache_handle);
|
||||||
|
value = nullptr;
|
||||||
|
cache_handle = nullptr;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TValue* value = nullptr;
|
||||||
|
// if the entry is from the cache, cache_handle will be populated.
|
||||||
|
Cache::Handle* cache_handle = nullptr;
|
||||||
|
};
|
||||||
|
|
||||||
// Helper function to setup the cache key's prefix for the Table.
|
// Helper function to setup the cache key's prefix for the Table.
|
||||||
void BlockBasedTable::SetupCacheKeyPrefix(Rep* rep, uint64_t file_size) {
|
void BlockBasedTable::SetupCacheKeyPrefix(Rep* rep, uint64_t file_size) {
|
||||||
assert(kMaxCacheKeyPrefixSize >= 10);
|
assert(kMaxCacheKeyPrefixSize >= 10);
|
||||||
|
@ -507,7 +498,7 @@ Status BlockBasedTable::Open(const ImmutableCFOptions& ioptions,
|
||||||
uint64_t file_size,
|
uint64_t file_size,
|
||||||
unique_ptr<TableReader>* table_reader,
|
unique_ptr<TableReader>* table_reader,
|
||||||
const bool prefetch_index_and_filter,
|
const bool prefetch_index_and_filter,
|
||||||
const bool skip_filters, const int level) {
|
const bool skip_filters) {
|
||||||
table_reader->reset();
|
table_reader->reset();
|
||||||
|
|
||||||
Footer footer;
|
Footer footer;
|
||||||
|
@ -603,33 +594,14 @@ Status BlockBasedTable::Open(const ImmutableCFOptions& ioptions,
|
||||||
assert(table_options.block_cache != nullptr);
|
assert(table_options.block_cache != nullptr);
|
||||||
// Hack: Call NewIndexIterator() to implicitly add index to the
|
// Hack: Call NewIndexIterator() to implicitly add index to the
|
||||||
// block_cache
|
// block_cache
|
||||||
|
|
||||||
// if pin_l0_filter_and_index_blocks_in_cache is true and this is
|
|
||||||
// a level0 file, then we will pass in this pointer to rep->index
|
|
||||||
// to NewIndexIterator(), which will save the index block in there
|
|
||||||
// else it's a nullptr and nothing special happens
|
|
||||||
CachableEntry<IndexReader>* index_entry = nullptr;
|
|
||||||
if (rep->table_options.pin_l0_filter_and_index_blocks_in_cache &&
|
|
||||||
level == 0) {
|
|
||||||
index_entry = &rep->index_entry;
|
|
||||||
}
|
|
||||||
unique_ptr<InternalIterator> iter(
|
unique_ptr<InternalIterator> iter(
|
||||||
new_table->NewIndexIterator(ReadOptions(), nullptr, index_entry));
|
new_table->NewIndexIterator(ReadOptions()));
|
||||||
s = iter->status();
|
s = iter->status();
|
||||||
|
|
||||||
if (s.ok()) {
|
if (s.ok()) {
|
||||||
// Hack: Call GetFilter() to implicitly add filter to the block_cache
|
// Hack: Call GetFilter() to implicitly add filter to the block_cache
|
||||||
auto filter_entry = new_table->GetFilter();
|
auto filter_entry = new_table->GetFilter();
|
||||||
// if pin_l0_filter_and_index_blocks_in_cache is true, and this is
|
filter_entry.Release(table_options.block_cache.get());
|
||||||
// a level0 file, then save it in rep_->filter_entry; it will be
|
|
||||||
// released in the destructor only, hence it will be pinned in the
|
|
||||||
// cache until this reader is alive
|
|
||||||
if (rep->table_options.pin_l0_filter_and_index_blocks_in_cache &&
|
|
||||||
level == 0) {
|
|
||||||
rep->filter_entry = filter_entry;
|
|
||||||
} else {
|
|
||||||
filter_entry.Release(table_options.block_cache.get());
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// If we don't use block cache for index/filter blocks access, we'll
|
// If we don't use block cache for index/filter blocks access, we'll
|
||||||
|
@ -914,11 +886,6 @@ BlockBasedTable::CachableEntry<FilterBlockReader> BlockBasedTable::GetFilter(
|
||||||
return {rep_->filter.get(), nullptr /* cache handle */};
|
return {rep_->filter.get(), nullptr /* cache handle */};
|
||||||
}
|
}
|
||||||
|
|
||||||
// we have a pinned filter block
|
|
||||||
if (rep_->filter_entry.IsSet()) {
|
|
||||||
return rep_->filter_entry;
|
|
||||||
}
|
|
||||||
|
|
||||||
PERF_TIMER_GUARD(read_filter_block_nanos);
|
PERF_TIMER_GUARD(read_filter_block_nanos);
|
||||||
|
|
||||||
Cache* block_cache = rep_->table_options.block_cache.get();
|
Cache* block_cache = rep_->table_options.block_cache.get();
|
||||||
|
@ -968,19 +935,12 @@ BlockBasedTable::CachableEntry<FilterBlockReader> BlockBasedTable::GetFilter(
|
||||||
}
|
}
|
||||||
|
|
||||||
InternalIterator* BlockBasedTable::NewIndexIterator(
|
InternalIterator* BlockBasedTable::NewIndexIterator(
|
||||||
const ReadOptions& read_options, BlockIter* input_iter,
|
const ReadOptions& read_options, BlockIter* input_iter) {
|
||||||
CachableEntry<IndexReader>* index_entry) {
|
|
||||||
// index reader has already been pre-populated.
|
// index reader has already been pre-populated.
|
||||||
if (rep_->index_reader) {
|
if (rep_->index_reader) {
|
||||||
return rep_->index_reader->NewIterator(
|
return rep_->index_reader->NewIterator(
|
||||||
input_iter, read_options.total_order_seek);
|
input_iter, read_options.total_order_seek);
|
||||||
}
|
}
|
||||||
// we have a pinned index block
|
|
||||||
if (rep_->index_entry.IsSet()) {
|
|
||||||
return rep_->index_entry.value->NewIterator(input_iter,
|
|
||||||
read_options.total_order_seek);
|
|
||||||
}
|
|
||||||
|
|
||||||
PERF_TIMER_GUARD(read_index_block_nanos);
|
PERF_TIMER_GUARD(read_index_block_nanos);
|
||||||
|
|
||||||
bool no_io = read_options.read_tier == kBlockCacheTier;
|
bool no_io = read_options.read_tier == kBlockCacheTier;
|
||||||
|
@ -1036,15 +996,7 @@ InternalIterator* BlockBasedTable::NewIndexIterator(
|
||||||
assert(cache_handle);
|
assert(cache_handle);
|
||||||
auto* iter = index_reader->NewIterator(
|
auto* iter = index_reader->NewIterator(
|
||||||
input_iter, read_options.total_order_seek);
|
input_iter, read_options.total_order_seek);
|
||||||
|
iter->RegisterCleanup(&ReleaseCachedEntry, block_cache, cache_handle);
|
||||||
// the caller would like to take ownership of the index block
|
|
||||||
// don't call RegisterCleanup() in this case, the caller will take care of it
|
|
||||||
if (index_entry != nullptr) {
|
|
||||||
*index_entry = {index_reader, cache_handle};
|
|
||||||
} else {
|
|
||||||
iter->RegisterCleanup(&ReleaseCachedEntry, block_cache, cache_handle);
|
|
||||||
}
|
|
||||||
|
|
||||||
return iter;
|
return iter;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1272,13 +1224,7 @@ bool BlockBasedTable::PrefixMayMatch(const Slice& internal_key) {
|
||||||
RecordTick(statistics, BLOOM_FILTER_PREFIX_USEFUL);
|
RecordTick(statistics, BLOOM_FILTER_PREFIX_USEFUL);
|
||||||
}
|
}
|
||||||
|
|
||||||
// if rep_->filter_entry is not set, we should call Release(); otherwise
|
filter_entry.Release(rep_->table_options.block_cache.get());
|
||||||
// don't call, in this case we have a local copy in rep_->filter_entry,
|
|
||||||
// it's pinned to the cache and will be released in the destructor
|
|
||||||
if (!rep_->filter_entry.IsSet()) {
|
|
||||||
filter_entry.Release(rep_->table_options.block_cache.get());
|
|
||||||
}
|
|
||||||
|
|
||||||
return may_match;
|
return may_match;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1378,12 +1324,7 @@ Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// if rep_->filter_entry is not set, we should call Release(); otherwise
|
filter_entry.Release(rep_->table_options.block_cache.get());
|
||||||
// don't call, in this case we have a local copy in rep_->filter_entry,
|
|
||||||
// it's pinned to the cache and will be released in the destructor
|
|
||||||
if (!rep_->filter_entry.IsSet()) {
|
|
||||||
filter_entry.Release(rep_->table_options.block_cache.get());
|
|
||||||
}
|
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1671,11 +1612,6 @@ Status BlockBasedTable::DumpTable(WritableFile* out_file) {
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
void BlockBasedTable::Close() {
|
|
||||||
rep_->filter_entry.Release(rep_->table_options.block_cache.get());
|
|
||||||
rep_->index_entry.Release(rep_->table_options.block_cache.get());
|
|
||||||
}
|
|
||||||
|
|
||||||
Status BlockBasedTable::DumpIndexBlock(WritableFile* out_file) {
|
Status BlockBasedTable::DumpIndexBlock(WritableFile* out_file) {
|
||||||
out_file->Append(
|
out_file->Append(
|
||||||
"Index Details:\n"
|
"Index Details:\n"
|
||||||
|
|
|
@ -76,7 +76,7 @@ class BlockBasedTable : public TableReader {
|
||||||
unique_ptr<RandomAccessFileReader>&& file,
|
unique_ptr<RandomAccessFileReader>&& file,
|
||||||
uint64_t file_size, unique_ptr<TableReader>* table_reader,
|
uint64_t file_size, unique_ptr<TableReader>* table_reader,
|
||||||
bool prefetch_index_and_filter = true,
|
bool prefetch_index_and_filter = true,
|
||||||
bool skip_filters = false, int level = -1);
|
bool skip_filters = false);
|
||||||
|
|
||||||
bool PrefixMayMatch(const Slice& internal_key);
|
bool PrefixMayMatch(const Slice& internal_key);
|
||||||
|
|
||||||
|
@ -119,8 +119,6 @@ class BlockBasedTable : public TableReader {
|
||||||
// convert SST file to a human readable form
|
// convert SST file to a human readable form
|
||||||
Status DumpTable(WritableFile* out_file) override;
|
Status DumpTable(WritableFile* out_file) override;
|
||||||
|
|
||||||
void Close() override;
|
|
||||||
|
|
||||||
~BlockBasedTable();
|
~BlockBasedTable();
|
||||||
|
|
||||||
bool TEST_filter_block_preloaded() const;
|
bool TEST_filter_block_preloaded() const;
|
||||||
|
@ -157,9 +155,8 @@ class BlockBasedTable : public TableReader {
|
||||||
// 2. index is not present in block cache.
|
// 2. index is not present in block cache.
|
||||||
// 3. We disallowed any io to be performed, that is, read_options ==
|
// 3. We disallowed any io to be performed, that is, read_options ==
|
||||||
// kBlockCacheTier
|
// kBlockCacheTier
|
||||||
InternalIterator* NewIndexIterator(
|
InternalIterator* NewIndexIterator(const ReadOptions& read_options,
|
||||||
const ReadOptions& read_options, BlockIter* input_iter = nullptr,
|
BlockIter* input_iter = nullptr);
|
||||||
CachableEntry<IndexReader>* index_entry = nullptr);
|
|
||||||
|
|
||||||
// Read block cache from block caches (if set): block_cache and
|
// Read block cache from block caches (if set): block_cache and
|
||||||
// block_cache_compressed.
|
// block_cache_compressed.
|
||||||
|
|
|
@ -29,20 +29,17 @@ struct TableReaderOptions {
|
||||||
TableReaderOptions(const ImmutableCFOptions& _ioptions,
|
TableReaderOptions(const ImmutableCFOptions& _ioptions,
|
||||||
const EnvOptions& _env_options,
|
const EnvOptions& _env_options,
|
||||||
const InternalKeyComparator& _internal_comparator,
|
const InternalKeyComparator& _internal_comparator,
|
||||||
bool _skip_filters = false, int _level = -1)
|
bool _skip_filters = false)
|
||||||
: ioptions(_ioptions),
|
: ioptions(_ioptions),
|
||||||
env_options(_env_options),
|
env_options(_env_options),
|
||||||
internal_comparator(_internal_comparator),
|
internal_comparator(_internal_comparator),
|
||||||
skip_filters(_skip_filters),
|
skip_filters(_skip_filters) {}
|
||||||
level(_level) {}
|
|
||||||
|
|
||||||
const ImmutableCFOptions& ioptions;
|
const ImmutableCFOptions& ioptions;
|
||||||
const EnvOptions& env_options;
|
const EnvOptions& env_options;
|
||||||
const InternalKeyComparator& internal_comparator;
|
const InternalKeyComparator& internal_comparator;
|
||||||
// This is only used for BlockBasedTable (reader)
|
// This is only used for BlockBasedTable (reader)
|
||||||
bool skip_filters;
|
bool skip_filters;
|
||||||
// what level this table/file is on, -1 for "not set, don't know"
|
|
||||||
int level;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
struct TableBuilderOptions {
|
struct TableBuilderOptions {
|
||||||
|
|
|
@ -91,8 +91,6 @@ class TableReader {
|
||||||
virtual Status DumpTable(WritableFile* out_file) {
|
virtual Status DumpTable(WritableFile* out_file) {
|
||||||
return Status::NotSupported("DumpTable() not supported");
|
return Status::NotSupported("DumpTable() not supported");
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual void Close() {}
|
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace rocksdb
|
} // namespace rocksdb
|
||||||
|
|
|
@ -1715,7 +1715,7 @@ TEST_F(BlockBasedTableTest, FilterBlockInBlockCache) {
|
||||||
ImmutableCFOptions ioptions3(options);
|
ImmutableCFOptions ioptions3(options);
|
||||||
// Generate table without filter policy
|
// Generate table without filter policy
|
||||||
c3.Finish(options, ioptions3, table_options,
|
c3.Finish(options, ioptions3, table_options,
|
||||||
GetPlainInternalComparator(options.comparator), &keys, &kvmap);
|
GetPlainInternalComparator(options.comparator), &keys, &kvmap);
|
||||||
// Open table with filter policy
|
// Open table with filter policy
|
||||||
table_options.filter_policy.reset(NewBloomFilterPolicy(1));
|
table_options.filter_policy.reset(NewBloomFilterPolicy(1));
|
||||||
options.table_factory.reset(new BlockBasedTableFactory(table_options));
|
options.table_factory.reset(new BlockBasedTableFactory(table_options));
|
||||||
|
|
|
@ -74,7 +74,6 @@ const_params="
|
||||||
--level_compaction_dynamic_level_bytes=true \
|
--level_compaction_dynamic_level_bytes=true \
|
||||||
--bytes_per_sync=$((8 * M)) \
|
--bytes_per_sync=$((8 * M)) \
|
||||||
--cache_index_and_filter_blocks=0 \
|
--cache_index_and_filter_blocks=0 \
|
||||||
--pin_l0_filter_and_index_blocks_in_cache=1 \
|
|
||||||
--benchmark_write_rate_limit=$(( 1024 * 1024 * $mb_written_per_sec )) \
|
--benchmark_write_rate_limit=$(( 1024 * 1024 * $mb_written_per_sec )) \
|
||||||
\
|
\
|
||||||
--hard_rate_limit=3 \
|
--hard_rate_limit=3 \
|
||||||
|
|
|
@ -340,9 +340,6 @@ DEFINE_int64(cache_size, -1, "Number of bytes to use as a cache of uncompressed"
|
||||||
DEFINE_bool(cache_index_and_filter_blocks, false,
|
DEFINE_bool(cache_index_and_filter_blocks, false,
|
||||||
"Cache index/filter blocks in block cache.");
|
"Cache index/filter blocks in block cache.");
|
||||||
|
|
||||||
DEFINE_bool(pin_l0_filter_and_index_blocks_in_cache, false,
|
|
||||||
"Pin index/filter blocks of L0 files in block cache.");
|
|
||||||
|
|
||||||
DEFINE_int32(block_size,
|
DEFINE_int32(block_size,
|
||||||
static_cast<int32_t>(rocksdb::BlockBasedTableOptions().block_size),
|
static_cast<int32_t>(rocksdb::BlockBasedTableOptions().block_size),
|
||||||
"Number of bytes in a block.");
|
"Number of bytes in a block.");
|
||||||
|
@ -2514,8 +2511,6 @@ class Benchmark {
|
||||||
}
|
}
|
||||||
block_based_options.cache_index_and_filter_blocks =
|
block_based_options.cache_index_and_filter_blocks =
|
||||||
FLAGS_cache_index_and_filter_blocks;
|
FLAGS_cache_index_and_filter_blocks;
|
||||||
block_based_options.pin_l0_filter_and_index_blocks_in_cache =
|
|
||||||
FLAGS_pin_l0_filter_and_index_blocks_in_cache;
|
|
||||||
block_based_options.block_cache = cache_;
|
block_based_options.block_cache = cache_;
|
||||||
block_based_options.block_cache_compressed = compressed_cache_;
|
block_based_options.block_cache_compressed = compressed_cache_;
|
||||||
block_based_options.block_size = FLAGS_block_size;
|
block_based_options.block_size = FLAGS_block_size;
|
||||||
|
|
|
@ -491,10 +491,6 @@ static std::unordered_map<std::string,
|
||||||
{"cache_index_and_filter_blocks",
|
{"cache_index_and_filter_blocks",
|
||||||
{offsetof(struct BlockBasedTableOptions, cache_index_and_filter_blocks),
|
{offsetof(struct BlockBasedTableOptions, cache_index_and_filter_blocks),
|
||||||
OptionType::kBoolean, OptionVerificationType::kNormal}},
|
OptionType::kBoolean, OptionVerificationType::kNormal}},
|
||||||
{"pin_l0_filter_and_index_blocks_in_cache",
|
|
||||||
{offsetof(struct BlockBasedTableOptions,
|
|
||||||
pin_l0_filter_and_index_blocks_in_cache),
|
|
||||||
OptionType::kBoolean, OptionVerificationType::kNormal}},
|
|
||||||
{"index_type",
|
{"index_type",
|
||||||
{offsetof(struct BlockBasedTableOptions, index_type),
|
{offsetof(struct BlockBasedTableOptions, index_type),
|
||||||
OptionType::kBlockBasedTableIndexType, OptionVerificationType::kNormal}},
|
OptionType::kBlockBasedTableIndexType, OptionVerificationType::kNormal}},
|
||||||
|
|
|
@ -1582,9 +1582,7 @@ TEST_F(OptionsParserTest, BlockBasedTableOptionsAllFieldsSettable) {
|
||||||
// Need to update the option string if a new option is added.
|
// Need to update the option string if a new option is added.
|
||||||
ASSERT_OK(GetBlockBasedTableOptionsFromString(
|
ASSERT_OK(GetBlockBasedTableOptionsFromString(
|
||||||
*bbto,
|
*bbto,
|
||||||
"cache_index_and_filter_blocks=1;"
|
"cache_index_and_filter_blocks=1;index_type=kHashSearch;"
|
||||||
"pin_l0_filter_and_index_blocks_in_cache=1;"
|
|
||||||
"index_type=kHashSearch;"
|
|
||||||
"checksum=kxxHash;hash_index_allow_collision=1;no_block_cache=1;"
|
"checksum=kxxHash;hash_index_allow_collision=1;no_block_cache=1;"
|
||||||
"block_cache=1M;block_cache_compressed=1k;block_size=1024;"
|
"block_cache=1M;block_cache_compressed=1k;block_size=1024;"
|
||||||
"block_size_deviation=8;block_restart_interval=4; "
|
"block_size_deviation=8;block_restart_interval=4; "
|
||||||
|
|
|
@ -193,7 +193,6 @@ const SliceTransform* RandomSliceTransform(Random* rnd, int pre_defined) {
|
||||||
BlockBasedTableOptions RandomBlockBasedTableOptions(Random* rnd) {
|
BlockBasedTableOptions RandomBlockBasedTableOptions(Random* rnd) {
|
||||||
BlockBasedTableOptions opt;
|
BlockBasedTableOptions opt;
|
||||||
opt.cache_index_and_filter_blocks = rnd->Uniform(2);
|
opt.cache_index_and_filter_blocks = rnd->Uniform(2);
|
||||||
opt.pin_l0_filter_and_index_blocks_in_cache = rnd->Uniform(2);
|
|
||||||
opt.index_type = rnd->Uniform(2) ? BlockBasedTableOptions::kBinarySearch
|
opt.index_type = rnd->Uniform(2) ? BlockBasedTableOptions::kBinarySearch
|
||||||
: BlockBasedTableOptions::kHashSearch;
|
: BlockBasedTableOptions::kHashSearch;
|
||||||
opt.hash_index_allow_collision = rnd->Uniform(2);
|
opt.hash_index_allow_collision = rnd->Uniform(2);
|
||||||
|
|
Loading…
Reference in New Issue