mirror of https://github.com/facebook/rocksdb.git
Index Reader should not be reused after DB restart
Summary: In block based table reader, wow we put index reader to block cache, which can be retrieved after DB restart. However, index reader may reference internal comparator, which can be destroyed after DB restarts, causing problems. Fix it by making cache key identical per table reader. Test Plan: Add a new test which failed with out the commit but now pass. Reviewers: IslamAbdelRahman Reviewed By: IslamAbdelRahman Subscribers: maro, yhchiang, kradhakrishnan, leveldb, andrewkr, dhruba Differential Revision: https://reviews.facebook.net/D55287
This commit is contained in:
parent
0267655dad
commit
b2ae5950ba
|
@ -58,6 +58,25 @@ TEST_F(DBTest2, IteratorPropertyVersionNumber) {
|
|||
delete iter2;
|
||||
delete iter3;
|
||||
}
|
||||
|
||||
TEST_F(DBTest2, CacheIndexAndFilterWithDBRestart) {
|
||||
Options options = CurrentOptions();
|
||||
options.create_if_missing = true;
|
||||
options.statistics = rocksdb::CreateDBStatistics();
|
||||
BlockBasedTableOptions table_options;
|
||||
table_options.cache_index_and_filter_blocks = true;
|
||||
table_options.filter_policy.reset(NewBloomFilterPolicy(20));
|
||||
options.table_factory.reset(new BlockBasedTableFactory(table_options));
|
||||
CreateAndReopenWithCF({"pikachu"}, options);
|
||||
|
||||
Put(1, "a", "begin");
|
||||
Put(1, "z", "end");
|
||||
ASSERT_OK(Flush(1));
|
||||
TryReopenWithColumnFamilies({"default", "pikachu"}, options);
|
||||
|
||||
std::string value;
|
||||
value = Get(1, "a");
|
||||
}
|
||||
} // namespace rocksdb
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
|
|
|
@ -98,17 +98,23 @@ void ReleaseCachedEntry(void* arg, void* h) {
|
|||
cache->Release(handle);
|
||||
}
|
||||
|
||||
Slice GetCacheKey(const char* cache_key_prefix, size_t cache_key_prefix_size,
|
||||
const BlockHandle& handle, char* cache_key) {
|
||||
Slice GetCacheKeyFromOffset(const char* cache_key_prefix,
|
||||
size_t cache_key_prefix_size, uint64_t offset,
|
||||
char* cache_key) {
|
||||
assert(cache_key != nullptr);
|
||||
assert(cache_key_prefix_size != 0);
|
||||
assert(cache_key_prefix_size <= kMaxCacheKeyPrefixSize);
|
||||
memcpy(cache_key, cache_key_prefix, cache_key_prefix_size);
|
||||
char* end =
|
||||
EncodeVarint64(cache_key + cache_key_prefix_size, handle.offset());
|
||||
char* end = EncodeVarint64(cache_key + cache_key_prefix_size, offset);
|
||||
return Slice(cache_key, static_cast<size_t>(end - cache_key));
|
||||
}
|
||||
|
||||
Slice GetCacheKey(const char* cache_key_prefix, size_t cache_key_prefix_size,
|
||||
const BlockHandle& handle, char* cache_key) {
|
||||
return GetCacheKeyFromOffset(cache_key_prefix, cache_key_prefix_size,
|
||||
handle.offset(), cache_key);
|
||||
}
|
||||
|
||||
Cache::Handle* GetEntryFromCache(Cache* block_cache, const Slice& key,
|
||||
Tickers block_cache_miss_ticker,
|
||||
Tickers block_cache_hit_ticker,
|
||||
|
@ -359,6 +365,8 @@ struct BlockBasedTable::Rep {
|
|||
size_t cache_key_prefix_size = 0;
|
||||
char compressed_cache_key_prefix[kMaxCacheKeyPrefixSize];
|
||||
size_t compressed_cache_key_prefix_size = 0;
|
||||
uint64_t dummy_index_reader_offset =
|
||||
0; // ID that is unique for the block cache.
|
||||
|
||||
// Footer contains the fixed table information
|
||||
Footer footer;
|
||||
|
@ -415,13 +423,16 @@ struct BlockBasedTable::CachableEntry {
|
|||
};
|
||||
|
||||
// Helper function to setup the cache key's prefix for the Table.
|
||||
void BlockBasedTable::SetupCacheKeyPrefix(Rep* rep) {
|
||||
void BlockBasedTable::SetupCacheKeyPrefix(Rep* rep, uint64_t file_size) {
|
||||
assert(kMaxCacheKeyPrefixSize >= 10);
|
||||
rep->cache_key_prefix_size = 0;
|
||||
rep->compressed_cache_key_prefix_size = 0;
|
||||
if (rep->table_options.block_cache != nullptr) {
|
||||
GenerateCachePrefix(rep->table_options.block_cache.get(), rep->file->file(),
|
||||
&rep->cache_key_prefix[0], &rep->cache_key_prefix_size);
|
||||
// Create dummy offset of index reader which is beyond the file size.
|
||||
rep->dummy_index_reader_offset =
|
||||
file_size + rep->table_options.block_cache->NewId();
|
||||
}
|
||||
if (rep->table_options.block_cache_compressed != nullptr) {
|
||||
GenerateCachePrefix(rep->table_options.block_cache_compressed.get(),
|
||||
|
@ -510,7 +521,7 @@ Status BlockBasedTable::Open(const ImmutableCFOptions& ioptions,
|
|||
rep->footer = footer;
|
||||
rep->index_type = table_options.index_type;
|
||||
rep->hash_index_allow_collision = table_options.hash_index_allow_collision;
|
||||
SetupCacheKeyPrefix(rep);
|
||||
SetupCacheKeyPrefix(rep, file_size);
|
||||
unique_ptr<BlockBasedTable> new_table(new BlockBasedTable(rep));
|
||||
|
||||
// Read meta index
|
||||
|
@ -935,8 +946,9 @@ InternalIterator* BlockBasedTable::NewIndexIterator(
|
|||
bool no_io = read_options.read_tier == kBlockCacheTier;
|
||||
Cache* block_cache = rep_->table_options.block_cache.get();
|
||||
char cache_key[kMaxCacheKeyPrefixSize + kMaxVarint64Length];
|
||||
auto key = GetCacheKey(rep_->cache_key_prefix, rep_->cache_key_prefix_size,
|
||||
rep_->footer.index_handle(), cache_key);
|
||||
auto key =
|
||||
GetCacheKeyFromOffset(rep_->cache_key_prefix, rep_->cache_key_prefix_size,
|
||||
rep_->dummy_index_reader_offset, cache_key);
|
||||
Statistics* statistics = rep_->ioptions.statistics;
|
||||
auto cache_handle =
|
||||
GetEntryFromCache(block_cache, key, BLOCK_CACHE_INDEX_MISS,
|
||||
|
|
|
@ -207,7 +207,7 @@ class BlockBasedTable : public TableReader {
|
|||
// Create the filter from the filter block.
|
||||
static FilterBlockReader* ReadFilter(Rep* rep, size_t* filter_size = nullptr);
|
||||
|
||||
static void SetupCacheKeyPrefix(Rep* rep);
|
||||
static void SetupCacheKeyPrefix(Rep* rep, uint64_t file_size);
|
||||
|
||||
explicit BlockBasedTable(Rep* rep)
|
||||
: rep_(rep), compaction_optimized_(false) {}
|
||||
|
|
Loading…
Reference in New Issue