Index Reader should not be reused after DB restart

Summary:
In block based table reader, wow we put index reader to block cache, which can be retrieved after DB restart. However, index reader may reference internal comparator, which can be destroyed after DB restarts, causing problems.
Fix it by making cache key identical per table reader.

Test Plan: Add a new test which failed with out the commit but now pass.

Reviewers: IslamAbdelRahman

Reviewed By: IslamAbdelRahman

Subscribers: maro, yhchiang, kradhakrishnan, leveldb, andrewkr, dhruba

Differential Revision: https://reviews.facebook.net/D55287
This commit is contained in:
sdong 2016-03-10 15:16:11 -08:00
parent 0267655dad
commit b2ae5950ba
3 changed files with 40 additions and 9 deletions

View File

@ -58,6 +58,25 @@ TEST_F(DBTest2, IteratorPropertyVersionNumber) {
delete iter2; delete iter2;
delete iter3; delete iter3;
} }
TEST_F(DBTest2, CacheIndexAndFilterWithDBRestart) {
Options options = CurrentOptions();
options.create_if_missing = true;
options.statistics = rocksdb::CreateDBStatistics();
BlockBasedTableOptions table_options;
table_options.cache_index_and_filter_blocks = true;
table_options.filter_policy.reset(NewBloomFilterPolicy(20));
options.table_factory.reset(new BlockBasedTableFactory(table_options));
CreateAndReopenWithCF({"pikachu"}, options);
Put(1, "a", "begin");
Put(1, "z", "end");
ASSERT_OK(Flush(1));
TryReopenWithColumnFamilies({"default", "pikachu"}, options);
std::string value;
value = Get(1, "a");
}
} // namespace rocksdb } // namespace rocksdb
int main(int argc, char** argv) { int main(int argc, char** argv) {

View File

@ -98,17 +98,23 @@ void ReleaseCachedEntry(void* arg, void* h) {
cache->Release(handle); cache->Release(handle);
} }
Slice GetCacheKey(const char* cache_key_prefix, size_t cache_key_prefix_size, Slice GetCacheKeyFromOffset(const char* cache_key_prefix,
const BlockHandle& handle, char* cache_key) { size_t cache_key_prefix_size, uint64_t offset,
char* cache_key) {
assert(cache_key != nullptr); assert(cache_key != nullptr);
assert(cache_key_prefix_size != 0); assert(cache_key_prefix_size != 0);
assert(cache_key_prefix_size <= kMaxCacheKeyPrefixSize); assert(cache_key_prefix_size <= kMaxCacheKeyPrefixSize);
memcpy(cache_key, cache_key_prefix, cache_key_prefix_size); memcpy(cache_key, cache_key_prefix, cache_key_prefix_size);
char* end = char* end = EncodeVarint64(cache_key + cache_key_prefix_size, offset);
EncodeVarint64(cache_key + cache_key_prefix_size, handle.offset());
return Slice(cache_key, static_cast<size_t>(end - cache_key)); return Slice(cache_key, static_cast<size_t>(end - cache_key));
} }
Slice GetCacheKey(const char* cache_key_prefix, size_t cache_key_prefix_size,
const BlockHandle& handle, char* cache_key) {
return GetCacheKeyFromOffset(cache_key_prefix, cache_key_prefix_size,
handle.offset(), cache_key);
}
Cache::Handle* GetEntryFromCache(Cache* block_cache, const Slice& key, Cache::Handle* GetEntryFromCache(Cache* block_cache, const Slice& key,
Tickers block_cache_miss_ticker, Tickers block_cache_miss_ticker,
Tickers block_cache_hit_ticker, Tickers block_cache_hit_ticker,
@ -359,6 +365,8 @@ struct BlockBasedTable::Rep {
size_t cache_key_prefix_size = 0; size_t cache_key_prefix_size = 0;
char compressed_cache_key_prefix[kMaxCacheKeyPrefixSize]; char compressed_cache_key_prefix[kMaxCacheKeyPrefixSize];
size_t compressed_cache_key_prefix_size = 0; size_t compressed_cache_key_prefix_size = 0;
uint64_t dummy_index_reader_offset =
0; // ID that is unique for the block cache.
// Footer contains the fixed table information // Footer contains the fixed table information
Footer footer; Footer footer;
@ -415,13 +423,16 @@ struct BlockBasedTable::CachableEntry {
}; };
// Helper function to setup the cache key's prefix for the Table. // Helper function to setup the cache key's prefix for the Table.
void BlockBasedTable::SetupCacheKeyPrefix(Rep* rep) { void BlockBasedTable::SetupCacheKeyPrefix(Rep* rep, uint64_t file_size) {
assert(kMaxCacheKeyPrefixSize >= 10); assert(kMaxCacheKeyPrefixSize >= 10);
rep->cache_key_prefix_size = 0; rep->cache_key_prefix_size = 0;
rep->compressed_cache_key_prefix_size = 0; rep->compressed_cache_key_prefix_size = 0;
if (rep->table_options.block_cache != nullptr) { if (rep->table_options.block_cache != nullptr) {
GenerateCachePrefix(rep->table_options.block_cache.get(), rep->file->file(), GenerateCachePrefix(rep->table_options.block_cache.get(), rep->file->file(),
&rep->cache_key_prefix[0], &rep->cache_key_prefix_size); &rep->cache_key_prefix[0], &rep->cache_key_prefix_size);
// Create dummy offset of index reader which is beyond the file size.
rep->dummy_index_reader_offset =
file_size + rep->table_options.block_cache->NewId();
} }
if (rep->table_options.block_cache_compressed != nullptr) { if (rep->table_options.block_cache_compressed != nullptr) {
GenerateCachePrefix(rep->table_options.block_cache_compressed.get(), GenerateCachePrefix(rep->table_options.block_cache_compressed.get(),
@ -510,7 +521,7 @@ Status BlockBasedTable::Open(const ImmutableCFOptions& ioptions,
rep->footer = footer; rep->footer = footer;
rep->index_type = table_options.index_type; rep->index_type = table_options.index_type;
rep->hash_index_allow_collision = table_options.hash_index_allow_collision; rep->hash_index_allow_collision = table_options.hash_index_allow_collision;
SetupCacheKeyPrefix(rep); SetupCacheKeyPrefix(rep, file_size);
unique_ptr<BlockBasedTable> new_table(new BlockBasedTable(rep)); unique_ptr<BlockBasedTable> new_table(new BlockBasedTable(rep));
// Read meta index // Read meta index
@ -935,8 +946,9 @@ InternalIterator* BlockBasedTable::NewIndexIterator(
bool no_io = read_options.read_tier == kBlockCacheTier; bool no_io = read_options.read_tier == kBlockCacheTier;
Cache* block_cache = rep_->table_options.block_cache.get(); Cache* block_cache = rep_->table_options.block_cache.get();
char cache_key[kMaxCacheKeyPrefixSize + kMaxVarint64Length]; char cache_key[kMaxCacheKeyPrefixSize + kMaxVarint64Length];
auto key = GetCacheKey(rep_->cache_key_prefix, rep_->cache_key_prefix_size, auto key =
rep_->footer.index_handle(), cache_key); GetCacheKeyFromOffset(rep_->cache_key_prefix, rep_->cache_key_prefix_size,
rep_->dummy_index_reader_offset, cache_key);
Statistics* statistics = rep_->ioptions.statistics; Statistics* statistics = rep_->ioptions.statistics;
auto cache_handle = auto cache_handle =
GetEntryFromCache(block_cache, key, BLOCK_CACHE_INDEX_MISS, GetEntryFromCache(block_cache, key, BLOCK_CACHE_INDEX_MISS,

View File

@ -207,7 +207,7 @@ class BlockBasedTable : public TableReader {
// Create the filter from the filter block. // Create the filter from the filter block.
static FilterBlockReader* ReadFilter(Rep* rep, size_t* filter_size = nullptr); static FilterBlockReader* ReadFilter(Rep* rep, size_t* filter_size = nullptr);
static void SetupCacheKeyPrefix(Rep* rep); static void SetupCacheKeyPrefix(Rep* rep, uint64_t file_size);
explicit BlockBasedTable(Rep* rep) explicit BlockBasedTable(Rep* rep)
: rep_(rep), compaction_optimized_(false) {} : rep_(rep), compaction_optimized_(false) {}