From b2ae5950ba5240483ea83ad89f132269638b7373 Mon Sep 17 00:00:00 2001 From: sdong Date: Thu, 10 Mar 2016 15:16:11 -0800 Subject: [PATCH] Index Reader should not be reused after DB restart Summary: In block based table reader, wow we put index reader to block cache, which can be retrieved after DB restart. However, index reader may reference internal comparator, which can be destroyed after DB restarts, causing problems. Fix it by making cache key identical per table reader. Test Plan: Add a new test which failed with out the commit but now pass. Reviewers: IslamAbdelRahman Reviewed By: IslamAbdelRahman Subscribers: maro, yhchiang, kradhakrishnan, leveldb, andrewkr, dhruba Differential Revision: https://reviews.facebook.net/D55287 --- db/db_test2.cc | 19 +++++++++++++++++++ table/block_based_table_reader.cc | 28 ++++++++++++++++++++-------- table/block_based_table_reader.h | 2 +- 3 files changed, 40 insertions(+), 9 deletions(-) diff --git a/db/db_test2.cc b/db/db_test2.cc index 1764131ac9..3d9820b653 100644 --- a/db/db_test2.cc +++ b/db/db_test2.cc @@ -58,6 +58,25 @@ TEST_F(DBTest2, IteratorPropertyVersionNumber) { delete iter2; delete iter3; } + +TEST_F(DBTest2, CacheIndexAndFilterWithDBRestart) { + Options options = CurrentOptions(); + options.create_if_missing = true; + options.statistics = rocksdb::CreateDBStatistics(); + BlockBasedTableOptions table_options; + table_options.cache_index_and_filter_blocks = true; + table_options.filter_policy.reset(NewBloomFilterPolicy(20)); + options.table_factory.reset(new BlockBasedTableFactory(table_options)); + CreateAndReopenWithCF({"pikachu"}, options); + + Put(1, "a", "begin"); + Put(1, "z", "end"); + ASSERT_OK(Flush(1)); + TryReopenWithColumnFamilies({"default", "pikachu"}, options); + + std::string value; + value = Get(1, "a"); +} } // namespace rocksdb int main(int argc, char** argv) { diff --git a/table/block_based_table_reader.cc b/table/block_based_table_reader.cc index cbaf90a900..e48eea6947 100644 --- a/table/block_based_table_reader.cc +++ b/table/block_based_table_reader.cc @@ -98,17 +98,23 @@ void ReleaseCachedEntry(void* arg, void* h) { cache->Release(handle); } -Slice GetCacheKey(const char* cache_key_prefix, size_t cache_key_prefix_size, - const BlockHandle& handle, char* cache_key) { +Slice GetCacheKeyFromOffset(const char* cache_key_prefix, + size_t cache_key_prefix_size, uint64_t offset, + char* cache_key) { assert(cache_key != nullptr); assert(cache_key_prefix_size != 0); assert(cache_key_prefix_size <= kMaxCacheKeyPrefixSize); memcpy(cache_key, cache_key_prefix, cache_key_prefix_size); - char* end = - EncodeVarint64(cache_key + cache_key_prefix_size, handle.offset()); + char* end = EncodeVarint64(cache_key + cache_key_prefix_size, offset); return Slice(cache_key, static_cast(end - cache_key)); } +Slice GetCacheKey(const char* cache_key_prefix, size_t cache_key_prefix_size, + const BlockHandle& handle, char* cache_key) { + return GetCacheKeyFromOffset(cache_key_prefix, cache_key_prefix_size, + handle.offset(), cache_key); +} + Cache::Handle* GetEntryFromCache(Cache* block_cache, const Slice& key, Tickers block_cache_miss_ticker, Tickers block_cache_hit_ticker, @@ -359,6 +365,8 @@ struct BlockBasedTable::Rep { size_t cache_key_prefix_size = 0; char compressed_cache_key_prefix[kMaxCacheKeyPrefixSize]; size_t compressed_cache_key_prefix_size = 0; + uint64_t dummy_index_reader_offset = + 0; // ID that is unique for the block cache. // Footer contains the fixed table information Footer footer; @@ -415,13 +423,16 @@ struct BlockBasedTable::CachableEntry { }; // Helper function to setup the cache key's prefix for the Table. -void BlockBasedTable::SetupCacheKeyPrefix(Rep* rep) { +void BlockBasedTable::SetupCacheKeyPrefix(Rep* rep, uint64_t file_size) { assert(kMaxCacheKeyPrefixSize >= 10); rep->cache_key_prefix_size = 0; rep->compressed_cache_key_prefix_size = 0; if (rep->table_options.block_cache != nullptr) { GenerateCachePrefix(rep->table_options.block_cache.get(), rep->file->file(), &rep->cache_key_prefix[0], &rep->cache_key_prefix_size); + // Create dummy offset of index reader which is beyond the file size. + rep->dummy_index_reader_offset = + file_size + rep->table_options.block_cache->NewId(); } if (rep->table_options.block_cache_compressed != nullptr) { GenerateCachePrefix(rep->table_options.block_cache_compressed.get(), @@ -510,7 +521,7 @@ Status BlockBasedTable::Open(const ImmutableCFOptions& ioptions, rep->footer = footer; rep->index_type = table_options.index_type; rep->hash_index_allow_collision = table_options.hash_index_allow_collision; - SetupCacheKeyPrefix(rep); + SetupCacheKeyPrefix(rep, file_size); unique_ptr new_table(new BlockBasedTable(rep)); // Read meta index @@ -935,8 +946,9 @@ InternalIterator* BlockBasedTable::NewIndexIterator( bool no_io = read_options.read_tier == kBlockCacheTier; Cache* block_cache = rep_->table_options.block_cache.get(); char cache_key[kMaxCacheKeyPrefixSize + kMaxVarint64Length]; - auto key = GetCacheKey(rep_->cache_key_prefix, rep_->cache_key_prefix_size, - rep_->footer.index_handle(), cache_key); + auto key = + GetCacheKeyFromOffset(rep_->cache_key_prefix, rep_->cache_key_prefix_size, + rep_->dummy_index_reader_offset, cache_key); Statistics* statistics = rep_->ioptions.statistics; auto cache_handle = GetEntryFromCache(block_cache, key, BLOCK_CACHE_INDEX_MISS, diff --git a/table/block_based_table_reader.h b/table/block_based_table_reader.h index a35b8ae41f..600ca18a37 100644 --- a/table/block_based_table_reader.h +++ b/table/block_based_table_reader.h @@ -207,7 +207,7 @@ class BlockBasedTable : public TableReader { // Create the filter from the filter block. static FilterBlockReader* ReadFilter(Rep* rep, size_t* filter_size = nullptr); - static void SetupCacheKeyPrefix(Rep* rep); + static void SetupCacheKeyPrefix(Rep* rep, uint64_t file_size); explicit BlockBasedTable(Rep* rep) : rep_(rep), compaction_optimized_(false) {}