From bee2f48a6607f641701e8971f7df3a711feaf64a Mon Sep 17 00:00:00 2001 From: Levi Tamasi Date: Thu, 6 Jun 2019 11:28:54 -0700 Subject: [PATCH] Refactor the handling of cache related counters and statistics (#5408) Summary: The patch cleans up the handling of cache hit/miss/insertion related performance counters, get context counters, and statistics by eliminating some code duplication and factoring out the affected logic into separate methods. In addition, it makes the semantics of cache hit metrics more consistent by changing the code so that accessing a partition of partitioned indexes/filters through a pinned reference no longer counts as a cache hit. Pull Request resolved: https://github.com/facebook/rocksdb/pull/5408 Differential Revision: D15610883 Pulled By: ltamasi fbshipit-source-id: ee749c18965077aca971d8f8bee8b24ed8fa76f1 --- HISTORY.md | 1 + table/block_based/block_based_table_reader.cc | 466 ++++++++++-------- table/block_based/block_based_table_reader.h | 58 ++- table/block_based/block_type.h | 24 + table/block_based/partitioned_filter_block.cc | 5 - 5 files changed, 311 insertions(+), 243 deletions(-) create mode 100644 table/block_based/block_type.h diff --git a/HISTORY.md b/HISTORY.md index 028ddcf825..c88b436e40 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -6,6 +6,7 @@ * Due to a refactoring, block cache eviction statistics for indexes are temporarily broken. We plan to reintroduce them in a later phase. * options.keep_log_file_num will be enforced strictly all the time. File names of all log files will be tracked, which may take significantly amount of memory if options.keep_log_file_num is large and either of options.max_log_file_size or options.log_file_time_to_roll is set. * Add initial support for Get/Put with user timestamps. Users can specify timestamps via ReadOptions and WriteOptions when calling DB::Get and DB::Put. +* Accessing a partition of a partitioned filter or index through a pinned reference is no longer considered a cache hit. ### New Features * Add an option `snap_refresh_nanos` (default to 0.1s) to periodically refresh the snapshot list in compaction jobs. Assign to 0 to disable the feature. diff --git a/table/block_based/block_based_table_reader.cc b/table/block_based/block_based_table_reader.cc index 37bbc3b52b..0d7e3cf53a 100644 --- a/table/block_based/block_based_table_reader.cc +++ b/table/block_based/block_based_table_reader.cc @@ -230,10 +230,10 @@ Status BlockBasedTable::IndexReaderCommon::ReadIndexBlock( const Rep* const rep = table->get_rep(); assert(rep != nullptr); - constexpr bool is_index = true; const Status s = table->RetrieveBlock( prefetch_buffer, read_options, rep->footer.index_handle(), - UncompressionDict::GetEmptyDict(), index_block, is_index, get_context); + UncompressionDict::GetEmptyDict(), index_block, BlockType::kIndex, + get_context); return s; } @@ -244,9 +244,7 @@ Status BlockBasedTable::IndexReaderCommon::GetOrReadIndexBlock( assert(index_block != nullptr); if (!index_block_.IsEmpty()) { - *index_block = - CachableEntry(index_block_.GetValue(), nullptr /* cache */, - nullptr /* cache_handle */, false /* own_value */); + index_block->SetUnownedValue(index_block_.GetValue()); return Status::OK(); } @@ -321,7 +319,6 @@ class PartitionIndexReader : public BlockBasedTable::IndexReaderCommon { } else { ReadOptions ro; ro.fill_cache = read_options.fill_cache; - constexpr bool is_index = true; // We don't return pinned data from index blocks, so no need // to set `block_contents_pinned`. it = new BlockBasedTableIterator( @@ -330,7 +327,7 @@ class PartitionIndexReader : public BlockBasedTable::IndexReaderCommon { internal_comparator(), internal_comparator()->user_comparator(), nullptr, kNullStats, true, index_key_includes_seq(), index_value_is_full()), - false, true, /* prefix_extractor */ nullptr, is_index, + false, true, /* prefix_extractor */ nullptr, BlockType::kIndex, index_key_includes_seq(), index_value_is_full()); } @@ -399,12 +396,11 @@ class PartitionIndexReader : public BlockBasedTable::IndexReaderCommon { for (; biter.Valid(); biter.Next()) { handle = biter.value(); CachableEntry block; - const bool is_index = true; // TODO: Support counter batch update for partitioned index and // filter blocks s = table()->MaybeReadBlockAndLoadToCache( prefetch_buffer.get(), ro, handle, UncompressionDict::GetEmptyDict(), - &block, is_index, nullptr /* get_context */); + &block, BlockType::kIndex, nullptr /* get_context */); assert(s.ok() || block.GetValue() == nullptr); if (s.ok() && block.GetValue() != nullptr) { @@ -662,44 +658,188 @@ class HashIndexReader : public BlockBasedTable::IndexReaderCommon { std::unique_ptr prefix_index_; }; -Cache::Handle* BlockBasedTable::GetEntryFromCache( - Cache* block_cache, const Slice& key, Tickers block_cache_miss_ticker, - Tickers block_cache_hit_ticker, uint64_t* block_cache_miss_stats, - uint64_t* block_cache_hit_stats, Statistics* statistics, - GetContext* get_context) const { - auto cache_handle = block_cache->Lookup(key, statistics); - if (cache_handle != nullptr) { - PERF_COUNTER_ADD(block_cache_hit_count, 1); - PERF_COUNTER_BY_LEVEL_ADD(block_cache_hit_count, 1, - static_cast(rep_->level)); - if (get_context != nullptr) { - // overall cache hit - get_context->get_context_stats_.num_cache_hit++; - // total bytes read from cache - get_context->get_context_stats_.num_cache_bytes_read += - block_cache->GetUsage(cache_handle); - // block-type specific cache hit - (*block_cache_hit_stats)++; - } else { - // overall cache hit - RecordTick(statistics, BLOCK_CACHE_HIT); - // total bytes read from cache - RecordTick(statistics, BLOCK_CACHE_BYTES_READ, - block_cache->GetUsage(cache_handle)); - RecordTick(statistics, block_cache_hit_ticker); - } +void BlockBasedTable::UpdateCacheHitMetrics(BlockType block_type, + GetContext* get_context, + size_t usage) const { + Statistics* const statistics = rep_->ioptions.statistics; + + PERF_COUNTER_ADD(block_cache_hit_count, 1); + PERF_COUNTER_BY_LEVEL_ADD(block_cache_hit_count, 1, + static_cast(rep_->level)); + + if (get_context) { + ++get_context->get_context_stats_.num_cache_hit; + get_context->get_context_stats_.num_cache_bytes_read += usage; } else { - PERF_COUNTER_BY_LEVEL_ADD(block_cache_miss_count, 1, - static_cast(rep_->level)); - if (get_context != nullptr) { - // overall cache miss - get_context->get_context_stats_.num_cache_miss++; - // block-type specific cache miss - (*block_cache_miss_stats)++; - } else { - RecordTick(statistics, BLOCK_CACHE_MISS); - RecordTick(statistics, block_cache_miss_ticker); - } + RecordTick(statistics, BLOCK_CACHE_HIT); + RecordTick(statistics, BLOCK_CACHE_BYTES_READ, usage); + } + + switch (block_type) { + case BlockType::kFilter: + PERF_COUNTER_ADD(block_cache_filter_hit_count, 1); + + if (get_context) { + ++get_context->get_context_stats_.num_cache_filter_hit; + } else { + RecordTick(statistics, BLOCK_CACHE_FILTER_HIT); + } + break; + + case BlockType::kCompressionDictionary: + // TODO: introduce perf counter for compression dictionary hit count + if (get_context) { + ++get_context->get_context_stats_.num_cache_compression_dict_hit; + } else { + RecordTick(statistics, BLOCK_CACHE_COMPRESSION_DICT_HIT); + } + break; + + case BlockType::kIndex: + PERF_COUNTER_ADD(block_cache_index_hit_count, 1); + + if (get_context) { + ++get_context->get_context_stats_.num_cache_index_hit; + } else { + RecordTick(statistics, BLOCK_CACHE_INDEX_HIT); + } + break; + + default: + // TODO: introduce dedicated tickers/statistics/counters + // for range tombstones + if (get_context) { + ++get_context->get_context_stats_.num_cache_data_hit; + } else { + RecordTick(statistics, BLOCK_CACHE_DATA_HIT); + } + break; + } +} + +void BlockBasedTable::UpdateCacheMissMetrics(BlockType block_type, + GetContext* get_context) const { + Statistics* const statistics = rep_->ioptions.statistics; + + // TODO: introduce aggregate (not per-level) block cache miss count + PERF_COUNTER_BY_LEVEL_ADD(block_cache_miss_count, 1, + static_cast(rep_->level)); + + if (get_context) { + ++get_context->get_context_stats_.num_cache_miss; + } else { + RecordTick(statistics, BLOCK_CACHE_MISS); + } + + // TODO: introduce perf counters for misses per block type + switch (block_type) { + case BlockType::kFilter: + if (get_context) { + ++get_context->get_context_stats_.num_cache_filter_miss; + } else { + RecordTick(statistics, BLOCK_CACHE_FILTER_MISS); + } + break; + + case BlockType::kCompressionDictionary: + if (get_context) { + ++get_context->get_context_stats_.num_cache_compression_dict_miss; + } else { + RecordTick(statistics, BLOCK_CACHE_COMPRESSION_DICT_MISS); + } + break; + + case BlockType::kIndex: + if (get_context) { + ++get_context->get_context_stats_.num_cache_index_miss; + } else { + RecordTick(statistics, BLOCK_CACHE_INDEX_MISS); + } + break; + + default: + // TODO: introduce dedicated tickers/statistics/counters + // for range tombstones + if (get_context) { + ++get_context->get_context_stats_.num_cache_data_miss; + } else { + RecordTick(statistics, BLOCK_CACHE_DATA_MISS); + } + break; + } +} + +void BlockBasedTable::UpdateCacheInsertionMetrics(BlockType block_type, + GetContext* get_context, + size_t usage) const { + Statistics* const statistics = rep_->ioptions.statistics; + + // TODO: introduce perf counters for block cache insertions + if (get_context) { + ++get_context->get_context_stats_.num_cache_add; + get_context->get_context_stats_.num_cache_bytes_write += usage; + } else { + RecordTick(statistics, BLOCK_CACHE_ADD); + RecordTick(statistics, BLOCK_CACHE_BYTES_WRITE, usage); + } + + switch (block_type) { + case BlockType::kFilter: + if (get_context) { + ++get_context->get_context_stats_.num_cache_filter_add; + get_context->get_context_stats_.num_cache_filter_bytes_insert += usage; + } else { + RecordTick(statistics, BLOCK_CACHE_FILTER_ADD); + RecordTick(statistics, BLOCK_CACHE_FILTER_BYTES_INSERT, usage); + } + break; + + case BlockType::kCompressionDictionary: + if (get_context) { + ++get_context->get_context_stats_.num_cache_compression_dict_add; + get_context->get_context_stats_ + .num_cache_compression_dict_bytes_insert += usage; + } else { + RecordTick(statistics, BLOCK_CACHE_COMPRESSION_DICT_ADD); + RecordTick(statistics, BLOCK_CACHE_COMPRESSION_DICT_BYTES_INSERT, + usage); + } + break; + + case BlockType::kIndex: + if (get_context) { + ++get_context->get_context_stats_.num_cache_index_add; + get_context->get_context_stats_.num_cache_index_bytes_insert += usage; + } else { + RecordTick(statistics, BLOCK_CACHE_INDEX_ADD); + RecordTick(statistics, BLOCK_CACHE_INDEX_BYTES_INSERT, usage); + } + break; + + default: + // TODO: introduce dedicated tickers/statistics/counters + // for range tombstones + if (get_context) { + ++get_context->get_context_stats_.num_cache_data_add; + get_context->get_context_stats_.num_cache_data_bytes_insert += usage; + } else { + RecordTick(statistics, BLOCK_CACHE_DATA_ADD); + RecordTick(statistics, BLOCK_CACHE_DATA_BYTES_INSERT, usage); + } + break; + } +} + +Cache::Handle* BlockBasedTable::GetEntryFromCache( + Cache* block_cache, const Slice& key, BlockType block_type, + GetContext* get_context) const { + auto cache_handle = block_cache->Lookup(key, rep_->ioptions.statistics); + + if (cache_handle != nullptr) { + UpdateCacheHitMetrics(block_type, get_context, + block_cache->GetUsage(cache_handle)); + } else { + UpdateCacheMissMetrics(block_type, get_context); } return cache_handle; @@ -1170,7 +1310,7 @@ Status BlockBasedTable::ReadRangeDelBlock( ReadOptions read_options; std::unique_ptr iter(NewDataBlockIterator( read_options, range_del_handle, nullptr /* input_iter */, - false /* is_index */, true /* key_includes_seq */, + BlockType::kRangeDeletion, true /* key_includes_seq */, true /* index_key_is_full */, nullptr /* get_context */, Status(), prefetch_buffer)); assert(iter != nullptr); @@ -1433,38 +1573,24 @@ Status BlockBasedTable::GetDataBlockFromCache( const Slice& block_cache_key, const Slice& compressed_block_cache_key, Cache* block_cache, Cache* block_cache_compressed, const ReadOptions& read_options, CachableEntry* block, - const UncompressionDict& uncompression_dict, bool is_index, + const UncompressionDict& uncompression_dict, BlockType block_type, GetContext* get_context) const { const size_t read_amp_bytes_per_bit = - !is_index ? rep_->table_options.read_amp_bytes_per_bit : 0; + block_type == BlockType::kData + ? rep_->table_options.read_amp_bytes_per_bit + : 0; assert(block); assert(block->IsEmpty()); Status s; BlockContents* compressed_block = nullptr; Cache::Handle* block_cache_compressed_handle = nullptr; - Statistics* statistics = rep_->ioptions.statistics; // Lookup uncompressed cache first if (block_cache != nullptr) { - auto cache_handle = GetEntryFromCache( - block_cache, block_cache_key, - is_index ? BLOCK_CACHE_INDEX_MISS : BLOCK_CACHE_DATA_MISS, - is_index ? BLOCK_CACHE_INDEX_HIT : BLOCK_CACHE_DATA_HIT, - get_context - ? (is_index ? &get_context->get_context_stats_.num_cache_index_miss - : &get_context->get_context_stats_.num_cache_data_miss) - : nullptr, - get_context - ? (is_index ? &get_context->get_context_stats_.num_cache_index_hit - : &get_context->get_context_stats_.num_cache_data_hit) - : nullptr, - statistics, get_context); + auto cache_handle = GetEntryFromCache(block_cache, block_cache_key, + block_type, get_context); if (cache_handle != nullptr) { - if (is_index) { - PERF_COUNTER_ADD(block_cache_index_hit_count, 1); - } - block->SetCachedValue( reinterpret_cast(block_cache->Value(cache_handle)), block_cache, cache_handle); @@ -1482,6 +1608,9 @@ Status BlockBasedTable::GetDataBlockFromCache( assert(!compressed_block_cache_key.empty()); block_cache_compressed_handle = block_cache_compressed->Lookup(compressed_block_cache_key); + + Statistics* statistics = rep_->ioptions.statistics; + // if we found in the compressed cache, then uncompress and insert into // uncompressed cache if (block_cache_compressed_handle == nullptr) { @@ -1508,7 +1637,7 @@ Status BlockBasedTable::GetDataBlockFromCache( // Insert uncompressed block into block cache if (s.ok()) { std::unique_ptr block_holder( - new Block(std::move(contents), rep_->get_global_seqno(is_index), + new Block(std::move(contents), rep_->get_global_seqno(block_type), read_amp_bytes_per_bit, statistics)); // uncompressed block if (block_cache != nullptr && block_holder->own_bytes() && @@ -1526,32 +1655,7 @@ Status BlockBasedTable::GetDataBlockFromCache( block->SetCachedValue(block_holder.release(), block_cache, cache_handle); - if (get_context != nullptr) { - get_context->get_context_stats_.num_cache_add++; - get_context->get_context_stats_.num_cache_bytes_write += charge; - } else { - RecordTick(statistics, BLOCK_CACHE_ADD); - RecordTick(statistics, BLOCK_CACHE_BYTES_WRITE, charge); - } - if (is_index) { - if (get_context != nullptr) { - get_context->get_context_stats_.num_cache_index_add++; - get_context->get_context_stats_.num_cache_index_bytes_insert += - charge; - } else { - RecordTick(statistics, BLOCK_CACHE_INDEX_ADD); - RecordTick(statistics, BLOCK_CACHE_INDEX_BYTES_INSERT, charge); - } - } else { - if (get_context != nullptr) { - get_context->get_context_stats_.num_cache_data_add++; - get_context->get_context_stats_.num_cache_data_bytes_insert += - charge; - } else { - RecordTick(statistics, BLOCK_CACHE_DATA_ADD); - RecordTick(statistics, BLOCK_CACHE_DATA_BYTES_INSERT, charge); - } - } + UpdateCacheInsertionMetrics(block_type, get_context, charge); } else { RecordTick(statistics, BLOCK_CACHE_ADD_FAILURES); } @@ -1571,15 +1675,19 @@ Status BlockBasedTable::PutDataBlockToCache( CachableEntry* cached_block, BlockContents* raw_block_contents, CompressionType raw_block_comp_type, const UncompressionDict& uncompression_dict, SequenceNumber seq_no, - MemoryAllocator* memory_allocator, bool is_index, + MemoryAllocator* memory_allocator, BlockType block_type, GetContext* get_context) const { const ImmutableCFOptions& ioptions = rep_->ioptions; const uint32_t format_version = rep_->table_options.format_version; const size_t read_amp_bytes_per_bit = - !is_index ? rep_->table_options.read_amp_bytes_per_bit : 0; + block_type == BlockType::kData + ? rep_->table_options.read_amp_bytes_per_bit + : 0; const Cache::Priority priority = - is_index && rep_->table_options - .cache_index_and_filter_blocks_with_high_priority + rep_->table_options.cache_index_and_filter_blocks_with_high_priority && + (block_type == BlockType::kFilter || + block_type == BlockType::kCompressionDictionary || + block_type == BlockType::kIndex) ? Cache::Priority::HIGH : Cache::Priority::LOW; assert(cached_block); @@ -1652,33 +1760,7 @@ Status BlockBasedTable::PutDataBlockToCache( cached_block->SetCachedValue(block_holder.release(), block_cache, cache_handle); - if (get_context != nullptr) { - get_context->get_context_stats_.num_cache_add++; - get_context->get_context_stats_.num_cache_bytes_write += charge; - } else { - RecordTick(statistics, BLOCK_CACHE_ADD); - RecordTick(statistics, BLOCK_CACHE_BYTES_WRITE, charge); - } - if (is_index) { - if (get_context != nullptr) { - get_context->get_context_stats_.num_cache_index_add++; - get_context->get_context_stats_.num_cache_index_bytes_insert += - charge; - } else { - RecordTick(statistics, BLOCK_CACHE_INDEX_ADD); - RecordTick(statistics, BLOCK_CACHE_INDEX_BYTES_INSERT, charge); - } - } else { - if (get_context != nullptr) { - get_context->get_context_stats_.num_cache_data_add++; - get_context->get_context_stats_.num_cache_data_bytes_insert += charge; - } else { - RecordTick(statistics, BLOCK_CACHE_DATA_ADD); - RecordTick(statistics, BLOCK_CACHE_DATA_BYTES_INSERT, charge); - } - } - assert(reinterpret_cast(block_cache->Value( - cached_block->GetCacheHandle())) == cached_block->GetValue()); + UpdateCacheInsertionMetrics(block_type, get_context, charge); } else { RecordTick(statistics, BLOCK_CACHE_ADD_FAILURES); } @@ -1798,18 +1880,11 @@ CachableEntry BlockBasedTable::GetFilter( auto key = GetCacheKey(rep_->cache_key_prefix, rep_->cache_key_prefix_size, filter_blk_handle, cache_key); - Statistics* statistics = rep_->ioptions.statistics; - Cache::Handle* cache_handle = GetEntryFromCache( - block_cache, key, BLOCK_CACHE_FILTER_MISS, BLOCK_CACHE_FILTER_HIT, - get_context ? &get_context->get_context_stats_.num_cache_filter_miss - : nullptr, - get_context ? &get_context->get_context_stats_.num_cache_filter_hit - : nullptr, - statistics, get_context); + Cache::Handle* cache_handle = + GetEntryFromCache(block_cache, key, BlockType::kFilter, get_context); FilterBlockReader* filter = nullptr; if (cache_handle != nullptr) { - PERF_COUNTER_ADD(block_cache_filter_hit_count, 1); filter = reinterpret_cast(block_cache->Value(cache_handle)); } else if (no_io) { @@ -1827,20 +1902,9 @@ CachableEntry BlockBasedTable::GetFilter( : Cache::Priority::LOW); if (s.ok()) { PERF_COUNTER_ADD(filter_block_read_count, 1); - if (get_context != nullptr) { - get_context->get_context_stats_.num_cache_add++; - get_context->get_context_stats_.num_cache_bytes_write += usage; - get_context->get_context_stats_.num_cache_filter_add++; - get_context->get_context_stats_.num_cache_filter_bytes_insert += - usage; - } else { - RecordTick(statistics, BLOCK_CACHE_ADD); - RecordTick(statistics, BLOCK_CACHE_BYTES_WRITE, usage); - RecordTick(statistics, BLOCK_CACHE_FILTER_ADD); - RecordTick(statistics, BLOCK_CACHE_FILTER_BYTES_INSERT, usage); - } + UpdateCacheInsertionMetrics(BlockType::kFilter, get_context, usage); } else { - RecordTick(statistics, BLOCK_CACHE_ADD_FAILURES); + RecordTick(rep_->ioptions.statistics, BLOCK_CACHE_ADD_FAILURES); delete filter; return CachableEntry(); } @@ -1867,16 +1931,9 @@ CachableEntry BlockBasedTable::GetUncompressionDict( auto cache_key = GetCacheKey(rep_->cache_key_prefix, rep_->cache_key_prefix_size, rep_->compression_dict_handle, cache_key_buf); - auto cache_handle = GetEntryFromCache( - rep_->table_options.block_cache.get(), cache_key, - BLOCK_CACHE_COMPRESSION_DICT_MISS, BLOCK_CACHE_COMPRESSION_DICT_HIT, - get_context - ? &get_context->get_context_stats_.num_cache_compression_dict_miss - : nullptr, - get_context - ? &get_context->get_context_stats_.num_cache_compression_dict_hit - : nullptr, - rep_->ioptions.statistics, get_context); + auto cache_handle = + GetEntryFromCache(rep_->table_options.block_cache.get(), cache_key, + BlockType::kCompressionDictionary, get_context); UncompressionDict* dict = nullptr; if (cache_handle != nullptr) { dict = reinterpret_cast( @@ -1887,43 +1944,31 @@ CachableEntry BlockBasedTable::GetUncompressionDict( std::unique_ptr compression_dict_block; Status s = ReadCompressionDictBlock(prefetch_buffer, &compression_dict_block); - size_t usage = 0; if (s.ok()) { assert(compression_dict_block != nullptr); // TODO(ajkr): find a way to avoid the `compression_dict_block` data copy - dict = new UncompressionDict(compression_dict_block->data.ToString(), - rep_->blocks_definitely_zstd_compressed, - rep_->ioptions.statistics); - usage = dict->ApproximateMemoryUsage(); + std::unique_ptr uncompression_dict( + new UncompressionDict(compression_dict_block->data.ToString(), + rep_->blocks_definitely_zstd_compressed, + rep_->ioptions.statistics)); + const size_t usage = uncompression_dict->ApproximateMemoryUsage(); s = rep_->table_options.block_cache->Insert( - cache_key, dict, usage, &DeleteCachedUncompressionDictEntry, - &cache_handle, + cache_key, uncompression_dict.get(), usage, + &DeleteCachedUncompressionDictEntry, &cache_handle, rep_->table_options.cache_index_and_filter_blocks_with_high_priority ? Cache::Priority::HIGH : Cache::Priority::LOW); - } - if (s.ok()) { - PERF_COUNTER_ADD(compression_dict_block_read_count, 1); - if (get_context != nullptr) { - get_context->get_context_stats_.num_cache_add++; - get_context->get_context_stats_.num_cache_bytes_write += usage; - get_context->get_context_stats_.num_cache_compression_dict_add++; - get_context->get_context_stats_ - .num_cache_compression_dict_bytes_insert += usage; + + if (s.ok()) { + PERF_COUNTER_ADD(compression_dict_block_read_count, 1); + UpdateCacheInsertionMetrics(BlockType::kCompressionDictionary, + get_context, usage); + dict = uncompression_dict.release(); } else { - RecordTick(rep_->ioptions.statistics, BLOCK_CACHE_ADD); - RecordTick(rep_->ioptions.statistics, BLOCK_CACHE_BYTES_WRITE, usage); - RecordTick(rep_->ioptions.statistics, BLOCK_CACHE_COMPRESSION_DICT_ADD); - RecordTick(rep_->ioptions.statistics, - BLOCK_CACHE_COMPRESSION_DICT_BYTES_INSERT, usage); + RecordTick(rep_->ioptions.statistics, BLOCK_CACHE_ADD_FAILURES); + assert(dict == nullptr); + assert(cache_handle == nullptr); } - } else { - // There should be no way to get here if block cache insertion succeeded. - // Though it is still possible something failed earlier. - RecordTick(rep_->ioptions.statistics, BLOCK_CACHE_ADD_FAILURES); - delete dict; - dict = nullptr; - assert(cache_handle == nullptr); } } return {dict, cache_handle ? rep_->table_options.block_cache.get() : nullptr, @@ -1951,7 +1996,7 @@ InternalIteratorBase* BlockBasedTable::NewIndexIterator( template TBlockIter* BlockBasedTable::NewDataBlockIterator( const ReadOptions& ro, const BlockHandle& handle, TBlockIter* input_iter, - bool is_index, bool key_includes_seq, bool index_key_is_full, + BlockType block_type, bool key_includes_seq, bool index_key_is_full, GetContext* get_context, Status s, FilePrefetchBuffer* prefetch_buffer) const { PERF_TIMER_GUARD(new_table_block_iter_nanos); @@ -1972,7 +2017,7 @@ TBlockIter* BlockBasedTable::NewDataBlockIterator( CachableEntry block; s = RetrieveBlock(prefetch_buffer, ro, handle, uncompression_dict, &block, - is_index, get_context); + block_type, get_context); if (!s.ok()) { assert(block.IsEmpty()); @@ -2037,7 +2082,7 @@ TBlockIter* BlockBasedTable::NewDataBlockIterator( Status BlockBasedTable::MaybeReadBlockAndLoadToCache( FilePrefetchBuffer* prefetch_buffer, const ReadOptions& ro, const BlockHandle& handle, const UncompressionDict& uncompression_dict, - CachableEntry* block_entry, bool is_index, + CachableEntry* block_entry, BlockType block_type, GetContext* get_context) const { assert(block_entry != nullptr); const bool no_io = (ro.read_tier == kBlockCacheTier); @@ -2070,7 +2115,7 @@ Status BlockBasedTable::MaybeReadBlockAndLoadToCache( } s = GetDataBlockFromCache(key, ckey, block_cache, block_cache_compressed, - ro, block_entry, uncompression_dict, is_index, + ro, block_entry, uncompression_dict, block_type, get_context); // Can't find the block from the cache. If I/O is allowed, read from the @@ -2095,14 +2140,14 @@ Status BlockBasedTable::MaybeReadBlockAndLoadToCache( } if (s.ok()) { - SequenceNumber seq_no = rep_->get_global_seqno(is_index); + SequenceNumber seq_no = rep_->get_global_seqno(block_type); // If filling cache is allowed and a cache is configured, try to put the // block to the cache. s = PutDataBlockToCache(key, ckey, block_cache, block_cache_compressed, block_entry, &raw_block_contents, raw_block_comp_type, uncompression_dict, seq_no, GetMemoryAllocator(rep_->table_options), - is_index, get_context); + block_type, get_context); } } } @@ -2113,16 +2158,19 @@ Status BlockBasedTable::MaybeReadBlockAndLoadToCache( Status BlockBasedTable::RetrieveBlock( FilePrefetchBuffer* prefetch_buffer, const ReadOptions& ro, const BlockHandle& handle, const UncompressionDict& uncompression_dict, - CachableEntry* block_entry, bool is_index, + CachableEntry* block_entry, BlockType block_type, GetContext* get_context) const { assert(block_entry); assert(block_entry->IsEmpty()); Status s; - if (!is_index || rep_->table_options.cache_index_and_filter_blocks) { + if (rep_->table_options.cache_index_and_filter_blocks || + (block_type != BlockType::kFilter && + block_type != BlockType::kCompressionDictionary && + block_type != BlockType::kIndex)) { s = MaybeReadBlockAndLoadToCache(prefetch_buffer, ro, handle, - uncompression_dict, block_entry, is_index, - get_context); + uncompression_dict, block_entry, + block_type, get_context); if (!s.ok()) { return s; @@ -2150,8 +2198,10 @@ Status BlockBasedTable::RetrieveBlock( rep_->file.get(), prefetch_buffer, rep_->footer, ro, handle, &block, rep_->ioptions, rep_->blocks_maybe_compressed, rep_->blocks_maybe_compressed, uncompression_dict, - rep_->persistent_cache_options, rep_->get_global_seqno(is_index), - !is_index ? rep_->table_options.read_amp_bytes_per_bit : 0, + rep_->persistent_cache_options, rep_->get_global_seqno(block_type), + block_type == BlockType::kData + ? rep_->table_options.read_amp_bytes_per_bit + : 0, GetMemoryAllocator(rep_->table_options)); } @@ -2178,18 +2228,13 @@ InternalIteratorBase* BlockBasedTable::PartitionedIndexIteratorState::NewSecondaryIterator( const BlockHandle& handle) { // Return a block iterator on the index partition - auto rep = table_->get_rep(); auto block = block_map_->find(handle.offset()); // This is a possible scenario since block cache might not have had space // for the partition if (block != block_map_->end()) { - PERF_COUNTER_ADD(block_cache_hit_count, 1); - RecordTick(rep->ioptions.statistics, BLOCK_CACHE_INDEX_HIT); - RecordTick(rep->ioptions.statistics, BLOCK_CACHE_HIT); - Cache* block_cache = rep->table_options.block_cache.get(); - assert(block_cache); - RecordTick(rep->ioptions.statistics, BLOCK_CACHE_BYTES_READ, - block_cache->GetUsage(block->second.GetCacheHandle())); + auto rep = table_->get_rep(); + assert(rep); + Statistics* kNullStats = nullptr; // We don't return pinned data from index blocks, so no need // to set `block_contents_pinned`. @@ -2531,7 +2576,7 @@ void BlockBasedTableIterator::InitDataBlock() { Status s; table_->NewDataBlockIterator( - read_options_, data_block_handle, &block_iter_, is_index_, + read_options_, data_block_handle, &block_iter_, block_type_, key_includes_seq_, index_key_is_full_, /* get_context */ nullptr, s, prefetch_buffer_.get()); block_iter_points_to_real_block_ = true; @@ -2623,7 +2668,6 @@ InternalIterator* BlockBasedTable::NewIterator( Arena* arena, bool skip_filters, bool for_compaction) { bool need_upper_bound_check = PrefixExtractorChanged(rep_->table_properties.get(), prefix_extractor); - const bool kIsNotIndex = false; if (arena == nullptr) { return new BlockBasedTableIterator( this, read_options, rep_->internal_comparator, @@ -2633,7 +2677,7 @@ InternalIterator* BlockBasedTable::NewIterator( rep_->index_type == BlockBasedTableOptions::kHashSearch), !skip_filters && !read_options.total_order_seek && prefix_extractor != nullptr, - need_upper_bound_check, prefix_extractor, kIsNotIndex, + need_upper_bound_check, prefix_extractor, BlockType::kData, true /*key_includes_seq*/, true /*index_key_is_full*/, for_compaction); } else { auto* mem = @@ -2643,7 +2687,7 @@ InternalIterator* BlockBasedTable::NewIterator( NewIndexIterator(read_options, need_upper_bound_check), !skip_filters && !read_options.total_order_seek && prefix_extractor != nullptr, - need_upper_bound_check, prefix_extractor, kIsNotIndex, + need_upper_bound_check, prefix_extractor, BlockType::kData, true /*key_includes_seq*/, true /*index_key_is_full*/, for_compaction); } } @@ -2780,7 +2824,7 @@ Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key, } else { DataBlockIter biter; NewDataBlockIterator( - read_options, iiter->value(), &biter, false, + read_options, iiter->value(), &biter, BlockType::kData, true /* key_includes_seq */, true /* index_key_is_full */, get_context); @@ -2893,7 +2937,7 @@ void BlockBasedTable::MultiGet(const ReadOptions& read_options, for (iiter->Seek(key); iiter->Valid() && !done; iiter->Next()) { DataBlockIter biter; NewDataBlockIterator( - read_options, iiter->value(), &biter, false, + read_options, iiter->value(), &biter, BlockType::kData, true /* key_includes_seq */, get_context); if (read_options.read_tier == kBlockCacheTier && diff --git a/table/block_based/block_based_table_reader.h b/table/block_based/block_based_table_reader.h index e53248fbcb..d8319a3e71 100644 --- a/table/block_based/block_based_table_reader.h +++ b/table/block_based/block_based_table_reader.h @@ -25,6 +25,7 @@ #include "rocksdb/table.h" #include "table/block_based/block.h" #include "table/block_based/block_based_table_factory.h" +#include "table/block_based/block_type.h" #include "table/block_based/cachable_entry.h" #include "table/block_based/filter_block.h" #include "table/format.h" @@ -220,8 +221,8 @@ class BlockBasedTable : public TableReader { // input_iter: if it is not null, update this one and return it as Iterator template TBlockIter* NewDataBlockIterator( - const ReadOptions& ro, const BlockHandle& block_hanlde, - TBlockIter* input_iter = nullptr, bool is_index = false, + const ReadOptions& ro, const BlockHandle& block_handle, + TBlockIter* input_iter = nullptr, BlockType block_type = BlockType::kData, bool key_includes_seq = true, bool index_key_is_full = true, GetContext* get_context = nullptr, Status s = Status(), FilePrefetchBuffer* prefetch_buffer = nullptr) const; @@ -238,12 +239,14 @@ class BlockBasedTable : public TableReader { friend class MockedBlockBasedTable; static std::atomic next_cache_key_id_; + void UpdateCacheHitMetrics(BlockType block_type, GetContext* get_context, + size_t usage) const; + void UpdateCacheMissMetrics(BlockType block_type, + GetContext* get_context) const; + void UpdateCacheInsertionMetrics(BlockType block_type, + GetContext* get_context, size_t usage) const; Cache::Handle* GetEntryFromCache(Cache* block_cache, const Slice& key, - Tickers block_cache_miss_ticker, - Tickers block_cache_hit_ticker, - uint64_t* block_cache_miss_stats, - uint64_t* block_cache_hit_stats, - Statistics* statistics, + BlockType block_type, GetContext* get_context) const; // If block cache enabled (compressed or uncompressed), looks for the block @@ -258,7 +261,7 @@ class BlockBasedTable : public TableReader { Status MaybeReadBlockAndLoadToCache( FilePrefetchBuffer* prefetch_buffer, const ReadOptions& ro, const BlockHandle& handle, const UncompressionDict& uncompression_dict, - CachableEntry* block_entry, bool is_index = false, + CachableEntry* block_entry, BlockType block_type, GetContext* get_context = nullptr) const; // Similar to the above, with one crucial difference: it will retrieve the @@ -267,7 +270,7 @@ class BlockBasedTable : public TableReader { Status RetrieveBlock(FilePrefetchBuffer* prefetch_buffer, const ReadOptions& ro, const BlockHandle& handle, const UncompressionDict& uncompression_dict, - CachableEntry* block_entry, bool is_index, + CachableEntry* block_entry, BlockType block_type, GetContext* get_context) const; // For the following two functions: @@ -311,7 +314,7 @@ class BlockBasedTable : public TableReader { const Slice& block_cache_key, const Slice& compressed_block_cache_key, Cache* block_cache, Cache* block_cache_compressed, const ReadOptions& read_options, CachableEntry* block, - const UncompressionDict& uncompression_dict, bool is_index = false, + const UncompressionDict& uncompression_dict, BlockType block_type, GetContext* get_context = nullptr) const; // Put a raw block (maybe compressed) to the corresponding block caches. @@ -324,16 +327,14 @@ class BlockBasedTable : public TableReader { // PutDataBlockToCache(). After the call, the object will be invalid. // @param uncompression_dict Data for presetting the compression library's // dictionary. - Status PutDataBlockToCache(const Slice& block_cache_key, - const Slice& compressed_block_cache_key, - Cache* block_cache, Cache* block_cache_compressed, - CachableEntry* cached_block, - BlockContents* raw_block_contents, - CompressionType raw_block_comp_type, - const UncompressionDict& uncompression_dict, - SequenceNumber seq_no, - MemoryAllocator* memory_allocator, bool is_index, - GetContext* get_context) const; + Status PutDataBlockToCache( + const Slice& block_cache_key, const Slice& compressed_block_cache_key, + Cache* block_cache, Cache* block_cache_compressed, + CachableEntry* cached_block, BlockContents* raw_block_contents, + CompressionType raw_block_comp_type, + const UncompressionDict& uncompression_dict, SequenceNumber seq_no, + MemoryAllocator* memory_allocator, BlockType block_type, + GetContext* get_context) const; // Calls (*handle_result)(arg, ...) repeatedly, starting with the entry found // after a call to Seek(key), until handle_result returns false. @@ -546,8 +547,12 @@ struct BlockBasedTable::Rep { bool closed = false; const bool immortal_table; - SequenceNumber get_global_seqno(bool is_index) const { - return is_index ? kDisableGlobalSequenceNumber : global_seqno; + SequenceNumber get_global_seqno(BlockType block_type) const { + return (block_type == BlockType::kFilter || + block_type == BlockType::kIndex || + block_type == BlockType::kCompressionDictionary) + ? kDisableGlobalSequenceNumber + : global_seqno; } }; @@ -560,8 +565,8 @@ class BlockBasedTableIterator : public InternalIteratorBase { const InternalKeyComparator& icomp, InternalIteratorBase* index_iter, bool check_filter, bool need_upper_bound_check, - const SliceTransform* prefix_extractor, bool is_index, - bool key_includes_seq = true, + const SliceTransform* prefix_extractor, + BlockType block_type, bool key_includes_seq = true, bool index_key_is_full = true, bool for_compaction = false) : InternalIteratorBase(false), @@ -575,7 +580,7 @@ class BlockBasedTableIterator : public InternalIteratorBase { check_filter_(check_filter), need_upper_bound_check_(need_upper_bound_check), prefix_extractor_(prefix_extractor), - is_index_(is_index), + block_type_(block_type), key_includes_seq_(key_includes_seq), index_key_is_full_(index_key_is_full), for_compaction_(for_compaction) {} @@ -690,8 +695,7 @@ class BlockBasedTableIterator : public InternalIteratorBase { // TODO(Zhongyi): pick a better name bool need_upper_bound_check_; const SliceTransform* prefix_extractor_; - // If the blocks over which we iterate are index blocks - bool is_index_; + BlockType block_type_; // If the keys in the blocks over which we iterate include 8 byte sequence bool key_includes_seq_; bool index_key_is_full_; diff --git a/table/block_based/block_type.h b/table/block_based/block_type.h new file mode 100644 index 0000000000..9b9c53946c --- /dev/null +++ b/table/block_based/block_type.h @@ -0,0 +1,24 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#pragma once + +namespace rocksdb { + +// Represents the types of blocks used in the block based table format. +// See https://github.com/facebook/rocksdb/wiki/Rocksdb-BlockBasedTable-Format +// for details. + +enum class BlockType : uint8_t { + kData, + kFilter, + kProperties, + kCompressionDictionary, + kRangeDeletion, + kMetaIndex, + kIndex, +}; + +} // namespace rocksdb diff --git a/table/block_based/partitioned_filter_block.cc b/table/block_based/partitioned_filter_block.cc index 315e63306f..7874ce1874 100644 --- a/table/block_based/partitioned_filter_block.cc +++ b/table/block_based/partitioned_filter_block.cc @@ -243,11 +243,6 @@ PartitionedFilterBlockReader::GetFilterPartition( // This is a possible scenario since block cache might not have had space // for the partition if (iter != filter_map_.end()) { - PERF_COUNTER_ADD(block_cache_hit_count, 1); - RecordTick(statistics(), BLOCK_CACHE_FILTER_HIT); - RecordTick(statistics(), BLOCK_CACHE_HIT); - RecordTick(statistics(), BLOCK_CACHE_BYTES_READ, - block_cache->GetUsage(iter->second.GetCacheHandle())); return {iter->second.GetValue(), nullptr /* cache */, nullptr /* cache_handle */, false /* own_value */}; }