From 6b7fcc0d5f8c91f891f243906e6431969cfa8d11 Mon Sep 17 00:00:00 2001 From: Eli Pozniansky Date: Tue, 23 Jul 2019 15:30:59 -0700 Subject: [PATCH] Improve CPU Efficiency of ApproximateSize (part 1) (#5613) Summary: 1. Avoid creating the iterator in order to call BlockBasedTable::ApproximateOffsetOf(). Instead, directly call into it. 2. Optimize BlockBasedTable::ApproximateOffsetOf() keeps the index block iterator in stack. Pull Request resolved: https://github.com/facebook/rocksdb/pull/5613 Differential Revision: D16442660 Pulled By: elipoz fbshipit-source-id: 9320be3e918c139b10e758cbbb684706d172e516 --- db/table_cache.cc | 28 ++++++++++++++++++- db/table_cache.h | 6 ++++ db/version_set.cc | 17 ++++------- db/version_set.h | 4 +-- table/block_based/block_based_table_reader.cc | 12 ++++++-- table/block_based/block_based_table_reader.h | 7 +++-- 6 files changed, 54 insertions(+), 20 deletions(-) diff --git a/db/table_cache.cc b/db/table_cache.cc index 2290b5939c..48415beff3 100644 --- a/db/table_cache.cc +++ b/db/table_cache.cc @@ -194,7 +194,7 @@ InternalIterator* TableCache::NewIterator( if (table_reader == nullptr) { s = FindTable(env_options, icomparator, fd, &handle, prefix_extractor, options.read_tier == kBlockCacheTier /* no_io */, - !for_compaction /* record read_stats */, file_read_hist, + !for_compaction /* record_read_stats */, file_read_hist, skip_filters, level); if (s.ok()) { table_reader = GetTableReaderFromHandle(handle); @@ -505,4 +505,30 @@ void TableCache::Evict(Cache* cache, uint64_t file_number) { cache->Erase(GetSliceForFileNumber(&file_number)); } +uint64_t TableCache::ApproximateOffsetOf( + const Slice& key, const FileDescriptor& fd, TableReaderCaller caller, + const InternalKeyComparator& internal_comparator, + const SliceTransform* prefix_extractor) { + uint64_t result = 0; + TableReader* table_reader = fd.table_reader; + Cache::Handle* table_handle = nullptr; + if (table_reader == nullptr) { + const bool for_compaction = (caller == TableReaderCaller::kCompaction); + Status s = FindTable(env_options_, internal_comparator, fd, &table_handle, + prefix_extractor, false /* no_io */, + !for_compaction /* record_read_stats */); + if (s.ok()) { + table_reader = GetTableReaderFromHandle(table_handle); + } + } + + if (table_reader != nullptr) { + result = table_reader->ApproximateOffsetOf(key, caller); + } + if (table_handle != nullptr) { + ReleaseHandle(table_handle); + } + + return result; +} } // namespace rocksdb diff --git a/db/table_cache.h b/db/table_cache.h index f9fd481522..89a0b1b5c6 100644 --- a/db/table_cache.h +++ b/db/table_cache.h @@ -153,6 +153,12 @@ class TableCache { const FileDescriptor& fd, const SliceTransform* prefix_extractor = nullptr); + // Returns approximated offset of a key in a file represented by fd. + uint64_t ApproximateOffsetOf( + const Slice& key, const FileDescriptor& fd, TableReaderCaller caller, + const InternalKeyComparator& internal_comparator, + const SliceTransform* prefix_extractor = nullptr); + // Release the handle from a cache void ReleaseHandle(Cache::Handle* handle); diff --git a/db/version_set.cc b/db/version_set.cc index 559a4190f1..281065d050 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -4974,19 +4974,12 @@ uint64_t VersionSet::ApproximateSize(Version* v, const FdWithKeyRange& f, } else { // "key" falls in the range for this table. Add the // approximate offset of "key" within the table. - TableReader* table_reader_ptr; - InternalIterator* iter = v->cfd_->table_cache()->NewIterator( - ReadOptions(), v->env_options_, v->cfd_->internal_comparator(), - *f.file_metadata, nullptr /* range_del_agg */, - v->GetMutableCFOptions().prefix_extractor.get(), &table_reader_ptr, - /*file_read_hist=*/nullptr, caller, - /*arena=*/nullptr, /*skip_filters=*/false, /*level=*/-1, - /*smallest_compaction_key=*/nullptr, - /*largest_compaction_key=*/nullptr); - if (table_reader_ptr != nullptr) { - result = table_reader_ptr->ApproximateOffsetOf(key, caller); + TableCache* table_cache = v->cfd_->table_cache(); + if (table_cache != nullptr) { + result = table_cache->ApproximateOffsetOf( + key, f.file_metadata->fd, caller, v->cfd()->internal_comparator(), + v->GetMutableCFOptions().prefix_extractor.get()); } - delete iter; } return result; } diff --git a/db/version_set.h b/db/version_set.h index 6b7c42881c..ee94f5966d 100644 --- a/db/version_set.h +++ b/db/version_set.h @@ -655,7 +655,7 @@ class Version { uint64_t GetSstFilesSize(); - MutableCFOptions GetMutableCFOptions() { return mutable_cf_options_; } + const MutableCFOptions& GetMutableCFOptions() { return mutable_cf_options_; } private: Env* env_; @@ -981,7 +981,7 @@ class VersionSet { void AddLiveFiles(std::vector* live_list); // Return the approximate size of data to be scanned for range [start, end) - // in levels [start_level, end_level). If end_level == 0 it will search + // in levels [start_level, end_level). If end_level == -1 it will search // through all non-empty levels uint64_t ApproximateSize(Version* v, const Slice& start, const Slice& end, int start_level, int end_level, diff --git a/table/block_based/block_based_table_reader.cc b/table/block_based/block_based_table_reader.cc index fde11c0d36..000bc295fc 100644 --- a/table/block_based/block_based_table_reader.cc +++ b/table/block_based/block_based_table_reader.cc @@ -4018,10 +4018,11 @@ Status BlockBasedTable::CreateIndexReader( uint64_t BlockBasedTable::ApproximateOffsetOf(const Slice& key, TableReaderCaller caller) { BlockCacheLookupContext context(caller); - std::unique_ptr> index_iter( + IndexBlockIter iiter_on_stack; + auto index_iter = NewIndexIterator(ReadOptions(), /*need_upper_bound_check=*/false, - /*input_iter=*/nullptr, /*get_context=*/nullptr, - /*lookup_context=*/&context)); + /*input_iter=*/&iiter_on_stack, /*get_context=*/nullptr, + /*lookup_context=*/&context); index_iter->Seek(key); uint64_t result; @@ -4041,6 +4042,11 @@ uint64_t BlockBasedTable::ApproximateOffsetOf(const Slice& key, result = rep_->footer.metaindex_handle().offset(); } } + + if (index_iter != &iiter_on_stack) { + delete index_iter; + } + return result; } diff --git a/table/block_based/block_based_table_reader.h b/table/block_based/block_based_table_reader.h index 189cd5d2e3..3a16e2995f 100644 --- a/table/block_based/block_based_table_reader.h +++ b/table/block_based/block_based_table_reader.h @@ -318,8 +318,11 @@ class BlockBasedTable : public TableReader { BlockCacheLookupContext* lookup_context) const; // Get the iterator from the index reader. - // If input_iter is not set, return new Iterator - // If input_iter is set, update it and return it as Iterator + // + // If input_iter is not set, return a new Iterator. + // If input_iter is set, try to update it and return it as Iterator. + // However note that in some cases the returned iterator may be different + // from input_iter. In such case the returned iterator should be freed. // // Note: ErrorIterator with Status::Incomplete shall be returned if all the // following conditions are met: