From e6be168aa553ef95d87f9dfa991fd1730b37fab2 Mon Sep 17 00:00:00 2001 From: Andrew Kryczka Date: Wed, 10 Jun 2020 13:56:25 -0700 Subject: [PATCH] save a key comparison in block seeks (#6646) Summary: This saves up to two key comparisons in block seeks. The first key comparison saved is a redundant key comparison against the restart key where the linear scan starts. This comparison is saved in all cases except when the found key is in the first restart interval. The second key comparison saved is a redundant key comparison against the restart key where the linear scan ends. This is only saved in cases where all keys in the restart interval are less than the target (probability roughly `1/restart_interval`). Pull Request resolved: https://github.com/facebook/rocksdb/pull/6646 Test Plan: ran a benchmark with mostly default settings and counted key comparisons before: `user_key_comparison_count = 19399529` after: `user_key_comparison_count = 18431498` setup command: ``` $ TEST_TMPDIR=/dev/shm/dbbench ./db_bench -benchmarks=fillrandom,compact -write_buffer_size=1048576 -target_file_size_base=1048576 -max_bytes_for_level_base=4194304 -max_background_jobs=12 -level_compaction_dynamic_level_bytes=true -num=10000000 ``` benchmark command: ``` $ TEST_TMPDIR=/dev/shm/dbbench/ ./db_bench -use_existing_db=true -benchmarks=readrandom -disable_auto_compactions=true -num=10000000 -compression_type=none -reads=1000000 -perf_level=3 ``` Reviewed By: pdillinger Differential Revision: D20849707 Pulled By: ajkr fbshipit-source-id: 1f01c5cd99ea771fd27974046e37b194f1cdcfac --- HISTORY.md | 3 + table/block_based/block.cc | 119 ++++++++++++++++++++++++++++--------- table/block_based/block.h | 9 ++- 3 files changed, 102 insertions(+), 29 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index 7d485a2766..301c186186 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -30,6 +30,9 @@ * Generate file checksum in SstFileWriter if Options.file_checksum_gen_factory is set. The checksum and checksum function name are stored in ExternalSstFileInfo after the sst file write is finished. * Add a value_size_soft_limit in read options which limits the cumulative value size of keys read in batches in MultiGet. Once the cumulative value size of found keys exceeds read_options.value_size_soft_limit, all the remaining keys are returned with status Abort without further finding their values. By default the value_size_soft_limit is std::numeric_limits::max(). +### Performance Improvements +* Eliminate redundant key comparisons during random access in block-based tables. + ## 6.10 (5/2/2020) ### Bug Fixes * Fix wrong result being read from ingested file. May happen when a key in the file happen to be prefix of another key also in the file. The issue can further cause more data corruption. The issue exists with rocksdb >= 5.0.0 since DB::IngestExternalFile() was introduced. diff --git a/table/block_based/block.cc b/table/block_based/block.cc index 064cd132c8..8bf7e11d49 100644 --- a/table/block_based/block.cc +++ b/table/block_based/block.cc @@ -128,17 +128,14 @@ struct DecodeKeyV4 { }; void DataBlockIter::Next() { - assert(Valid()); ParseNextDataKey(); } void DataBlockIter::NextOrReport() { - assert(Valid()); ParseNextDataKey(); } void IndexBlockIter::Next() { - assert(Valid()); ParseNextIndexKey(); } @@ -248,18 +245,14 @@ void DataBlockIter::Seek(const Slice& target) { return; } uint32_t index = 0; + bool skip_linear_scan = false; bool ok = BinarySeek(seek_key, 0, num_restarts_ - 1, &index, - comparator_); + &skip_linear_scan, comparator_); if (!ok) { return; } - SeekToRestartPoint(index); - - // Linear search (within restart block) for first key >= target - while (ParseNextDataKey() && - comparator_->Compare(applied_key_.UpdateAndGetKey(), seek_key) < 0) { - } + FindKeyAfterBinarySeek(seek_key, index, skip_linear_scan, comparator_); } // Optimized Seek for point lookup for an internal key `target` @@ -393,6 +386,7 @@ void IndexBlockIter::Seek(const Slice& target) { } status_ = Status::OK(); uint32_t index = 0; + bool skip_linear_scan = false; bool ok = false; if (prefix_index_) { bool prefix_may_exist = true; @@ -404,23 +398,21 @@ void IndexBlockIter::Seek(const Slice& target) { current_ = restarts_; status_ = Status::NotFound(); } + // restart interval must be one when hash search is enabled so the binary + // search simply lands at the right place. + skip_linear_scan = true; } else if (value_delta_encoded_) { ok = BinarySeek(seek_key, 0, num_restarts_ - 1, &index, - comparator_); + &skip_linear_scan, comparator_); } else { ok = BinarySeek(seek_key, 0, num_restarts_ - 1, &index, - comparator_); + &skip_linear_scan, comparator_); } if (!ok) { return; } - SeekToRestartPoint(index); - - // Linear search (within restart block) for first key >= target - while (ParseNextIndexKey() && - comparator_->Compare(applied_key_.UpdateAndGetKey(), seek_key) < 0) { - } + FindKeyAfterBinarySeek(seek_key, index, skip_linear_scan, comparator_); } void DataBlockIter::SeekForPrev(const Slice& target) { @@ -430,18 +422,15 @@ void DataBlockIter::SeekForPrev(const Slice& target) { return; } uint32_t index = 0; + bool skip_linear_scan = false; bool ok = BinarySeek(seek_key, 0, num_restarts_ - 1, &index, - comparator_); + &skip_linear_scan, comparator_); if (!ok) { return; } - SeekToRestartPoint(index); + FindKeyAfterBinarySeek(seek_key, index, skip_linear_scan, comparator_); - // Linear search (within restart block) for first key >= seek_key - while (ParseNextDataKey() && - comparator_->Compare(applied_key_.UpdateAndGetKey(), seek_key) < 0) { - } if (!Valid()) { SeekToLast(); } else { @@ -659,17 +648,70 @@ void IndexBlockIter::DecodeCurrentValue(uint32_t shared) { } } -// Binary search in restart array to find the first restart point that -// is either the last restart point with a key less than target, -// which means the key of next restart point is larger than target, or -// the first restart point with a key = target +template +void BlockIter::FindKeyAfterBinarySeek(const Slice& target, + uint32_t index, + bool skip_linear_scan, + const Comparator* comp) { + // SeekToRestartPoint() only does the lookup in the restart block. We need + // to follow it up with Next() to position the iterator at the restart key. + SeekToRestartPoint(index); + Next(); + + if (!skip_linear_scan) { + // Linear search (within restart block) for first key >= target + uint32_t max_offset; + if (index + 1 < num_restarts_) { + // We are in a non-last restart interval. Since `BinarySeek()` guarantees + // the next restart key is strictly greater than `target`, we can + // terminate upon reaching it without any additional key comparison. + max_offset = GetRestartPoint(index + 1); + } else { + // We are in the last restart interval. The while-loop will terminate by + // `Valid()` returning false upon advancing past the block's last key. + max_offset = port::kMaxUint32; + } + while (true) { + Next(); + if (!Valid()) { + break; + } + if (current_ == max_offset) { + assert(comp->Compare(applied_key_.UpdateAndGetKey(), target) > 0); + break; + } else if (comp->Compare(applied_key_.UpdateAndGetKey(), target) >= 0) { + break; + } + } + } +} + +// Binary searches in restart array to find the starting restart point for the +// linear scan, and stores it in `*index`. Assumes restart array does not +// contain duplicate keys. It is guaranteed that the restart key at `*index + 1` +// is strictly greater than `target` or does not exist (this can be used to +// elide a comparison when linear scan reaches all the way to the next restart +// key). Furthermore, `*skip_linear_scan` is set to indicate whether the +// `*index`th restart key is the final result so that key does not need to be +// compared again later. template template bool BlockIter::BinarySeek(const Slice& target, uint32_t left, uint32_t right, uint32_t* index, + bool* skip_linear_scan, const Comparator* comp) { assert(left <= right); + if (restarts_ == 0) { + // SST files dedicated to range tombstones are written with index blocks + // that have no keys while also having `num_restarts_ == 1`. This would + // cause a problem for `BinarySeek()` as it'd try to access the first key + // which does not exist. We identify such blocks by the offset at which + // their restarts are stored, and return false to prevent any attempted + // key accesses. + return false; + } + *skip_linear_scan = false; while (left < right) { uint32_t mid = (left + right + 1) / 2; uint32_t region_offset = GetRestartPoint(mid); @@ -692,11 +734,32 @@ bool BlockIter::BinarySeek(const Slice& target, uint32_t left, // after "mid" are uninteresting. right = mid - 1; } else { + *skip_linear_scan = true; left = right = mid; } } + assert(left == right); *index = left; + if (*index == 0) { + // Special case as we land at zero as long as restart key at index 1 is > + // "target". We need to compare the restart key at index 0 so we can set + // `*skip_linear_scan` when the 0th restart key is >= "target". + // + // GetRestartPoint() is always zero for restart key zero; skip the restart + // block access. + uint32_t shared, non_shared; + const char* key_ptr = + DecodeKeyFunc()(data_, data_ + restarts_, &shared, &non_shared); + if (key_ptr == nullptr || (shared != 0)) { + CorruptionError(); + return false; + } + Slice first_key(key_ptr, non_shared); + raw_key_.SetKey(first_key, false /* copy */); + int cmp = comp->Compare(applied_key_.UpdateAndGetKey(), target); + *skip_linear_scan = cmp >= 0; + } return true; } diff --git a/table/block_based/block.h b/table/block_based/block.h index a24005f25a..a7378949a6 100644 --- a/table/block_based/block.h +++ b/table/block_based/block.h @@ -343,6 +343,8 @@ class BlockIter : public InternalIteratorBase { Cache::Handle* cache_handle() { return cache_handle_; } + virtual void Next() override = 0; + protected: // Note: The type could be changed to InternalKeyComparator but we see a weird // performance drop by that. @@ -403,9 +405,14 @@ class BlockIter : public InternalIteratorBase { void CorruptionError(); + protected: template inline bool BinarySeek(const Slice& target, uint32_t left, uint32_t right, - uint32_t* index, const Comparator* comp); + uint32_t* index, bool* is_index_key_result, + const Comparator* comp); + + void FindKeyAfterBinarySeek(const Slice& target, uint32_t index, + bool is_index_key_result, const Comparator* comp); }; class DataBlockIter final : public BlockIter {