Avoid per-key upper bound check in BlockBasedTableIterator (#5101)

Summary:
`BlockBasedTableIterator` avoid reading next block on `Next()` if it detects the iterator will be out of bound, by checking against index key. The optimization was added in #2239, and by the time it only check the bound per block. It seems later change make it a per-key check, which introduce unnecessary key comparisons.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5101

Differential Revision: D14678707

Pulled By: siying

fbshipit-source-id: 2372446116753c7892ea4cec7b4b49ef87ba463e
This commit is contained in:
Yi Wu 2019-03-29 13:07:44 -07:00 committed by Facebook Github Bot
parent 09957ded1d
commit f29dc1b906
4 changed files with 56 additions and 25 deletions

View file

@ -1026,11 +1026,8 @@ TEST_P(DBIteratorTest, DBIteratorBoundOptimizationTest) {
int upper_bound_hits = 0;
Options options = CurrentOptions();
rocksdb::SyncPoint::GetInstance()->SetCallBack(
"BlockBasedTable::BlockEntryIteratorState::KeyReachedUpperBound",
[&upper_bound_hits](void* arg) {
assert(arg != nullptr);
upper_bound_hits += (*static_cast<bool*>(arg) ? 1 : 0);
});
"BlockBasedTableIterator:out_of_bound",
[&upper_bound_hits](void*) { upper_bound_hits++; });
rocksdb::SyncPoint::GetInstance()->EnableProcessing();
options.env = env_;
options.create_if_missing = true;

View file

@ -2346,6 +2346,7 @@ void BlockBasedTableIterator<TBlockIter, TValue>::Seek(const Slice& target) {
block_iter_.Seek(target);
FindKeyForward();
CheckOutOfBound();
assert(
!block_iter_.Valid() ||
(key_includes_seq_ && icomp_.Compare(target, block_iter_.key()) <= 0) ||
@ -2409,6 +2410,7 @@ void BlockBasedTableIterator<TBlockIter, TValue>::SeekToFirst() {
InitDataBlock();
block_iter_.SeekToFirst();
FindKeyForward();
CheckOutOfBound();
}
template <class TBlockIter, typename TValue>
@ -2491,18 +2493,24 @@ void BlockBasedTableIterator<TBlockIter, TValue>::InitDataBlock() {
template <class TBlockIter, typename TValue>
void BlockBasedTableIterator<TBlockIter, TValue>::FindKeyForward() {
assert(!is_out_of_bound_);
// TODO the while loop inherits from two-level-iterator. We don't know
// whether a block can be empty so it can be replaced by an "if".
while (!block_iter_.Valid()) {
if (!block_iter_.status().ok()) {
return;
}
if (read_options_.iterate_upper_bound != nullptr &&
block_iter_points_to_real_block_) {
is_out_of_bound_ =
(user_comparator_.Compare(*read_options_.iterate_upper_bound,
ExtractUserKey(index_iter_->key())) <= 0);
}
ResetDataIter();
// We used to check the current index key for upperbound.
// It will only save a data reading for a small percentage of use cases,
// so for code simplicity, we removed it. We can add it back if there is a
// significnat performance regression.
if (is_out_of_bound_) {
// The next block is out of bound. No need to read it.
TEST_SYNC_POINT_CALLBACK("BlockBasedTableIterator:out_of_bound", nullptr);
return;
}
index_iter_->Next();
if (index_iter_->Valid()) {
@ -2512,25 +2520,10 @@ void BlockBasedTableIterator<TBlockIter, TValue>::FindKeyForward() {
return;
}
}
// Check upper bound on the current key
bool reached_upper_bound =
(read_options_.iterate_upper_bound != nullptr &&
block_iter_points_to_real_block_ && block_iter_.Valid() &&
user_comparator_.Compare(ExtractUserKey(block_iter_.key()),
*read_options_.iterate_upper_bound) >= 0);
TEST_SYNC_POINT_CALLBACK(
"BlockBasedTable::BlockEntryIteratorState::KeyReachedUpperBound",
&reached_upper_bound);
if (reached_upper_bound) {
is_out_of_bound_ = true;
return;
}
}
template <class TBlockIter, typename TValue>
void BlockBasedTableIterator<TBlockIter, TValue>::FindKeyBackward() {
assert(!is_out_of_bound_);
while (!block_iter_.Valid()) {
if (!block_iter_.status().ok()) {
return;
@ -2551,6 +2544,16 @@ void BlockBasedTableIterator<TBlockIter, TValue>::FindKeyBackward() {
// code simplicity.
}
template <class TBlockIter, typename TValue>
void BlockBasedTableIterator<TBlockIter, TValue>::CheckOutOfBound() {
if (read_options_.iterate_upper_bound != nullptr &&
block_iter_points_to_real_block_ && block_iter_.Valid()) {
is_out_of_bound_ =
user_comparator_.Compare(*read_options_.iterate_upper_bound,
ExtractUserKey(block_iter_.key())) <= 0;
}
}
InternalIterator* BlockBasedTable::NewIterator(
const ReadOptions& read_options, const SliceTransform* prefix_extractor,
Arena* arena, bool skip_filters, bool for_compaction) {

View file

@ -623,6 +623,7 @@ class BlockBasedTableIterator : public InternalIteratorBase<TValue> {
}
}
// Whether iterator invalidated for being out of bound.
bool IsOutOfBound() override { return is_out_of_bound_; }
void SetPinnedItersMgr(PinnedIteratorsManager* pinned_iters_mgr) override {
@ -673,6 +674,7 @@ class BlockBasedTableIterator : public InternalIteratorBase<TValue> {
void InitDataBlock();
void FindKeyForward();
void FindKeyBackward();
void CheckOutOfBound();
private:
BlockBasedTable* table_;

View file

@ -3842,6 +3842,35 @@ TEST_P(BlockBasedTableTest, DataBlockHashIndex) {
}
}
// BlockBasedTableIterator should invalidate itself and return
// OutOfBound()=true immediately after Seek(), to allow LevelIterator
// filter out corresponding level.
TEST_P(BlockBasedTableTest, OutOfBoundOnSeek) {
TableConstructor c(BytewiseComparator(), true /*convert_to_internal_key*/);
c.Add("foo", "v1");
std::vector<std::string> keys;
stl_wrappers::KVMap kvmap;
Options options;
const ImmutableCFOptions ioptions(options);
const MutableCFOptions moptions(options);
c.Finish(options, ioptions, moptions, BlockBasedTableOptions(),
GetPlainInternalComparator(BytewiseComparator()), &keys, &kvmap);
auto* reader = c.GetTableReader();
ReadOptions read_opt;
std::string upper_bound = "bar";
Slice upper_bound_slice(upper_bound);
read_opt.iterate_upper_bound = &upper_bound_slice;
std::unique_ptr<InternalIterator> iter;
iter.reset(reader->NewIterator(read_opt, nullptr /*prefix_extractor*/));
iter->SeekToFirst();
ASSERT_FALSE(iter->Valid());
ASSERT_TRUE(iter->IsOutOfBound());
iter.reset(reader->NewIterator(read_opt, nullptr /*prefix_extractor*/));
iter->Seek("foo");
ASSERT_FALSE(iter->Valid());
ASSERT_TRUE(iter->IsOutOfBound());
}
} // namespace rocksdb
int main(int argc, char** argv) {