rocksdb/table/iterator_wrapper.h
leipeng 68ce5d84f6 Add new Iterator API Refresh(const snapshot*) (#10594)
Summary:
This PR resolves https://github.com/facebook/rocksdb/issues/10487 & https://github.com/facebook/rocksdb/issues/10536, user code needs to call Refresh() periodically.

The main code change is to support range deletions. A range tombstone iterator uses a sequence number as upper bound to decide which range tombstones are effective. During Iterator refresh, this sequence number upper bound needs to be updated for all range tombstone iterators under DBIter and LevelIterator. LevelIterator may create new table iterators and range tombstone iterator during scanning, so it needs to be aware of iterator refresh. The code path that propagates this change is `db_iter_->set_sequence(read_seq)  -> MergingIterator::SetRangeDelReadSeqno() -> TruncatedRangeDelIterator::SetRangeDelReadSeqno() and LevelIterator::SetRangeDelReadSeqno()`.

This change also fixes an issue where range tombstone iterators created by LevelIterator may access ReadOptions::snapshot, even though we do not explicitly require users to keep a snapshot alive after creating an Iterator.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/10594

Test Plan:
* New unit tests.
* Add Iterator::Refresh(snapshot) to stress test. Note that this change only adds tests for refreshing to the same snapshot since this is the main target use case.

TODO in a following PR:
* Stress test Iterator::Refresh() to different snapshots or no snapshot.

Reviewed By: ajkr

Differential Revision: D48456896

Pulled By: cbi42

fbshipit-source-id: 2e642c04e91235cc9542ef4cd37b3c20823bd779
2023-09-15 10:44:43 -07:00

196 lines
4.8 KiB
C++

// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
//
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#pragma once
#include <set>
#include "table/internal_iterator.h"
#include "test_util/sync_point.h"
namespace ROCKSDB_NAMESPACE {
// A internal wrapper class with an interface similar to Iterator that caches
// the valid() and key() results for an underlying iterator.
// This can help avoid virtual function calls and also gives better
// cache locality.
template <class TValue = Slice>
class IteratorWrapperBase {
public:
IteratorWrapperBase() : iter_(nullptr), valid_(false) {}
explicit IteratorWrapperBase(InternalIteratorBase<TValue>* _iter)
: iter_(nullptr) {
Set(_iter);
}
~IteratorWrapperBase() {}
InternalIteratorBase<TValue>* iter() const { return iter_; }
void SetRangeDelReadSeqno(SequenceNumber read_seqno) {
if (iter_) {
iter_->SetRangeDelReadSeqno(read_seqno);
}
}
// Set the underlying Iterator to _iter and return
// previous underlying Iterator.
InternalIteratorBase<TValue>* Set(InternalIteratorBase<TValue>* _iter) {
InternalIteratorBase<TValue>* old_iter = iter_;
iter_ = _iter;
if (iter_ == nullptr) {
valid_ = false;
} else {
Update();
}
return old_iter;
}
void DeleteIter(bool is_arena_mode) {
if (iter_) {
if (!is_arena_mode) {
delete iter_;
} else {
iter_->~InternalIteratorBase<TValue>();
}
}
}
// Iterator interface methods
bool Valid() const { return valid_; }
Slice key() const {
assert(Valid());
return result_.key;
}
TValue value() const {
assert(Valid());
return iter_->value();
}
// Methods below require iter() != nullptr
Status status() const {
assert(iter_);
return iter_->status();
}
bool PrepareValue() {
assert(Valid());
if (result_.value_prepared) {
return true;
}
if (iter_->PrepareValue()) {
result_.value_prepared = true;
return true;
}
assert(!iter_->Valid());
valid_ = false;
return false;
}
void Next() {
assert(iter_);
valid_ = iter_->NextAndGetResult(&result_);
assert(!valid_ || iter_->status().ok());
}
bool NextAndGetResult(IterateResult* result) {
assert(iter_);
valid_ = iter_->NextAndGetResult(&result_);
*result = result_;
assert(!valid_ || iter_->status().ok());
return valid_;
}
void Prev() {
assert(iter_);
iter_->Prev();
Update();
}
void Seek(const Slice& k) {
assert(iter_);
iter_->Seek(k);
Update();
}
void SeekForPrev(const Slice& k) {
assert(iter_);
iter_->SeekForPrev(k);
Update();
}
void SeekToFirst() {
assert(iter_);
iter_->SeekToFirst();
Update();
}
void SeekToLast() {
assert(iter_);
iter_->SeekToLast();
Update();
}
bool MayBeOutOfLowerBound() {
assert(Valid());
return iter_->MayBeOutOfLowerBound();
}
IterBoundCheck UpperBoundCheckResult() {
assert(Valid());
return result_.bound_check_result;
}
void SetPinnedItersMgr(PinnedIteratorsManager* pinned_iters_mgr) {
assert(iter_);
iter_->SetPinnedItersMgr(pinned_iters_mgr);
}
bool IsKeyPinned() const {
assert(Valid());
return iter_->IsKeyPinned();
}
bool IsValuePinned() const {
assert(Valid());
return iter_->IsValuePinned();
}
bool IsValuePrepared() const { return result_.value_prepared; }
Slice user_key() const {
assert(Valid());
return iter_->user_key();
}
void UpdateReadaheadState(InternalIteratorBase<TValue>* old_iter) {
if (old_iter && iter_) {
ReadaheadFileInfo readahead_file_info;
old_iter->GetReadaheadState(&readahead_file_info);
iter_->SetReadaheadState(&readahead_file_info);
}
}
bool IsDeleteRangeSentinelKey() const {
return iter_->IsDeleteRangeSentinelKey();
}
private:
void Update() {
valid_ = iter_->Valid();
if (valid_) {
assert(iter_->status().ok());
result_.key = iter_->key();
result_.bound_check_result = IterBoundCheck::kUnknown;
result_.value_prepared = false;
}
}
InternalIteratorBase<TValue>* iter_;
IterateResult result_;
bool valid_;
};
using IteratorWrapper = IteratorWrapperBase<Slice>;
class Arena;
// Return an empty iterator (yields nothing) allocated from arena.
template <class TValue = Slice>
extern InternalIteratorBase<TValue>* NewEmptyInternalIterator(Arena* arena);
} // namespace ROCKSDB_NAMESPACE