Option to fail a request as incomplete when skipping too many internal keys

Summary:
Operations like Seek/Next/Prev sometimes take too long to complete when there are many internal keys to be skipped. Adding an option, max_skippable_internal_keys -- which could be used to set a threshold for the maximum number of keys that can be skipped, will help to address these cases where it is much better to fail a request (as incomplete) than to wait for a considerable time for the request to complete.

This feature -- to fail an iterator seek request as incomplete, is disabled by default when max_skippable_internal_keys = 0. It is enabled only when max_skippable_internal_keys > 0.

This feature is based on the discussion mentioned in the PR https://github.com/facebook/rocksdb/pull/1084.
Closes https://github.com/facebook/rocksdb/pull/2000

Differential Revision: D4753223

Pulled By: sagar0

fbshipit-source-id: 1c973f7
This commit is contained in:
Sagar Vemuri 2017-03-30 11:57:05 -07:00 committed by Facebook Github Bot
parent 58179ec4a6
commit c6d04f2ecf
8 changed files with 462 additions and 24 deletions

View file

@ -2,6 +2,7 @@
## Unreleased ## Unreleased
### Public API Change ### Public API Change
* Support dynamically change `stats_dump_period_sec` option via SetDBOptions(). * Support dynamically change `stats_dump_period_sec` option via SetDBOptions().
* Added ReadOptions::max_skippable_internal_keys to set a threshold to fail a request as incomplete when too many keys are being skipped when using iterators.
### New Features ### New Features
* Memtable flush can be avoided during checkpoint creation if total log file size is smaller than a threshold specified by the user. * Memtable flush can be avoided during checkpoint creation if total log file size is smaller than a threshold specified by the user.

View file

@ -4442,7 +4442,9 @@ Iterator* DBImpl::NewIterator(const ReadOptions& read_options,
kMaxSequenceNumber, kMaxSequenceNumber,
sv->mutable_cf_options.max_sequential_skip_in_iterations, sv->mutable_cf_options.max_sequential_skip_in_iterations,
sv->version_number, read_options.iterate_upper_bound, sv->version_number, read_options.iterate_upper_bound,
read_options.prefix_same_as_start, read_options.pin_data); read_options.prefix_same_as_start, read_options.pin_data,
read_options.total_order_seek,
read_options.max_skippable_internal_keys);
#endif #endif
} else { } else {
SequenceNumber latest_snapshot = versions_->LastSequence(); SequenceNumber latest_snapshot = versions_->LastSequence();
@ -4501,7 +4503,8 @@ Iterator* DBImpl::NewIterator(const ReadOptions& read_options,
sv->mutable_cf_options.max_sequential_skip_in_iterations, sv->mutable_cf_options.max_sequential_skip_in_iterations,
sv->version_number, read_options.iterate_upper_bound, sv->version_number, read_options.iterate_upper_bound,
read_options.prefix_same_as_start, read_options.pin_data, read_options.prefix_same_as_start, read_options.pin_data,
read_options.total_order_seek); read_options.total_order_seek,
read_options.max_skippable_internal_keys);
InternalIterator* internal_iter = InternalIterator* internal_iter =
NewInternalIterator(read_options, cfd, sv, db_iter->GetArena(), NewInternalIterator(read_options, cfd, sv, db_iter->GetArena(),
@ -4553,7 +4556,9 @@ Status DBImpl::NewIterators(
env_, *cfd->ioptions(), cfd->user_comparator(), iter, env_, *cfd->ioptions(), cfd->user_comparator(), iter,
kMaxSequenceNumber, kMaxSequenceNumber,
sv->mutable_cf_options.max_sequential_skip_in_iterations, sv->mutable_cf_options.max_sequential_skip_in_iterations,
sv->version_number, nullptr, false, read_options.pin_data)); sv->version_number, nullptr, false, read_options.pin_data,
read_options.total_order_seek,
read_options.max_skippable_internal_keys));
} }
#endif #endif
} else { } else {
@ -4573,7 +4578,9 @@ Status DBImpl::NewIterators(
ArenaWrappedDBIter* db_iter = NewArenaWrappedDbIterator( ArenaWrappedDBIter* db_iter = NewArenaWrappedDbIterator(
env_, *cfd->ioptions(), cfd->user_comparator(), snapshot, env_, *cfd->ioptions(), cfd->user_comparator(), snapshot,
sv->mutable_cf_options.max_sequential_skip_in_iterations, sv->mutable_cf_options.max_sequential_skip_in_iterations,
sv->version_number, nullptr, false, read_options.pin_data); sv->version_number, nullptr, false, read_options.pin_data,
read_options.total_order_seek,
read_options.max_skippable_internal_keys);
InternalIterator* internal_iter = InternalIterator* internal_iter =
NewInternalIterator(read_options, cfd, sv, db_iter->GetArena(), NewInternalIterator(read_options, cfd, sv, db_iter->GetArena(),
db_iter->GetRangeDelAggregator()); db_iter->GetRangeDelAggregator());
@ -5160,7 +5167,6 @@ Status DBImpl::DelayWrite(uint64_t num_bytes,
env_->SleepForMicroseconds(kDelayInterval); env_->SleepForMicroseconds(kDelayInterval);
} }
mutex_.Lock(); mutex_.Lock();
} }
while (bg_error_.ok() && write_controller_.IsStopped()) { while (bg_error_.ok() && write_controller_.IsStopped()) {

View file

@ -65,7 +65,9 @@ Iterator* DBImplReadOnly::NewIterator(const ReadOptions& read_options,
->number_ ->number_
: latest_snapshot), : latest_snapshot),
super_version->mutable_cf_options.max_sequential_skip_in_iterations, super_version->mutable_cf_options.max_sequential_skip_in_iterations,
super_version->version_number); super_version->version_number, read_options.iterate_upper_bound,
read_options.prefix_same_as_start, read_options.pin_data,
read_options.total_order_seek, read_options.max_skippable_internal_keys);
auto internal_iter = auto internal_iter =
NewInternalIterator(read_options, cfd, super_version, db_iter->GetArena(), NewInternalIterator(read_options, cfd, super_version, db_iter->GetArena(),
db_iter->GetRangeDelAggregator()); db_iter->GetRangeDelAggregator());
@ -94,7 +96,10 @@ Status DBImplReadOnly::NewIterators(
->number_ ->number_
: latest_snapshot), : latest_snapshot),
sv->mutable_cf_options.max_sequential_skip_in_iterations, sv->mutable_cf_options.max_sequential_skip_in_iterations,
sv->version_number); sv->version_number, read_options.iterate_upper_bound,
read_options.prefix_same_as_start, read_options.pin_data,
read_options.total_order_seek,
read_options.max_skippable_internal_keys);
auto* internal_iter = auto* internal_iter =
NewInternalIterator(read_options, cfd, sv, db_iter->GetArena(), NewInternalIterator(read_options, cfd, sv, db_iter->GetArena(),
db_iter->GetRangeDelAggregator()); db_iter->GetRangeDelAggregator());

View file

@ -106,7 +106,8 @@ class DBIter: public Iterator {
uint64_t max_sequential_skip_in_iterations, uint64_t version_number, uint64_t max_sequential_skip_in_iterations, uint64_t version_number,
const Slice* iterate_upper_bound = nullptr, const Slice* iterate_upper_bound = nullptr,
bool prefix_same_as_start = false, bool pin_data = false, bool prefix_same_as_start = false, bool pin_data = false,
bool total_order_seek = false) bool total_order_seek = false,
uint64_t max_skippable_internal_keys = 0)
: arena_mode_(arena_mode), : arena_mode_(arena_mode),
env_(env), env_(env),
logger_(ioptions.info_log), logger_(ioptions.info_log),
@ -128,6 +129,7 @@ class DBIter: public Iterator {
RecordTick(statistics_, NO_ITERATORS); RecordTick(statistics_, NO_ITERATORS);
prefix_extractor_ = ioptions.prefix_extractor; prefix_extractor_ = ioptions.prefix_extractor;
max_skip_ = max_sequential_skip_in_iterations; max_skip_ = max_sequential_skip_in_iterations;
max_skippable_internal_keys_ = max_skippable_internal_keys;
if (pin_thru_lifetime_) { if (pin_thru_lifetime_) {
pinned_iters_mgr_.StartPinning(); pinned_iters_mgr_.StartPinning();
} }
@ -224,6 +226,7 @@ class DBIter: public Iterator {
void FindNextUserEntryInternal(bool skipping, bool prefix_check); void FindNextUserEntryInternal(bool skipping, bool prefix_check);
bool ParseKey(ParsedInternalKey* key); bool ParseKey(ParsedInternalKey* key);
void MergeValuesNewToOld(); void MergeValuesNewToOld();
bool TooManyInternalKeysSkipped(bool increment = true);
// Temporarily pin the blocks that we encounter until ReleaseTempPinnedData() // Temporarily pin the blocks that we encounter until ReleaseTempPinnedData()
// is called // is called
@ -249,6 +252,10 @@ class DBIter: public Iterator {
} }
} }
inline void ResetInternalKeysSkippedCounter() {
num_internal_keys_skipped_ = 0;
}
const SliceTransform* prefix_extractor_; const SliceTransform* prefix_extractor_;
bool arena_mode_; bool arena_mode_;
Env* const env_; Env* const env_;
@ -268,6 +275,8 @@ class DBIter: public Iterator {
// for prefix seek mode to support prev() // for prefix seek mode to support prev()
Statistics* statistics_; Statistics* statistics_;
uint64_t max_skip_; uint64_t max_skip_;
uint64_t max_skippable_internal_keys_;
uint64_t num_internal_keys_skipped_;
uint64_t version_number_; uint64_t version_number_;
const Slice* iterate_upper_bound_; const Slice* iterate_upper_bound_;
IterKey prefix_start_buf_; IterKey prefix_start_buf_;
@ -304,6 +313,7 @@ void DBIter::Next() {
// Release temporarily pinned blocks from last operation // Release temporarily pinned blocks from last operation
ReleaseTempPinnedData(); ReleaseTempPinnedData();
ResetInternalKeysSkippedCounter();
if (direction_ == kReverse) { if (direction_ == kReverse) {
ReverseToForward(); ReverseToForward();
} else if (iter_->Valid() && !current_entry_is_merged_) { } else if (iter_->Valid() && !current_entry_is_merged_) {
@ -390,6 +400,10 @@ void DBIter::FindNextUserEntryInternal(bool skipping, bool prefix_check) {
break; break;
} }
if (TooManyInternalKeysSkipped()) {
return;
}
if (ikey.sequence <= sequence_) { if (ikey.sequence <= sequence_) {
if (skipping && if (skipping &&
user_comparator_->Compare(ikey.user_key, saved_key_.GetKey()) <= 0) { user_comparator_->Compare(ikey.user_key, saved_key_.GetKey()) <= 0) {
@ -580,6 +594,7 @@ void DBIter::MergeValuesNewToOld() {
void DBIter::Prev() { void DBIter::Prev() {
assert(valid_); assert(valid_);
ReleaseTempPinnedData(); ReleaseTempPinnedData();
ResetInternalKeysSkippedCounter();
if (direction_ == kForward) { if (direction_ == kForward) {
ReverseToBackward(); ReverseToBackward();
} }
@ -658,6 +673,7 @@ void DBIter::PrevInternal() {
while (iter_->Valid()) { while (iter_->Valid()) {
saved_key_.SetKey(ExtractUserKey(iter_->key()), saved_key_.SetKey(ExtractUserKey(iter_->key()),
!iter_->IsKeyPinned() || !pin_thru_lifetime_ /* copy */); !iter_->IsKeyPinned() || !pin_thru_lifetime_ /* copy */);
if (FindValueForCurrentKey()) { if (FindValueForCurrentKey()) {
valid_ = true; valid_ = true;
if (!iter_->Valid()) { if (!iter_->Valid()) {
@ -674,6 +690,11 @@ void DBIter::PrevInternal() {
} }
return; return;
} }
if (TooManyInternalKeysSkipped(false)) {
return;
}
if (!iter_->Valid()) { if (!iter_->Valid()) {
break; break;
} }
@ -709,6 +730,10 @@ bool DBIter::FindValueForCurrentKey() {
size_t num_skipped = 0; size_t num_skipped = 0;
while (iter_->Valid() && ikey.sequence <= sequence_ && while (iter_->Valid() && ikey.sequence <= sequence_ &&
user_comparator_->Equal(ikey.user_key, saved_key_.GetKey())) { user_comparator_->Equal(ikey.user_key, saved_key_.GetKey())) {
if (TooManyInternalKeysSkipped()) {
return false;
}
// We iterate too much: let's use Seek() to avoid too much key comparisons // We iterate too much: let's use Seek() to avoid too much key comparisons
if (num_skipped >= max_skip_) { if (num_skipped >= max_skip_) {
return FindValueForCurrentKeyUsingSeek(); return FindValueForCurrentKeyUsingSeek();
@ -908,6 +933,10 @@ void DBIter::FindPrevUserKey() {
while (iter_->Valid() && ((cmp = user_comparator_->Compare( while (iter_->Valid() && ((cmp = user_comparator_->Compare(
ikey.user_key, saved_key_.GetKey())) == 0 || ikey.user_key, saved_key_.GetKey())) == 0 ||
(cmp > 0 && ikey.sequence > sequence_))) { (cmp > 0 && ikey.sequence > sequence_))) {
if (TooManyInternalKeysSkipped()) {
return;
}
if (cmp == 0) { if (cmp == 0) {
if (num_skipped >= max_skip_) { if (num_skipped >= max_skip_) {
num_skipped = 0; num_skipped = 0;
@ -930,6 +959,18 @@ void DBIter::FindPrevUserKey() {
} }
} }
bool DBIter::TooManyInternalKeysSkipped(bool increment) {
if ((max_skippable_internal_keys_ > 0) &&
(num_internal_keys_skipped_ > max_skippable_internal_keys_)) {
valid_ = false;
status_ = Status::Incomplete("Too many internal keys skipped.");
return true;
} else if (increment) {
num_internal_keys_skipped_++;
}
return false;
}
// Skip all unparseable keys // Skip all unparseable keys
void DBIter::FindParseableKey(ParsedInternalKey* ikey, Direction direction) { void DBIter::FindParseableKey(ParsedInternalKey* ikey, Direction direction) {
while (iter_->Valid() && !ParseKey(ikey)) { while (iter_->Valid() && !ParseKey(ikey)) {
@ -944,6 +985,7 @@ void DBIter::FindParseableKey(ParsedInternalKey* ikey, Direction direction) {
void DBIter::Seek(const Slice& target) { void DBIter::Seek(const Slice& target) {
StopWatch sw(env_, statistics_, DB_SEEK); StopWatch sw(env_, statistics_, DB_SEEK);
ReleaseTempPinnedData(); ReleaseTempPinnedData();
ResetInternalKeysSkippedCounter();
saved_key_.Clear(); saved_key_.Clear();
saved_key_.SetInternalKey(target, sequence_); saved_key_.SetInternalKey(target, sequence_);
@ -985,6 +1027,7 @@ void DBIter::Seek(const Slice& target) {
void DBIter::SeekForPrev(const Slice& target) { void DBIter::SeekForPrev(const Slice& target) {
StopWatch sw(env_, statistics_, DB_SEEK); StopWatch sw(env_, statistics_, DB_SEEK);
ReleaseTempPinnedData(); ReleaseTempPinnedData();
ResetInternalKeysSkippedCounter();
saved_key_.Clear(); saved_key_.Clear();
// now saved_key is used to store internal key. // now saved_key is used to store internal key.
saved_key_.SetInternalKey(target, 0 /* sequence_number */, saved_key_.SetInternalKey(target, 0 /* sequence_number */,
@ -1030,6 +1073,7 @@ void DBIter::SeekToFirst() {
} }
direction_ = kForward; direction_ = kForward;
ReleaseTempPinnedData(); ReleaseTempPinnedData();
ResetInternalKeysSkippedCounter();
ClearSavedValue(); ClearSavedValue();
{ {
@ -1066,6 +1110,7 @@ void DBIter::SeekToLast() {
} }
direction_ = kReverse; direction_ = kReverse;
ReleaseTempPinnedData(); ReleaseTempPinnedData();
ResetInternalKeysSkippedCounter();
ClearSavedValue(); ClearSavedValue();
{ {
@ -1105,11 +1150,13 @@ Iterator* NewDBIterator(
const Comparator* user_key_comparator, InternalIterator* internal_iter, const Comparator* user_key_comparator, InternalIterator* internal_iter,
const SequenceNumber& sequence, uint64_t max_sequential_skip_in_iterations, const SequenceNumber& sequence, uint64_t max_sequential_skip_in_iterations,
uint64_t version_number, const Slice* iterate_upper_bound, uint64_t version_number, const Slice* iterate_upper_bound,
bool prefix_same_as_start, bool pin_data, bool total_order_seek) { bool prefix_same_as_start, bool pin_data, bool total_order_seek,
DBIter* db_iter = new DBIter( uint64_t max_skippable_internal_keys) {
env, ioptions, user_key_comparator, internal_iter, sequence, false, DBIter* db_iter =
max_sequential_skip_in_iterations, version_number, iterate_upper_bound, new DBIter(env, ioptions, user_key_comparator, internal_iter, sequence,
prefix_same_as_start, pin_data, total_order_seek); false, max_sequential_skip_in_iterations, version_number,
iterate_upper_bound, prefix_same_as_start, pin_data,
total_order_seek, max_skippable_internal_keys);
return db_iter; return db_iter;
} }
@ -1153,14 +1200,15 @@ ArenaWrappedDBIter* NewArenaWrappedDbIterator(
const Comparator* user_key_comparator, const SequenceNumber& sequence, const Comparator* user_key_comparator, const SequenceNumber& sequence,
uint64_t max_sequential_skip_in_iterations, uint64_t version_number, uint64_t max_sequential_skip_in_iterations, uint64_t version_number,
const Slice* iterate_upper_bound, bool prefix_same_as_start, bool pin_data, const Slice* iterate_upper_bound, bool prefix_same_as_start, bool pin_data,
bool total_order_seek) { bool total_order_seek, uint64_t max_skippable_internal_keys) {
ArenaWrappedDBIter* iter = new ArenaWrappedDBIter(); ArenaWrappedDBIter* iter = new ArenaWrappedDBIter();
Arena* arena = iter->GetArena(); Arena* arena = iter->GetArena();
auto mem = arena->AllocateAligned(sizeof(DBIter)); auto mem = arena->AllocateAligned(sizeof(DBIter));
DBIter* db_iter = new (mem) DBIter( DBIter* db_iter =
env, ioptions, user_key_comparator, nullptr, sequence, true, new (mem) DBIter(env, ioptions, user_key_comparator, nullptr, sequence,
max_sequential_skip_in_iterations, version_number, iterate_upper_bound, true, max_sequential_skip_in_iterations, version_number,
prefix_same_as_start, pin_data, total_order_seek); iterate_upper_bound, prefix_same_as_start, pin_data,
total_order_seek, max_skippable_internal_keys);
iter->SetDBIter(db_iter); iter->SetDBIter(db_iter);

View file

@ -33,7 +33,7 @@ extern Iterator* NewDBIterator(
const SequenceNumber& sequence, uint64_t max_sequential_skip_in_iterations, const SequenceNumber& sequence, uint64_t max_sequential_skip_in_iterations,
uint64_t version_number, const Slice* iterate_upper_bound = nullptr, uint64_t version_number, const Slice* iterate_upper_bound = nullptr,
bool prefix_same_as_start = false, bool pin_data = false, bool prefix_same_as_start = false, bool pin_data = false,
bool total_order_seek = false); bool total_order_seek = false, uint64_t max_skippable_internal_keys = 0);
// A wrapper iterator which wraps DB Iterator and the arena, with which the DB // A wrapper iterator which wraps DB Iterator and the arena, with which the DB
// iterator is supposed be allocated. This class is used as an entry point of // iterator is supposed be allocated. This class is used as an entry point of
@ -82,6 +82,6 @@ extern ArenaWrappedDBIter* NewArenaWrappedDbIterator(
uint64_t max_sequential_skip_in_iterations, uint64_t version_number, uint64_t max_sequential_skip_in_iterations, uint64_t version_number,
const Slice* iterate_upper_bound = nullptr, const Slice* iterate_upper_bound = nullptr,
bool prefix_same_as_start = false, bool pin_data = false, bool prefix_same_as_start = false, bool pin_data = false,
bool total_order_seek = false); bool total_order_seek = false, uint64_t max_skippable_internal_keys = 0);
} // namespace rocksdb } // namespace rocksdb

View file

@ -875,6 +875,378 @@ TEST_F(DBIteratorTest, DBIteratorUseSkip) {
} }
} }
TEST_F(DBIteratorTest, DBIteratorSkipInternalKeys) {
Options options;
ReadOptions ro;
// Basic test case ... Make sure explicityly passing the default value works.
// Skipping internal keys is disabled by default, when the value is 0.
{
TestIterator* internal_iter = new TestIterator(BytewiseComparator());
internal_iter->AddPut("a", "val_a");
internal_iter->AddDeletion("b");
internal_iter->AddDeletion("b");
internal_iter->AddPut("c", "val_c");
internal_iter->AddPut("c", "val_c");
internal_iter->AddDeletion("c");
internal_iter->AddPut("d", "val_d");
internal_iter->Finish();
ro.max_skippable_internal_keys = 0;
std::unique_ptr<Iterator> db_iter(NewDBIterator(
env_, ImmutableCFOptions(options), BytewiseComparator(), internal_iter,
10, options.max_sequential_skip_in_iterations, 0, nullptr, false, false,
false, ro.max_skippable_internal_keys));
db_iter->SeekToFirst();
ASSERT_TRUE(db_iter->Valid());
ASSERT_EQ(db_iter->key().ToString(), "a");
ASSERT_EQ(db_iter->value().ToString(), "val_a");
db_iter->Next();
ASSERT_TRUE(db_iter->Valid());
ASSERT_EQ(db_iter->key().ToString(), "d");
ASSERT_EQ(db_iter->value().ToString(), "val_d");
db_iter->Next();
ASSERT_TRUE(!db_iter->Valid());
ASSERT_TRUE(db_iter->status().ok());
db_iter->SeekToLast();
ASSERT_TRUE(db_iter->Valid());
ASSERT_EQ(db_iter->key().ToString(), "d");
ASSERT_EQ(db_iter->value().ToString(), "val_d");
db_iter->Prev();
ASSERT_TRUE(db_iter->Valid());
ASSERT_EQ(db_iter->key().ToString(), "a");
ASSERT_EQ(db_iter->value().ToString(), "val_a");
db_iter->Prev();
ASSERT_TRUE(!db_iter->Valid());
ASSERT_TRUE(db_iter->status().ok());
}
// Test to make sure that the request will *not* fail as incomplete if
// num_internal_keys_skipped is *equal* to max_skippable_internal_keys
// threshold. (It will fail as incomplete only when the threshold is
// exceeded.)
{
TestIterator* internal_iter = new TestIterator(BytewiseComparator());
internal_iter->AddPut("a", "val_a");
internal_iter->AddDeletion("b");
internal_iter->AddDeletion("b");
internal_iter->AddPut("c", "val_c");
internal_iter->Finish();
ro.max_skippable_internal_keys = 2;
std::unique_ptr<Iterator> db_iter(NewDBIterator(
env_, ImmutableCFOptions(options), BytewiseComparator(), internal_iter,
10, options.max_sequential_skip_in_iterations, 0, nullptr, false, false,
false, ro.max_skippable_internal_keys));
db_iter->SeekToFirst();
ASSERT_TRUE(db_iter->Valid());
ASSERT_EQ(db_iter->key().ToString(), "a");
ASSERT_EQ(db_iter->value().ToString(), "val_a");
db_iter->Next();
ASSERT_TRUE(db_iter->Valid());
ASSERT_EQ(db_iter->key().ToString(), "c");
ASSERT_EQ(db_iter->value().ToString(), "val_c");
db_iter->Next();
ASSERT_TRUE(!db_iter->Valid());
ASSERT_TRUE(db_iter->status().ok());
db_iter->SeekToLast();
ASSERT_TRUE(db_iter->Valid());
ASSERT_EQ(db_iter->key().ToString(), "c");
ASSERT_EQ(db_iter->value().ToString(), "val_c");
db_iter->Prev();
ASSERT_EQ(db_iter->key().ToString(), "a");
ASSERT_EQ(db_iter->value().ToString(), "val_a");
db_iter->Prev();
ASSERT_TRUE(!db_iter->Valid());
ASSERT_TRUE(db_iter->status().ok());
}
// Fail the request as incomplete when num_internal_keys_skipped >
// max_skippable_internal_keys
{
TestIterator* internal_iter = new TestIterator(BytewiseComparator());
internal_iter->AddPut("a", "val_a");
internal_iter->AddDeletion("b");
internal_iter->AddDeletion("b");
internal_iter->AddDeletion("b");
internal_iter->AddPut("c", "val_c");
internal_iter->Finish();
ro.max_skippable_internal_keys = 2;
std::unique_ptr<Iterator> db_iter(NewDBIterator(
env_, ImmutableCFOptions(options), BytewiseComparator(), internal_iter,
10, options.max_sequential_skip_in_iterations, 0, nullptr, false, false,
false, ro.max_skippable_internal_keys));
db_iter->SeekToFirst();
ASSERT_TRUE(db_iter->Valid());
ASSERT_EQ(db_iter->key().ToString(), "a");
ASSERT_EQ(db_iter->value().ToString(), "val_a");
db_iter->Next();
ASSERT_TRUE(!db_iter->Valid());
ASSERT_TRUE(db_iter->status().IsIncomplete());
db_iter->SeekToLast();
ASSERT_TRUE(db_iter->Valid());
ASSERT_EQ(db_iter->key().ToString(), "c");
ASSERT_EQ(db_iter->value().ToString(), "val_c");
db_iter->Prev();
ASSERT_TRUE(!db_iter->Valid());
ASSERT_TRUE(db_iter->status().IsIncomplete());
}
// Test that the num_internal_keys_skipped counter resets after a successful
// read.
{
TestIterator* internal_iter = new TestIterator(BytewiseComparator());
internal_iter->AddPut("a", "val_a");
internal_iter->AddDeletion("b");
internal_iter->AddDeletion("b");
internal_iter->AddPut("c", "val_c");
internal_iter->AddDeletion("d");
internal_iter->AddDeletion("d");
internal_iter->AddDeletion("d");
internal_iter->AddPut("e", "val_e");
internal_iter->Finish();
ro.max_skippable_internal_keys = 2;
std::unique_ptr<Iterator> db_iter(NewDBIterator(
env_, ImmutableCFOptions(options), BytewiseComparator(), internal_iter,
10, options.max_sequential_skip_in_iterations, 0, nullptr, false, false,
false, ro.max_skippable_internal_keys));
db_iter->SeekToFirst();
ASSERT_TRUE(db_iter->Valid());
ASSERT_EQ(db_iter->key().ToString(), "a");
ASSERT_EQ(db_iter->value().ToString(), "val_a");
db_iter->Next();
ASSERT_TRUE(db_iter->Valid());
ASSERT_EQ(db_iter->key().ToString(), "c");
ASSERT_EQ(db_iter->value().ToString(), "val_c");
db_iter->Next(); // num_internal_keys_skipped counter resets here.
ASSERT_TRUE(!db_iter->Valid());
ASSERT_TRUE(db_iter->status().IsIncomplete());
}
// Test that the num_internal_keys_skipped counter resets after a successful
// read.
// Reverse direction
{
TestIterator* internal_iter = new TestIterator(BytewiseComparator());
internal_iter->AddPut("a", "val_a");
internal_iter->AddDeletion("b");
internal_iter->AddDeletion("b");
internal_iter->AddDeletion("b");
internal_iter->AddPut("c", "val_c");
internal_iter->AddDeletion("d");
internal_iter->AddDeletion("d");
internal_iter->AddPut("e", "val_e");
internal_iter->Finish();
ro.max_skippable_internal_keys = 2;
std::unique_ptr<Iterator> db_iter(NewDBIterator(
env_, ImmutableCFOptions(options), BytewiseComparator(), internal_iter,
10, options.max_sequential_skip_in_iterations, 0, nullptr, false, false,
false, ro.max_skippable_internal_keys));
db_iter->SeekToLast();
ASSERT_TRUE(db_iter->Valid());
ASSERT_EQ(db_iter->key().ToString(), "e");
ASSERT_EQ(db_iter->value().ToString(), "val_e");
db_iter->Prev();
ASSERT_TRUE(db_iter->Valid());
ASSERT_EQ(db_iter->key().ToString(), "c");
ASSERT_EQ(db_iter->value().ToString(), "val_c");
db_iter->Prev(); // num_internal_keys_skipped counter resets here.
ASSERT_TRUE(!db_iter->Valid());
ASSERT_TRUE(db_iter->status().IsIncomplete());
}
// Test that skipping separate keys is handled
{
TestIterator* internal_iter = new TestIterator(BytewiseComparator());
internal_iter->AddPut("a", "val_a");
internal_iter->AddDeletion("b");
internal_iter->AddDeletion("c");
internal_iter->AddDeletion("d");
internal_iter->AddPut("e", "val_e");
internal_iter->Finish();
ro.max_skippable_internal_keys = 2;
std::unique_ptr<Iterator> db_iter(NewDBIterator(
env_, ImmutableCFOptions(options), BytewiseComparator(), internal_iter,
10, options.max_sequential_skip_in_iterations, 0, nullptr, false, false,
false, ro.max_skippable_internal_keys));
db_iter->SeekToFirst();
ASSERT_TRUE(db_iter->Valid());
ASSERT_EQ(db_iter->key().ToString(), "a");
ASSERT_EQ(db_iter->value().ToString(), "val_a");
db_iter->Next();
ASSERT_TRUE(!db_iter->Valid());
ASSERT_TRUE(db_iter->status().IsIncomplete());
db_iter->SeekToLast();
ASSERT_TRUE(db_iter->Valid());
ASSERT_EQ(db_iter->key().ToString(), "e");
ASSERT_EQ(db_iter->value().ToString(), "val_e");
db_iter->Prev();
ASSERT_TRUE(!db_iter->Valid());
ASSERT_TRUE(db_iter->status().IsIncomplete());
}
// Test if alternating puts and deletes of the same key are handled correctly.
{
TestIterator* internal_iter = new TestIterator(BytewiseComparator());
internal_iter->AddPut("a", "val_a");
internal_iter->AddPut("b", "val_b");
internal_iter->AddDeletion("b");
internal_iter->AddPut("c", "val_c");
internal_iter->AddDeletion("c");
internal_iter->AddPut("d", "val_d");
internal_iter->AddDeletion("d");
internal_iter->AddPut("e", "val_e");
internal_iter->Finish();
ro.max_skippable_internal_keys = 2;
std::unique_ptr<Iterator> db_iter(NewDBIterator(
env_, ImmutableCFOptions(options), BytewiseComparator(), internal_iter,
10, options.max_sequential_skip_in_iterations, 0, nullptr, false, false,
false, ro.max_skippable_internal_keys));
db_iter->SeekToFirst();
ASSERT_TRUE(db_iter->Valid());
ASSERT_EQ(db_iter->key().ToString(), "a");
ASSERT_EQ(db_iter->value().ToString(), "val_a");
db_iter->Next();
ASSERT_TRUE(!db_iter->Valid());
ASSERT_TRUE(db_iter->status().IsIncomplete());
db_iter->SeekToLast();
ASSERT_TRUE(db_iter->Valid());
ASSERT_EQ(db_iter->key().ToString(), "e");
ASSERT_EQ(db_iter->value().ToString(), "val_e");
db_iter->Prev();
ASSERT_TRUE(!db_iter->Valid());
ASSERT_TRUE(db_iter->status().IsIncomplete());
}
// Test for large number of skippable internal keys with *default*
// max_sequential_skip_in_iterations.
{
for (size_t i = 1; i <= 200; ++i) {
TestIterator* internal_iter = new TestIterator(BytewiseComparator());
internal_iter->AddPut("a", "val_a");
for (size_t j = 1; j <= i; ++j) {
internal_iter->AddPut("b", "val_b");
internal_iter->AddDeletion("b");
}
internal_iter->AddPut("c", "val_c");
internal_iter->Finish();
ro.max_skippable_internal_keys = i;
std::unique_ptr<Iterator> db_iter(NewDBIterator(
env_, ImmutableCFOptions(options), BytewiseComparator(),
internal_iter, 2 * i + 1, options.max_sequential_skip_in_iterations,
0, nullptr, false, false, false, ro.max_skippable_internal_keys));
db_iter->SeekToFirst();
ASSERT_TRUE(db_iter->Valid());
ASSERT_EQ(db_iter->key().ToString(), "a");
ASSERT_EQ(db_iter->value().ToString(), "val_a");
db_iter->Next();
if ((options.max_sequential_skip_in_iterations + 1) >=
ro.max_skippable_internal_keys) {
ASSERT_TRUE(!db_iter->Valid());
ASSERT_TRUE(db_iter->status().IsIncomplete());
} else {
ASSERT_TRUE(db_iter->Valid());
ASSERT_EQ(db_iter->key().ToString(), "c");
ASSERT_EQ(db_iter->value().ToString(), "val_c");
}
db_iter->SeekToLast();
ASSERT_TRUE(db_iter->Valid());
ASSERT_EQ(db_iter->key().ToString(), "c");
ASSERT_EQ(db_iter->value().ToString(), "val_c");
db_iter->Prev();
if ((options.max_sequential_skip_in_iterations + 1) >=
ro.max_skippable_internal_keys) {
ASSERT_TRUE(!db_iter->Valid());
ASSERT_TRUE(db_iter->status().IsIncomplete());
} else {
ASSERT_TRUE(db_iter->Valid());
ASSERT_EQ(db_iter->key().ToString(), "a");
ASSERT_EQ(db_iter->value().ToString(), "val_a");
}
}
}
// Test for large number of skippable internal keys with a *non-default*
// max_sequential_skip_in_iterations.
{
for (size_t i = 1; i <= 200; ++i) {
TestIterator* internal_iter = new TestIterator(BytewiseComparator());
internal_iter->AddPut("a", "val_a");
for (size_t j = 1; j <= i; ++j) {
internal_iter->AddPut("b", "val_b");
internal_iter->AddDeletion("b");
}
internal_iter->AddPut("c", "val_c");
internal_iter->Finish();
options.max_sequential_skip_in_iterations = 1000;
ro.max_skippable_internal_keys = i;
std::unique_ptr<Iterator> db_iter(NewDBIterator(
env_, ImmutableCFOptions(options), BytewiseComparator(),
internal_iter, 2 * i + 1, options.max_sequential_skip_in_iterations,
0, nullptr, false, false, false, ro.max_skippable_internal_keys));
db_iter->SeekToFirst();
ASSERT_TRUE(db_iter->Valid());
ASSERT_EQ(db_iter->key().ToString(), "a");
ASSERT_EQ(db_iter->value().ToString(), "val_a");
db_iter->Next();
ASSERT_TRUE(!db_iter->Valid());
ASSERT_TRUE(db_iter->status().IsIncomplete());
db_iter->SeekToLast();
ASSERT_TRUE(db_iter->Valid());
ASSERT_EQ(db_iter->key().ToString(), "c");
ASSERT_EQ(db_iter->value().ToString(), "val_c");
db_iter->Prev();
ASSERT_TRUE(!db_iter->Valid());
ASSERT_TRUE(db_iter->status().IsIncomplete());
}
}
}
TEST_F(DBIteratorTest, DBIterator1) { TEST_F(DBIteratorTest, DBIterator1) {
Options options; Options options;
options.merge_operator = MergeOperators::CreateFromStringId("stringappend"); options.merge_operator = MergeOperators::CreateFromStringId("stringappend");

View file

@ -997,6 +997,12 @@ struct ReadOptions {
// Default: false // Default: false
bool ignore_range_deletions; bool ignore_range_deletions;
// A threshold for the number of keys that can be skipped before failing an
// iterator seek as incomplete. The default value of 0 should be used to
// never fail a request as incomplete, even on skipping too many keys.
// Default: 0
uint64_t max_skippable_internal_keys;
ReadOptions(); ReadOptions();
ReadOptions(bool cksum, bool cache); ReadOptions(bool cksum, bool cache);
}; };

View file

@ -592,8 +592,8 @@ ReadOptions::ReadOptions()
pin_data(false), pin_data(false),
background_purge_on_iterator_cleanup(false), background_purge_on_iterator_cleanup(false),
readahead_size(0), readahead_size(0),
ignore_range_deletions(false) { ignore_range_deletions(false),
} max_skippable_internal_keys(0) {}
ReadOptions::ReadOptions(bool cksum, bool cache) ReadOptions::ReadOptions(bool cksum, bool cache)
: verify_checksums(cksum), : verify_checksums(cksum),
@ -608,7 +608,7 @@ ReadOptions::ReadOptions(bool cksum, bool cache)
pin_data(false), pin_data(false),
background_purge_on_iterator_cleanup(false), background_purge_on_iterator_cleanup(false),
readahead_size(0), readahead_size(0),
ignore_range_deletions(false) { ignore_range_deletions(false),
} max_skippable_internal_keys(0) {}
} // namespace rocksdb } // namespace rocksdb