mirror of
https://github.com/facebook/rocksdb.git
synced 2024-11-29 18:33:58 +00:00
d93812c9ae
Summary: Preliminary support for iterator with user timestamp. Current implementation does not consider merge operator and reverse iterator. Auto compaction is also disabled in unit tests. Create an iterator with timestamp. ``` ... read_opts.timestamp = &ts; auto* iter = db->NewIterator(read_opts); // target is key without timestamp. for (iter->Seek(target); iter->Valid(); iter->Next()) {} for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {} delete iter; read_opts.timestamp = &ts1; // lower_bound and upper_bound are without timestamp. read_opts.iterate_lower_bound = &lower_bound; read_opts.iterate_upper_bound = &upper_bound; auto* iter1 = db->NewIterator(read_opts); // Do Seek or SeekToFirst() delete iter1; ``` Test plan (dev server) ``` $make check ``` Simple benchmarking (dev server) 1. The overhead introduced by this PR even when timestamp is disabled. key size: 16 bytes value size: 100 bytes Entries: 1000000 Data reside in main memory, and try to stress iterator. Repeated three times on master and this PR. - Seek without next ``` ./db_bench -db=/dev/shm/rocksdbtest-1000 -benchmarks=fillseq,seekrandom -enable_pipelined_write=false -disable_wal=true -format_version=3 ``` master: 159047.0 ops/sec this PR: 158922.3 ops/sec (2% drop in throughput) - Seek and next 10 times ``` ./db_bench -db=/dev/shm/rocksdbtest-1000 -benchmarks=fillseq,seekrandom -enable_pipelined_write=false -disable_wal=true -format_version=3 -seek_nexts=10 ``` master: 109539.3 ops/sec this PR: 107519.7 ops/sec (2% drop in throughput) Pull Request resolved: https://github.com/facebook/rocksdb/pull/6255 Differential Revision: D19438227 Pulled By: riversand963 fbshipit-source-id: b66b4979486f8474619f4aa6bdd88598870b0746
184 lines
6.7 KiB
C++
184 lines
6.7 KiB
C++
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
|
// This source code is licensed under both the GPLv2 (found in the
|
|
// COPYING file in the root directory) and Apache 2.0 License
|
|
// (found in the LICENSE.Apache file in the root directory).
|
|
//
|
|
|
|
#pragma once
|
|
|
|
#include <string>
|
|
#include "db/dbformat.h"
|
|
#include "rocksdb/comparator.h"
|
|
#include "rocksdb/iterator.h"
|
|
#include "rocksdb/status.h"
|
|
#include "table/format.h"
|
|
|
|
namespace ROCKSDB_NAMESPACE {
|
|
|
|
class PinnedIteratorsManager;
|
|
|
|
struct IterateResult {
|
|
Slice key;
|
|
bool may_be_out_of_upper_bound;
|
|
};
|
|
|
|
template <class TValue>
|
|
class InternalIteratorBase : public Cleanable {
|
|
public:
|
|
InternalIteratorBase() {}
|
|
|
|
// No copying allowed
|
|
InternalIteratorBase(const InternalIteratorBase&) = delete;
|
|
InternalIteratorBase& operator=(const InternalIteratorBase&) = delete;
|
|
|
|
virtual ~InternalIteratorBase() {}
|
|
|
|
// An iterator is either positioned at a key/value pair, or
|
|
// not valid. This method returns true iff the iterator is valid.
|
|
// Always returns false if !status().ok().
|
|
virtual bool Valid() const = 0;
|
|
|
|
// Position at the first key in the source. The iterator is Valid()
|
|
// after this call iff the source is not empty.
|
|
virtual void SeekToFirst() = 0;
|
|
|
|
// Position at the last key in the source. The iterator is
|
|
// Valid() after this call iff the source is not empty.
|
|
virtual void SeekToLast() = 0;
|
|
|
|
// Position at the first key in the source that at or past target
|
|
// The iterator is Valid() after this call iff the source contains
|
|
// an entry that comes at or past target.
|
|
// All Seek*() methods clear any error status() that the iterator had prior to
|
|
// the call; after the seek, status() indicates only the error (if any) that
|
|
// happened during the seek, not any past errors.
|
|
// 'target' contains user timestamp if timestamp is enabled.
|
|
virtual void Seek(const Slice& target) = 0;
|
|
|
|
// Position at the first key in the source that at or before target
|
|
// The iterator is Valid() after this call iff the source contains
|
|
// an entry that comes at or before target.
|
|
virtual void SeekForPrev(const Slice& target) = 0;
|
|
|
|
// Moves to the next entry in the source. After this call, Valid() is
|
|
// true iff the iterator was not positioned at the last entry in the source.
|
|
// REQUIRES: Valid()
|
|
virtual void Next() = 0;
|
|
|
|
// Moves to the next entry in the source, and return result. Iterator
|
|
// implementation should override this method to help methods inline better,
|
|
// or when MayBeOutOfUpperBound() is non-trivial.
|
|
// REQUIRES: Valid()
|
|
virtual bool NextAndGetResult(IterateResult* result) {
|
|
Next();
|
|
bool is_valid = Valid();
|
|
if (is_valid) {
|
|
result->key = key();
|
|
// Default may_be_out_of_upper_bound to true to avoid unnecessary virtual
|
|
// call. If an implementation has non-trivial MayBeOutOfUpperBound(),
|
|
// it should also override NextAndGetResult().
|
|
result->may_be_out_of_upper_bound = true;
|
|
assert(MayBeOutOfUpperBound());
|
|
}
|
|
return is_valid;
|
|
}
|
|
|
|
// Moves to the previous entry in the source. After this call, Valid() is
|
|
// true iff the iterator was not positioned at the first entry in source.
|
|
// REQUIRES: Valid()
|
|
virtual void Prev() = 0;
|
|
|
|
// Return the key for the current entry. The underlying storage for
|
|
// the returned slice is valid only until the next modification of
|
|
// the iterator.
|
|
// REQUIRES: Valid()
|
|
virtual Slice key() const = 0;
|
|
|
|
// Return user key for the current entry.
|
|
// REQUIRES: Valid()
|
|
virtual Slice user_key() const { return ExtractUserKey(key()); }
|
|
|
|
// Return the value for the current entry. The underlying storage for
|
|
// the returned slice is valid only until the next modification of
|
|
// the iterator.
|
|
// REQUIRES: Valid()
|
|
virtual TValue value() const = 0;
|
|
|
|
// If an error has occurred, return it. Else return an ok status.
|
|
// If non-blocking IO is requested and this operation cannot be
|
|
// satisfied without doing some IO, then this returns Status::Incomplete().
|
|
virtual Status status() const = 0;
|
|
|
|
// True if the iterator is invalidated because it reached a key that is above
|
|
// the iterator upper bound. Used by LevelIterator to decide whether it should
|
|
// stop or move on to the next file.
|
|
// Important: if iterator reached the end of the file without encountering any
|
|
// keys above the upper bound, IsOutOfBound() must return false.
|
|
virtual bool IsOutOfBound() { return false; }
|
|
|
|
// Keys return from this iterator can be smaller than iterate_lower_bound.
|
|
virtual bool MayBeOutOfLowerBound() { return true; }
|
|
|
|
// Keys return from this iterator can be larger or equal to
|
|
// iterate_upper_bound.
|
|
virtual bool MayBeOutOfUpperBound() { return true; }
|
|
|
|
// Pass the PinnedIteratorsManager to the Iterator, most Iterators don't
|
|
// communicate with PinnedIteratorsManager so default implementation is no-op
|
|
// but for Iterators that need to communicate with PinnedIteratorsManager
|
|
// they will implement this function and use the passed pointer to communicate
|
|
// with PinnedIteratorsManager.
|
|
virtual void SetPinnedItersMgr(PinnedIteratorsManager* /*pinned_iters_mgr*/) {
|
|
}
|
|
|
|
// If true, this means that the Slice returned by key() is valid as long as
|
|
// PinnedIteratorsManager::ReleasePinnedData is not called and the
|
|
// Iterator is not deleted.
|
|
//
|
|
// IsKeyPinned() is guaranteed to always return true if
|
|
// - Iterator is created with ReadOptions::pin_data = true
|
|
// - DB tables were created with BlockBasedTableOptions::use_delta_encoding
|
|
// set to false.
|
|
virtual bool IsKeyPinned() const { return false; }
|
|
|
|
// If true, this means that the Slice returned by value() is valid as long as
|
|
// PinnedIteratorsManager::ReleasePinnedData is not called and the
|
|
// Iterator is not deleted.
|
|
virtual bool IsValuePinned() const { return false; }
|
|
|
|
virtual Status GetProperty(std::string /*prop_name*/, std::string* /*prop*/) {
|
|
return Status::NotSupported("");
|
|
}
|
|
|
|
protected:
|
|
void SeekForPrevImpl(const Slice& target, const Comparator* cmp) {
|
|
Seek(target);
|
|
if (!Valid()) {
|
|
SeekToLast();
|
|
}
|
|
while (Valid() && cmp->Compare(target, key()) < 0) {
|
|
Prev();
|
|
}
|
|
}
|
|
|
|
bool is_mutable_;
|
|
};
|
|
|
|
using InternalIterator = InternalIteratorBase<Slice>;
|
|
|
|
// Return an empty iterator (yields nothing).
|
|
template <class TValue = Slice>
|
|
extern InternalIteratorBase<TValue>* NewEmptyInternalIterator();
|
|
|
|
// Return an empty iterator with the specified status.
|
|
template <class TValue = Slice>
|
|
extern InternalIteratorBase<TValue>* NewErrorInternalIterator(
|
|
const Status& status);
|
|
|
|
// Return an empty iterator with the specified status, allocated arena.
|
|
template <class TValue = Slice>
|
|
extern InternalIteratorBase<TValue>* NewErrorInternalIterator(
|
|
const Status& status, Arena* arena);
|
|
|
|
} // namespace ROCKSDB_NAMESPACE
|