mirror of https://github.com/facebook/rocksdb.git
479 lines
14 KiB
C++
479 lines
14 KiB
C++
// Copyright (c) 2018-present, Facebook, Inc. All rights reserved.
|
|
// This source code is licensed under both the GPLv2 (found in the
|
|
// COPYING file in the root directory) and Apache 2.0 License
|
|
// (found in the LICENSE.Apache file in the root directory).
|
|
|
|
#pragma once
|
|
|
|
#include <algorithm>
|
|
#include <iterator>
|
|
#include <list>
|
|
#include <map>
|
|
#include <set>
|
|
#include <string>
|
|
#include <vector>
|
|
|
|
#include "db/compaction/compaction_iteration_stats.h"
|
|
#include "db/dbformat.h"
|
|
#include "db/pinned_iterators_manager.h"
|
|
#include "db/range_del_aggregator.h"
|
|
#include "db/range_tombstone_fragmenter.h"
|
|
#include "db/version_edit.h"
|
|
#include "rocksdb/comparator.h"
|
|
#include "rocksdb/types.h"
|
|
#include "table/internal_iterator.h"
|
|
#include "table/scoped_arena_iterator.h"
|
|
#include "table/table_builder.h"
|
|
#include "util/heap.h"
|
|
#include "util/kv_map.h"
|
|
|
|
namespace ROCKSDB_NAMESPACE {
|
|
|
|
class TruncatedRangeDelIterator {
|
|
public:
|
|
TruncatedRangeDelIterator(
|
|
std::unique_ptr<FragmentedRangeTombstoneIterator> iter,
|
|
const InternalKeyComparator* icmp, const InternalKey* smallest,
|
|
const InternalKey* largest);
|
|
|
|
bool Valid() const;
|
|
|
|
void Next() { iter_->TopNext(); }
|
|
void Prev() { iter_->TopPrev(); }
|
|
|
|
void InternalNext() { iter_->Next(); }
|
|
|
|
// Seeks to the tombstone with the highest visible sequence number that covers
|
|
// target (a user key). If no such tombstone exists, the position will be at
|
|
// the earliest tombstone that ends after target.
|
|
// REQUIRES: target is a user key.
|
|
void Seek(const Slice& target);
|
|
|
|
// Seeks to the first range tombstone with end_key() > target.
|
|
void SeekInternalKey(const Slice& target);
|
|
|
|
// Seeks to the tombstone with the highest visible sequence number that covers
|
|
// target (a user key). If no such tombstone exists, the position will be at
|
|
// the latest tombstone that starts before target.
|
|
void SeekForPrev(const Slice& target);
|
|
|
|
void SeekToFirst();
|
|
void SeekToLast();
|
|
|
|
ParsedInternalKey start_key() const {
|
|
return (smallest_ == nullptr ||
|
|
icmp_->Compare(*smallest_, iter_->parsed_start_key()) <= 0)
|
|
? iter_->parsed_start_key()
|
|
: *smallest_;
|
|
}
|
|
|
|
ParsedInternalKey end_key() const {
|
|
return (largest_ == nullptr ||
|
|
icmp_->Compare(iter_->parsed_end_key(), *largest_) <= 0)
|
|
? iter_->parsed_end_key()
|
|
: *largest_;
|
|
}
|
|
|
|
SequenceNumber seq() const { return iter_->seq(); }
|
|
Slice timestamp() const {
|
|
assert(icmp_->user_comparator()->timestamp_size());
|
|
return iter_->timestamp();
|
|
}
|
|
void SetTimestampUpperBound(const Slice* ts_upper_bound) {
|
|
iter_->SetTimestampUpperBound(ts_upper_bound);
|
|
}
|
|
|
|
std::map<SequenceNumber, std::unique_ptr<TruncatedRangeDelIterator>>
|
|
SplitBySnapshot(const std::vector<SequenceNumber>& snapshots);
|
|
|
|
SequenceNumber upper_bound() const { return iter_->upper_bound(); }
|
|
|
|
SequenceNumber lower_bound() const { return iter_->lower_bound(); }
|
|
|
|
private:
|
|
std::unique_ptr<FragmentedRangeTombstoneIterator> iter_;
|
|
const InternalKeyComparator* icmp_;
|
|
const ParsedInternalKey* smallest_ = nullptr;
|
|
const ParsedInternalKey* largest_ = nullptr;
|
|
std::list<ParsedInternalKey> pinned_bounds_;
|
|
|
|
const InternalKey* smallest_ikey_;
|
|
const InternalKey* largest_ikey_;
|
|
};
|
|
|
|
struct SeqMaxComparator {
|
|
bool operator()(const TruncatedRangeDelIterator* a,
|
|
const TruncatedRangeDelIterator* b) const {
|
|
return a->seq() > b->seq();
|
|
}
|
|
};
|
|
|
|
struct StartKeyMinComparator {
|
|
explicit StartKeyMinComparator(const InternalKeyComparator* c) : icmp(c) {}
|
|
|
|
bool operator()(const TruncatedRangeDelIterator* a,
|
|
const TruncatedRangeDelIterator* b) const {
|
|
return icmp->Compare(a->start_key(), b->start_key()) > 0;
|
|
}
|
|
|
|
const InternalKeyComparator* icmp;
|
|
};
|
|
|
|
class ForwardRangeDelIterator {
|
|
public:
|
|
explicit ForwardRangeDelIterator(const InternalKeyComparator* icmp);
|
|
|
|
bool ShouldDelete(const ParsedInternalKey& parsed);
|
|
void Invalidate();
|
|
|
|
void AddNewIter(TruncatedRangeDelIterator* iter,
|
|
const ParsedInternalKey& parsed) {
|
|
iter->Seek(parsed.user_key);
|
|
PushIter(iter, parsed);
|
|
assert(active_iters_.size() == active_seqnums_.size());
|
|
}
|
|
|
|
size_t UnusedIdx() const { return unused_idx_; }
|
|
void IncUnusedIdx() { unused_idx_++; }
|
|
|
|
private:
|
|
using ActiveSeqSet =
|
|
std::multiset<TruncatedRangeDelIterator*, SeqMaxComparator>;
|
|
|
|
struct EndKeyMinComparator {
|
|
explicit EndKeyMinComparator(const InternalKeyComparator* c) : icmp(c) {}
|
|
|
|
bool operator()(const ActiveSeqSet::const_iterator& a,
|
|
const ActiveSeqSet::const_iterator& b) const {
|
|
return icmp->Compare((*a)->end_key(), (*b)->end_key()) > 0;
|
|
}
|
|
|
|
const InternalKeyComparator* icmp;
|
|
};
|
|
|
|
void PushIter(TruncatedRangeDelIterator* iter,
|
|
const ParsedInternalKey& parsed) {
|
|
if (!iter->Valid()) {
|
|
// The iterator has been fully consumed, so we don't need to add it to
|
|
// either of the heaps.
|
|
return;
|
|
}
|
|
int cmp = icmp_->Compare(parsed, iter->start_key());
|
|
if (cmp < 0) {
|
|
PushInactiveIter(iter);
|
|
} else {
|
|
PushActiveIter(iter);
|
|
}
|
|
}
|
|
|
|
void PushActiveIter(TruncatedRangeDelIterator* iter) {
|
|
auto seq_pos = active_seqnums_.insert(iter);
|
|
active_iters_.push(seq_pos);
|
|
}
|
|
|
|
TruncatedRangeDelIterator* PopActiveIter() {
|
|
auto active_top = active_iters_.top();
|
|
auto iter = *active_top;
|
|
active_iters_.pop();
|
|
active_seqnums_.erase(active_top);
|
|
return iter;
|
|
}
|
|
|
|
void PushInactiveIter(TruncatedRangeDelIterator* iter) {
|
|
inactive_iters_.push(iter);
|
|
}
|
|
|
|
TruncatedRangeDelIterator* PopInactiveIter() {
|
|
auto* iter = inactive_iters_.top();
|
|
inactive_iters_.pop();
|
|
return iter;
|
|
}
|
|
|
|
const InternalKeyComparator* icmp_;
|
|
size_t unused_idx_;
|
|
ActiveSeqSet active_seqnums_;
|
|
BinaryHeap<ActiveSeqSet::const_iterator, EndKeyMinComparator> active_iters_;
|
|
BinaryHeap<TruncatedRangeDelIterator*, StartKeyMinComparator> inactive_iters_;
|
|
};
|
|
|
|
class ReverseRangeDelIterator {
|
|
public:
|
|
explicit ReverseRangeDelIterator(const InternalKeyComparator* icmp);
|
|
|
|
bool ShouldDelete(const ParsedInternalKey& parsed);
|
|
void Invalidate();
|
|
|
|
void AddNewIter(TruncatedRangeDelIterator* iter,
|
|
const ParsedInternalKey& parsed) {
|
|
iter->SeekForPrev(parsed.user_key);
|
|
PushIter(iter, parsed);
|
|
assert(active_iters_.size() == active_seqnums_.size());
|
|
}
|
|
|
|
size_t UnusedIdx() const { return unused_idx_; }
|
|
void IncUnusedIdx() { unused_idx_++; }
|
|
|
|
private:
|
|
using ActiveSeqSet =
|
|
std::multiset<TruncatedRangeDelIterator*, SeqMaxComparator>;
|
|
|
|
struct EndKeyMaxComparator {
|
|
explicit EndKeyMaxComparator(const InternalKeyComparator* c) : icmp(c) {}
|
|
|
|
bool operator()(const TruncatedRangeDelIterator* a,
|
|
const TruncatedRangeDelIterator* b) const {
|
|
return icmp->Compare(a->end_key(), b->end_key()) < 0;
|
|
}
|
|
|
|
const InternalKeyComparator* icmp;
|
|
};
|
|
struct StartKeyMaxComparator {
|
|
explicit StartKeyMaxComparator(const InternalKeyComparator* c) : icmp(c) {}
|
|
|
|
bool operator()(const ActiveSeqSet::const_iterator& a,
|
|
const ActiveSeqSet::const_iterator& b) const {
|
|
return icmp->Compare((*a)->start_key(), (*b)->start_key()) < 0;
|
|
}
|
|
|
|
const InternalKeyComparator* icmp;
|
|
};
|
|
|
|
void PushIter(TruncatedRangeDelIterator* iter,
|
|
const ParsedInternalKey& parsed) {
|
|
if (!iter->Valid()) {
|
|
// The iterator has been fully consumed, so we don't need to add it to
|
|
// either of the heaps.
|
|
} else if (icmp_->Compare(iter->end_key(), parsed) <= 0) {
|
|
PushInactiveIter(iter);
|
|
} else {
|
|
PushActiveIter(iter);
|
|
}
|
|
}
|
|
|
|
void PushActiveIter(TruncatedRangeDelIterator* iter) {
|
|
auto seq_pos = active_seqnums_.insert(iter);
|
|
active_iters_.push(seq_pos);
|
|
}
|
|
|
|
TruncatedRangeDelIterator* PopActiveIter() {
|
|
auto active_top = active_iters_.top();
|
|
auto iter = *active_top;
|
|
active_iters_.pop();
|
|
active_seqnums_.erase(active_top);
|
|
return iter;
|
|
}
|
|
|
|
void PushInactiveIter(TruncatedRangeDelIterator* iter) {
|
|
inactive_iters_.push(iter);
|
|
}
|
|
|
|
TruncatedRangeDelIterator* PopInactiveIter() {
|
|
auto* iter = inactive_iters_.top();
|
|
inactive_iters_.pop();
|
|
return iter;
|
|
}
|
|
|
|
const InternalKeyComparator* icmp_;
|
|
size_t unused_idx_;
|
|
ActiveSeqSet active_seqnums_;
|
|
BinaryHeap<ActiveSeqSet::const_iterator, StartKeyMaxComparator> active_iters_;
|
|
BinaryHeap<TruncatedRangeDelIterator*, EndKeyMaxComparator> inactive_iters_;
|
|
};
|
|
|
|
enum class RangeDelPositioningMode { kForwardTraversal, kBackwardTraversal };
|
|
class RangeDelAggregator {
|
|
public:
|
|
explicit RangeDelAggregator(const InternalKeyComparator* icmp)
|
|
: icmp_(icmp) {}
|
|
virtual ~RangeDelAggregator() {}
|
|
|
|
virtual void AddTombstones(
|
|
std::unique_ptr<FragmentedRangeTombstoneIterator> input_iter,
|
|
const InternalKey* smallest = nullptr,
|
|
const InternalKey* largest = nullptr) = 0;
|
|
|
|
bool ShouldDelete(const Slice& ikey, RangeDelPositioningMode mode) {
|
|
ParsedInternalKey parsed;
|
|
|
|
Status pik_status =
|
|
ParseInternalKey(ikey, &parsed, false /* log_err_key */); // TODO
|
|
assert(pik_status.ok());
|
|
if (!pik_status.ok()) {
|
|
return false;
|
|
}
|
|
|
|
return ShouldDelete(parsed, mode);
|
|
}
|
|
virtual bool ShouldDelete(const ParsedInternalKey& parsed,
|
|
RangeDelPositioningMode mode) = 0;
|
|
|
|
virtual void InvalidateRangeDelMapPositions() = 0;
|
|
|
|
virtual bool IsEmpty() const = 0;
|
|
|
|
bool AddFile(uint64_t file_number) {
|
|
return files_seen_.insert(file_number).second;
|
|
}
|
|
|
|
protected:
|
|
class StripeRep {
|
|
public:
|
|
StripeRep(const InternalKeyComparator* icmp, SequenceNumber upper_bound,
|
|
SequenceNumber lower_bound)
|
|
: icmp_(icmp),
|
|
forward_iter_(icmp),
|
|
reverse_iter_(icmp),
|
|
upper_bound_(upper_bound),
|
|
lower_bound_(lower_bound) {}
|
|
|
|
void AddTombstones(std::unique_ptr<TruncatedRangeDelIterator> input_iter) {
|
|
iters_.push_back(std::move(input_iter));
|
|
}
|
|
|
|
bool IsEmpty() const { return iters_.empty(); }
|
|
|
|
bool ShouldDelete(const ParsedInternalKey& parsed,
|
|
RangeDelPositioningMode mode);
|
|
|
|
void Invalidate() {
|
|
if (!IsEmpty()) {
|
|
InvalidateForwardIter();
|
|
InvalidateReverseIter();
|
|
}
|
|
}
|
|
|
|
// If user-defined timestamp is enabled, `start` and `end` are user keys
|
|
// with timestamp.
|
|
bool IsRangeOverlapped(const Slice& start, const Slice& end);
|
|
|
|
private:
|
|
bool InStripe(SequenceNumber seq) const {
|
|
return lower_bound_ <= seq && seq <= upper_bound_;
|
|
}
|
|
|
|
void InvalidateForwardIter() { forward_iter_.Invalidate(); }
|
|
|
|
void InvalidateReverseIter() { reverse_iter_.Invalidate(); }
|
|
|
|
const InternalKeyComparator* icmp_;
|
|
std::vector<std::unique_ptr<TruncatedRangeDelIterator>> iters_;
|
|
ForwardRangeDelIterator forward_iter_;
|
|
ReverseRangeDelIterator reverse_iter_;
|
|
SequenceNumber upper_bound_;
|
|
SequenceNumber lower_bound_;
|
|
};
|
|
|
|
const InternalKeyComparator* icmp_;
|
|
|
|
private:
|
|
std::set<uint64_t> files_seen_;
|
|
};
|
|
|
|
class ReadRangeDelAggregator final : public RangeDelAggregator {
|
|
public:
|
|
ReadRangeDelAggregator(const InternalKeyComparator* icmp,
|
|
SequenceNumber upper_bound)
|
|
: RangeDelAggregator(icmp),
|
|
rep_(icmp, upper_bound, 0 /* lower_bound */) {}
|
|
~ReadRangeDelAggregator() override {}
|
|
|
|
using RangeDelAggregator::ShouldDelete;
|
|
void AddTombstones(
|
|
std::unique_ptr<FragmentedRangeTombstoneIterator> input_iter,
|
|
const InternalKey* smallest = nullptr,
|
|
const InternalKey* largest = nullptr) override;
|
|
|
|
bool ShouldDelete(const ParsedInternalKey& parsed,
|
|
RangeDelPositioningMode mode) final override {
|
|
if (rep_.IsEmpty()) {
|
|
return false;
|
|
}
|
|
return ShouldDeleteImpl(parsed, mode);
|
|
}
|
|
|
|
bool IsRangeOverlapped(const Slice& start, const Slice& end);
|
|
|
|
void InvalidateRangeDelMapPositions() override { rep_.Invalidate(); }
|
|
|
|
bool IsEmpty() const override { return rep_.IsEmpty(); }
|
|
|
|
private:
|
|
StripeRep rep_;
|
|
|
|
bool ShouldDeleteImpl(const ParsedInternalKey& parsed,
|
|
RangeDelPositioningMode mode);
|
|
};
|
|
|
|
class CompactionRangeDelAggregator : public RangeDelAggregator {
|
|
public:
|
|
CompactionRangeDelAggregator(const InternalKeyComparator* icmp,
|
|
const std::vector<SequenceNumber>& snapshots,
|
|
const std::string* full_history_ts_low = nullptr,
|
|
const std::string* trim_ts = nullptr)
|
|
: RangeDelAggregator(icmp), snapshots_(&snapshots) {
|
|
if (full_history_ts_low) {
|
|
ts_upper_bound_ = *full_history_ts_low;
|
|
}
|
|
if (trim_ts) {
|
|
trim_ts_ = *trim_ts;
|
|
// Range tombstone newer than `trim_ts` or `full_history_ts_low` should
|
|
// not be considered in ShouldDelete().
|
|
if (ts_upper_bound_.empty()) {
|
|
ts_upper_bound_ = trim_ts_;
|
|
} else if (!trim_ts_.empty() && icmp->user_comparator()->CompareTimestamp(
|
|
trim_ts_, ts_upper_bound_) < 0) {
|
|
ts_upper_bound_ = trim_ts_;
|
|
}
|
|
}
|
|
}
|
|
~CompactionRangeDelAggregator() override {}
|
|
|
|
void AddTombstones(
|
|
std::unique_ptr<FragmentedRangeTombstoneIterator> input_iter,
|
|
const InternalKey* smallest = nullptr,
|
|
const InternalKey* largest = nullptr) override;
|
|
|
|
using RangeDelAggregator::ShouldDelete;
|
|
bool ShouldDelete(const ParsedInternalKey& parsed,
|
|
RangeDelPositioningMode mode) override;
|
|
|
|
bool IsRangeOverlapped(const Slice& start, const Slice& end);
|
|
|
|
void InvalidateRangeDelMapPositions() override {
|
|
for (auto& rep : reps_) {
|
|
rep.second.Invalidate();
|
|
}
|
|
}
|
|
|
|
bool IsEmpty() const override {
|
|
for (const auto& rep : reps_) {
|
|
if (!rep.second.IsEmpty()) {
|
|
return false;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
// Creates an iterator over all the range tombstones in the aggregator, for
|
|
// use in compaction.
|
|
//
|
|
// NOTE: the internal key boundaries are used for optimization purposes to
|
|
// reduce the number of tombstones that are passed to the fragmenter; they do
|
|
// not guarantee that the resulting iterator only contains range tombstones
|
|
// that cover keys in the provided range. If required, these bounds must be
|
|
// enforced during iteration.
|
|
std::unique_ptr<FragmentedRangeTombstoneIterator> NewIterator(
|
|
const Slice* lower_bound = nullptr, const Slice* upper_bound = nullptr);
|
|
|
|
private:
|
|
std::vector<std::unique_ptr<TruncatedRangeDelIterator>> parent_iters_;
|
|
std::map<SequenceNumber, StripeRep> reps_;
|
|
|
|
const std::vector<SequenceNumber>* snapshots_;
|
|
// min over full_history_ts_low and trim_ts_
|
|
Slice ts_upper_bound_{};
|
|
Slice trim_ts_{};
|
|
};
|
|
|
|
} // namespace ROCKSDB_NAMESPACE
|