mirror of
https://github.com/facebook/rocksdb.git
synced 2024-12-04 20:02:50 +00:00
229297d1b8
Summary: A second attempt after https://github.com/facebook/rocksdb/issues/10802, with bug fixes and refactoring. This PR updates compaction logic to take range tombstones into account when determining whether to cut the current compaction output file (https://github.com/facebook/rocksdb/issues/4811). Before this change, only point keys were considered, and range tombstones could cause large compactions. For example, if the current compaction outputs is a range tombstone [a, b) and 2 point keys y, z, they would be added to the same file, and may overlap with too many files in the next level and cause a large compaction in the future. This PR also includes ajkr's effort to simplify the logic to add range tombstones to compaction output files in `AddRangeDels()` ([https://github.com/facebook/rocksdb/issues/11078](https://github.com/facebook/rocksdb/pull/11078#issuecomment-1386078861)). The main change is for `CompactionIterator` to emit range tombstone start keys to be processed by `CompactionOutputs`. A new class `CompactionMergingIterator` is introduced to replace `MergingIterator` under `CompactionIterator` to enable emitting of range tombstone start keys. Further improvement after this PR include cutting compaction output at some grandparent boundary key (instead of the next output key) when cutting within a range tombstone to reduce overlap with grandparents. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11113 Test Plan: * added unit test in db_range_del_test * crash test with a small key range: `python3 tools/db_crashtest.py blackbox --simple --max_key=100 --interval=600 --write_buffer_size=262144 --target_file_size_base=256 --max_bytes_for_level_base=262144 --block_size=128 --value_size_mult=33 --subcompactions=10 --use_multiget=1 --delpercent=3 --delrangepercent=2 --verify_iterator_with_expected_state_one_in=2 --num_iterations=10` Reviewed By: ajkr Differential Revision: D42655709 Pulled By: cbi42 fbshipit-source-id: 8367e36ef5640e8f21c14a3855d4a8d6e360a34c
282 lines
5.9 KiB
C++
282 lines
5.9 KiB
C++
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
|
// This source code is licensed under both the GPLv2 (found in the
|
|
// COPYING file in the root directory) and Apache 2.0 License
|
|
// (found in the LICENSE.Apache file in the root directory).
|
|
|
|
#pragma once
|
|
|
|
#include <cassert>
|
|
|
|
#include "rocksdb/comparator.h"
|
|
#include "table/internal_iterator.h"
|
|
|
|
namespace ROCKSDB_NAMESPACE {
|
|
|
|
// An internal iterator that wraps another one and ensures that any keys
|
|
// returned are strictly within a range [start, end). If the underlying
|
|
// iterator has already performed the bounds checking, it relies on that result;
|
|
// otherwise, it performs the necessary key comparisons itself. Both bounds
|
|
// are optional.
|
|
class ClippingIterator : public InternalIterator {
|
|
public:
|
|
ClippingIterator(InternalIterator* iter, const Slice* start, const Slice* end,
|
|
const CompareInterface* cmp)
|
|
: iter_(iter), start_(start), end_(end), cmp_(cmp), valid_(false) {
|
|
assert(iter_);
|
|
assert(cmp_);
|
|
assert(!start_ || !end_ || cmp_->Compare(*start_, *end_) <= 0);
|
|
|
|
UpdateAndEnforceBounds();
|
|
}
|
|
|
|
bool Valid() const override { return valid_; }
|
|
|
|
void SeekToFirst() override {
|
|
if (start_) {
|
|
iter_->Seek(*start_);
|
|
} else {
|
|
iter_->SeekToFirst();
|
|
}
|
|
|
|
UpdateAndEnforceUpperBound();
|
|
}
|
|
|
|
void SeekToLast() override {
|
|
if (end_) {
|
|
iter_->SeekForPrev(*end_);
|
|
|
|
// Upper bound is exclusive, so we need a key which is strictly smaller
|
|
if (iter_->Valid() && cmp_->Compare(iter_->key(), *end_) == 0) {
|
|
iter_->Prev();
|
|
}
|
|
} else {
|
|
iter_->SeekToLast();
|
|
}
|
|
|
|
UpdateAndEnforceLowerBound();
|
|
}
|
|
|
|
void Seek(const Slice& target) override {
|
|
if (start_ && cmp_->Compare(target, *start_) < 0) {
|
|
iter_->Seek(*start_);
|
|
UpdateAndEnforceUpperBound();
|
|
return;
|
|
}
|
|
|
|
if (end_ && cmp_->Compare(target, *end_) >= 0) {
|
|
valid_ = false;
|
|
return;
|
|
}
|
|
|
|
iter_->Seek(target);
|
|
UpdateAndEnforceUpperBound();
|
|
}
|
|
|
|
void SeekForPrev(const Slice& target) override {
|
|
if (start_ && cmp_->Compare(target, *start_) < 0) {
|
|
valid_ = false;
|
|
return;
|
|
}
|
|
|
|
if (end_ && cmp_->Compare(target, *end_) >= 0) {
|
|
iter_->SeekForPrev(*end_);
|
|
|
|
// Upper bound is exclusive, so we need a key which is strictly smaller
|
|
if (iter_->Valid() && cmp_->Compare(iter_->key(), *end_) == 0) {
|
|
iter_->Prev();
|
|
}
|
|
|
|
UpdateAndEnforceLowerBound();
|
|
return;
|
|
}
|
|
|
|
iter_->SeekForPrev(target);
|
|
UpdateAndEnforceLowerBound();
|
|
}
|
|
|
|
void Next() override {
|
|
assert(valid_);
|
|
iter_->Next();
|
|
UpdateAndEnforceUpperBound();
|
|
}
|
|
|
|
bool NextAndGetResult(IterateResult* result) override {
|
|
assert(valid_);
|
|
assert(result);
|
|
|
|
IterateResult res;
|
|
valid_ = iter_->NextAndGetResult(&res);
|
|
|
|
if (!valid_) {
|
|
return false;
|
|
}
|
|
|
|
if (end_) {
|
|
EnforceUpperBoundImpl(res.bound_check_result);
|
|
|
|
if (!valid_) {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
res.bound_check_result = IterBoundCheck::kInbound;
|
|
*result = res;
|
|
|
|
return true;
|
|
}
|
|
|
|
void Prev() override {
|
|
assert(valid_);
|
|
iter_->Prev();
|
|
UpdateAndEnforceLowerBound();
|
|
}
|
|
|
|
Slice key() const override {
|
|
assert(valid_);
|
|
return iter_->key();
|
|
}
|
|
|
|
Slice user_key() const override {
|
|
assert(valid_);
|
|
return iter_->user_key();
|
|
}
|
|
|
|
Slice value() const override {
|
|
assert(valid_);
|
|
return iter_->value();
|
|
}
|
|
|
|
Status status() const override { return iter_->status(); }
|
|
|
|
bool PrepareValue() override {
|
|
assert(valid_);
|
|
|
|
if (iter_->PrepareValue()) {
|
|
return true;
|
|
}
|
|
|
|
assert(!iter_->Valid());
|
|
valid_ = false;
|
|
return false;
|
|
}
|
|
|
|
bool MayBeOutOfLowerBound() override {
|
|
assert(valid_);
|
|
return false;
|
|
}
|
|
|
|
IterBoundCheck UpperBoundCheckResult() override {
|
|
assert(valid_);
|
|
return IterBoundCheck::kInbound;
|
|
}
|
|
|
|
void SetPinnedItersMgr(PinnedIteratorsManager* pinned_iters_mgr) override {
|
|
iter_->SetPinnedItersMgr(pinned_iters_mgr);
|
|
}
|
|
|
|
bool IsKeyPinned() const override {
|
|
assert(valid_);
|
|
return iter_->IsKeyPinned();
|
|
}
|
|
|
|
bool IsValuePinned() const override {
|
|
assert(valid_);
|
|
return iter_->IsValuePinned();
|
|
}
|
|
|
|
Status GetProperty(std::string prop_name, std::string* prop) override {
|
|
return iter_->GetProperty(prop_name, prop);
|
|
}
|
|
|
|
bool IsDeleteRangeSentinelKey() const override {
|
|
assert(valid_);
|
|
return iter_->IsDeleteRangeSentinelKey();
|
|
}
|
|
|
|
private:
|
|
void UpdateValid() {
|
|
assert(!iter_->Valid() || iter_->status().ok());
|
|
|
|
valid_ = iter_->Valid();
|
|
}
|
|
|
|
void EnforceUpperBoundImpl(IterBoundCheck bound_check_result) {
|
|
if (bound_check_result == IterBoundCheck::kInbound) {
|
|
return;
|
|
}
|
|
|
|
if (bound_check_result == IterBoundCheck::kOutOfBound) {
|
|
valid_ = false;
|
|
return;
|
|
}
|
|
|
|
assert(bound_check_result == IterBoundCheck::kUnknown);
|
|
|
|
if (cmp_->Compare(key(), *end_) >= 0) {
|
|
valid_ = false;
|
|
}
|
|
}
|
|
|
|
void EnforceUpperBound() {
|
|
if (!valid_) {
|
|
return;
|
|
}
|
|
|
|
if (!end_) {
|
|
return;
|
|
}
|
|
|
|
EnforceUpperBoundImpl(iter_->UpperBoundCheckResult());
|
|
}
|
|
|
|
void EnforceLowerBound() {
|
|
if (!valid_) {
|
|
return;
|
|
}
|
|
|
|
if (!start_) {
|
|
return;
|
|
}
|
|
|
|
if (!iter_->MayBeOutOfLowerBound()) {
|
|
return;
|
|
}
|
|
|
|
if (cmp_->Compare(key(), *start_) < 0) {
|
|
valid_ = false;
|
|
}
|
|
}
|
|
|
|
void AssertBounds() {
|
|
assert(!valid_ || !start_ || cmp_->Compare(key(), *start_) >= 0);
|
|
assert(!valid_ || !end_ || cmp_->Compare(key(), *end_) < 0);
|
|
}
|
|
|
|
void UpdateAndEnforceBounds() {
|
|
UpdateValid();
|
|
EnforceUpperBound();
|
|
EnforceLowerBound();
|
|
AssertBounds();
|
|
}
|
|
|
|
void UpdateAndEnforceUpperBound() {
|
|
UpdateValid();
|
|
EnforceUpperBound();
|
|
AssertBounds();
|
|
}
|
|
|
|
void UpdateAndEnforceLowerBound() {
|
|
UpdateValid();
|
|
EnforceLowerBound();
|
|
AssertBounds();
|
|
}
|
|
|
|
InternalIterator* iter_;
|
|
const Slice* start_;
|
|
const Slice* end_;
|
|
const CompareInterface* cmp_;
|
|
bool valid_;
|
|
};
|
|
|
|
} // namespace ROCKSDB_NAMESPACE
|