mirror of
https://github.com/facebook/rocksdb.git
synced 2024-12-01 07:15:51 +00:00
10984e8c26
Summary: There was a subtle design/contract bug in the previous version of range filtering in experimental.h If someone implemented a key segments extractor with "all or nothing" fixed size segments, that could result in unsafe range filtering. For example, with two segments of width 3: ``` x = 0x|12 34 56|78 9A 00| y = 0x|12 34 56||78 9B z = 0x|12 34 56|78 9C 00| ``` Segment 1 of y (empty) is out of order with segment 1 of x and z. I have re-worked the contract to make it clear what does work, and implemented a standard extractor for fixed-size segments, CappedKeySegmentsExtractor. The safe approach for filtering is to consume as much as is available for a segment in the case of a short key. I have also added support for min-max filtering with reverse byte-wise comparator, which is probably the 2nd most common comparator for RocksDB users (because of MySQL). It might seem that a min-max filter doesn't care about forward or reverse ordering, but it does when trying to determine whether in input range from segment values v1 to v2, where it so happens that v2 is byte-wise less than v1, is an empty forward interval or a non-empty reverse interval. At least in the current setup, we don't have that context. A new unit test (with some refactoring) tests CappedKeySegmentsExtractor, reverse byte-wise comparator, and the corresponding min-max filter. I have also (contractually / mathematically) generalized the framework to comparators other than the byte-wise comparator, and made other generalizations to make the extractor limitations more explicitly connected to the particular filters and filtering used--at least in description. Pull Request resolved: https://github.com/facebook/rocksdb/pull/13005 Test Plan: added unit tests as described Reviewed By: jowlyzhang Differential Revision: D62769784 Pulled By: pdillinger fbshipit-source-id: 0d41f0d0273586bdad55e4aa30381ebc861f7044
1345 lines
45 KiB
C++
1345 lines
45 KiB
C++
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
|
// This source code is licensed under both the GPLv2 (found in the
|
|
// COPYING file in the root directory) and Apache 2.0 License
|
|
// (found in the LICENSE.Apache file in the root directory).
|
|
|
|
#include "rocksdb/experimental.h"
|
|
|
|
#include <cstddef>
|
|
#include <cstdint>
|
|
#include <functional>
|
|
#include <memory>
|
|
#include <string>
|
|
#include <vector>
|
|
|
|
#include "db/db_impl/db_impl.h"
|
|
#include "db/version_util.h"
|
|
#include "logging/logging.h"
|
|
#include "util/atomic.h"
|
|
|
|
namespace ROCKSDB_NAMESPACE::experimental {
|
|
|
|
Status SuggestCompactRange(DB* db, ColumnFamilyHandle* column_family,
|
|
const Slice* begin, const Slice* end) {
|
|
if (db == nullptr) {
|
|
return Status::InvalidArgument("DB is empty");
|
|
}
|
|
|
|
return db->SuggestCompactRange(column_family, begin, end);
|
|
}
|
|
|
|
Status PromoteL0(DB* db, ColumnFamilyHandle* column_family, int target_level) {
|
|
if (db == nullptr) {
|
|
return Status::InvalidArgument("Didn't recognize DB object");
|
|
}
|
|
return db->PromoteL0(column_family, target_level);
|
|
}
|
|
|
|
|
|
Status SuggestCompactRange(DB* db, const Slice* begin, const Slice* end) {
|
|
return SuggestCompactRange(db, db->DefaultColumnFamily(), begin, end);
|
|
}
|
|
|
|
Status UpdateManifestForFilesState(
|
|
const DBOptions& db_opts, const std::string& db_name,
|
|
const std::vector<ColumnFamilyDescriptor>& column_families,
|
|
const UpdateManifestForFilesStateOptions& opts) {
|
|
// TODO: plumb Env::IOActivity, Env::IOPriority
|
|
const ReadOptions read_options;
|
|
const WriteOptions write_options;
|
|
OfflineManifestWriter w(db_opts, db_name);
|
|
Status s = w.Recover(column_families);
|
|
|
|
size_t files_updated = 0;
|
|
size_t cfs_updated = 0;
|
|
auto fs = db_opts.env->GetFileSystem();
|
|
|
|
for (auto cfd : *w.Versions().GetColumnFamilySet()) {
|
|
if (!s.ok()) {
|
|
break;
|
|
}
|
|
assert(cfd);
|
|
|
|
if (cfd->IsDropped() || !cfd->initialized()) {
|
|
continue;
|
|
}
|
|
|
|
const auto* current = cfd->current();
|
|
assert(current);
|
|
|
|
const auto* vstorage = current->storage_info();
|
|
assert(vstorage);
|
|
|
|
VersionEdit edit;
|
|
edit.SetColumnFamily(cfd->GetID());
|
|
|
|
/* SST files */
|
|
for (int level = 0; level < cfd->NumberLevels(); level++) {
|
|
if (!s.ok()) {
|
|
break;
|
|
}
|
|
const auto& level_files = vstorage->LevelFiles(level);
|
|
|
|
for (const auto& lf : level_files) {
|
|
assert(lf);
|
|
|
|
uint64_t number = lf->fd.GetNumber();
|
|
std::string fname =
|
|
TableFileName(w.IOptions().db_paths, number, lf->fd.GetPathId());
|
|
|
|
std::unique_ptr<FSSequentialFile> f;
|
|
FileOptions fopts;
|
|
// Use kUnknown to signal the FileSystem to search all tiers for the
|
|
// file.
|
|
fopts.temperature = Temperature::kUnknown;
|
|
|
|
IOStatus file_ios =
|
|
fs->NewSequentialFile(fname, fopts, &f, /*dbg*/ nullptr);
|
|
if (file_ios.ok()) {
|
|
if (opts.update_temperatures) {
|
|
Temperature temp = f->GetTemperature();
|
|
if (temp != Temperature::kUnknown && temp != lf->temperature) {
|
|
// Current state inconsistent with manifest
|
|
++files_updated;
|
|
edit.DeleteFile(level, number);
|
|
edit.AddFile(
|
|
level, number, lf->fd.GetPathId(), lf->fd.GetFileSize(),
|
|
lf->smallest, lf->largest, lf->fd.smallest_seqno,
|
|
lf->fd.largest_seqno, lf->marked_for_compaction, temp,
|
|
lf->oldest_blob_file_number, lf->oldest_ancester_time,
|
|
lf->file_creation_time, lf->epoch_number, lf->file_checksum,
|
|
lf->file_checksum_func_name, lf->unique_id,
|
|
lf->compensated_range_deletion_size, lf->tail_size,
|
|
lf->user_defined_timestamps_persisted);
|
|
}
|
|
}
|
|
} else {
|
|
s = file_ios;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (s.ok() && edit.NumEntries() > 0) {
|
|
std::unique_ptr<FSDirectory> db_dir;
|
|
s = fs->NewDirectory(db_name, IOOptions(), &db_dir, nullptr);
|
|
if (s.ok()) {
|
|
s = w.LogAndApply(read_options, write_options, cfd, &edit,
|
|
db_dir.get());
|
|
}
|
|
if (s.ok()) {
|
|
++cfs_updated;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (cfs_updated > 0) {
|
|
ROCKS_LOG_INFO(db_opts.info_log,
|
|
"UpdateManifestForFilesState: updated %zu files in %zu CFs",
|
|
files_updated, cfs_updated);
|
|
} else if (s.ok()) {
|
|
ROCKS_LOG_INFO(db_opts.info_log,
|
|
"UpdateManifestForFilesState: no updates needed");
|
|
}
|
|
if (!s.ok()) {
|
|
ROCKS_LOG_ERROR(db_opts.info_log, "UpdateManifestForFilesState failed: %s",
|
|
s.ToString().c_str());
|
|
}
|
|
|
|
return s;
|
|
}
|
|
|
|
// EXPERIMENTAL new filtering features
|
|
|
|
namespace {
|
|
template <size_t N>
|
|
class SemiStaticCappedKeySegmentsExtractor : public KeySegmentsExtractor {
|
|
public:
|
|
SemiStaticCappedKeySegmentsExtractor(const uint32_t* byte_widths) {
|
|
id_ = kName();
|
|
uint32_t prev_end = 0;
|
|
if constexpr (N > 0) { // Suppress a compiler warning
|
|
for (size_t i = 0; i < N; ++i) {
|
|
prev_end = prev_end + byte_widths[i];
|
|
ideal_ends_[i] = prev_end;
|
|
id_ += std::to_string(byte_widths[i]) + "b";
|
|
}
|
|
}
|
|
}
|
|
|
|
static const char* kName() { return "CappedKeySegmentsExtractor"; }
|
|
|
|
const char* Name() const override { return kName(); }
|
|
|
|
std::string GetId() const override { return id_; }
|
|
|
|
void Extract(const Slice& key_or_bound, KeyKind /*kind*/,
|
|
Result* result) const override {
|
|
// Optimistic assignment
|
|
result->segment_ends.assign(ideal_ends_.begin(), ideal_ends_.end());
|
|
if constexpr (N > 0) { // Suppress a compiler warning
|
|
uint32_t key_size = static_cast<uint32_t>(key_or_bound.size());
|
|
if (key_size < ideal_ends_.back()) {
|
|
// Need to fix up (should be rare)
|
|
for (size_t i = 0; i < N; ++i) {
|
|
result->segment_ends[i] = std::min(key_size, result->segment_ends[i]);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
private:
|
|
std::array<uint32_t, N> ideal_ends_;
|
|
std::string id_;
|
|
};
|
|
|
|
class DynamicCappedKeySegmentsExtractor : public KeySegmentsExtractor {
|
|
public:
|
|
DynamicCappedKeySegmentsExtractor(const std::vector<uint32_t>& byte_widths) {
|
|
id_ = kName();
|
|
uint32_t prev_end = 0;
|
|
for (size_t i = 0; i < byte_widths.size(); ++i) {
|
|
prev_end = prev_end + byte_widths[i];
|
|
ideal_ends_[i] = prev_end;
|
|
id_ += std::to_string(byte_widths[i]) + "b";
|
|
}
|
|
final_ideal_end_ = prev_end;
|
|
}
|
|
|
|
static const char* kName() { return "CappedKeySegmentsExtractor"; }
|
|
|
|
const char* Name() const override { return kName(); }
|
|
|
|
std::string GetId() const override { return id_; }
|
|
|
|
void Extract(const Slice& key_or_bound, KeyKind /*kind*/,
|
|
Result* result) const override {
|
|
// Optimistic assignment
|
|
result->segment_ends = ideal_ends_;
|
|
uint32_t key_size = static_cast<uint32_t>(key_or_bound.size());
|
|
if (key_size < final_ideal_end_) {
|
|
// Need to fix up (should be rare)
|
|
for (size_t i = 0; i < ideal_ends_.size(); ++i) {
|
|
result->segment_ends[i] = std::min(key_size, result->segment_ends[i]);
|
|
}
|
|
}
|
|
}
|
|
|
|
private:
|
|
std::vector<uint32_t> ideal_ends_;
|
|
uint32_t final_ideal_end_;
|
|
std::string id_;
|
|
};
|
|
|
|
void GetFilterInput(FilterInput select, const Slice& key,
|
|
const KeySegmentsExtractor::Result& extracted,
|
|
Slice* out_input, Slice* out_leadup) {
|
|
struct FilterInputGetter {
|
|
explicit FilterInputGetter(const Slice& _key,
|
|
const KeySegmentsExtractor::Result& _extracted)
|
|
: key(_key), extracted(_extracted) {}
|
|
const Slice& key;
|
|
const KeySegmentsExtractor::Result& extracted;
|
|
|
|
Slice operator()(SelectKeySegment select) {
|
|
size_t count = extracted.segment_ends.size();
|
|
if (count <= select.segment_index) {
|
|
return Slice();
|
|
}
|
|
assert(count > 0);
|
|
size_t start = select.segment_index > 0
|
|
? extracted.segment_ends[select.segment_index - 1]
|
|
: 0;
|
|
size_t end =
|
|
extracted
|
|
.segment_ends[std::min(size_t{select.segment_index}, count - 1)];
|
|
return Slice(key.data() + start, end - start);
|
|
}
|
|
|
|
Slice operator()(SelectKeySegmentRange select) {
|
|
assert(select.from_segment_index <= select.to_segment_index);
|
|
size_t count = extracted.segment_ends.size();
|
|
if (count <= select.from_segment_index) {
|
|
return Slice();
|
|
}
|
|
assert(count > 0);
|
|
size_t start = select.from_segment_index > 0
|
|
? extracted.segment_ends[select.from_segment_index - 1]
|
|
: 0;
|
|
size_t end = extracted.segment_ends[std::min(
|
|
size_t{select.to_segment_index}, count - 1)];
|
|
return Slice(key.data() + start, end - start);
|
|
}
|
|
|
|
Slice operator()(SelectWholeKey) { return key; }
|
|
|
|
Slice operator()(SelectLegacyKeyPrefix) {
|
|
// TODO
|
|
assert(false);
|
|
return Slice();
|
|
}
|
|
|
|
Slice operator()(SelectUserTimestamp) {
|
|
// TODO
|
|
assert(false);
|
|
return Slice();
|
|
}
|
|
|
|
Slice operator()(SelectColumnName) {
|
|
// TODO
|
|
assert(false);
|
|
return Slice();
|
|
}
|
|
};
|
|
|
|
Slice input = std::visit(FilterInputGetter(key, extracted), select);
|
|
*out_input = input;
|
|
if (input.empty() || input.data() < key.data() ||
|
|
input.data() > key.data() + key.size()) {
|
|
*out_leadup = key;
|
|
} else {
|
|
*out_leadup = Slice(key.data(), input.data() - key.data());
|
|
}
|
|
}
|
|
|
|
const char* DeserializeFilterInput(const char* p, const char* limit,
|
|
FilterInput* out) {
|
|
if (p >= limit) {
|
|
return nullptr;
|
|
}
|
|
uint8_t b = static_cast<uint8_t>(*p++);
|
|
if (b & 0x80) {
|
|
// Reserved for future use to read more bytes
|
|
return nullptr;
|
|
}
|
|
|
|
switch (b >> 4) {
|
|
case 0:
|
|
// Various cases that don't have an argument
|
|
switch (b) {
|
|
case 0:
|
|
*out = SelectWholeKey{};
|
|
return p;
|
|
case 1:
|
|
*out = SelectLegacyKeyPrefix{};
|
|
return p;
|
|
case 2:
|
|
*out = SelectUserTimestamp{};
|
|
return p;
|
|
case 3:
|
|
*out = SelectColumnName{};
|
|
return p;
|
|
default:
|
|
// Reserved for future use
|
|
return nullptr;
|
|
}
|
|
case 1:
|
|
// First 16 cases of SelectKeySegment
|
|
*out = SelectKeySegment{BitwiseAnd(b, 0xf)};
|
|
return p;
|
|
case 2:
|
|
// First 16 cases of SelectKeySegmentRange
|
|
// that are not a single key segment
|
|
// 0: 0-1
|
|
// 1: 0-2
|
|
// 2: 1-2
|
|
// 3: 0-3
|
|
// 4: 1-3
|
|
// 5: 2-3
|
|
// 6: 0-4
|
|
// 7: 1-4
|
|
// 8: 2-4
|
|
// 9: 3-4
|
|
// 10: 0-5
|
|
// 11: 1-5
|
|
// 12: 2-5
|
|
// 13: 3-5
|
|
// 14: 4-5
|
|
// 15: 0-6
|
|
if (b < 6) {
|
|
if (b >= 3) {
|
|
*out = SelectKeySegmentRange{static_cast<uint8_t>(b - 3), 3};
|
|
} else if (b >= 1) {
|
|
*out = SelectKeySegmentRange{static_cast<uint8_t>(b - 1), 2};
|
|
} else {
|
|
*out = SelectKeySegmentRange{0, 1};
|
|
}
|
|
} else if (b < 10) {
|
|
*out = SelectKeySegmentRange{static_cast<uint8_t>(b - 6), 4};
|
|
} else if (b < 15) {
|
|
*out = SelectKeySegmentRange{static_cast<uint8_t>(b - 10), 5};
|
|
} else {
|
|
*out = SelectKeySegmentRange{0, 6};
|
|
}
|
|
return p;
|
|
default:
|
|
// Reserved for future use
|
|
return nullptr;
|
|
}
|
|
}
|
|
|
|
void SerializeFilterInput(std::string* out, const FilterInput& select) {
|
|
struct FilterInputSerializer {
|
|
std::string* out;
|
|
void operator()(SelectWholeKey) { out->push_back(0); }
|
|
void operator()(SelectLegacyKeyPrefix) { out->push_back(1); }
|
|
void operator()(SelectUserTimestamp) { out->push_back(2); }
|
|
void operator()(SelectColumnName) { out->push_back(3); }
|
|
void operator()(SelectKeySegment select) {
|
|
// TODO: expand supported cases
|
|
assert(select.segment_index < 16);
|
|
out->push_back(static_cast<char>((1 << 4) | select.segment_index));
|
|
}
|
|
void operator()(SelectKeySegmentRange select) {
|
|
auto from = select.from_segment_index;
|
|
auto to = select.to_segment_index;
|
|
// TODO: expand supported cases
|
|
assert(from < 6);
|
|
assert(to < 6 || (to == 6 && from == 0));
|
|
assert(from < to);
|
|
int start = (to - 1) * to / 2;
|
|
assert(start + from < 16);
|
|
out->push_back(static_cast<char>((2 << 4) | (start + from)));
|
|
}
|
|
};
|
|
std::visit(FilterInputSerializer{out}, select);
|
|
}
|
|
|
|
size_t GetFilterInputSerializedLength(const FilterInput& /*select*/) {
|
|
// TODO: expand supported cases
|
|
return 1;
|
|
}
|
|
|
|
uint64_t CategorySetToUint(const KeySegmentsExtractor::KeyCategorySet& s) {
|
|
static_assert(sizeof(KeySegmentsExtractor::KeyCategorySet) ==
|
|
sizeof(uint64_t));
|
|
return *reinterpret_cast<const uint64_t*>(&s);
|
|
}
|
|
|
|
KeySegmentsExtractor::KeyCategorySet UintToCategorySet(uint64_t s) {
|
|
static_assert(sizeof(KeySegmentsExtractor::KeyCategorySet) ==
|
|
sizeof(uint64_t));
|
|
return *reinterpret_cast<const KeySegmentsExtractor::KeyCategorySet*>(&s);
|
|
}
|
|
|
|
enum BuiltinSstQueryFilters : char {
|
|
// Wraps a set of filters such that they use a particular
|
|
// KeySegmentsExtractor and a set of categories covering all keys seen.
|
|
// TODO: unit test category covering filtering
|
|
kExtrAndCatFilterWrapper = 0x1,
|
|
|
|
// Wraps a set of filters to limit their scope to a particular set of
|
|
// categories. (Unlike kExtrAndCatFilterWrapper,
|
|
// keys in other categories may have been seen so are not filtered here.)
|
|
// TODO: unit test more subtleties
|
|
kCategoryScopeFilterWrapper = 0x2,
|
|
|
|
// ... (reserve some values for more wrappers)
|
|
|
|
// A filter representing the bytewise min and max values of a numbered
|
|
// segment or composite (range of segments). The empty value is tracked
|
|
// and filtered independently because it might be a special case that is
|
|
// not representative of the minimum in a spread of values.
|
|
kBytewiseMinMaxFilter = 0x10,
|
|
kRevBytewiseMinMaxFilter = 0x11,
|
|
};
|
|
|
|
class SstQueryFilterBuilder {
|
|
public:
|
|
virtual ~SstQueryFilterBuilder() = default;
|
|
virtual void Add(const Slice& key,
|
|
const KeySegmentsExtractor::Result& extracted,
|
|
const Slice* prev_key,
|
|
const KeySegmentsExtractor::Result* prev_extracted) = 0;
|
|
virtual Status GetStatus() const = 0;
|
|
virtual size_t GetEncodedLength() const = 0;
|
|
virtual void Finish(std::string& append_to) = 0;
|
|
};
|
|
|
|
class SstQueryFilterConfigImpl : public SstQueryFilterConfig {
|
|
public:
|
|
explicit SstQueryFilterConfigImpl(
|
|
const FilterInput& input,
|
|
const KeySegmentsExtractor::KeyCategorySet& categories)
|
|
: input_(input), categories_(categories) {}
|
|
|
|
virtual ~SstQueryFilterConfigImpl() = default;
|
|
|
|
virtual std::unique_ptr<SstQueryFilterBuilder> NewBuilder(
|
|
bool sanity_checks) const = 0;
|
|
|
|
protected:
|
|
FilterInput input_;
|
|
KeySegmentsExtractor::KeyCategorySet categories_;
|
|
};
|
|
|
|
class CategoryScopeFilterWrapperBuilder : public SstQueryFilterBuilder {
|
|
public:
|
|
explicit CategoryScopeFilterWrapperBuilder(
|
|
KeySegmentsExtractor::KeyCategorySet categories,
|
|
std::unique_ptr<SstQueryFilterBuilder> wrapped)
|
|
: categories_(categories), wrapped_(std::move(wrapped)) {}
|
|
|
|
void Add(const Slice& key, const KeySegmentsExtractor::Result& extracted,
|
|
const Slice* prev_key,
|
|
const KeySegmentsExtractor::Result* prev_extracted) override {
|
|
if (!categories_.Contains(extracted.category)) {
|
|
// Category not in scope of the contituent filters
|
|
return;
|
|
}
|
|
wrapped_->Add(key, extracted, prev_key, prev_extracted);
|
|
}
|
|
|
|
Status GetStatus() const override { return wrapped_->GetStatus(); }
|
|
|
|
size_t GetEncodedLength() const override {
|
|
size_t wrapped_length = wrapped_->GetEncodedLength();
|
|
if (wrapped_length == 0) {
|
|
// Use empty filter
|
|
// FIXME: needs unit test
|
|
return 0;
|
|
} else {
|
|
// For now in the code, wraps only 1 filter, but schema supports multiple
|
|
return 1 + VarintLength(CategorySetToUint(categories_)) +
|
|
VarintLength(1) + wrapped_length;
|
|
}
|
|
}
|
|
|
|
void Finish(std::string& append_to) override {
|
|
size_t encoded_length = GetEncodedLength();
|
|
if (encoded_length == 0) {
|
|
// Nothing to do
|
|
return;
|
|
}
|
|
size_t old_append_to_size = append_to.size();
|
|
append_to.reserve(old_append_to_size + encoded_length);
|
|
append_to.push_back(kCategoryScopeFilterWrapper);
|
|
|
|
PutVarint64(&append_to, CategorySetToUint(categories_));
|
|
|
|
// Wrapping just 1 filter for now
|
|
PutVarint64(&append_to, 1);
|
|
wrapped_->Finish(append_to);
|
|
}
|
|
|
|
private:
|
|
KeySegmentsExtractor::KeyCategorySet categories_;
|
|
std::unique_ptr<SstQueryFilterBuilder> wrapped_;
|
|
};
|
|
|
|
class BytewiseMinMaxSstQueryFilterConfig : public SstQueryFilterConfigImpl {
|
|
public:
|
|
explicit BytewiseMinMaxSstQueryFilterConfig(
|
|
const FilterInput& input,
|
|
const KeySegmentsExtractor::KeyCategorySet& categories, bool reverse)
|
|
: SstQueryFilterConfigImpl(input, categories), reverse_(reverse) {}
|
|
|
|
std::unique_ptr<SstQueryFilterBuilder> NewBuilder(
|
|
bool sanity_checks) const override {
|
|
auto b = std::make_unique<MyBuilder>(*this, sanity_checks);
|
|
if (categories_ != KeySegmentsExtractor::KeyCategorySet::All()) {
|
|
return std::make_unique<CategoryScopeFilterWrapperBuilder>(categories_,
|
|
std::move(b));
|
|
} else {
|
|
return b;
|
|
}
|
|
}
|
|
|
|
static bool RangeMayMatch(
|
|
const Slice& filter, const Slice& lower_bound_incl,
|
|
const KeySegmentsExtractor::Result& lower_bound_extracted,
|
|
const Slice& upper_bound_excl,
|
|
const KeySegmentsExtractor::Result& upper_bound_extracted) {
|
|
assert(!filter.empty() && (filter[0] == kBytewiseMinMaxFilter ||
|
|
filter[0] == kRevBytewiseMinMaxFilter));
|
|
if (filter.size() <= 4) {
|
|
// Missing some data
|
|
return true;
|
|
}
|
|
bool reverse = (filter[0] == kRevBytewiseMinMaxFilter);
|
|
bool empty_included = (filter[1] & kEmptySeenFlag) != 0;
|
|
const char* p = filter.data() + 2;
|
|
const char* limit = filter.data() + filter.size();
|
|
|
|
FilterInput in;
|
|
p = DeserializeFilterInput(p, limit, &in);
|
|
if (p == nullptr) {
|
|
// Corrupt or unsupported
|
|
return true;
|
|
}
|
|
|
|
uint32_t smallest_size;
|
|
p = GetVarint32Ptr(p, limit, &smallest_size);
|
|
if (p == nullptr || static_cast<size_t>(limit - p) <= smallest_size) {
|
|
// Corrupt
|
|
return true;
|
|
}
|
|
Slice smallest = Slice(p, smallest_size);
|
|
p += smallest_size;
|
|
|
|
size_t largest_size = static_cast<size_t>(limit - p);
|
|
Slice largest = Slice(p, largest_size);
|
|
|
|
Slice lower_bound_input, lower_bound_leadup;
|
|
Slice upper_bound_input, upper_bound_leadup;
|
|
GetFilterInput(in, lower_bound_incl, lower_bound_extracted,
|
|
&lower_bound_input, &lower_bound_leadup);
|
|
GetFilterInput(in, upper_bound_excl, upper_bound_extracted,
|
|
&upper_bound_input, &upper_bound_leadup);
|
|
|
|
if (lower_bound_leadup.compare(upper_bound_leadup) != 0) {
|
|
// Unable to filter range when bounds have different lead-up to key
|
|
// segment
|
|
return true;
|
|
}
|
|
|
|
if (empty_included && lower_bound_input.empty()) {
|
|
// May match on 0-length segment
|
|
return true;
|
|
}
|
|
// TODO: potentially fix upper bound to actually be exclusive, but it's not
|
|
// as simple as changing >= to > below, because it's upper_bound_excl that's
|
|
// exclusive, and the upper_bound_input part extracted from it might not be.
|
|
|
|
// May match if both the upper bound and lower bound indicate there could
|
|
// be overlap
|
|
if (reverse) {
|
|
return upper_bound_input.compare(smallest) <= 0 &&
|
|
lower_bound_input.compare(largest) >= 0;
|
|
} else {
|
|
return upper_bound_input.compare(smallest) >= 0 &&
|
|
lower_bound_input.compare(largest) <= 0;
|
|
}
|
|
}
|
|
|
|
protected:
|
|
struct MyBuilder : public SstQueryFilterBuilder {
|
|
MyBuilder(const BytewiseMinMaxSstQueryFilterConfig& _parent,
|
|
bool _sanity_checks)
|
|
: parent(_parent), sanity_checks(_sanity_checks) {}
|
|
|
|
void Add(const Slice& key, const KeySegmentsExtractor::Result& extracted,
|
|
const Slice* prev_key,
|
|
const KeySegmentsExtractor::Result* prev_extracted) override {
|
|
Slice input, leadup;
|
|
GetFilterInput(parent.input_, key, extracted, &input, &leadup);
|
|
|
|
if (sanity_checks && prev_key && prev_extracted) {
|
|
// Opportunistic checking of segment ordering invariant
|
|
Slice prev_input, prev_leadup;
|
|
GetFilterInput(parent.input_, *prev_key, *prev_extracted, &prev_input,
|
|
&prev_leadup);
|
|
|
|
int compare = prev_leadup.compare(leadup);
|
|
if (compare == 0) {
|
|
// On the same prefix leading up to the segment, the segments must
|
|
// not be out of order.
|
|
compare = prev_input.compare(input);
|
|
if (parent.reverse_ ? compare < 0 : compare > 0) {
|
|
status = Status::Corruption(
|
|
"Ordering invariant violated from 0x" +
|
|
prev_key->ToString(/*hex=*/true) + " with segment 0x" +
|
|
prev_input.ToString(/*hex=*/true) + " to 0x" +
|
|
key.ToString(/*hex=*/true) + " with segment 0x" +
|
|
input.ToString(/*hex=*/true));
|
|
return;
|
|
}
|
|
}
|
|
// NOTE: it is not strictly required that the leadup be ordered, just
|
|
// satisfy the "common segment prefix property" which would be
|
|
// expensive to check
|
|
}
|
|
|
|
// Now actually update state for the filter inputs
|
|
// TODO: shorten largest and smallest if appropriate
|
|
if (input.empty()) {
|
|
empty_seen = true;
|
|
} else if (largest.empty()) {
|
|
// Step for first non-empty input
|
|
smallest = largest = input.ToString();
|
|
} else if (input.compare(largest) > 0) {
|
|
largest = input.ToString();
|
|
} else if (input.compare(smallest) < 0) {
|
|
smallest = input.ToString();
|
|
}
|
|
}
|
|
|
|
Status GetStatus() const override { return status; }
|
|
|
|
size_t GetEncodedLength() const override {
|
|
if (largest.empty()) {
|
|
// Not an interesting filter -> 0 to indicate no filter
|
|
// FIXME: needs unit test
|
|
return 0;
|
|
}
|
|
return 2 + GetFilterInputSerializedLength(parent.input_) +
|
|
VarintLength(parent.reverse_ ? largest.size() : smallest.size()) +
|
|
smallest.size() + largest.size();
|
|
}
|
|
|
|
void Finish(std::string& append_to) override {
|
|
assert(status.ok());
|
|
size_t encoded_length = GetEncodedLength();
|
|
if (encoded_length == 0) {
|
|
// Nothing to do
|
|
return;
|
|
}
|
|
size_t old_append_to_size = append_to.size();
|
|
append_to.reserve(old_append_to_size + encoded_length);
|
|
append_to.push_back(parent.reverse_ ? kRevBytewiseMinMaxFilter
|
|
: kBytewiseMinMaxFilter);
|
|
|
|
append_to.push_back(empty_seen ? kEmptySeenFlag : 0);
|
|
|
|
SerializeFilterInput(&append_to, parent.input_);
|
|
|
|
auto& minv = parent.reverse_ ? largest : smallest;
|
|
auto& maxv = parent.reverse_ ? smallest : largest;
|
|
PutVarint32(&append_to, static_cast<uint32_t>(minv.size()));
|
|
append_to.append(minv);
|
|
// The end of `maxv` is given by the end of the filter
|
|
append_to.append(maxv);
|
|
assert(append_to.size() == old_append_to_size + encoded_length);
|
|
}
|
|
|
|
const BytewiseMinMaxSstQueryFilterConfig& parent;
|
|
const bool sanity_checks;
|
|
// Smallest and largest segment seen, excluding the empty segment which
|
|
// is tracked separately. "Reverse" from parent is only applied at
|
|
// serialization time, for efficiency.
|
|
std::string smallest;
|
|
std::string largest;
|
|
bool empty_seen = false;
|
|
|
|
// Only for sanity checks
|
|
Status status;
|
|
};
|
|
|
|
bool reverse_;
|
|
|
|
private:
|
|
static constexpr char kEmptySeenFlag = 0x1;
|
|
};
|
|
|
|
const SstQueryFilterConfigs kEmptyNotFoundSQFC{};
|
|
|
|
class SstQueryFilterConfigsManagerImpl : public SstQueryFilterConfigsManager {
|
|
public:
|
|
using ConfigVersionMap = std::map<FilteringVersion, SstQueryFilterConfigs>;
|
|
|
|
Status Populate(const Data& data) {
|
|
if (data.empty()) {
|
|
return Status::OK();
|
|
}
|
|
// Populate only once
|
|
assert(min_ver_ == 0 && max_ver_ == 0);
|
|
min_ver_ = max_ver_ = data.begin()->first;
|
|
|
|
FilteringVersion prev_ver = 0;
|
|
bool first_entry = true;
|
|
for (const auto& ver_info : data) {
|
|
if (ver_info.first == 0) {
|
|
return Status::InvalidArgument(
|
|
"Filtering version 0 is reserved for empty configuration and may "
|
|
"not be overridden");
|
|
}
|
|
if (first_entry) {
|
|
min_ver_ = ver_info.first;
|
|
first_entry = false;
|
|
} else if (ver_info.first != prev_ver + 1) {
|
|
return Status::InvalidArgument(
|
|
"Filtering versions must increase by 1 without repeating: " +
|
|
std::to_string(prev_ver) + " -> " + std::to_string(ver_info.first));
|
|
}
|
|
max_ver_ = ver_info.first;
|
|
UnorderedSet<std::string> names_seen_this_ver;
|
|
for (const auto& config : ver_info.second) {
|
|
if (!names_seen_this_ver.insert(config.first).second) {
|
|
return Status::InvalidArgument(
|
|
"Duplicate name in filtering version " +
|
|
std::to_string(ver_info.first) + ": " + config.first);
|
|
}
|
|
auto& ver_map = name_map_[config.first];
|
|
ver_map[ver_info.first] = config.second;
|
|
if (config.second.extractor) {
|
|
extractor_map_[config.second.extractor->GetId()] =
|
|
config.second.extractor;
|
|
}
|
|
}
|
|
prev_ver = ver_info.first;
|
|
}
|
|
return Status::OK();
|
|
}
|
|
|
|
struct MyCollector : public TablePropertiesCollector {
|
|
// Keeps a reference to `configs` which should be kept alive by
|
|
// SstQueryFilterConfigsManagerImpl, which should be kept alive by
|
|
// any factories
|
|
// TODO: sanity_checks option
|
|
explicit MyCollector(const SstQueryFilterConfigs& configs,
|
|
const SstQueryFilterConfigsManagerImpl& _parent)
|
|
: parent(_parent),
|
|
extractor(configs.extractor.get()),
|
|
sanity_checks(true) {
|
|
for (const auto& c : configs.filters) {
|
|
builders.push_back(
|
|
static_cast<SstQueryFilterConfigImpl&>(*c).NewBuilder(
|
|
sanity_checks));
|
|
}
|
|
}
|
|
|
|
Status AddUserKey(const Slice& key, const Slice& /*value*/,
|
|
EntryType /*type*/, SequenceNumber /*seq*/,
|
|
uint64_t /*file_size*/) override {
|
|
// FIXME later: `key` might contain user timestamp. That should be
|
|
// exposed properly in a future update to TablePropertiesCollector
|
|
extracted.Reset();
|
|
if (extractor) {
|
|
extractor->Extract(key, KeySegmentsExtractor::kFullUserKey, &extracted);
|
|
if (UNLIKELY(extracted.category >=
|
|
KeySegmentsExtractor::kMinErrorCategory)) {
|
|
// TODO: proper failure scopes
|
|
Status s = Status::Corruption(
|
|
"Extractor returned error category from key 0x" +
|
|
Slice(key).ToString(/*hex=*/true));
|
|
overall_status.UpdateIfOk(s);
|
|
return s;
|
|
}
|
|
assert(extracted.category <= KeySegmentsExtractor::kMaxUsableCategory);
|
|
|
|
bool new_category = categories_seen.Add(extracted.category);
|
|
if (sanity_checks) {
|
|
// Opportunistic checking of category ordering invariant
|
|
if (!first_key) {
|
|
if (prev_extracted.category != extracted.category &&
|
|
!new_category) {
|
|
Status s = Status::Corruption(
|
|
"Category ordering invariant violated from key 0x" +
|
|
Slice(prev_key).ToString(/*hex=*/true) + " to 0x" +
|
|
key.ToString(/*hex=*/true));
|
|
overall_status.UpdateIfOk(s);
|
|
return s;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
for (const auto& b : builders) {
|
|
if (first_key) {
|
|
b->Add(key, extracted, nullptr, nullptr);
|
|
} else {
|
|
Slice prev_key_slice = Slice(prev_key);
|
|
b->Add(key, extracted, &prev_key_slice, &prev_extracted);
|
|
}
|
|
}
|
|
prev_key.assign(key.data(), key.size());
|
|
std::swap(prev_extracted, extracted);
|
|
first_key = false;
|
|
return Status::OK();
|
|
}
|
|
Status Finish(UserCollectedProperties* properties) override {
|
|
assert(properties != nullptr);
|
|
|
|
if (!overall_status.ok()) {
|
|
return overall_status;
|
|
}
|
|
|
|
size_t total_size = 1;
|
|
autovector<std::pair<SstQueryFilterBuilder&, size_t>> filters_to_finish;
|
|
// Need to determine number of filters before serializing them. Might
|
|
// as well determine full length also.
|
|
for (const auto& b : builders) {
|
|
Status s = b->GetStatus();
|
|
if (s.ok()) {
|
|
size_t len = b->GetEncodedLength();
|
|
if (len > 0) {
|
|
total_size += VarintLength(len) + len;
|
|
filters_to_finish.emplace_back(*b, len);
|
|
}
|
|
} else {
|
|
// FIXME: no way to report partial failure without getting
|
|
// remaining filters thrown out
|
|
}
|
|
}
|
|
total_size += VarintLength(filters_to_finish.size());
|
|
if (filters_to_finish.empty()) {
|
|
// No filters to add
|
|
return Status::OK();
|
|
}
|
|
// Length of the last filter is omitted
|
|
total_size -= VarintLength(filters_to_finish.back().second);
|
|
|
|
// Need to determine size of
|
|
// kExtrAndCatFilterWrapper if used
|
|
std::string extractor_id;
|
|
if (extractor) {
|
|
extractor_id = extractor->GetId();
|
|
// identifier byte
|
|
total_size += 1;
|
|
// fields of the wrapper
|
|
total_size += VarintLength(extractor_id.size()) + extractor_id.size() +
|
|
VarintLength(CategorySetToUint(categories_seen));
|
|
// outer layer will have just 1 filter in its count (added here)
|
|
// and this filter wrapper will have filters_to_finish.size()
|
|
// (added above).
|
|
total_size += VarintLength(1);
|
|
}
|
|
|
|
std::string filters;
|
|
filters.reserve(total_size);
|
|
|
|
// Leave room for drastic changes in the future.
|
|
filters.push_back(kSchemaVersion);
|
|
|
|
if (extractor) {
|
|
// Wrap everything in a kExtrAndCatFilterWrapper
|
|
// TODO in future: put whole key filters outside of this wrapper.
|
|
// Also TODO in future: order the filters starting with broadest
|
|
// applicability.
|
|
|
|
// Just one top-level filter (wrapper). Because it's last, we don't
|
|
// need to encode its length.
|
|
PutVarint64(&filters, 1);
|
|
// The filter(s) wrapper itself
|
|
filters.push_back(kExtrAndCatFilterWrapper);
|
|
PutVarint64(&filters, extractor_id.size());
|
|
filters += extractor_id;
|
|
PutVarint64(&filters, CategorySetToUint(categories_seen));
|
|
}
|
|
|
|
PutVarint64(&filters, filters_to_finish.size());
|
|
|
|
for (const auto& e : filters_to_finish) {
|
|
// Encode filter length, except last filter
|
|
if (&e != &filters_to_finish.back()) {
|
|
PutVarint64(&filters, e.second);
|
|
}
|
|
// Encode filter
|
|
e.first.Finish(filters);
|
|
}
|
|
if (filters.size() != total_size) {
|
|
assert(false);
|
|
return Status::Corruption(
|
|
"Internal inconsistency building SST query filters");
|
|
}
|
|
|
|
(*properties)[kTablePropertyName] = std::move(filters);
|
|
return Status::OK();
|
|
}
|
|
UserCollectedProperties GetReadableProperties() const override {
|
|
// TODO?
|
|
return {};
|
|
}
|
|
const char* Name() const override {
|
|
// placeholder
|
|
return "SstQueryFilterConfigsImpl::MyCollector";
|
|
}
|
|
|
|
Status overall_status;
|
|
const SstQueryFilterConfigsManagerImpl& parent;
|
|
const KeySegmentsExtractor* const extractor;
|
|
const bool sanity_checks;
|
|
std::vector<std::shared_ptr<SstQueryFilterBuilder>> builders;
|
|
bool first_key = true;
|
|
std::string prev_key;
|
|
KeySegmentsExtractor::Result extracted;
|
|
KeySegmentsExtractor::Result prev_extracted;
|
|
KeySegmentsExtractor::KeyCategorySet categories_seen;
|
|
};
|
|
|
|
struct RangeQueryFilterReader {
|
|
Slice lower_bound_incl;
|
|
Slice upper_bound_excl;
|
|
const KeySegmentsExtractor* extractor;
|
|
const UnorderedMap<std::string,
|
|
std::shared_ptr<const KeySegmentsExtractor>>&
|
|
extractor_map;
|
|
|
|
struct State {
|
|
KeySegmentsExtractor::Result lb_extracted;
|
|
KeySegmentsExtractor::Result ub_extracted;
|
|
};
|
|
|
|
bool MayMatch_CategoryScopeFilterWrapper(Slice wrapper,
|
|
State& state) const {
|
|
assert(!wrapper.empty() && wrapper[0] == kCategoryScopeFilterWrapper);
|
|
|
|
// Regardless of the filter values (which we assume is not all
|
|
// categories; that should skip the wrapper), we need upper bound and
|
|
// lower bound to be in the same category to do any range filtering.
|
|
// (There could be another category in range between the bounds.)
|
|
if (state.lb_extracted.category != state.ub_extracted.category) {
|
|
// Can't filter between categories
|
|
return true;
|
|
}
|
|
|
|
const char* p = wrapper.data() + 1;
|
|
const char* limit = wrapper.data() + wrapper.size();
|
|
|
|
uint64_t cats_raw;
|
|
p = GetVarint64Ptr(p, limit, &cats_raw);
|
|
if (p == nullptr) {
|
|
// Missing categories
|
|
return true;
|
|
}
|
|
KeySegmentsExtractor::KeyCategorySet categories =
|
|
UintToCategorySet(cats_raw);
|
|
|
|
// Check category against those in scope
|
|
if (!categories.Contains(state.lb_extracted.category)) {
|
|
// Can't filter this category
|
|
return true;
|
|
}
|
|
|
|
// Process the wrapped filters
|
|
return MayMatch(Slice(p, limit - p), &state);
|
|
}
|
|
|
|
bool MayMatch_ExtrAndCatFilterWrapper(Slice wrapper) const {
|
|
assert(!wrapper.empty() && wrapper[0] == kExtrAndCatFilterWrapper);
|
|
if (wrapper.size() <= 4) {
|
|
// Missing some data
|
|
// (1 byte marker, >= 1 byte name length, >= 1 byte name, >= 1 byte
|
|
// categories, ...)
|
|
return true;
|
|
}
|
|
const char* p = wrapper.data() + 1;
|
|
const char* limit = wrapper.data() + wrapper.size();
|
|
uint64_t name_len;
|
|
p = GetVarint64Ptr(p, limit, &name_len);
|
|
if (p == nullptr || name_len == 0 ||
|
|
static_cast<size_t>(limit - p) < name_len) {
|
|
// Missing some data
|
|
return true;
|
|
}
|
|
Slice name(p, name_len);
|
|
p += name_len;
|
|
const KeySegmentsExtractor* ex = nullptr;
|
|
if (extractor && name == Slice(extractor->GetId())) {
|
|
ex = extractor;
|
|
} else {
|
|
auto it = extractor_map.find(name.ToString());
|
|
if (it != extractor_map.end()) {
|
|
ex = it->second.get();
|
|
} else {
|
|
// Extractor mismatch / not found
|
|
// TODO future: try to get the extractor from the ObjectRegistry
|
|
return true;
|
|
}
|
|
}
|
|
|
|
// TODO future: cache extraction?
|
|
|
|
// Ready to run extractor
|
|
assert(ex);
|
|
State state;
|
|
ex->Extract(lower_bound_incl, KeySegmentsExtractor::kInclusiveLowerBound,
|
|
&state.lb_extracted);
|
|
if (UNLIKELY(state.lb_extracted.category >=
|
|
KeySegmentsExtractor::kMinErrorCategory)) {
|
|
// TODO? Report problem
|
|
// No filtering
|
|
return true;
|
|
}
|
|
assert(state.lb_extracted.category <=
|
|
KeySegmentsExtractor::kMaxUsableCategory);
|
|
|
|
ex->Extract(upper_bound_excl, KeySegmentsExtractor::kExclusiveUpperBound,
|
|
&state.ub_extracted);
|
|
if (UNLIKELY(state.ub_extracted.category >=
|
|
KeySegmentsExtractor::kMinErrorCategory)) {
|
|
// TODO? Report problem
|
|
// No filtering
|
|
return true;
|
|
}
|
|
assert(state.ub_extracted.category <=
|
|
KeySegmentsExtractor::kMaxUsableCategory);
|
|
|
|
uint64_t cats_raw;
|
|
p = GetVarint64Ptr(p, limit, &cats_raw);
|
|
if (p == nullptr) {
|
|
// Missing categories
|
|
return true;
|
|
}
|
|
KeySegmentsExtractor::KeyCategorySet categories =
|
|
UintToCategorySet(cats_raw);
|
|
|
|
// Can only filter out based on category if upper and lower bound have
|
|
// the same category. (Each category is contiguous by key order, but we
|
|
// don't know the order between categories.)
|
|
if (state.lb_extracted.category == state.ub_extracted.category &&
|
|
!categories.Contains(state.lb_extracted.category)) {
|
|
// Filtered out
|
|
return false;
|
|
}
|
|
|
|
// Process the wrapped filters
|
|
return MayMatch(Slice(p, limit - p), &state);
|
|
}
|
|
|
|
bool MayMatch(Slice filters, State* state = nullptr) const {
|
|
const char* p = filters.data();
|
|
const char* limit = p + filters.size();
|
|
uint64_t filter_count;
|
|
p = GetVarint64Ptr(p, limit, &filter_count);
|
|
if (p == nullptr || filter_count == 0) {
|
|
// TODO? Report problem
|
|
// No filtering
|
|
return true;
|
|
}
|
|
|
|
for (size_t i = 0; i < filter_count; ++i) {
|
|
uint64_t filter_len;
|
|
if (i + 1 == filter_count) {
|
|
// Last filter
|
|
filter_len = static_cast<uint64_t>(limit - p);
|
|
} else {
|
|
p = GetVarint64Ptr(p, limit, &filter_len);
|
|
if (p == nullptr || filter_len == 0 ||
|
|
static_cast<size_t>(limit - p) < filter_len) {
|
|
// TODO? Report problem
|
|
// No filtering
|
|
return true;
|
|
}
|
|
}
|
|
Slice filter = Slice(p, filter_len);
|
|
p += filter_len;
|
|
bool may_match = true;
|
|
char type = filter[0];
|
|
switch (type) {
|
|
case kExtrAndCatFilterWrapper:
|
|
may_match = MayMatch_ExtrAndCatFilterWrapper(filter);
|
|
break;
|
|
case kCategoryScopeFilterWrapper:
|
|
if (state == nullptr) {
|
|
// TODO? Report problem
|
|
// No filtering
|
|
return true;
|
|
}
|
|
may_match = MayMatch_CategoryScopeFilterWrapper(filter, *state);
|
|
break;
|
|
case kBytewiseMinMaxFilter:
|
|
case kRevBytewiseMinMaxFilter:
|
|
if (state == nullptr) {
|
|
// TODO? Report problem
|
|
// No filtering
|
|
return true;
|
|
}
|
|
may_match = BytewiseMinMaxSstQueryFilterConfig::RangeMayMatch(
|
|
filter, lower_bound_incl, state->lb_extracted, upper_bound_excl,
|
|
state->ub_extracted);
|
|
break;
|
|
default:
|
|
// TODO? Report problem
|
|
{}
|
|
// Unknown filter type
|
|
}
|
|
if (!may_match) {
|
|
// Successfully filtered
|
|
return false;
|
|
}
|
|
}
|
|
|
|
// Wasn't filtered
|
|
return true;
|
|
}
|
|
};
|
|
|
|
struct MyFactory : public Factory {
|
|
explicit MyFactory(
|
|
std::shared_ptr<const SstQueryFilterConfigsManagerImpl> _parent,
|
|
const std::string& _configs_name)
|
|
: parent(std::move(_parent)),
|
|
ver_map(parent->GetVerMap(_configs_name)),
|
|
configs_name(_configs_name) {}
|
|
|
|
TablePropertiesCollector* CreateTablePropertiesCollector(
|
|
TablePropertiesCollectorFactory::Context /*context*/) override {
|
|
auto& configs = GetConfigs();
|
|
if (configs.IsEmptyNotFound()) {
|
|
return nullptr;
|
|
}
|
|
return new MyCollector(configs, *parent);
|
|
}
|
|
const char* Name() const override {
|
|
// placeholder
|
|
return "SstQueryFilterConfigsManagerImpl::MyFactory";
|
|
}
|
|
|
|
Status SetFilteringVersion(FilteringVersion ver) override {
|
|
if (ver > 0 && ver < parent->min_ver_) {
|
|
return Status::InvalidArgument(
|
|
"Filtering version is before earliest known configuration: " +
|
|
std::to_string(ver) + " < " + std::to_string(parent->min_ver_));
|
|
}
|
|
if (ver > parent->max_ver_) {
|
|
return Status::InvalidArgument(
|
|
"Filtering version is after latest known configuration: " +
|
|
std::to_string(ver) + " > " + std::to_string(parent->max_ver_));
|
|
}
|
|
version.StoreRelaxed(ver);
|
|
return Status::OK();
|
|
}
|
|
FilteringVersion GetFilteringVersion() const override {
|
|
return version.LoadRelaxed();
|
|
}
|
|
const std::string& GetConfigsName() const override { return configs_name; }
|
|
const SstQueryFilterConfigs& GetConfigs() const override {
|
|
FilteringVersion ver = version.LoadRelaxed();
|
|
if (ver == 0) {
|
|
// Special case
|
|
return kEmptyNotFoundSQFC;
|
|
}
|
|
assert(ver >= parent->min_ver_);
|
|
assert(ver <= parent->max_ver_);
|
|
auto it = ver_map.upper_bound(ver);
|
|
if (it == ver_map.begin()) {
|
|
return kEmptyNotFoundSQFC;
|
|
} else {
|
|
--it;
|
|
return it->second;
|
|
}
|
|
}
|
|
|
|
// The buffers pointed to by the Slices must live as long as any read
|
|
// operations using this table filter function.
|
|
std::function<bool(const TableProperties&)> GetTableFilterForRangeQuery(
|
|
Slice lower_bound_incl, Slice upper_bound_excl) const override {
|
|
// TODO: cache extractor results between SST files, assuming most will
|
|
// use the same version
|
|
return
|
|
[rqf = RangeQueryFilterReader{
|
|
lower_bound_incl, upper_bound_excl, GetConfigs().extractor.get(),
|
|
parent->extractor_map_}](const TableProperties& props) -> bool {
|
|
auto it = props.user_collected_properties.find(kTablePropertyName);
|
|
if (it == props.user_collected_properties.end()) {
|
|
// No filtering
|
|
return true;
|
|
}
|
|
auto& filters = it->second;
|
|
// Parse the serialized filters string
|
|
if (filters.size() < 2 || filters[0] != kSchemaVersion) {
|
|
// TODO? Report problem
|
|
// No filtering
|
|
return true;
|
|
}
|
|
return rqf.MayMatch(Slice(filters.data() + 1, filters.size() - 1));
|
|
};
|
|
}
|
|
|
|
const std::shared_ptr<const SstQueryFilterConfigsManagerImpl> parent;
|
|
const ConfigVersionMap& ver_map;
|
|
const std::string configs_name;
|
|
RelaxedAtomic<FilteringVersion> version;
|
|
};
|
|
|
|
Status MakeSharedFactory(const std::string& configs_name,
|
|
FilteringVersion ver,
|
|
std::shared_ptr<Factory>* out) const override {
|
|
auto obj = std::make_shared<MyFactory>(
|
|
static_cast_with_check<const SstQueryFilterConfigsManagerImpl>(
|
|
shared_from_this()),
|
|
configs_name);
|
|
Status s = obj->SetFilteringVersion(ver);
|
|
if (s.ok()) {
|
|
*out = std::move(obj);
|
|
}
|
|
return s;
|
|
}
|
|
|
|
const ConfigVersionMap& GetVerMap(const std::string& configs_name) const {
|
|
static const ConfigVersionMap kEmptyMap;
|
|
auto it = name_map_.find(configs_name);
|
|
if (it == name_map_.end()) {
|
|
return kEmptyMap;
|
|
}
|
|
return it->second;
|
|
}
|
|
|
|
private:
|
|
static const std::string kTablePropertyName;
|
|
static constexpr char kSchemaVersion = 1;
|
|
|
|
private:
|
|
UnorderedMap<std::string, ConfigVersionMap> name_map_;
|
|
UnorderedMap<std::string, std::shared_ptr<const KeySegmentsExtractor>>
|
|
extractor_map_;
|
|
FilteringVersion min_ver_ = 0;
|
|
FilteringVersion max_ver_ = 0;
|
|
};
|
|
|
|
// SstQueryFilterConfigs
|
|
const std::string SstQueryFilterConfigsManagerImpl::kTablePropertyName =
|
|
"rocksdb.sqfc";
|
|
} // namespace
|
|
|
|
std::shared_ptr<const KeySegmentsExtractor>
|
|
MakeSharedCappedKeySegmentsExtractor(const std::vector<size_t>& byte_widths) {
|
|
std::vector<uint32_t> byte_widths_checked;
|
|
byte_widths_checked.resize(byte_widths.size());
|
|
size_t final_end = 0;
|
|
for (size_t i = 0; i < byte_widths.size(); ++i) {
|
|
final_end += byte_widths[i];
|
|
if (byte_widths[i] > UINT32_MAX / 2 || final_end > UINT32_MAX) {
|
|
// Better to crash than to proceed unsafely
|
|
return nullptr;
|
|
}
|
|
byte_widths_checked[i] = static_cast<uint32_t>(byte_widths[i]);
|
|
}
|
|
|
|
switch (byte_widths_checked.size()) {
|
|
case 0:
|
|
return std::make_shared<SemiStaticCappedKeySegmentsExtractor<0>>(
|
|
byte_widths_checked.data());
|
|
case 1:
|
|
return std::make_shared<SemiStaticCappedKeySegmentsExtractor<1>>(
|
|
byte_widths_checked.data());
|
|
case 2:
|
|
return std::make_shared<SemiStaticCappedKeySegmentsExtractor<2>>(
|
|
byte_widths_checked.data());
|
|
case 3:
|
|
return std::make_shared<SemiStaticCappedKeySegmentsExtractor<3>>(
|
|
byte_widths_checked.data());
|
|
case 4:
|
|
return std::make_shared<SemiStaticCappedKeySegmentsExtractor<4>>(
|
|
byte_widths_checked.data());
|
|
case 5:
|
|
return std::make_shared<SemiStaticCappedKeySegmentsExtractor<5>>(
|
|
byte_widths_checked.data());
|
|
case 6:
|
|
return std::make_shared<SemiStaticCappedKeySegmentsExtractor<6>>(
|
|
byte_widths_checked.data());
|
|
default:
|
|
return std::make_shared<DynamicCappedKeySegmentsExtractor>(
|
|
byte_widths_checked);
|
|
}
|
|
}
|
|
|
|
bool SstQueryFilterConfigs::IsEmptyNotFound() const {
|
|
return this == &kEmptyNotFoundSQFC;
|
|
}
|
|
|
|
std::shared_ptr<SstQueryFilterConfig> MakeSharedBytewiseMinMaxSQFC(
|
|
FilterInput input, KeySegmentsExtractor::KeyCategorySet categories) {
|
|
return std::make_shared<BytewiseMinMaxSstQueryFilterConfig>(
|
|
input, categories,
|
|
/*reverse=*/false);
|
|
}
|
|
|
|
std::shared_ptr<SstQueryFilterConfig> MakeSharedReverseBytewiseMinMaxSQFC(
|
|
FilterInput input, KeySegmentsExtractor::KeyCategorySet categories) {
|
|
return std::make_shared<BytewiseMinMaxSstQueryFilterConfig>(input, categories,
|
|
/*reverse=*/true);
|
|
}
|
|
|
|
Status SstQueryFilterConfigsManager::MakeShared(
|
|
const Data& data, std::shared_ptr<SstQueryFilterConfigsManager>* out) {
|
|
auto obj = std::make_shared<SstQueryFilterConfigsManagerImpl>();
|
|
Status s = obj->Populate(data);
|
|
if (s.ok()) {
|
|
*out = std::move(obj);
|
|
}
|
|
return s;
|
|
}
|
|
|
|
} // namespace ROCKSDB_NAMESPACE::experimental
|