From 11943e8b2794bd4c1c3155c16a9f44fbb77c4ae9 Mon Sep 17 00:00:00 2001 From: Yanqin Jin Date: Fri, 7 Oct 2022 14:11:23 -0700 Subject: [PATCH] Exclude timestamp when checking compaction boundaries (#10787) Summary: When checking if a range [start, end) overlaps with a compaction whose range is [start1, end1), always exclude timestamp from start, end, start1 and end1, otherwise some versions of one user key may be compacted to bottommost layer while others remain in the original level. Pull Request resolved: https://github.com/facebook/rocksdb/pull/10787 Test Plan: make check Reviewed By: ltamasi Differential Revision: D40187672 Pulled By: ltamasi fbshipit-source-id: 81226267fd3e33ffa79665c62abadf2ebec45496 --- db/compaction/compaction.cc | 16 +++++++++++----- db/compaction/compaction.h | 6 ++++++ db/compaction/compaction_iterator.h | 4 ++++ 3 files changed, 21 insertions(+), 5 deletions(-) diff --git a/db/compaction/compaction.cc b/db/compaction/compaction.cc index 9dffb08991..81bdcbe7a0 100644 --- a/db/compaction/compaction.cc +++ b/db/compaction/compaction.cc @@ -342,6 +342,8 @@ bool Compaction::SupportsPerKeyPlacement() const { int Compaction::GetPenultimateLevel() const { return penultimate_level_; } +// smallest_key and largest_key include timestamps if user-defined timestamp is +// enabled. bool Compaction::OverlapPenultimateLevelOutputRange( const Slice& smallest_key, const Slice& largest_key) const { if (!SupportsPerKeyPlacement()) { @@ -350,11 +352,13 @@ bool Compaction::OverlapPenultimateLevelOutputRange( const Comparator* ucmp = input_vstorage_->InternalComparator()->user_comparator(); - return ucmp->Compare(smallest_key, penultimate_level_largest_user_key_) <= - 0 && - ucmp->Compare(largest_key, penultimate_level_smallest_user_key_) >= 0; + return ucmp->CompareWithoutTimestamp( + smallest_key, penultimate_level_largest_user_key_) <= 0 && + ucmp->CompareWithoutTimestamp( + largest_key, penultimate_level_smallest_user_key_) >= 0; } +// key includes timestamp if user-defined timestamp is enabled. bool Compaction::WithinPenultimateLevelOutputRange(const Slice& key) const { if (!SupportsPerKeyPlacement()) { return false; @@ -363,8 +367,10 @@ bool Compaction::WithinPenultimateLevelOutputRange(const Slice& key) const { const Comparator* ucmp = input_vstorage_->InternalComparator()->user_comparator(); - return ucmp->Compare(key, penultimate_level_smallest_user_key_) >= 0 && - ucmp->Compare(key, penultimate_level_largest_user_key_) <= 0; + return ucmp->CompareWithoutTimestamp( + key, penultimate_level_smallest_user_key_) >= 0 && + ucmp->CompareWithoutTimestamp( + key, penultimate_level_largest_user_key_) <= 0; } bool Compaction::InputCompressionMatchesOutput() const { diff --git a/db/compaction/compaction.h b/db/compaction/compaction.h index 22c30c9c34..b073ed61dd 100644 --- a/db/compaction/compaction.h +++ b/db/compaction/compaction.h @@ -319,6 +319,8 @@ class Compaction { // Return true if the given range is overlap with penultimate level output // range. + // Both smallest_key and largest_key include timestamps if user-defined + // timestamp is enabled. bool OverlapPenultimateLevelOutputRange(const Slice& smallest_key, const Slice& largest_key) const; @@ -328,6 +330,7 @@ class Compaction { // If per_key_placement is not supported, always return false. // TODO: currently it doesn't support moving data from the last level to the // penultimate level + // key includes timestamp if user-defined timestamp is enabled. bool WithinPenultimateLevelOutputRange(const Slice& key) const; CompactionReason compaction_reason() const { return compaction_reason_; } @@ -474,9 +477,11 @@ class Compaction { TablePropertiesCollection output_table_properties_; // smallest user keys in compaction + // includes timestamp if user-defined timestamp is enabled. Slice smallest_user_key_; // largest user keys in compaction + // includes timestamp if user-defined timestamp is enabled. Slice largest_user_key_; // Reason for compaction @@ -497,6 +502,7 @@ class Compaction { const int penultimate_level_; // Key range for penultimate level output + // includes timestamp if user-defined timestamp is enabled. Slice penultimate_level_smallest_user_key_; Slice penultimate_level_largest_user_key_; }; diff --git a/db/compaction/compaction_iterator.h b/db/compaction/compaction_iterator.h index c337f76e03..c5b7b67bd8 100644 --- a/db/compaction/compaction_iterator.h +++ b/db/compaction/compaction_iterator.h @@ -88,6 +88,7 @@ class CompactionIterator { virtual int number_levels() const = 0; + // Result includes timestamp if user-defined timestamp is enabled. virtual Slice GetLargestUserKey() const = 0; virtual bool allow_ingest_behind() const = 0; @@ -108,6 +109,7 @@ class CompactionIterator { virtual bool SupportsPerKeyPlacement() const = 0; + // `key` includes timestamp if user-defined timestamp is enabled. virtual bool WithinPenultimateLevelOutputRange(const Slice& key) const = 0; }; @@ -133,6 +135,7 @@ class CompactionIterator { int number_levels() const override { return compaction_->number_levels(); } + // Result includes timestamp if user-defined timestamp is enabled. Slice GetLargestUserKey() const override { return compaction_->GetLargestUserKey(); } @@ -173,6 +176,7 @@ class CompactionIterator { // Check if key is within penultimate level output range, to see if it's // safe to output to the penultimate level for per_key_placement feature. + // `key` includes timestamp if user-defined timestamp is enabled. bool WithinPenultimateLevelOutputRange(const Slice& key) const override { return compaction_->WithinPenultimateLevelOutputRange(key); }