diff --git a/db/dbformat_test.cc b/db/dbformat_test.cc index 3b6190d92b..ab31e5a6f0 100644 --- a/db/dbformat_test.cc +++ b/db/dbformat_test.cc @@ -23,17 +23,17 @@ static std::string IKey(const std::string& user_key, uint64_t seq, } static std::string Shorten(const std::string& s, const std::string& l) { - std::string result = s; - ShortenedIndexBuilder::FindShortestInternalKeySeparator(*BytewiseComparator(), - &result, l); - return result; + std::string scratch; + return ShortenedIndexBuilder::FindShortestInternalKeySeparator( + *BytewiseComparator(), s, l, &scratch) + .ToString(); } static std::string ShortSuccessor(const std::string& s) { - std::string result = s; - ShortenedIndexBuilder::FindShortInternalKeySuccessor(*BytewiseComparator(), - &result); - return result; + std::string scratch; + return ShortenedIndexBuilder::FindShortInternalKeySuccessor( + *BytewiseComparator(), s, &scratch) + .ToString(); } static void TestKey(const std::string& key, uint64_t seq, ValueType vt) { diff --git a/table/block_based/block_based_table_builder.cc b/table/block_based/block_based_table_builder.cc index 87f0bd61c1..c59097b84d 100644 --- a/table/block_based/block_based_table_builder.cc +++ b/table/block_based/block_based_table_builder.cc @@ -293,6 +293,7 @@ struct BlockBasedTableBuilder::Rep { InternalKeySliceTransform internal_prefix_transform; std::unique_ptr index_builder; + std::string index_separator_scratch; PartitionedIndexBuilder* p_index_builder_ = nullptr; std::string last_key; @@ -1049,8 +1050,8 @@ void BlockBasedTableBuilder::Add(const Slice& key, const Slice& value) { if (r->IsParallelCompressionEnabled()) { r->pc_rep->curr_block_keys->Clear(); } else { - r->index_builder->AddIndexEntry(&r->last_key, &key, - r->pending_handle); + r->index_builder->AddIndexEntry(r->last_key, &key, r->pending_handle, + &r->index_separator_scratch); } } } @@ -1485,14 +1486,15 @@ void BlockBasedTableBuilder::BGWorkWriteMaybeCompressedBlock() { ++r->props.num_data_blocks; if (block_rep->first_key_in_next_block == nullptr) { - r->index_builder->AddIndexEntry(&(block_rep->keys->Back()), nullptr, - r->pending_handle); + r->index_builder->AddIndexEntry(block_rep->keys->Back(), nullptr, + r->pending_handle, + &r->index_separator_scratch); } else { Slice first_key_in_next_block = Slice(*block_rep->first_key_in_next_block); - r->index_builder->AddIndexEntry(&(block_rep->keys->Back()), - &first_key_in_next_block, - r->pending_handle); + r->index_builder->AddIndexEntry( + block_rep->keys->Back(), &first_key_in_next_block, r->pending_handle, + &r->index_separator_scratch); } r->pc_rep->ReapBlock(block_rep); @@ -1987,9 +1989,9 @@ void BlockBasedTableBuilder::EnterUnbuffered() { Slice* first_key_in_next_block_ptr = &first_key_in_next_block; iter->SeekToLast(); - std::string last_key = iter->key().ToString(); - r->index_builder->AddIndexEntry(&last_key, first_key_in_next_block_ptr, - r->pending_handle); + r->index_builder->AddIndexEntry( + iter->key(), first_key_in_next_block_ptr, r->pending_handle, + &r->index_separator_scratch); } } std::swap(iter, next_block_iter); @@ -2025,7 +2027,8 @@ Status BlockBasedTableBuilder::Finish() { // block, we will finish writing all index entries first. if (ok() && !empty_data_block) { r->index_builder->AddIndexEntry( - &r->last_key, nullptr /* no next data block */, r->pending_handle); + r->last_key, nullptr /* no next data block */, r->pending_handle, + &r->index_separator_scratch); } } diff --git a/table/block_based/index_builder.cc b/table/block_based/index_builder.cc index 98d084b344..a5a34d65b6 100644 --- a/table/block_based/index_builder.cc +++ b/table/block_based/index_builder.cc @@ -73,37 +73,47 @@ IndexBuilder* IndexBuilder::CreateIndexBuilder( return result; } -void ShortenedIndexBuilder::FindShortestInternalKeySeparator( - const Comparator& comparator, std::string* start, const Slice& limit) { +Slice ShortenedIndexBuilder::FindShortestInternalKeySeparator( + const Comparator& comparator, const Slice& start, const Slice& limit, + std::string* scratch) { // Attempt to shorten the user portion of the key - Slice user_start = ExtractUserKey(*start); + Slice user_start = ExtractUserKey(start); Slice user_limit = ExtractUserKey(limit); - std::string tmp(user_start.data(), user_start.size()); - comparator.FindShortestSeparator(&tmp, user_limit); - if (tmp.size() <= user_start.size() && - comparator.Compare(user_start, tmp) < 0) { + scratch->assign(user_start.data(), user_start.size()); + comparator.FindShortestSeparator(scratch, user_limit); + assert(comparator.Compare(user_start, *scratch) <= 0); + assert(comparator.Compare(user_start, user_limit) >= 0 || + comparator.Compare(*scratch, user_limit) < 0); + if (scratch->size() <= user_start.size() && + comparator.Compare(user_start, *scratch) < 0) { // User key has become shorter physically, but larger logically. // Tack on the earliest possible number to the shortened user key. - PutFixed64(&tmp, + PutFixed64(scratch, PackSequenceAndType(kMaxSequenceNumber, kValueTypeForSeek)); - assert(InternalKeyComparator(&comparator).Compare(*start, tmp) < 0); - assert(InternalKeyComparator(&comparator).Compare(tmp, limit) < 0); - start->swap(tmp); + assert(InternalKeyComparator(&comparator).Compare(start, *scratch) < 0); + assert(InternalKeyComparator(&comparator).Compare(*scratch, limit) < 0); + return *scratch; + } else { + return start; } } -void ShortenedIndexBuilder::FindShortInternalKeySuccessor( - const Comparator& comparator, std::string* key) { - Slice user_key = ExtractUserKey(*key); - std::string tmp(user_key.data(), user_key.size()); - comparator.FindShortSuccessor(&tmp); - if (tmp.size() <= user_key.size() && comparator.Compare(user_key, tmp) < 0) { +Slice ShortenedIndexBuilder::FindShortInternalKeySuccessor( + const Comparator& comparator, const Slice& key, std::string* scratch) { + Slice user_key = ExtractUserKey(key); + scratch->assign(user_key.data(), user_key.size()); + comparator.FindShortSuccessor(scratch); + assert(comparator.Compare(user_key, *scratch) <= 0); + if (scratch->size() <= user_key.size() && + comparator.Compare(user_key, *scratch) < 0) { // User key has become shorter physically, but larger logically. // Tack on the earliest possible number to the shortened user key. - PutFixed64(&tmp, + PutFixed64(scratch, PackSequenceAndType(kMaxSequenceNumber, kValueTypeForSeek)); - assert(InternalKeyComparator(&comparator).Compare(*key, tmp) < 0); - key->swap(tmp); + assert(InternalKeyComparator(&comparator).Compare(key, *scratch) < 0); + return *scratch; + } else { + return key; } } @@ -135,7 +145,6 @@ PartitionedIndexBuilder::PartitionedIndexBuilder( BlockBasedTableOptions::kDataBlockBinarySearch /* index_type */, 0.75 /* data_block_hash_table_util_ratio */, ts_sz, persist_user_defined_timestamps, true /* is_user_key */), - sub_index_builder_(nullptr), table_opt_(table_opt), // We start by false. After each partition we revise the value based on // what the sub_index_builder has decided. If the feature is disabled @@ -146,13 +155,9 @@ PartitionedIndexBuilder::PartitionedIndexBuilder( seperator_is_key_plus_seq_(false), use_value_delta_encoding_(use_value_delta_encoding) {} -PartitionedIndexBuilder::~PartitionedIndexBuilder() { - delete sub_index_builder_; -} - void PartitionedIndexBuilder::MakeNewSubIndexBuilder() { assert(sub_index_builder_ == nullptr); - sub_index_builder_ = new ShortenedIndexBuilder( + sub_index_builder_ = std::make_unique( comparator_, table_opt_.index_block_restart_interval, table_opt_.format_version, use_value_delta_encoding_, table_opt_.index_shortening, /* include_first_key */ false, ts_sz_, @@ -180,33 +185,35 @@ void PartitionedIndexBuilder::RequestPartitionCut() { partition_cut_requested_ = true; } -void PartitionedIndexBuilder::AddIndexEntry( - std::string* last_key_in_current_block, - const Slice* first_key_in_next_block, const BlockHandle& block_handle) { +Slice PartitionedIndexBuilder::AddIndexEntry( + const Slice& last_key_in_current_block, + const Slice* first_key_in_next_block, const BlockHandle& block_handle, + std::string* separator_scratch) { // Note: to avoid two consecuitive flush in the same method call, we do not // check flush policy when adding the last key if (UNLIKELY(first_key_in_next_block == nullptr)) { // no more keys if (sub_index_builder_ == nullptr) { MakeNewSubIndexBuilder(); + // Reserve next partition entry, where we will modify the key and + // eventually set the value + entries_.push_back({{}, {}}); } - sub_index_builder_->AddIndexEntry(last_key_in_current_block, - first_key_in_next_block, block_handle); + auto sep = sub_index_builder_->AddIndexEntry( + last_key_in_current_block, first_key_in_next_block, block_handle, + separator_scratch); if (!seperator_is_key_plus_seq_ && sub_index_builder_->seperator_is_key_plus_seq_) { - // then we need to apply it to all sub-index builders and reset - // flush_policy to point to Block Builder of sub_index_builder_ that store - // internal keys. + // We need to apply !seperator_is_key_plus_seq to all sub-index builders seperator_is_key_plus_seq_ = true; - flush_policy_.reset(FlushBlockBySizePolicyFactory::NewFlushBlockPolicy( - table_opt_.metadata_block_size, table_opt_.block_size_deviation, - sub_index_builder_->index_block_builder_)); + // Would associate flush_policy with the appropriate builder, but it won't + // be used again with no more keys + flush_policy_.reset(); } - sub_index_last_key_ = std::string(*last_key_in_current_block); - entries_.push_back( - {sub_index_last_key_, - std::unique_ptr(sub_index_builder_)}); - sub_index_builder_ = nullptr; + entries_.back().key.assign(sep.data(), sep.size()); + assert(entries_.back().value == nullptr); + std::swap(entries_.back().value, sub_index_builder_); cut_filter_block = true; + return sep; } else { // apply flush policy only to non-empty sub_index_builder_ if (sub_index_builder_ != nullptr) { @@ -214,31 +221,33 @@ void PartitionedIndexBuilder::AddIndexEntry( block_handle.EncodeTo(&handle_encoding); bool do_flush = partition_cut_requested_ || - flush_policy_->Update(*last_key_in_current_block, handle_encoding); + flush_policy_->Update(last_key_in_current_block, handle_encoding); if (do_flush) { - entries_.push_back( - {sub_index_last_key_, - std::unique_ptr(sub_index_builder_)}); + assert(entries_.back().value == nullptr); + std::swap(entries_.back().value, sub_index_builder_); cut_filter_block = true; - sub_index_builder_ = nullptr; } } if (sub_index_builder_ == nullptr) { MakeNewSubIndexBuilder(); + // Reserve next partition entry, where we will modify the key and + // eventually set the value + entries_.push_back({{}, {}}); } - sub_index_builder_->AddIndexEntry(last_key_in_current_block, - first_key_in_next_block, block_handle); - sub_index_last_key_ = std::string(*last_key_in_current_block); + auto sep = sub_index_builder_->AddIndexEntry( + last_key_in_current_block, first_key_in_next_block, block_handle, + separator_scratch); + entries_.back().key.assign(sep.data(), sep.size()); if (!seperator_is_key_plus_seq_ && sub_index_builder_->seperator_is_key_plus_seq_) { - // then we need to apply it to all sub-index builders and reset - // flush_policy to point to Block Builder of sub_index_builder_ that store - // internal keys. + // We need to apply !seperator_is_key_plus_seq to all sub-index builders seperator_is_key_plus_seq_ = true; + // And use a flush_policy with the appropriate builder flush_policy_.reset(FlushBlockBySizePolicyFactory::NewFlushBlockPolicy( table_opt_.metadata_block_size, table_opt_.block_size_deviation, sub_index_builder_->index_block_builder_)); } + return sep; } } diff --git a/table/block_based/index_builder.h b/table/block_based/index_builder.h index be690d7997..99b348b2ff 100644 --- a/table/block_based/index_builder.h +++ b/table/block_based/index_builder.h @@ -57,19 +57,24 @@ class IndexBuilder { virtual ~IndexBuilder() = default; // Add a new index entry to index block. + // // To allow further optimization, we provide `last_key_in_current_block` and // `first_key_in_next_block`, based on which the specific implementation can // determine the best index key to be used for the index block. // Called before the OnKeyAdded() call for first_key_in_next_block. - // @last_key_in_current_block: this parameter maybe overridden with the value - // "substitute key". + // @last_key_in_current_block: TODO lifetime details // @first_key_in_next_block: it will be nullptr if the entry being added is // the last one in the table - // + // @separator_scratch: a scratch buffer to back a computed separator between + // those, as needed. May be modified on each call. + // @return: the key or separator stored in the index, which could be + // last_key_in_current_block or a computed separator backed by + // separator_scratch or last_key_in_current_block. // REQUIRES: Finish() has not yet been called. - virtual void AddIndexEntry(std::string* last_key_in_current_block, - const Slice* first_key_in_next_block, - const BlockHandle& block_handle) = 0; + virtual Slice AddIndexEntry(const Slice& last_key_in_current_block, + const Slice* first_key_in_next_block, + const BlockHandle& block_handle, + std::string* separator_scratch) = 0; // This method will be called whenever a key is added. The subclasses may // override OnKeyAdded() if they need to collect additional information. @@ -181,29 +186,35 @@ class ShortenedIndexBuilder : public IndexBuilder { } } - void AddIndexEntry(std::string* last_key_in_current_block, - const Slice* first_key_in_next_block, - const BlockHandle& block_handle) override { + Slice AddIndexEntry(const Slice& last_key_in_current_block, + const Slice* first_key_in_next_block, + const BlockHandle& block_handle, + std::string* separator_scratch) override { + Slice separator; if (first_key_in_next_block != nullptr) { if (shortening_mode_ != BlockBasedTableOptions::IndexShorteningMode::kNoShortening) { - FindShortestInternalKeySeparator(*comparator_->user_comparator(), - last_key_in_current_block, - *first_key_in_next_block); + separator = FindShortestInternalKeySeparator( + *comparator_->user_comparator(), last_key_in_current_block, + *first_key_in_next_block, separator_scratch); + } else { + separator = last_key_in_current_block; } if (!seperator_is_key_plus_seq_ && - ShouldUseKeyPlusSeqAsSeparator(*last_key_in_current_block, + ShouldUseKeyPlusSeqAsSeparator(last_key_in_current_block, *first_key_in_next_block)) { seperator_is_key_plus_seq_ = true; } } else { if (shortening_mode_ == BlockBasedTableOptions::IndexShorteningMode:: kShortenSeparatorsAndSuccessor) { - FindShortInternalKeySuccessor(*comparator_->user_comparator(), - last_key_in_current_block); + separator = FindShortInternalKeySuccessor( + *comparator_->user_comparator(), last_key_in_current_block, + separator_scratch); + } else { + separator = last_key_in_current_block; } } - auto sep = Slice(*last_key_in_current_block); assert(!include_first_key_ || !current_block_first_internal_key_.empty()); // When UDT should not be persisted, the index block builders take care of @@ -241,13 +252,15 @@ class ShortenedIndexBuilder : public IndexBuilder { // away the UDT from key in index block as data block does the same thing. // What are the implications if a "FindShortInternalKeySuccessor" // optimization is provided. - index_block_builder_.Add(sep, encoded_entry, &delta_encoded_entry_slice); + index_block_builder_.Add(separator, encoded_entry, + &delta_encoded_entry_slice); if (!seperator_is_key_plus_seq_) { - index_block_builder_without_seq_.Add(ExtractUserKey(sep), encoded_entry, - &delta_encoded_entry_slice); + index_block_builder_without_seq_.Add( + ExtractUserKey(separator), encoded_entry, &delta_encoded_entry_slice); } current_block_first_internal_key_.clear(); + return separator; } using IndexBuilder::Finish; @@ -271,12 +284,14 @@ class ShortenedIndexBuilder : public IndexBuilder { // Changes *key to a short string >= *key. // - static void FindShortestInternalKeySeparator(const Comparator& comparator, - std::string* start, - const Slice& limit); + static Slice FindShortestInternalKeySeparator(const Comparator& comparator, + const Slice& start, + const Slice& limit, + std::string* scratch); - static void FindShortInternalKeySuccessor(const Comparator& comparator, - std::string* key); + static Slice FindShortInternalKeySuccessor(const Comparator& comparator, + const Slice& key, + std::string* scratch); friend class PartitionedIndexBuilder; @@ -333,12 +348,14 @@ class HashIndexBuilder : public IndexBuilder { ts_sz, persist_user_defined_timestamps), hash_key_extractor_(hash_key_extractor) {} - void AddIndexEntry(std::string* last_key_in_current_block, - const Slice* first_key_in_next_block, - const BlockHandle& block_handle) override { + Slice AddIndexEntry(const Slice& last_key_in_current_block, + const Slice* first_key_in_next_block, + const BlockHandle& block_handle, + std::string* separator_scratch) override { ++current_restart_index_; - primary_index_builder_.AddIndexEntry(last_key_in_current_block, - first_key_in_next_block, block_handle); + return primary_index_builder_.AddIndexEntry( + last_key_in_current_block, first_key_in_next_block, block_handle, + separator_scratch); } void OnKeyAdded(const Slice& key) override { @@ -441,11 +458,10 @@ class PartitionedIndexBuilder : public IndexBuilder { bool use_value_delta_encoding, size_t ts_sz, bool persist_user_defined_timestamps); - ~PartitionedIndexBuilder() override; - - void AddIndexEntry(std::string* last_key_in_current_block, - const Slice* first_key_in_next_block, - const BlockHandle& block_handle) override; + Slice AddIndexEntry(const Slice& last_key_in_current_block, + const Slice* first_key_in_next_block, + const BlockHandle& block_handle, + std::string* separator_scratch) override; Status Finish(IndexBlocks* index_blocks, const BlockHandle& last_partition_block_handle) override; @@ -463,7 +479,10 @@ class PartitionedIndexBuilder : public IndexBuilder { return false; } - std::string& GetPartitionKey() { return sub_index_last_key_; } + const std::string& GetPartitionKey() { + static const std::string kEmptyKey; + return entries_.empty() ? kEmptyKey : entries_.back().key; + } // Called when an external entity (such as filter partition builder) request // cutting the next partition @@ -489,13 +508,16 @@ class PartitionedIndexBuilder : public IndexBuilder { std::string key; std::unique_ptr value; }; - std::list entries_; // list of partitioned indexes and their keys + // List of partitioned indexes and their keys. Note that when + // sub_index_builder_ is not null (during construction), there + // will be a placeholder entry at the back of this list tracking + // the possible key for that next entry. + std::list entries_; BlockBuilder index_block_builder_; // top-level index builder BlockBuilder index_block_builder_without_seq_; // same for user keys // the active partition index builder - ShortenedIndexBuilder* sub_index_builder_; + std::unique_ptr sub_index_builder_; // the last key in the active partition index builder - std::string sub_index_last_key_; std::unique_ptr flush_policy_; // true if Finish is called once but not complete yet. bool finishing_indexes = false; diff --git a/table/block_based/partitioned_filter_block.cc b/table/block_based/partitioned_filter_block.cc index 6c2f77a5ba..c2c5405cf1 100644 --- a/table/block_based/partitioned_filter_block.cc +++ b/table/block_based/partitioned_filter_block.cc @@ -108,8 +108,8 @@ void PartitionedFilterBlockBuilder::MaybeCutAFilterBlock( if (filter_construction_status.ok()) { filter_construction_status = filter_bits_builder_->MaybePostVerify(filter); } - std::string& index_key = p_index_builder_->GetPartitionKey(); - filters.push_back({index_key, std::move(filter_data), filter}); + filters.push_back( + {p_index_builder_->GetPartitionKey(), std::move(filter_data), filter}); if (!filter_construction_status.ok() && partitioned_filters_construction_status_.ok()) { partitioned_filters_construction_status_ = filter_construction_status; diff --git a/table/block_based/partitioned_filter_block_test.cc b/table/block_based/partitioned_filter_block_test.cc index 1da7ed5354..8d2dd62c3b 100644 --- a/table/block_based/partitioned_filter_block_test.cc +++ b/table/block_based/partitioned_filter_block_test.cc @@ -305,7 +305,8 @@ class PartitionedFilterBlockTest std::string key = std::string(*InternalKey(user_key, 0, ValueType::kTypeValue).rep()); BlockHandle dont_care_block_handle(1, 1); - builder->AddIndexEntry(&key, nullptr, dont_care_block_handle); + std::string scratch; + builder->AddIndexEntry(key, nullptr, dont_care_block_handle, &scratch); } void CutABlock(PartitionedIndexBuilder* builder, const std::string& user_key, @@ -317,7 +318,8 @@ class PartitionedFilterBlockTest *InternalKey(next_user_key, 0, ValueType::kTypeValue).rep()); BlockHandle dont_care_block_handle(1, 1); Slice slice = Slice(next_key.data(), next_key.size()); - builder->AddIndexEntry(&key, &slice, dont_care_block_handle); + std::string scratch; + builder->AddIndexEntry(key, &slice, dont_care_block_handle, &scratch); } int CountNumOfIndexPartitions(PartitionedIndexBuilder* builder) {