Deallocate payload of BlockBasedTableBuilder::Rep::FilterBlockBuilder earlier for Full/PartitionedFilter (#9070)

Summary:
Note: This PR is the 1st part of a bigger PR stack (https://github.com/facebook/rocksdb/pull/9073).

Context:
Previously, the payload (i.e, filter data) within `BlockBasedTableBuilder::Rep::FilterBlockBuilder` object is not deallocated until `BlockBasedTableBuilder` is deallocated, despite it is no longer useful after its related `filter_content` being written.
- Transferred the payload (i.e, the filter data) out of `BlockBasedTableBuilder::Rep::FilterBlockBuilder` object
- For PartitionedFilter:
   - Unified `filters` and `filter_gc` lists into one `std::deque<FilterEntry> filters` by adding a new field `last_filter_entry_key` and storing the `std::unique_ptr filter_data` with the `Slice filter` in the same entry
   - Reset `last_filter_data` in the case where `filters` is empty, which should be as by then we would've finish using all the `Slice filter`
- Deallocated the payload by going out of scope as soon as we're done with using the `filter_content` associated with the payload
- This is an internal interface change at the level of `FilterBlockBuilder::Finish()`, which leads to touching the inherited interface in `BlockBasedFilterBlockBuilder`. But for that, the payload transferring is ignored.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/9070

Test Plan: - The main focus is to catch segment fault error during `FilterBlockBuilder::Finish()` and `BlockBasedTableBuilder::Finish()` and interface mismatch. Relying on existing CI tests is enough as `assert(false)` was temporarily added to verify the new logic of transferring ownership indeed run

Reviewed By: pdillinger

Differential Revision: D31884933

Pulled By: hx235

fbshipit-source-id: f73ecfbea13788d4fc058013ace27230110b52f4
This commit is contained in:
Hui Xiao 2021-11-04 13:29:09 -07:00 committed by Facebook GitHub Bot
parent a64c8ca7a8
commit 1ababeb76a
10 changed files with 65 additions and 24 deletions

View File

@ -25,6 +25,9 @@
* Add API `FSDirectory::FsyncWithDirOptions()`, which provides extra information like directory fsync reason in `DirFsyncOptions`. File system like btrfs is using that to skip directory fsync for creating a new file, or when renaming a file, fsync the target file instead of the directory, which improves the `DB::Open()` speed by ~20%.
* `DB::Open()` is not going be blocked by obsolete file purge if `DBOptions::avoid_unnecessary_blocking_io` is set to true.
### Performance Improvements
* Released some memory related to filter construction earlier in `BlockBasedTableBuilder` for `FullFilter` and `PartitionedFilter` case (#9070)
## 6.26.0 (2021-10-20)
### Bug Fixes
* Fixes a bug in directed IO mode when calling MultiGet() for blobs in the same blob file. The bug is caused by not sorting the blob read requests by file offsets.

View File

@ -117,9 +117,10 @@ inline void BlockBasedFilterBlockBuilder::AddPrefix(const Slice& key) {
}
}
Slice BlockBasedFilterBlockBuilder::Finish(const BlockHandle& /*tmp*/,
Status* status) {
// In this impl we ignore BlockHandle
Slice BlockBasedFilterBlockBuilder::Finish(
const BlockHandle& /*tmp*/, Status* status,
std::unique_ptr<const char[]>* /* filter_data */) {
// In this impl we ignore BlockHandle and filter_data
*status = Status::OK();
if (!start_.empty()) {

View File

@ -49,7 +49,9 @@ class BlockBasedFilterBlockBuilder : public FilterBlockBuilder {
return start_.empty() && filter_offsets_.empty();
}
virtual size_t EstimateEntriesAdded() override;
virtual Slice Finish(const BlockHandle& tmp, Status* status) override;
virtual Slice Finish(
const BlockHandle& tmp, Status* status,
std::unique_ptr<const char[]>* filter_data = nullptr) override;
using FilterBlockBuilder::Finish;
private:

View File

@ -1542,8 +1542,17 @@ void BlockBasedTableBuilder::WriteFilterBlock(
rep_->filter_builder->EstimateEntriesAdded();
Status s = Status::Incomplete();
while (ok() && s.IsIncomplete()) {
// filter_data is used to store the transferred filter data payload from
// FilterBlockBuilder and deallocate the payload by going out of scope.
// Otherwise, the payload will unnecessarily remain until
// BlockBasedTableBuilder is deallocated.
//
// See FilterBlockBuilder::Finish() for more on the difference in
// transferred filter data payload among different FilterBlockBuilder
// subtypes.
std::unique_ptr<const char[]> filter_data;
Slice filter_content =
rep_->filter_builder->Finish(filter_block_handle, &s);
rep_->filter_builder->Finish(filter_block_handle, &s, &filter_data);
assert(s.ok() || s.IsIncomplete());
rep_->props.filter_size += filter_content.size();
WriteRawBlock(filter_content, kNoCompression, &filter_block_handle,

View File

@ -73,7 +73,14 @@ class FilterBlockBuilder {
assert(dont_care_status.ok());
return ret;
}
virtual Slice Finish(const BlockHandle& tmp, Status* status) = 0;
// If filter_data is not nullptr, Finish() may transfer ownership of
// underlying filter data to the caller, so that it can be freed as soon as
// possible.
virtual Slice Finish(
const BlockHandle& tmp /* only used in PartitionedFilterBlock as
last_partition_block_handle */
,
Status* status, std::unique_ptr<const char[]>* filter_data = nullptr) = 0;
};
// A FilterBlockReader is used to parse filter from SST table.

View File

@ -101,14 +101,17 @@ void FullFilterBlockBuilder::Reset() {
last_prefix_recorded_ = false;
}
Slice FullFilterBlockBuilder::Finish(const BlockHandle& /*tmp*/,
Status* status) {
Slice FullFilterBlockBuilder::Finish(
const BlockHandle& /*tmp*/, Status* status,
std::unique_ptr<const char[]>* filter_data) {
Reset();
// In this impl we ignore BlockHandle
*status = Status::OK();
if (any_added_) {
any_added_ = false;
return filter_bits_builder_->Finish(&filter_data_);
Slice filter_content =
filter_bits_builder_->Finish(filter_data ? filter_data : &filter_data_);
return filter_content;
}
return Slice();
}

View File

@ -54,7 +54,9 @@ class FullFilterBlockBuilder : public FilterBlockBuilder {
virtual void Add(const Slice& key_without_ts) override;
virtual bool IsEmpty() const override { return !any_added_; }
virtual size_t EstimateEntriesAdded() override;
virtual Slice Finish(const BlockHandle& tmp, Status* status) override;
virtual Slice Finish(
const BlockHandle& tmp, Status* status,
std::unique_ptr<const char[]>* filter_data = nullptr) override;
using FilterBlockBuilder::Finish;
protected:

View File

@ -73,7 +73,6 @@ void PartitionedFilterBlockBuilder::MaybeCutAFilterBlock(
if (!p_index_builder_->ShouldCutFilterBlock()) {
return;
}
filter_gc.push_back(std::unique_ptr<const char[]>(nullptr));
// Add the prefix of the next key before finishing the partition without
// updating last_prefix_str_. This hack, fixes a bug with format_verison=3
@ -88,9 +87,10 @@ void PartitionedFilterBlockBuilder::MaybeCutAFilterBlock(
}
total_added_in_built_ += filter_bits_builder_->EstimateEntriesAdded();
Slice filter = filter_bits_builder_->Finish(&filter_gc.back());
std::unique_ptr<const char[]> filter_data;
Slice filter = filter_bits_builder_->Finish(&filter_data);
std::string& index_key = p_index_builder_->GetPartitionKey();
filters.push_back({index_key, filter});
filters.push_back({index_key, filter, std::move(filter_data)});
keys_added_to_partition_ = 0;
Reset();
}
@ -110,10 +110,10 @@ size_t PartitionedFilterBlockBuilder::EstimateEntriesAdded() {
}
Slice PartitionedFilterBlockBuilder::Finish(
const BlockHandle& last_partition_block_handle, Status* status) {
const BlockHandle& last_partition_block_handle, Status* status,
std::unique_ptr<const char[]>* filter_data) {
if (finishing_filters == true) {
// Record the handle of the last written filter block in the index
FilterEntry& last_entry = filters.front();
std::string handle_encoding;
last_partition_block_handle.EncodeTo(&handle_encoding);
std::string handle_delta_encoding;
@ -122,14 +122,13 @@ Slice PartitionedFilterBlockBuilder::Finish(
last_partition_block_handle.size() - last_encoded_handle_.size());
last_encoded_handle_ = last_partition_block_handle;
const Slice handle_delta_encoding_slice(handle_delta_encoding);
index_on_filter_block_builder_.Add(last_entry.key, handle_encoding,
index_on_filter_block_builder_.Add(last_filter_entry_key, handle_encoding,
&handle_delta_encoding_slice);
if (!p_index_builder_->seperator_is_key_plus_seq()) {
index_on_filter_block_builder_without_seq_.Add(
ExtractUserKey(last_entry.key), handle_encoding,
ExtractUserKey(last_filter_entry_key), handle_encoding,
&handle_delta_encoding_slice);
}
filters.pop_front();
} else {
MaybeCutAFilterBlock(nullptr);
}
@ -137,6 +136,7 @@ Slice PartitionedFilterBlockBuilder::Finish(
// partitions
if (UNLIKELY(filters.empty())) {
*status = Status::OK();
last_filter_data.reset();
if (finishing_filters) {
// Simplest to just add them all at the end
total_added_in_built_ = 0;
@ -154,7 +154,15 @@ Slice PartitionedFilterBlockBuilder::Finish(
// indicate we expect more calls to Finish
*status = Status::Incomplete();
finishing_filters = true;
return filters.front().filter;
last_filter_entry_key = filters.front().key;
Slice filter = filters.front().filter;
last_filter_data = std::move(filters.front().filter_data);
if (filter_data != nullptr) {
*filter_data = std::move(last_filter_data);
}
filters.pop_front();
return filter;
}
}

View File

@ -5,6 +5,7 @@
#pragma once
#include <deque>
#include <list>
#include <string>
#include <unordered_map>
@ -36,8 +37,9 @@ class PartitionedFilterBlockBuilder : public FullFilterBlockBuilder {
void Add(const Slice& key) override;
size_t EstimateEntriesAdded() override;
virtual Slice Finish(const BlockHandle& last_partition_block_handle,
Status* status) override;
virtual Slice Finish(
const BlockHandle& last_partition_block_handle, Status* status,
std::unique_ptr<const char[]>* filter_data = nullptr) override;
private:
// Filter data
@ -47,10 +49,13 @@ class PartitionedFilterBlockBuilder : public FullFilterBlockBuilder {
struct FilterEntry {
std::string key;
Slice filter;
std::unique_ptr<const char[]> filter_data;
};
std::list<FilterEntry> filters; // list of partitioned indexes and their keys
std::deque<FilterEntry> filters; // list of partitioned filters and keys used
// in building the index
std::string last_filter_entry_key;
std::unique_ptr<const char[]> last_filter_data;
std::unique_ptr<IndexBuilder> value;
std::vector<std::unique_ptr<const char[]>> filter_gc;
bool finishing_filters =
false; // true if Finish is called once but not complete yet.
// The policy of when cut a filter block and Finish it

View File

@ -136,8 +136,9 @@ class PartitionedFilterBlockTest
BlockHandle bh;
Status status;
Slice slice;
std::unique_ptr<const char[]> filter_data;
do {
slice = builder->Finish(bh, &status);
slice = builder->Finish(bh, &status, &filter_data);
bh = Write(slice);
} while (status.IsIncomplete());