mirror of https://github.com/facebook/rocksdb.git
Deallocate payload of BlockBasedTableBuilder::Rep::FilterBlockBuilder earlier for Full/PartitionedFilter (#9070)
Summary: Note: This PR is the 1st part of a bigger PR stack (https://github.com/facebook/rocksdb/pull/9073). Context: Previously, the payload (i.e, filter data) within `BlockBasedTableBuilder::Rep::FilterBlockBuilder` object is not deallocated until `BlockBasedTableBuilder` is deallocated, despite it is no longer useful after its related `filter_content` being written. - Transferred the payload (i.e, the filter data) out of `BlockBasedTableBuilder::Rep::FilterBlockBuilder` object - For PartitionedFilter: - Unified `filters` and `filter_gc` lists into one `std::deque<FilterEntry> filters` by adding a new field `last_filter_entry_key` and storing the `std::unique_ptr filter_data` with the `Slice filter` in the same entry - Reset `last_filter_data` in the case where `filters` is empty, which should be as by then we would've finish using all the `Slice filter` - Deallocated the payload by going out of scope as soon as we're done with using the `filter_content` associated with the payload - This is an internal interface change at the level of `FilterBlockBuilder::Finish()`, which leads to touching the inherited interface in `BlockBasedFilterBlockBuilder`. But for that, the payload transferring is ignored. Pull Request resolved: https://github.com/facebook/rocksdb/pull/9070 Test Plan: - The main focus is to catch segment fault error during `FilterBlockBuilder::Finish()` and `BlockBasedTableBuilder::Finish()` and interface mismatch. Relying on existing CI tests is enough as `assert(false)` was temporarily added to verify the new logic of transferring ownership indeed run Reviewed By: pdillinger Differential Revision: D31884933 Pulled By: hx235 fbshipit-source-id: f73ecfbea13788d4fc058013ace27230110b52f4
This commit is contained in:
parent
a64c8ca7a8
commit
1ababeb76a
|
@ -25,6 +25,9 @@
|
|||
* Add API `FSDirectory::FsyncWithDirOptions()`, which provides extra information like directory fsync reason in `DirFsyncOptions`. File system like btrfs is using that to skip directory fsync for creating a new file, or when renaming a file, fsync the target file instead of the directory, which improves the `DB::Open()` speed by ~20%.
|
||||
* `DB::Open()` is not going be blocked by obsolete file purge if `DBOptions::avoid_unnecessary_blocking_io` is set to true.
|
||||
|
||||
### Performance Improvements
|
||||
* Released some memory related to filter construction earlier in `BlockBasedTableBuilder` for `FullFilter` and `PartitionedFilter` case (#9070)
|
||||
|
||||
## 6.26.0 (2021-10-20)
|
||||
### Bug Fixes
|
||||
* Fixes a bug in directed IO mode when calling MultiGet() for blobs in the same blob file. The bug is caused by not sorting the blob read requests by file offsets.
|
||||
|
|
|
@ -117,9 +117,10 @@ inline void BlockBasedFilterBlockBuilder::AddPrefix(const Slice& key) {
|
|||
}
|
||||
}
|
||||
|
||||
Slice BlockBasedFilterBlockBuilder::Finish(const BlockHandle& /*tmp*/,
|
||||
Status* status) {
|
||||
// In this impl we ignore BlockHandle
|
||||
Slice BlockBasedFilterBlockBuilder::Finish(
|
||||
const BlockHandle& /*tmp*/, Status* status,
|
||||
std::unique_ptr<const char[]>* /* filter_data */) {
|
||||
// In this impl we ignore BlockHandle and filter_data
|
||||
*status = Status::OK();
|
||||
|
||||
if (!start_.empty()) {
|
||||
|
|
|
@ -49,7 +49,9 @@ class BlockBasedFilterBlockBuilder : public FilterBlockBuilder {
|
|||
return start_.empty() && filter_offsets_.empty();
|
||||
}
|
||||
virtual size_t EstimateEntriesAdded() override;
|
||||
virtual Slice Finish(const BlockHandle& tmp, Status* status) override;
|
||||
virtual Slice Finish(
|
||||
const BlockHandle& tmp, Status* status,
|
||||
std::unique_ptr<const char[]>* filter_data = nullptr) override;
|
||||
using FilterBlockBuilder::Finish;
|
||||
|
||||
private:
|
||||
|
|
|
@ -1542,8 +1542,17 @@ void BlockBasedTableBuilder::WriteFilterBlock(
|
|||
rep_->filter_builder->EstimateEntriesAdded();
|
||||
Status s = Status::Incomplete();
|
||||
while (ok() && s.IsIncomplete()) {
|
||||
// filter_data is used to store the transferred filter data payload from
|
||||
// FilterBlockBuilder and deallocate the payload by going out of scope.
|
||||
// Otherwise, the payload will unnecessarily remain until
|
||||
// BlockBasedTableBuilder is deallocated.
|
||||
//
|
||||
// See FilterBlockBuilder::Finish() for more on the difference in
|
||||
// transferred filter data payload among different FilterBlockBuilder
|
||||
// subtypes.
|
||||
std::unique_ptr<const char[]> filter_data;
|
||||
Slice filter_content =
|
||||
rep_->filter_builder->Finish(filter_block_handle, &s);
|
||||
rep_->filter_builder->Finish(filter_block_handle, &s, &filter_data);
|
||||
assert(s.ok() || s.IsIncomplete());
|
||||
rep_->props.filter_size += filter_content.size();
|
||||
WriteRawBlock(filter_content, kNoCompression, &filter_block_handle,
|
||||
|
|
|
@ -73,7 +73,14 @@ class FilterBlockBuilder {
|
|||
assert(dont_care_status.ok());
|
||||
return ret;
|
||||
}
|
||||
virtual Slice Finish(const BlockHandle& tmp, Status* status) = 0;
|
||||
// If filter_data is not nullptr, Finish() may transfer ownership of
|
||||
// underlying filter data to the caller, so that it can be freed as soon as
|
||||
// possible.
|
||||
virtual Slice Finish(
|
||||
const BlockHandle& tmp /* only used in PartitionedFilterBlock as
|
||||
last_partition_block_handle */
|
||||
,
|
||||
Status* status, std::unique_ptr<const char[]>* filter_data = nullptr) = 0;
|
||||
};
|
||||
|
||||
// A FilterBlockReader is used to parse filter from SST table.
|
||||
|
|
|
@ -101,14 +101,17 @@ void FullFilterBlockBuilder::Reset() {
|
|||
last_prefix_recorded_ = false;
|
||||
}
|
||||
|
||||
Slice FullFilterBlockBuilder::Finish(const BlockHandle& /*tmp*/,
|
||||
Status* status) {
|
||||
Slice FullFilterBlockBuilder::Finish(
|
||||
const BlockHandle& /*tmp*/, Status* status,
|
||||
std::unique_ptr<const char[]>* filter_data) {
|
||||
Reset();
|
||||
// In this impl we ignore BlockHandle
|
||||
*status = Status::OK();
|
||||
if (any_added_) {
|
||||
any_added_ = false;
|
||||
return filter_bits_builder_->Finish(&filter_data_);
|
||||
Slice filter_content =
|
||||
filter_bits_builder_->Finish(filter_data ? filter_data : &filter_data_);
|
||||
return filter_content;
|
||||
}
|
||||
return Slice();
|
||||
}
|
||||
|
|
|
@ -54,7 +54,9 @@ class FullFilterBlockBuilder : public FilterBlockBuilder {
|
|||
virtual void Add(const Slice& key_without_ts) override;
|
||||
virtual bool IsEmpty() const override { return !any_added_; }
|
||||
virtual size_t EstimateEntriesAdded() override;
|
||||
virtual Slice Finish(const BlockHandle& tmp, Status* status) override;
|
||||
virtual Slice Finish(
|
||||
const BlockHandle& tmp, Status* status,
|
||||
std::unique_ptr<const char[]>* filter_data = nullptr) override;
|
||||
using FilterBlockBuilder::Finish;
|
||||
|
||||
protected:
|
||||
|
|
|
@ -73,7 +73,6 @@ void PartitionedFilterBlockBuilder::MaybeCutAFilterBlock(
|
|||
if (!p_index_builder_->ShouldCutFilterBlock()) {
|
||||
return;
|
||||
}
|
||||
filter_gc.push_back(std::unique_ptr<const char[]>(nullptr));
|
||||
|
||||
// Add the prefix of the next key before finishing the partition without
|
||||
// updating last_prefix_str_. This hack, fixes a bug with format_verison=3
|
||||
|
@ -88,9 +87,10 @@ void PartitionedFilterBlockBuilder::MaybeCutAFilterBlock(
|
|||
}
|
||||
|
||||
total_added_in_built_ += filter_bits_builder_->EstimateEntriesAdded();
|
||||
Slice filter = filter_bits_builder_->Finish(&filter_gc.back());
|
||||
std::unique_ptr<const char[]> filter_data;
|
||||
Slice filter = filter_bits_builder_->Finish(&filter_data);
|
||||
std::string& index_key = p_index_builder_->GetPartitionKey();
|
||||
filters.push_back({index_key, filter});
|
||||
filters.push_back({index_key, filter, std::move(filter_data)});
|
||||
keys_added_to_partition_ = 0;
|
||||
Reset();
|
||||
}
|
||||
|
@ -110,10 +110,10 @@ size_t PartitionedFilterBlockBuilder::EstimateEntriesAdded() {
|
|||
}
|
||||
|
||||
Slice PartitionedFilterBlockBuilder::Finish(
|
||||
const BlockHandle& last_partition_block_handle, Status* status) {
|
||||
const BlockHandle& last_partition_block_handle, Status* status,
|
||||
std::unique_ptr<const char[]>* filter_data) {
|
||||
if (finishing_filters == true) {
|
||||
// Record the handle of the last written filter block in the index
|
||||
FilterEntry& last_entry = filters.front();
|
||||
std::string handle_encoding;
|
||||
last_partition_block_handle.EncodeTo(&handle_encoding);
|
||||
std::string handle_delta_encoding;
|
||||
|
@ -122,14 +122,13 @@ Slice PartitionedFilterBlockBuilder::Finish(
|
|||
last_partition_block_handle.size() - last_encoded_handle_.size());
|
||||
last_encoded_handle_ = last_partition_block_handle;
|
||||
const Slice handle_delta_encoding_slice(handle_delta_encoding);
|
||||
index_on_filter_block_builder_.Add(last_entry.key, handle_encoding,
|
||||
index_on_filter_block_builder_.Add(last_filter_entry_key, handle_encoding,
|
||||
&handle_delta_encoding_slice);
|
||||
if (!p_index_builder_->seperator_is_key_plus_seq()) {
|
||||
index_on_filter_block_builder_without_seq_.Add(
|
||||
ExtractUserKey(last_entry.key), handle_encoding,
|
||||
ExtractUserKey(last_filter_entry_key), handle_encoding,
|
||||
&handle_delta_encoding_slice);
|
||||
}
|
||||
filters.pop_front();
|
||||
} else {
|
||||
MaybeCutAFilterBlock(nullptr);
|
||||
}
|
||||
|
@ -137,6 +136,7 @@ Slice PartitionedFilterBlockBuilder::Finish(
|
|||
// partitions
|
||||
if (UNLIKELY(filters.empty())) {
|
||||
*status = Status::OK();
|
||||
last_filter_data.reset();
|
||||
if (finishing_filters) {
|
||||
// Simplest to just add them all at the end
|
||||
total_added_in_built_ = 0;
|
||||
|
@ -154,7 +154,15 @@ Slice PartitionedFilterBlockBuilder::Finish(
|
|||
// indicate we expect more calls to Finish
|
||||
*status = Status::Incomplete();
|
||||
finishing_filters = true;
|
||||
return filters.front().filter;
|
||||
|
||||
last_filter_entry_key = filters.front().key;
|
||||
Slice filter = filters.front().filter;
|
||||
last_filter_data = std::move(filters.front().filter_data);
|
||||
if (filter_data != nullptr) {
|
||||
*filter_data = std::move(last_filter_data);
|
||||
}
|
||||
filters.pop_front();
|
||||
return filter;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include <deque>
|
||||
#include <list>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
|
@ -36,8 +37,9 @@ class PartitionedFilterBlockBuilder : public FullFilterBlockBuilder {
|
|||
void Add(const Slice& key) override;
|
||||
size_t EstimateEntriesAdded() override;
|
||||
|
||||
virtual Slice Finish(const BlockHandle& last_partition_block_handle,
|
||||
Status* status) override;
|
||||
virtual Slice Finish(
|
||||
const BlockHandle& last_partition_block_handle, Status* status,
|
||||
std::unique_ptr<const char[]>* filter_data = nullptr) override;
|
||||
|
||||
private:
|
||||
// Filter data
|
||||
|
@ -47,10 +49,13 @@ class PartitionedFilterBlockBuilder : public FullFilterBlockBuilder {
|
|||
struct FilterEntry {
|
||||
std::string key;
|
||||
Slice filter;
|
||||
std::unique_ptr<const char[]> filter_data;
|
||||
};
|
||||
std::list<FilterEntry> filters; // list of partitioned indexes and their keys
|
||||
std::deque<FilterEntry> filters; // list of partitioned filters and keys used
|
||||
// in building the index
|
||||
std::string last_filter_entry_key;
|
||||
std::unique_ptr<const char[]> last_filter_data;
|
||||
std::unique_ptr<IndexBuilder> value;
|
||||
std::vector<std::unique_ptr<const char[]>> filter_gc;
|
||||
bool finishing_filters =
|
||||
false; // true if Finish is called once but not complete yet.
|
||||
// The policy of when cut a filter block and Finish it
|
||||
|
|
|
@ -136,8 +136,9 @@ class PartitionedFilterBlockTest
|
|||
BlockHandle bh;
|
||||
Status status;
|
||||
Slice slice;
|
||||
std::unique_ptr<const char[]> filter_data;
|
||||
do {
|
||||
slice = builder->Finish(bh, &status);
|
||||
slice = builder->Finish(bh, &status, &filter_data);
|
||||
bh = Write(slice);
|
||||
} while (status.IsIncomplete());
|
||||
|
||||
|
|
Loading…
Reference in New Issue