mirror of https://github.com/facebook/rocksdb.git
Change the semantics of blob_garbage_collection_force_threshold to provide better control over space amp (#13022)
Summary: Pull Request resolved: https://github.com/facebook/rocksdb/pull/13022 Currently, `blob_garbage_collection_force_threshold` applies to the oldest batch of blob files, which is typically only a small subset of the blob files currently eligible for garbage collection. This can result in a form of head-of-line blocking: no GC-triggered compactions will be scheduled if the oldest batch does not currently exceed the threshold, even if a lot of higher-numbered blob files do. This can in turn lead to high space amplification that exceeds the soft bound implicit in the force threshold (e.g. 50% would suggest a space amp of <2 and 75% would imply a space amp of <4). The patch changes the semantics of this configuration threshold to apply to the entire set of blob files that are eligible for garbage collection based on `blob_garbage_collection_age_cutoff`. This provides more intuitive semantics for the option and can provide a better write amp/space amp trade-off. (Note that GC-triggered compactions still pick the same SST files as before, so triggered GC still targets the oldest the blob files.) Reviewed By: jowlyzhang Differential Revision: D62977860 fbshipit-source-id: a999f31fe9cdda313de513f0e7a6fc707424d4a3
This commit is contained in:
parent
98c33cb8e3
commit
54ace7f340
|
@ -3772,14 +3772,17 @@ void VersionStorageInfo::ComputeFilesMarkedForForcedBlobGC(
|
|||
return;
|
||||
}
|
||||
|
||||
// Compute the sum of total and garbage bytes over the oldest batch of blob
|
||||
// files. The oldest batch is defined as the set of blob files which are
|
||||
// kept alive by the same SSTs as the very oldest one. Here is a toy example.
|
||||
// Let's assume we have three SSTs 1, 2, and 3, and four blob files 10, 11,
|
||||
// 12, and 13. Also, let's say SSTs 1 and 2 both rely on blob file 10 and
|
||||
// potentially some higher-numbered ones, while SST 3 relies on blob file 12
|
||||
// and potentially some higher-numbered ones. Then, the SST to oldest blob
|
||||
// file mapping is as follows:
|
||||
// Compute the sum of total and garbage bytes over the batch of blob files
|
||||
// currently eligible for garbage collection based on
|
||||
// blob_garbage_collection_age_cutoff, and if the garbage ratio exceeds
|
||||
// blob_garbage_collection_force_threshold, schedule compaction for the
|
||||
// SST files that reference the oldest batch of blob files. Here is a toy
|
||||
// example. Let's assume we have three SSTs 1, 2, and 3, and four blob files
|
||||
// 10, 11, 12, and 13, which correspond to the range that is eligible for GC
|
||||
// and satisfy the garbage ratio threshold. Also, let's say SSTs 1 and 2 both
|
||||
// rely on blob file 10 and potentially some higher-numbered ones, while SST 3
|
||||
// relies on blob file 12 and potentially some higher-numbered ones. Then, the
|
||||
// SST to oldest blob file mapping is as follows:
|
||||
//
|
||||
// SST file number Oldest blob file number
|
||||
// 1 10
|
||||
|
@ -3797,11 +3800,6 @@ void VersionStorageInfo::ComputeFilesMarkedForForcedBlobGC(
|
|||
//
|
||||
// Then, the oldest batch of blob files consists of blob files 10 and 11,
|
||||
// and we can get rid of them by forcing the compaction of SSTs 1 and 2.
|
||||
//
|
||||
// Note that the overall ratio of garbage computed for the batch has to exceed
|
||||
// blob_garbage_collection_force_threshold and the entire batch has to be
|
||||
// eligible for GC according to blob_garbage_collection_age_cutoff in order
|
||||
// for us to schedule any compactions.
|
||||
const auto& oldest_meta = blob_files_.front();
|
||||
assert(oldest_meta);
|
||||
|
||||
|
@ -3818,25 +3816,10 @@ void VersionStorageInfo::ComputeFilesMarkedForForcedBlobGC(
|
|||
const auto& meta = blob_files_[count];
|
||||
assert(meta);
|
||||
|
||||
if (!meta->GetLinkedSsts().empty()) {
|
||||
// Found the beginning of the next batch of blob files
|
||||
break;
|
||||
}
|
||||
|
||||
sum_total_blob_bytes += meta->GetTotalBlobBytes();
|
||||
sum_garbage_blob_bytes += meta->GetGarbageBlobBytes();
|
||||
}
|
||||
|
||||
if (count < blob_files_.size()) {
|
||||
const auto& meta = blob_files_[count];
|
||||
assert(meta);
|
||||
|
||||
if (meta->GetLinkedSsts().empty()) {
|
||||
// Some files in the oldest batch are not eligible for GC
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (sum_garbage_blob_bytes <
|
||||
blob_garbage_collection_force_threshold * sum_total_blob_bytes) {
|
||||
return;
|
||||
|
|
|
@ -727,20 +727,7 @@ TEST_F(VersionStorageInfoTest, ForcedBlobGCSingleBatch) {
|
|||
ASSERT_TRUE(vstorage_.FilesMarkedForForcedBlobGC().empty());
|
||||
}
|
||||
|
||||
// Part of the oldest batch of blob files (specifically, #12 and #13) is
|
||||
// ineligible for GC due to the age cutoff
|
||||
|
||||
{
|
||||
constexpr double age_cutoff = 0.5;
|
||||
constexpr double force_threshold = 0.0;
|
||||
vstorage_.ComputeFilesMarkedForForcedBlobGC(
|
||||
age_cutoff, force_threshold, /*enable_blob_garbage_collection=*/true);
|
||||
|
||||
ASSERT_TRUE(vstorage_.FilesMarkedForForcedBlobGC().empty());
|
||||
}
|
||||
|
||||
// Oldest batch is eligible based on age cutoff but its overall garbage ratio
|
||||
// is below threshold
|
||||
// Overall garbage ratio of eligible files is below threshold
|
||||
|
||||
{
|
||||
constexpr double age_cutoff = 1.0;
|
||||
|
@ -751,8 +738,7 @@ TEST_F(VersionStorageInfoTest, ForcedBlobGCSingleBatch) {
|
|||
ASSERT_TRUE(vstorage_.FilesMarkedForForcedBlobGC().empty());
|
||||
}
|
||||
|
||||
// Oldest batch is eligible based on age cutoff and its overall garbage ratio
|
||||
// meets threshold
|
||||
// Overall garbage ratio of eligible files meets threshold
|
||||
|
||||
{
|
||||
constexpr double age_cutoff = 1.0;
|
||||
|
@ -878,20 +864,7 @@ TEST_F(VersionStorageInfoTest, ForcedBlobGCMultipleBatches) {
|
|||
ASSERT_TRUE(vstorage_.FilesMarkedForForcedBlobGC().empty());
|
||||
}
|
||||
|
||||
// Part of the oldest batch of blob files (specifically, the second file) is
|
||||
// ineligible for GC due to the age cutoff
|
||||
|
||||
{
|
||||
constexpr double age_cutoff = 0.25;
|
||||
constexpr double force_threshold = 0.0;
|
||||
vstorage_.ComputeFilesMarkedForForcedBlobGC(
|
||||
age_cutoff, force_threshold, /*enable_blob_garbage_collection=*/true);
|
||||
|
||||
ASSERT_TRUE(vstorage_.FilesMarkedForForcedBlobGC().empty());
|
||||
}
|
||||
|
||||
// Oldest batch is eligible based on age cutoff but its overall garbage ratio
|
||||
// is below threshold
|
||||
// Overall garbage ratio of eligible files is below threshold
|
||||
|
||||
{
|
||||
constexpr double age_cutoff = 0.5;
|
||||
|
@ -902,8 +875,7 @@ TEST_F(VersionStorageInfoTest, ForcedBlobGCMultipleBatches) {
|
|||
ASSERT_TRUE(vstorage_.FilesMarkedForForcedBlobGC().empty());
|
||||
}
|
||||
|
||||
// Oldest batch is eligible based on age cutoff and its overall garbage ratio
|
||||
// meets threshold
|
||||
// Overall garbage ratio of eligible files meets threshold
|
||||
|
||||
{
|
||||
constexpr double age_cutoff = 0.5;
|
||||
|
@ -929,48 +901,6 @@ TEST_F(VersionStorageInfoTest, ForcedBlobGCMultipleBatches) {
|
|||
ASSERT_EQ(ssts_to_be_compacted[0], expected_ssts_to_be_compacted[0]);
|
||||
ASSERT_EQ(ssts_to_be_compacted[1], expected_ssts_to_be_compacted[1]);
|
||||
}
|
||||
|
||||
// Now try the last two cases again with a greater than necessary age cutoff
|
||||
|
||||
// Oldest batch is eligible based on age cutoff but its overall garbage ratio
|
||||
// is below threshold
|
||||
|
||||
{
|
||||
constexpr double age_cutoff = 0.75;
|
||||
constexpr double force_threshold = 0.6;
|
||||
vstorage_.ComputeFilesMarkedForForcedBlobGC(
|
||||
age_cutoff, force_threshold, /*enable_blob_garbage_collection=*/true);
|
||||
|
||||
ASSERT_TRUE(vstorage_.FilesMarkedForForcedBlobGC().empty());
|
||||
}
|
||||
|
||||
// Oldest batch is eligible based on age cutoff and its overall garbage ratio
|
||||
// meets threshold
|
||||
|
||||
{
|
||||
constexpr double age_cutoff = 0.75;
|
||||
constexpr double force_threshold = 0.5;
|
||||
vstorage_.ComputeFilesMarkedForForcedBlobGC(
|
||||
age_cutoff, force_threshold, /*enable_blob_garbage_collection=*/true);
|
||||
|
||||
auto ssts_to_be_compacted = vstorage_.FilesMarkedForForcedBlobGC();
|
||||
ASSERT_EQ(ssts_to_be_compacted.size(), 2);
|
||||
|
||||
std::sort(ssts_to_be_compacted.begin(), ssts_to_be_compacted.end(),
|
||||
[](const std::pair<int, FileMetaData*>& lhs,
|
||||
const std::pair<int, FileMetaData*>& rhs) {
|
||||
assert(lhs.second);
|
||||
assert(rhs.second);
|
||||
return lhs.second->fd.GetNumber() < rhs.second->fd.GetNumber();
|
||||
});
|
||||
|
||||
const autovector<std::pair<int, FileMetaData*>>
|
||||
expected_ssts_to_be_compacted{{level, level_files[0]},
|
||||
{level, level_files[1]}};
|
||||
|
||||
ASSERT_EQ(ssts_to_be_compacted[0], expected_ssts_to_be_compacted[0]);
|
||||
ASSERT_EQ(ssts_to_be_compacted[1], expected_ssts_to_be_compacted[1]);
|
||||
}
|
||||
}
|
||||
|
||||
class VersionStorageInfoTimestampTest : public VersionStorageInfoTestBase {
|
||||
|
|
|
@ -497,7 +497,7 @@ DEFINE_double(blob_garbage_collection_force_threshold,
|
|||
ROCKSDB_NAMESPACE::AdvancedColumnFamilyOptions()
|
||||
.blob_garbage_collection_force_threshold,
|
||||
"[Integrated BlobDB] The threshold for the ratio of garbage in "
|
||||
"the oldest blob files for forcing garbage collection.");
|
||||
"the eligible blob files for forcing garbage collection.");
|
||||
|
||||
DEFINE_uint64(blob_compaction_readahead_size,
|
||||
ROCKSDB_NAMESPACE::AdvancedColumnFamilyOptions()
|
||||
|
|
|
@ -929,13 +929,12 @@ struct AdvancedColumnFamilyOptions {
|
|||
// Dynamically changeable through the SetOptions() API
|
||||
double blob_garbage_collection_age_cutoff = 0.25;
|
||||
|
||||
// If the ratio of garbage in the oldest blob files exceeds this threshold,
|
||||
// targeted compactions are scheduled in order to force garbage collecting
|
||||
// the blob files in question, assuming they are all eligible based on the
|
||||
// value of blob_garbage_collection_age_cutoff above. This option is
|
||||
// currently only supported with leveled compactions.
|
||||
// Note that enable_blob_garbage_collection has to be set in order for this
|
||||
// option to have any effect.
|
||||
// If the ratio of garbage in the blob files currently eligible for garbage
|
||||
// collection exceeds this threshold, targeted compactions are scheduled in
|
||||
// order to force garbage collecting the oldest blob files. This option is
|
||||
// currently only supported with leveled compactions. Note that
|
||||
// enable_blob_garbage_collection has to be set in order for this option to
|
||||
// have any effect.
|
||||
//
|
||||
// Default: 1.0
|
||||
//
|
||||
|
|
|
@ -733,11 +733,10 @@ public interface AdvancedMutableColumnFamilyOptionsInterface<
|
|||
double blobGarbageCollectionAgeCutoff();
|
||||
|
||||
/**
|
||||
* If the ratio of garbage in the oldest blob files exceeds this threshold,
|
||||
* targeted compactions are scheduled in order to force garbage collecting
|
||||
* the blob files in question, assuming they are all eligible based on the
|
||||
* value of {@link #blobGarbageCollectionAgeCutoff} above. This option is
|
||||
* currently only supported with leveled compactions.
|
||||
* If the ratio of garbage in the blob files currently eligible for garbage
|
||||
* collection exceeds this threshold, targeted compactions are scheduled in
|
||||
* order to force garbage collecting the oldest blob files. This option is
|
||||
* currently only supported with leveled compactions.
|
||||
* <p>
|
||||
* Note that {@link #enableBlobGarbageCollection} has to be set in order for this
|
||||
* option to have any effect.
|
||||
|
|
|
@ -1204,11 +1204,10 @@ public class ColumnFamilyOptions
|
|||
}
|
||||
|
||||
/**
|
||||
* If the ratio of garbage in the oldest blob files exceeds this threshold,
|
||||
* targeted compactions are scheduled in order to force garbage collecting
|
||||
* the blob files in question, assuming they are all eligible based on the
|
||||
* value of {@link #blobGarbageCollectionAgeCutoff} above. This option is
|
||||
* currently only supported with leveled compactions.
|
||||
* If the ratio of garbage in the blob files currently eligible for garbage
|
||||
* collection exceeds this threshold, targeted compactions are scheduled in
|
||||
* order to force garbage collecting the oldest blob files. This option is
|
||||
* currently only supported with leveled compactions.
|
||||
* <p>
|
||||
* Note that {@link #enableBlobGarbageCollection} has to be set in order for this
|
||||
* option to have any effect.
|
||||
|
|
|
@ -1104,7 +1104,7 @@ DEFINE_double(blob_garbage_collection_force_threshold,
|
|||
ROCKSDB_NAMESPACE::AdvancedColumnFamilyOptions()
|
||||
.blob_garbage_collection_force_threshold,
|
||||
"[Integrated BlobDB] The threshold for the ratio of garbage in "
|
||||
"the oldest blob files for forcing garbage collection.");
|
||||
"the eligible blob files for forcing garbage collection.");
|
||||
|
||||
DEFINE_uint64(blob_compaction_readahead_size,
|
||||
ROCKSDB_NAMESPACE::AdvancedColumnFamilyOptions()
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
Changed the semantics of the BlobDB configuration option `blob_garbage_collection_force_threshold` to define a threshold for the overall garbage ratio of all blob files currently eligible for garbage collection (according to `blob_garbage_collection_age_cutoff`). This can provide better control over space amplification at the cost of slightly higher write amplification.
|
Loading…
Reference in New Issue