mirror of https://github.com/facebook/rocksdb.git
Include estimated bytes deleted by range tombstones in compensated file size (#10734)
Summary: compensate file sizes in compaction picking so files with range tombstones are preferred, such that they get compacted down earlier as they tend to delete a lot of data. This PR adds a `compensated_range_deletion_size` field in FileMeta that is computed during Flush/Compaction and persisted in MANIFEST. This value is added to `compensated_file_size` which will be used for compaction picking. Currently, for a file in level L, `compensated_range_deletion_size` is set to the estimated bytes deleted by range tombstone of this file in all levels > L. This helps to reduce space amp when data in older levels are covered by range tombstones in level L. Pull Request resolved: https://github.com/facebook/rocksdb/pull/10734 Test Plan: - Added unit tests. - benchmark to check if the above definition `compensated_range_deletion_size` is reducing space amp as intended, without affecting write amp too much. The experiment set up favorable for this optimization: large range tombstone issued infrequently. Command used: ``` ./db_bench -benchmarks=fillrandom,waitforcompaction,stats,levelstats -use_existing_db=false -avoid_flush_during_recovery=true -write_buffer_size=33554432 -level_compaction_dynamic_level_bytes=true -max_background_jobs=8 -max_bytes_for_level_base=134217728 -target_file_size_base=33554432 -writes_per_range_tombstone=500000 -range_tombstone_width=5000000 -num=50000000 -benchmark_write_rate_limit=8388608 -threads=16 -duration=1800 --max_num_range_tombstones=1000000000 ``` In this experiment, each thread wrote 16 range tombstones over the duration of 30 minutes, each range tombstone has width 5M that is the 10% of the key space width. Results shows this PR generates a smaller DB size. Compaction stats from this PR: ``` Level Files Size Score Read(GB) Rn(GB) Rnp1(GB) Write(GB) Wnew(GB) Moved(GB) W-Amp Rd(MB/s) Wr(MB/s) Comp(sec) CompMergeCPU(sec) Comp(cnt) Avg(sec) KeyIn KeyDrop Rblob(GB) Wblob(GB) ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ L0 2/0 31.54 MB 0.5 0.0 0.0 0.0 8.4 8.4 0.0 1.0 0.0 63.4 135.56 110.94 544 0.249 0 0 0.0 0.0 L4 3/0 96.55 MB 0.8 18.5 6.7 11.8 18.4 6.6 0.0 2.7 65.3 64.9 290.08 284.03 108 2.686 284M 1957K 0.0 0.0 L5 15/0 404.41 MB 1.0 19.1 7.7 11.4 18.8 7.4 0.3 2.5 66.6 65.7 292.93 285.34 220 1.332 293M 3808K 0.0 0.0 L6 143/0 4.12 GB 0.0 45.0 7.5 37.5 41.6 4.1 0.0 5.5 71.2 65.9 647.00 632.66 251 2.578 739M 47M 0.0 0.0 Sum 163/0 4.64 GB 0.0 82.6 21.9 60.7 87.2 26.5 0.3 10.4 61.9 65.4 1365.58 1312.97 1123 1.216 1318M 52M 0.0 0.0 ``` Compaction stats from main: ``` Level Files Size Score Read(GB) Rn(GB) Rnp1(GB) Write(GB) Wnew(GB) Moved(GB) W-Amp Rd(MB/s) Wr(MB/s) Comp(sec) CompMergeCPU(sec) Comp(cnt) Avg(sec) KeyIn KeyDrop Rblob(GB) Wblob(GB) ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ L0 0/0 0.00 KB 0.0 0.0 0.0 0.0 8.4 8.4 0.0 1.0 0.0 60.5 142.12 115.89 569 0.250 0 0 0.0 0.0 L4 3/0 85.68 MB 1.0 17.7 6.8 10.9 17.6 6.7 0.0 2.6 62.7 62.3 289.05 281.79 112 2.581 272M 2309K 0.0 0.0 L5 11/0 293.73 MB 1.0 18.8 7.5 11.2 18.5 7.2 0.5 2.5 64.9 63.9 296.07 288.50 220 1.346 288M 4365K 0.0 0.0 L6 130/0 3.94 GB 0.0 51.5 7.6 43.9 47.9 3.9 0.0 6.3 67.2 62.4 784.95 765.92 258 3.042 848M 51M 0.0 0.0 Sum 144/0 4.31 GB 0.0 88.0 21.9 66.0 92.3 26.3 0.5 11.0 59.6 62.5 1512.19 1452.09 1159 1.305 1409M 58M 0.0 0.0``` Reviewed By: ajkr Differential Revision: D39834713 Pulled By: cbi42 fbshipit-source-id: fe9341040b8704a8fbb10cad5cf5c43e962c7e6b
This commit is contained in:
parent
02f2b20864
commit
cc6f323705
|
@ -71,8 +71,9 @@ Status BuildTable(
|
|||
int job_id, const Env::IOPriority io_priority,
|
||||
TableProperties* table_properties, Env::WriteLifeTimeHint write_hint,
|
||||
const std::string* full_history_ts_low,
|
||||
BlobFileCompletionCallback* blob_callback, uint64_t* num_input_entries,
|
||||
uint64_t* memtable_payload_bytes, uint64_t* memtable_garbage_bytes) {
|
||||
BlobFileCompletionCallback* blob_callback, Version* version,
|
||||
uint64_t* num_input_entries, uint64_t* memtable_payload_bytes,
|
||||
uint64_t* memtable_garbage_bytes) {
|
||||
assert((tboptions.column_family_id ==
|
||||
TablePropertiesCollectorFactory::Context::kUnknownColumnFamily) ==
|
||||
tboptions.column_family_name.empty());
|
||||
|
@ -246,9 +247,17 @@ Status BuildTable(
|
|||
auto tombstone = range_del_it->Tombstone();
|
||||
auto kv = tombstone.Serialize();
|
||||
builder->Add(kv.first.Encode(), kv.second);
|
||||
meta->UpdateBoundariesForRange(kv.first, tombstone.SerializeEndKey(),
|
||||
tombstone.seq_,
|
||||
InternalKey tombstone_end = tombstone.SerializeEndKey();
|
||||
meta->UpdateBoundariesForRange(kv.first, tombstone_end, tombstone.seq_,
|
||||
tboptions.internal_comparator);
|
||||
if (version) {
|
||||
SizeApproximationOptions approx_opts;
|
||||
approx_opts.files_size_error_margin = 0.1;
|
||||
meta->compensated_range_deletion_size += versions->ApproximateSize(
|
||||
approx_opts, version, kv.first.Encode(), tombstone_end.Encode(),
|
||||
0 /* start_level */, -1 /* end_level */,
|
||||
TableReaderCaller::kFlush);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -13,6 +13,7 @@
|
|||
#include "db/range_tombstone_fragmenter.h"
|
||||
#include "db/seqno_to_time_mapping.h"
|
||||
#include "db/table_properties_collector.h"
|
||||
#include "db/version_set.h"
|
||||
#include "logging/event_logger.h"
|
||||
#include "options/cf_options.h"
|
||||
#include "rocksdb/comparator.h"
|
||||
|
@ -70,7 +71,7 @@ extern Status BuildTable(
|
|||
Env::WriteLifeTimeHint write_hint = Env::WLTH_NOT_SET,
|
||||
const std::string* full_history_ts_low = nullptr,
|
||||
BlobFileCompletionCallback* blob_callback = nullptr,
|
||||
uint64_t* num_input_entries = nullptr,
|
||||
Version* version = nullptr, uint64_t* num_input_entries = nullptr,
|
||||
uint64_t* memtable_payload_bytes = nullptr,
|
||||
uint64_t* memtable_garbage_bytes = nullptr);
|
||||
|
||||
|
|
|
@ -386,7 +386,8 @@ class CompactionJobTestBase : public testing::Test {
|
|||
oldest_blob_file_number, kUnknownOldestAncesterTime,
|
||||
kUnknownFileCreationTime,
|
||||
versions_->GetColumnFamilySet()->GetDefault()->NewEpochNumber(),
|
||||
kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2);
|
||||
kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2,
|
||||
0);
|
||||
|
||||
mutex_.Lock();
|
||||
EXPECT_OK(
|
||||
|
|
|
@ -525,7 +525,8 @@ Status CompactionOutputs::AddRangeDels(
|
|||
ucmp->CompareWithoutTimestamp(*lower_bound, kv.second) < 0);
|
||||
// Range tombstone is not supported by output validator yet.
|
||||
builder_->Add(kv.first.Encode(), kv.second);
|
||||
InternalKey smallest_candidate = std::move(kv.first);
|
||||
InternalKey tombstone_start = std::move(kv.first);
|
||||
InternalKey smallest_candidate{tombstone_start};
|
||||
if (lower_bound != nullptr &&
|
||||
ucmp->CompareWithoutTimestamp(smallest_candidate.user_key(),
|
||||
*lower_bound) <= 0) {
|
||||
|
@ -594,7 +595,8 @@ Status CompactionOutputs::AddRangeDels(
|
|||
smallest_candidate = InternalKey(*lower_bound, 0, kTypeRangeDeletion);
|
||||
}
|
||||
}
|
||||
InternalKey largest_candidate = tombstone.SerializeEndKey();
|
||||
InternalKey tombstone_end = tombstone.SerializeEndKey();
|
||||
InternalKey largest_candidate{tombstone_end};
|
||||
if (upper_bound != nullptr &&
|
||||
ucmp->CompareWithoutTimestamp(*upper_bound,
|
||||
largest_candidate.user_key()) <= 0) {
|
||||
|
@ -636,6 +638,24 @@ Status CompactionOutputs::AddRangeDels(
|
|||
#endif
|
||||
meta.UpdateBoundariesForRange(smallest_candidate, largest_candidate,
|
||||
tombstone.seq_, icmp);
|
||||
if (!bottommost_level) {
|
||||
// Range tombstones are truncated at file boundaries
|
||||
if (icmp.Compare(tombstone_start, meta.smallest) < 0) {
|
||||
tombstone_start = meta.smallest;
|
||||
}
|
||||
if (icmp.Compare(tombstone_end, meta.largest) > 0) {
|
||||
tombstone_end = meta.largest;
|
||||
}
|
||||
SizeApproximationOptions approx_opts;
|
||||
approx_opts.files_size_error_margin = 0.1;
|
||||
auto approximate_covered_size =
|
||||
compaction_->input_version()->version_set()->ApproximateSize(
|
||||
approx_opts, compaction_->input_version(),
|
||||
tombstone_start.Encode(), tombstone_end.Encode(),
|
||||
compaction_->output_level() + 1 /* start_level */,
|
||||
-1 /* end_level */, kCompaction);
|
||||
meta.compensated_range_deletion_size += approximate_covered_size;
|
||||
}
|
||||
// The smallest key in a file is used for range tombstone truncation, so
|
||||
// it cannot have a seqnum of 0 (unless the smallest data key in a file
|
||||
// has a seqnum of 0). Otherwise, the truncated tombstone may expose
|
||||
|
|
|
@ -148,7 +148,7 @@ class CompactionPickerTestBase : public testing::Test {
|
|||
smallest_seq, largest_seq, marked_for_compact, temperature,
|
||||
kInvalidBlobFileNumber, kUnknownOldestAncesterTime,
|
||||
kUnknownFileCreationTime, epoch_number, kUnknownFileChecksum,
|
||||
kUnknownFileChecksumFuncName, kNullUniqueId64x2);
|
||||
kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0);
|
||||
f->compensated_file_size =
|
||||
(compensated_file_size != 0) ? compensated_file_size : file_size;
|
||||
f->oldest_ancester_time = oldest_ancestor_time;
|
||||
|
@ -2873,7 +2873,6 @@ TEST_F(CompactionPickerTest, IntraL0MaxCompactionBytesHit) {
|
|||
ASSERT_EQ(0, compaction->output_level());
|
||||
}
|
||||
|
||||
|
||||
#ifndef ROCKSDB_LITE
|
||||
TEST_F(CompactionPickerTest, UniversalMarkedCompactionFullOverlap) {
|
||||
const uint64_t kFileSize = 100000;
|
||||
|
|
|
@ -1747,7 +1747,8 @@ Status DBImpl::ReFitLevel(ColumnFamilyData* cfd, int level, int target_level) {
|
|||
f->smallest, f->largest, f->fd.smallest_seqno, f->fd.largest_seqno,
|
||||
f->marked_for_compaction, f->temperature, f->oldest_blob_file_number,
|
||||
f->oldest_ancester_time, f->file_creation_time, f->epoch_number,
|
||||
f->file_checksum, f->file_checksum_func_name, f->unique_id);
|
||||
f->file_checksum, f->file_checksum_func_name, f->unique_id,
|
||||
f->compensated_range_deletion_size);
|
||||
}
|
||||
ROCKS_LOG_DEBUG(immutable_db_options_.info_log,
|
||||
"[%s] Apply version edit:\n%s", cfd->GetName().c_str(),
|
||||
|
@ -3388,7 +3389,8 @@ Status DBImpl::BackgroundCompaction(bool* made_progress,
|
|||
f->fd.largest_seqno, f->marked_for_compaction, f->temperature,
|
||||
f->oldest_blob_file_number, f->oldest_ancester_time,
|
||||
f->file_creation_time, f->epoch_number, f->file_checksum,
|
||||
f->file_checksum_func_name, f->unique_id);
|
||||
f->file_checksum_func_name, f->unique_id,
|
||||
f->compensated_range_deletion_size);
|
||||
|
||||
ROCKS_LOG_BUFFER(
|
||||
log_buffer,
|
||||
|
|
|
@ -137,7 +137,8 @@ Status DBImpl::PromoteL0(ColumnFamilyHandle* column_family, int target_level) {
|
|||
f->marked_for_compaction, f->temperature,
|
||||
f->oldest_blob_file_number, f->oldest_ancester_time,
|
||||
f->file_creation_time, f->epoch_number, f->file_checksum,
|
||||
f->file_checksum_func_name, f->unique_id);
|
||||
f->file_checksum_func_name, f->unique_id,
|
||||
f->compensated_range_deletion_size);
|
||||
}
|
||||
|
||||
status = versions_->LogAndApply(cfd, *cfd->GetLatestMutableCFOptions(),
|
||||
|
|
|
@ -1550,6 +1550,8 @@ Status DBImpl::WriteLevel0TableForRecovery(int job_id, ColumnFamilyData* cfd,
|
|||
0 /* file_creation_time */, db_id_, db_session_id_,
|
||||
0 /* target_file_size */, meta.fd.GetNumber());
|
||||
SeqnoToTimeMapping empty_seqno_time_mapping;
|
||||
Version* version = cfd->current();
|
||||
version->Ref();
|
||||
s = BuildTable(
|
||||
dbname_, versions_.get(), immutable_db_options_, tboptions,
|
||||
file_options_for_compaction_, cfd->table_cache(), iter.get(),
|
||||
|
@ -1559,7 +1561,8 @@ Status DBImpl::WriteLevel0TableForRecovery(int job_id, ColumnFamilyData* cfd,
|
|||
io_tracer_, BlobFileCreationReason::kRecovery,
|
||||
empty_seqno_time_mapping, &event_logger_, job_id, Env::IO_HIGH,
|
||||
nullptr /* table_properties */, write_hint,
|
||||
nullptr /*full_history_ts_low*/, &blob_callback_);
|
||||
nullptr /*full_history_ts_low*/, &blob_callback_, version);
|
||||
version->Unref();
|
||||
LogFlush(immutable_db_options_.info_log);
|
||||
ROCKS_LOG_DEBUG(immutable_db_options_.info_log,
|
||||
"[%s] [WriteLevel0TableForRecovery]"
|
||||
|
@ -1583,13 +1586,14 @@ Status DBImpl::WriteLevel0TableForRecovery(int job_id, ColumnFamilyData* cfd,
|
|||
constexpr int level = 0;
|
||||
|
||||
if (s.ok() && has_output) {
|
||||
edit->AddFile(
|
||||
level, meta.fd.GetNumber(), meta.fd.GetPathId(), meta.fd.GetFileSize(),
|
||||
meta.smallest, meta.largest, meta.fd.smallest_seqno,
|
||||
meta.fd.largest_seqno, meta.marked_for_compaction, meta.temperature,
|
||||
meta.oldest_blob_file_number, meta.oldest_ancester_time,
|
||||
meta.file_creation_time, meta.epoch_number, meta.file_checksum,
|
||||
meta.file_checksum_func_name, meta.unique_id);
|
||||
edit->AddFile(level, meta.fd.GetNumber(), meta.fd.GetPathId(),
|
||||
meta.fd.GetFileSize(), meta.smallest, meta.largest,
|
||||
meta.fd.smallest_seqno, meta.fd.largest_seqno,
|
||||
meta.marked_for_compaction, meta.temperature,
|
||||
meta.oldest_blob_file_number, meta.oldest_ancester_time,
|
||||
meta.file_creation_time, meta.epoch_number,
|
||||
meta.file_checksum, meta.file_checksum_func_name,
|
||||
meta.unique_id, meta.compensated_range_deletion_size);
|
||||
|
||||
for (const auto& blob : blob_file_additions) {
|
||||
edit->AddBlobFile(blob);
|
||||
|
|
|
@ -479,7 +479,10 @@ TEST_F(DBRangeDelTest, ValidUniversalSubcompactionBoundaries) {
|
|||
std::vector<std::string> values;
|
||||
// Write 100KB (100 values, each 1K)
|
||||
for (int k = 0; k < kNumPerFile; k++) {
|
||||
values.push_back(rnd.RandomString(990));
|
||||
// For the highest level, use smaller value size such that it does not
|
||||
// prematurely cause auto compaction due to range tombstone adding
|
||||
// additional compensated file size
|
||||
values.push_back(rnd.RandomString((i == kNumLevels - 2) ? 600 : 990));
|
||||
ASSERT_OK(Put(Key(j * kNumPerFile + k), values[k]));
|
||||
}
|
||||
// put extra key to trigger flush
|
||||
|
@ -492,7 +495,13 @@ TEST_F(DBRangeDelTest, ValidUniversalSubcompactionBoundaries) {
|
|||
}
|
||||
ASSERT_OK(dbfull()->TEST_WaitForCompact());
|
||||
ASSERT_EQ(NumTableFilesAtLevel(0), 0);
|
||||
ASSERT_GT(NumTableFilesAtLevel(kNumLevels - 1 - i), kFilesPerLevel - 1);
|
||||
if (i == kNumLevels - 2) {
|
||||
// For the highest level, value size is smaller (see Put() above),
|
||||
// so output file number is smaller.
|
||||
ASSERT_GT(NumTableFilesAtLevel(kNumLevels - 1 - i), kFilesPerLevel - 2);
|
||||
} else {
|
||||
ASSERT_GT(NumTableFilesAtLevel(kNumLevels - 1 - i), kFilesPerLevel - 1);
|
||||
}
|
||||
}
|
||||
// Now L1-L3 are full, when we compact L1->L2 we should see (1) subcompactions
|
||||
// happen since input level > 0; (2) range deletions are not dropped since
|
||||
|
@ -3004,6 +3013,110 @@ TEST_F(DBRangeDelTest, RangeTombstoneRespectIterateUpperBound) {
|
|||
ASSERT_OK(iter->status());
|
||||
}
|
||||
|
||||
TEST_F(DBRangeDelTest, RangetombesoneCompensateFilesize) {
|
||||
Options opts = CurrentOptions();
|
||||
opts.disable_auto_compactions = true;
|
||||
DestroyAndReopen(opts);
|
||||
|
||||
std::vector<std::string> values;
|
||||
Random rnd(301);
|
||||
// file in L2
|
||||
values.push_back(rnd.RandomString(1 << 10));
|
||||
ASSERT_OK(Put("a", values.back()));
|
||||
values.push_back(rnd.RandomString(1 << 10));
|
||||
ASSERT_OK(Put("b", values.back()));
|
||||
ASSERT_OK(Flush());
|
||||
MoveFilesToLevel(2);
|
||||
uint64_t l2_size = 0;
|
||||
ASSERT_OK(Size("a", "c", 0 /* cf */, &l2_size));
|
||||
ASSERT_GT(l2_size, 0);
|
||||
// file in L1
|
||||
values.push_back(rnd.RandomString(1 << 10));
|
||||
ASSERT_OK(Put("d", values.back()));
|
||||
values.push_back(rnd.RandomString(1 << 10));
|
||||
ASSERT_OK(Put("e", values.back()));
|
||||
ASSERT_OK(Flush());
|
||||
MoveFilesToLevel(1);
|
||||
uint64_t l1_size = 0;
|
||||
ASSERT_OK(Size("d", "f", 0 /* cf */, &l1_size));
|
||||
ASSERT_GT(l1_size, 0);
|
||||
|
||||
ASSERT_OK(
|
||||
db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "f"));
|
||||
ASSERT_OK(Flush());
|
||||
// Range deletion compensated size computed during flush time
|
||||
std::vector<std::vector<FileMetaData>> level_to_files;
|
||||
dbfull()->TEST_GetFilesMetaData(dbfull()->DefaultColumnFamily(),
|
||||
&level_to_files);
|
||||
ASSERT_EQ(level_to_files[0].size(), 1);
|
||||
ASSERT_EQ(level_to_files[0][0].compensated_range_deletion_size,
|
||||
l1_size + l2_size);
|
||||
ASSERT_EQ(level_to_files[1].size(), 1);
|
||||
ASSERT_EQ(level_to_files[1][0].compensated_range_deletion_size, 0);
|
||||
ASSERT_EQ(level_to_files[2].size(), 1);
|
||||
ASSERT_EQ(level_to_files[2][0].compensated_range_deletion_size, 0);
|
||||
|
||||
// Range deletion compensated size computed during compaction time
|
||||
ASSERT_OK(dbfull()->TEST_CompactRange(0, nullptr, nullptr, nullptr,
|
||||
true /* disallow_trivial_move */));
|
||||
ASSERT_EQ(NumTableFilesAtLevel(0), 0);
|
||||
ASSERT_EQ(NumTableFilesAtLevel(1), 1);
|
||||
ASSERT_EQ(NumTableFilesAtLevel(2), 1);
|
||||
dbfull()->TEST_GetFilesMetaData(dbfull()->DefaultColumnFamily(),
|
||||
&level_to_files);
|
||||
ASSERT_EQ(level_to_files[1].size(), 1);
|
||||
ASSERT_EQ(level_to_files[1][0].compensated_range_deletion_size, l2_size);
|
||||
ASSERT_EQ(level_to_files[2].size(), 1);
|
||||
ASSERT_EQ(level_to_files[2][0].compensated_range_deletion_size, 0);
|
||||
}
|
||||
|
||||
TEST_F(DBRangeDelTest, RangetombesoneCompensateFilesizePersistDuringReopen) {
|
||||
Options opts = CurrentOptions();
|
||||
opts.disable_auto_compactions = true;
|
||||
DestroyAndReopen(opts);
|
||||
|
||||
std::vector<std::string> values;
|
||||
Random rnd(301);
|
||||
values.push_back(rnd.RandomString(1 << 10));
|
||||
ASSERT_OK(Put("a", values.back()));
|
||||
values.push_back(rnd.RandomString(1 << 10));
|
||||
ASSERT_OK(Put("b", values.back()));
|
||||
ASSERT_OK(Flush());
|
||||
MoveFilesToLevel(2);
|
||||
|
||||
ASSERT_OK(
|
||||
db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "c"));
|
||||
ASSERT_OK(Flush());
|
||||
MoveFilesToLevel(1);
|
||||
|
||||
ASSERT_OK(
|
||||
db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "z"));
|
||||
ASSERT_OK(Flush());
|
||||
|
||||
std::vector<std::vector<FileMetaData>> level_to_files;
|
||||
dbfull()->TEST_GetFilesMetaData(dbfull()->DefaultColumnFamily(),
|
||||
&level_to_files);
|
||||
ASSERT_EQ(level_to_files[0].size(), 1);
|
||||
ASSERT_EQ(level_to_files[1].size(), 1);
|
||||
ASSERT_EQ(level_to_files[2].size(), 1);
|
||||
uint64_t l2_size = level_to_files[2][0].fd.GetFileSize();
|
||||
uint64_t l1_size = level_to_files[1][0].fd.GetFileSize();
|
||||
ASSERT_GT(l2_size, 0);
|
||||
ASSERT_GT(l1_size, 0);
|
||||
ASSERT_EQ(level_to_files[0][0].compensated_range_deletion_size,
|
||||
l1_size + l2_size);
|
||||
ASSERT_EQ(level_to_files[1][0].compensated_range_deletion_size, l2_size);
|
||||
|
||||
Reopen(opts);
|
||||
dbfull()->TEST_GetFilesMetaData(dbfull()->DefaultColumnFamily(),
|
||||
&level_to_files);
|
||||
ASSERT_EQ(level_to_files[0].size(), 1);
|
||||
ASSERT_EQ(level_to_files[0][0].compensated_range_deletion_size,
|
||||
l1_size + l2_size);
|
||||
ASSERT_EQ(level_to_files[1].size(), 1);
|
||||
ASSERT_EQ(level_to_files[1][0].compensated_range_deletion_size, l2_size);
|
||||
}
|
||||
|
||||
#endif // ROCKSDB_LITE
|
||||
|
||||
} // namespace ROCKSDB_NAMESPACE
|
||||
|
|
|
@ -113,7 +113,8 @@ Status UpdateManifestForFilesState(
|
|||
lf->fd.largest_seqno, lf->marked_for_compaction, temp,
|
||||
lf->oldest_blob_file_number, lf->oldest_ancester_time,
|
||||
lf->file_creation_time, lf->epoch_number, lf->file_checksum,
|
||||
lf->file_checksum_func_name, lf->unique_id);
|
||||
lf->file_checksum_func_name, lf->unique_id,
|
||||
lf->compensated_range_deletion_size);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
|
|
|
@ -473,7 +473,7 @@ Status ExternalSstFileIngestionJob::Run() {
|
|||
ingestion_options_.ingest_behind
|
||||
? kReservedEpochNumberForFileIngestedBehind
|
||||
: cfd_->NewEpochNumber(),
|
||||
f.file_checksum, f.file_checksum_func_name, f.unique_id);
|
||||
f.file_checksum, f.file_checksum_func_name, f.unique_id, 0);
|
||||
f_metadata.temperature = f.file_temperature;
|
||||
edit_.AddFile(f.picked_level, f_metadata);
|
||||
}
|
||||
|
|
|
@ -941,7 +941,7 @@ Status FlushJob::WriteLevel0Table() {
|
|||
cfd_->internal_stats(), &io_s, io_tracer_,
|
||||
BlobFileCreationReason::kFlush, seqno_to_time_mapping_, event_logger_,
|
||||
job_context_->job_id, io_priority, &table_properties_, write_hint,
|
||||
full_history_ts_low, blob_callback_, &num_input_entries,
|
||||
full_history_ts_low, blob_callback_, base_, &num_input_entries,
|
||||
&memtable_payload_bytes, &memtable_garbage_bytes);
|
||||
// TODO: Cleanup io_status in BuildTable and table builders
|
||||
assert(!s.ok() || io_s.ok());
|
||||
|
@ -1003,8 +1003,7 @@ Status FlushJob::WriteLevel0Table() {
|
|||
meta_.oldest_blob_file_number, meta_.oldest_ancester_time,
|
||||
meta_.file_creation_time, meta_.epoch_number,
|
||||
meta_.file_checksum, meta_.file_checksum_func_name,
|
||||
meta_.unique_id);
|
||||
|
||||
meta_.unique_id, meta_.compensated_range_deletion_size);
|
||||
edit_->SetBlobFileAdditions(std::move(blob_file_additions));
|
||||
}
|
||||
#ifndef ROCKSDB_LITE
|
||||
|
|
|
@ -143,7 +143,7 @@ Status ImportColumnFamilyJob::Run() {
|
|||
file_metadata.smallest_seqno, file_metadata.largest_seqno, false,
|
||||
file_metadata.temperature, kInvalidBlobFileNumber, oldest_ancester_time,
|
||||
current_time, file_metadata.epoch_number, kUnknownFileChecksum,
|
||||
kUnknownFileChecksumFuncName, f.unique_id);
|
||||
kUnknownFileChecksumFuncName, f.unique_id, 0);
|
||||
s = dummy_version_builder.Apply(&dummy_version_edit);
|
||||
}
|
||||
if (s.ok()) {
|
||||
|
|
|
@ -665,7 +665,8 @@ class Repairer {
|
|||
table->meta.temperature, table->meta.oldest_blob_file_number,
|
||||
table->meta.oldest_ancester_time, table->meta.file_creation_time,
|
||||
table->meta.epoch_number, table->meta.file_checksum,
|
||||
table->meta.file_checksum_func_name, table->meta.unique_id);
|
||||
table->meta.file_checksum_func_name, table->meta.unique_id,
|
||||
table->meta.compensated_range_deletion_size);
|
||||
}
|
||||
s = dummy_version_builder.Apply(&dummy_edit);
|
||||
if (s.ok()) {
|
||||
|
|
|
@ -73,7 +73,7 @@ class VersionBuilderTest : public testing::Test {
|
|||
/* marked_for_compact */ false, Temperature::kUnknown,
|
||||
oldest_blob_file_number, kUnknownOldestAncesterTime,
|
||||
kUnknownFileCreationTime, epoch_number, kUnknownFileChecksum,
|
||||
kUnknownFileChecksumFuncName, kNullUniqueId64x2);
|
||||
kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0);
|
||||
f->compensated_file_size = file_size;
|
||||
f->num_entries = num_entries;
|
||||
f->num_deletions = num_deletions;
|
||||
|
@ -130,12 +130,13 @@ class VersionBuilderTest : public testing::Test {
|
|||
constexpr SequenceNumber largest_seqno = 300;
|
||||
constexpr bool marked_for_compaction = false;
|
||||
|
||||
edit->AddFile(
|
||||
level, table_file_number, path_id, file_size, GetInternalKey(smallest),
|
||||
GetInternalKey(largest), smallest_seqno, largest_seqno,
|
||||
marked_for_compaction, Temperature::kUnknown, blob_file_number,
|
||||
kUnknownOldestAncesterTime, kUnknownFileCreationTime, epoch_number,
|
||||
kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2);
|
||||
edit->AddFile(level, table_file_number, path_id, file_size,
|
||||
GetInternalKey(smallest), GetInternalKey(largest),
|
||||
smallest_seqno, largest_seqno, marked_for_compaction,
|
||||
Temperature::kUnknown, blob_file_number,
|
||||
kUnknownOldestAncesterTime, kUnknownFileCreationTime,
|
||||
epoch_number, kUnknownFileChecksum,
|
||||
kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0);
|
||||
}
|
||||
|
||||
void UpdateVersionStorageInfo(VersionStorageInfo* vstorage) {
|
||||
|
@ -186,7 +187,7 @@ TEST_F(VersionBuilderTest, ApplyAndSaveTo) {
|
|||
2, 666, 0, 100U, GetInternalKey("301"), GetInternalKey("350"), 200, 200,
|
||||
false, Temperature::kUnknown, kInvalidBlobFileNumber,
|
||||
kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber,
|
||||
kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2);
|
||||
kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0);
|
||||
version_edit.DeleteFile(3, 27U);
|
||||
|
||||
EnvOptions env_options;
|
||||
|
@ -233,7 +234,8 @@ TEST_F(VersionBuilderTest, ApplyAndSaveToDynamic) {
|
|||
3, 666, 0, 100U, GetInternalKey("301"), GetInternalKey("350"), 200, 200,
|
||||
false, Temperature::kUnknown, kInvalidBlobFileNumber,
|
||||
kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber,
|
||||
kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2);
|
||||
kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0);
|
||||
|
||||
version_edit.DeleteFile(0, 1U);
|
||||
version_edit.DeleteFile(0, 88U);
|
||||
|
||||
|
@ -283,7 +285,7 @@ TEST_F(VersionBuilderTest, ApplyAndSaveToDynamic2) {
|
|||
4, 666, 0, 100U, GetInternalKey("301"), GetInternalKey("350"), 200, 200,
|
||||
false, Temperature::kUnknown, kInvalidBlobFileNumber,
|
||||
kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber,
|
||||
kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2);
|
||||
kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0);
|
||||
version_edit.DeleteFile(0, 1U);
|
||||
version_edit.DeleteFile(0, 88U);
|
||||
version_edit.DeleteFile(4, 6U);
|
||||
|
@ -319,27 +321,27 @@ TEST_F(VersionBuilderTest, ApplyMultipleAndSaveTo) {
|
|||
2, 666, 0, 100U, GetInternalKey("301"), GetInternalKey("350"), 200, 200,
|
||||
false, Temperature::kUnknown, kInvalidBlobFileNumber,
|
||||
kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber,
|
||||
kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2);
|
||||
kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0);
|
||||
version_edit.AddFile(
|
||||
2, 676, 0, 100U, GetInternalKey("401"), GetInternalKey("450"), 200, 200,
|
||||
false, Temperature::kUnknown, kInvalidBlobFileNumber,
|
||||
kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber,
|
||||
kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2);
|
||||
kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0);
|
||||
version_edit.AddFile(
|
||||
2, 636, 0, 100U, GetInternalKey("601"), GetInternalKey("650"), 200, 200,
|
||||
false, Temperature::kUnknown, kInvalidBlobFileNumber,
|
||||
kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber,
|
||||
kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2);
|
||||
kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0);
|
||||
version_edit.AddFile(
|
||||
2, 616, 0, 100U, GetInternalKey("501"), GetInternalKey("550"), 200, 200,
|
||||
false, Temperature::kUnknown, kInvalidBlobFileNumber,
|
||||
kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber,
|
||||
kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2);
|
||||
kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0);
|
||||
version_edit.AddFile(
|
||||
2, 606, 0, 100U, GetInternalKey("701"), GetInternalKey("750"), 200, 200,
|
||||
false, Temperature::kUnknown, kInvalidBlobFileNumber,
|
||||
kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber,
|
||||
kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2);
|
||||
kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0);
|
||||
|
||||
EnvOptions env_options;
|
||||
constexpr TableCache* table_cache = nullptr;
|
||||
|
@ -378,27 +380,27 @@ TEST_F(VersionBuilderTest, ApplyDeleteAndSaveTo) {
|
|||
2, 666, 0, 100U, GetInternalKey("301"), GetInternalKey("350"), 200, 200,
|
||||
false, Temperature::kUnknown, kInvalidBlobFileNumber,
|
||||
kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber,
|
||||
kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2);
|
||||
kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0);
|
||||
version_edit.AddFile(
|
||||
2, 676, 0, 100U, GetInternalKey("401"), GetInternalKey("450"), 200, 200,
|
||||
false, Temperature::kUnknown, kInvalidBlobFileNumber,
|
||||
kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber,
|
||||
kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2);
|
||||
kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0);
|
||||
version_edit.AddFile(
|
||||
2, 636, 0, 100U, GetInternalKey("601"), GetInternalKey("650"), 200, 200,
|
||||
false, Temperature::kUnknown, kInvalidBlobFileNumber,
|
||||
kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber,
|
||||
kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2);
|
||||
kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0);
|
||||
version_edit.AddFile(
|
||||
2, 616, 0, 100U, GetInternalKey("501"), GetInternalKey("550"), 200, 200,
|
||||
false, Temperature::kUnknown, kInvalidBlobFileNumber,
|
||||
kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber,
|
||||
kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2);
|
||||
kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0);
|
||||
version_edit.AddFile(
|
||||
2, 606, 0, 100U, GetInternalKey("701"), GetInternalKey("750"), 200, 200,
|
||||
false, Temperature::kUnknown, kInvalidBlobFileNumber,
|
||||
kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber,
|
||||
kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2);
|
||||
kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0);
|
||||
ASSERT_OK(version_builder.Apply(&version_edit));
|
||||
|
||||
VersionEdit version_edit2;
|
||||
|
@ -406,14 +408,14 @@ TEST_F(VersionBuilderTest, ApplyDeleteAndSaveTo) {
|
|||
2, 808, 0, 100U, GetInternalKey("901"), GetInternalKey("950"), 200, 200,
|
||||
false, Temperature::kUnknown, kInvalidBlobFileNumber,
|
||||
kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber,
|
||||
kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2);
|
||||
kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0);
|
||||
version_edit2.DeleteFile(2, 616);
|
||||
version_edit2.DeleteFile(2, 636);
|
||||
version_edit.AddFile(
|
||||
2, 806, 0, 100U, GetInternalKey("801"), GetInternalKey("850"), 200, 200,
|
||||
false, Temperature::kUnknown, kInvalidBlobFileNumber,
|
||||
kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber,
|
||||
kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2);
|
||||
kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0);
|
||||
|
||||
ASSERT_OK(version_builder.Apply(&version_edit2));
|
||||
ASSERT_OK(version_builder.SaveTo(&new_vstorage));
|
||||
|
@ -524,7 +526,7 @@ TEST_F(VersionBuilderTest, ApplyFileDeletionAndAddition) {
|
|||
GetInternalKey(largest, largest_seq), smallest_seqno, largest_seqno,
|
||||
marked_for_compaction, Temperature::kUnknown, kInvalidBlobFileNumber,
|
||||
kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber,
|
||||
kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2);
|
||||
kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0);
|
||||
|
||||
ASSERT_OK(builder.Apply(&addition));
|
||||
|
||||
|
@ -573,7 +575,7 @@ TEST_F(VersionBuilderTest, ApplyFileAdditionAlreadyInBase) {
|
|||
GetInternalKey(largest), smallest_seqno, largest_seqno,
|
||||
marked_for_compaction, Temperature::kUnknown, kInvalidBlobFileNumber,
|
||||
kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber,
|
||||
kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2);
|
||||
kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0);
|
||||
|
||||
const Status s = builder.Apply(&edit);
|
||||
ASSERT_TRUE(s.IsCorruption());
|
||||
|
@ -609,7 +611,7 @@ TEST_F(VersionBuilderTest, ApplyFileAdditionAlreadyApplied) {
|
|||
GetInternalKey(largest), smallest_seqno, largest_seqno,
|
||||
marked_for_compaction, Temperature::kUnknown, kInvalidBlobFileNumber,
|
||||
kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber,
|
||||
kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2);
|
||||
kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0);
|
||||
|
||||
ASSERT_OK(builder.Apply(&edit));
|
||||
|
||||
|
@ -622,7 +624,7 @@ TEST_F(VersionBuilderTest, ApplyFileAdditionAlreadyApplied) {
|
|||
GetInternalKey(largest), smallest_seqno, largest_seqno,
|
||||
marked_for_compaction, Temperature::kUnknown, kInvalidBlobFileNumber,
|
||||
kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber,
|
||||
kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2);
|
||||
kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0);
|
||||
|
||||
const Status s = builder.Apply(&other_edit);
|
||||
ASSERT_TRUE(s.IsCorruption());
|
||||
|
@ -658,7 +660,7 @@ TEST_F(VersionBuilderTest, ApplyFileAdditionAndDeletion) {
|
|||
GetInternalKey(largest), smallest_seqno, largest_seqno,
|
||||
marked_for_compaction, Temperature::kUnknown, kInvalidBlobFileNumber,
|
||||
kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber,
|
||||
kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2);
|
||||
kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0);
|
||||
|
||||
ASSERT_OK(builder.Apply(&addition));
|
||||
|
||||
|
@ -1231,7 +1233,7 @@ TEST_F(VersionBuilderTest, SaveBlobFilesToConcurrentJobs) {
|
|||
GetInternalKey(largest), smallest_seqno, largest_seqno,
|
||||
marked_for_compaction, Temperature::kUnknown, blob_file_number,
|
||||
kUnknownOldestAncesterTime, kUnknownFileCreationTime, 2 /*epoch_number*/,
|
||||
checksum_value, checksum_method, kNullUniqueId64x2);
|
||||
checksum_value, checksum_method, kNullUniqueId64x2, 0);
|
||||
edit.AddBlobFile(blob_file_number, total_blob_count, total_blob_bytes,
|
||||
checksum_method, checksum_value);
|
||||
|
||||
|
@ -1319,7 +1321,7 @@ TEST_F(VersionBuilderTest, CheckConsistencyForBlobFiles) {
|
|||
/* oldest_blob_file_number */ 16, kUnknownOldestAncesterTime,
|
||||
kUnknownFileCreationTime, kUnknownEpochNumber,
|
||||
kUnknownFileChecksum, kUnknownFileChecksumFuncName,
|
||||
kNullUniqueId64x2);
|
||||
kNullUniqueId64x2, 0);
|
||||
|
||||
edit.AddFile(/* level */ 1, /* file_number */ 700, /* path_id */ 0,
|
||||
/* file_size */ 100, /* smallest */ GetInternalKey("801"),
|
||||
|
@ -1329,7 +1331,7 @@ TEST_F(VersionBuilderTest, CheckConsistencyForBlobFiles) {
|
|||
/* oldest_blob_file_number */ 1000, kUnknownOldestAncesterTime,
|
||||
kUnknownFileCreationTime, kUnknownEpochNumber,
|
||||
kUnknownFileChecksum, kUnknownFileChecksumFuncName,
|
||||
kNullUniqueId64x2);
|
||||
kNullUniqueId64x2, 0);
|
||||
edit.AddBlobFile(/* blob_file_number */ 1000, /* total_blob_count */ 2000,
|
||||
/* total_blob_bytes */ 200000,
|
||||
/* checksum_method */ std::string(),
|
||||
|
@ -1550,7 +1552,7 @@ TEST_F(VersionBuilderTest, MaintainLinkedSstsForBlobFiles) {
|
|||
Temperature::kUnknown,
|
||||
/* oldest_blob_file_number */ 1, kUnknownOldestAncesterTime,
|
||||
kUnknownFileCreationTime, kUnknownEpochNumber, kUnknownFileChecksum,
|
||||
kUnknownFileChecksumFuncName, kNullUniqueId64x2);
|
||||
kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0);
|
||||
|
||||
// Add an SST that does not reference any blob files.
|
||||
edit.AddFile(
|
||||
|
@ -1560,7 +1562,7 @@ TEST_F(VersionBuilderTest, MaintainLinkedSstsForBlobFiles) {
|
|||
/* largest_seqno */ 2200, /* marked_for_compaction */ false,
|
||||
Temperature::kUnknown, kInvalidBlobFileNumber, kUnknownOldestAncesterTime,
|
||||
kUnknownFileCreationTime, kUnknownEpochNumber, kUnknownFileChecksum,
|
||||
kUnknownFileChecksumFuncName, kNullUniqueId64x2);
|
||||
kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0);
|
||||
|
||||
// Delete a file that references a blob file.
|
||||
edit.DeleteFile(/* level */ 1, /* file_number */ 6);
|
||||
|
@ -1583,7 +1585,7 @@ TEST_F(VersionBuilderTest, MaintainLinkedSstsForBlobFiles) {
|
|||
/* oldest_blob_file_number */ 3, kUnknownOldestAncesterTime,
|
||||
kUnknownFileCreationTime, kUnknownEpochNumber,
|
||||
kUnknownFileChecksum, kUnknownFileChecksumFuncName,
|
||||
kNullUniqueId64x2);
|
||||
kNullUniqueId64x2, 0);
|
||||
|
||||
// Trivially move a file that does not reference any blob files.
|
||||
edit.DeleteFile(/* level */ 1, /* file_number */ 13);
|
||||
|
@ -1595,7 +1597,7 @@ TEST_F(VersionBuilderTest, MaintainLinkedSstsForBlobFiles) {
|
|||
Temperature::kUnknown, kInvalidBlobFileNumber,
|
||||
kUnknownOldestAncesterTime, kUnknownFileCreationTime,
|
||||
kUnknownEpochNumber, kUnknownFileChecksum,
|
||||
kUnknownFileChecksumFuncName, kNullUniqueId64x2);
|
||||
kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0);
|
||||
|
||||
// Add one more SST file that references a blob file, then promptly
|
||||
// delete it in a second version edit before the new version gets saved.
|
||||
|
@ -1609,7 +1611,7 @@ TEST_F(VersionBuilderTest, MaintainLinkedSstsForBlobFiles) {
|
|||
/* oldest_blob_file_number */ 5, kUnknownOldestAncesterTime,
|
||||
kUnknownFileCreationTime, kUnknownEpochNumber,
|
||||
kUnknownFileChecksum, kUnknownFileChecksumFuncName,
|
||||
kNullUniqueId64x2);
|
||||
kNullUniqueId64x2, 0);
|
||||
|
||||
VersionEdit edit2;
|
||||
|
||||
|
@ -1710,7 +1712,7 @@ TEST_F(VersionBuilderTest, CheckConsistencyForL0FilesSortedByEpochNumber) {
|
|||
/* oldest_blob_file_number */ kInvalidBlobFileNumber,
|
||||
kUnknownOldestAncesterTime, kUnknownFileCreationTime,
|
||||
1 /* epoch_number */, kUnknownFileChecksum, kUnknownFileChecksumFuncName,
|
||||
kNullUniqueId64x2);
|
||||
kNullUniqueId64x2, 0);
|
||||
version_edit_1.AddFile(
|
||||
/* level */ 0, /* file_number */ 2U, /* path_id */ 0,
|
||||
/* file_size */ 100, /* smallest */ GetInternalKey("b", 2),
|
||||
|
@ -1720,7 +1722,7 @@ TEST_F(VersionBuilderTest, CheckConsistencyForL0FilesSortedByEpochNumber) {
|
|||
/* oldest_blob_file_number */ kInvalidBlobFileNumber,
|
||||
kUnknownOldestAncesterTime, kUnknownFileCreationTime,
|
||||
1 /* epoch_number */, kUnknownFileChecksum, kUnknownFileChecksumFuncName,
|
||||
kNullUniqueId64x2);
|
||||
kNullUniqueId64x2, 0);
|
||||
|
||||
VersionBuilder version_builder_1(EnvOptions(), &ioptions_,
|
||||
nullptr /* table_cache */, &vstorage_,
|
||||
|
@ -1747,7 +1749,7 @@ TEST_F(VersionBuilderTest, CheckConsistencyForL0FilesSortedByEpochNumber) {
|
|||
/* oldest_blob_file_number */ kInvalidBlobFileNumber,
|
||||
kUnknownOldestAncesterTime, kUnknownFileCreationTime,
|
||||
1 /* epoch_number */, kUnknownFileChecksum, kUnknownFileChecksumFuncName,
|
||||
kNullUniqueId64x2);
|
||||
kNullUniqueId64x2, 0);
|
||||
version_edit_2.AddFile(
|
||||
/* level */ 0, /* file_number */ 2U, /* path_id */ 0,
|
||||
/* file_size */ 100, /* smallest */ GetInternalKey("b", 2),
|
||||
|
@ -1757,7 +1759,7 @@ TEST_F(VersionBuilderTest, CheckConsistencyForL0FilesSortedByEpochNumber) {
|
|||
/* oldest_blob_file_number */ kInvalidBlobFileNumber,
|
||||
kUnknownOldestAncesterTime, kUnknownFileCreationTime,
|
||||
2 /* epoch_number */, kUnknownFileChecksum, kUnknownFileChecksumFuncName,
|
||||
kNullUniqueId64x2);
|
||||
kNullUniqueId64x2, 0);
|
||||
|
||||
VersionBuilder version_builder_2(EnvOptions(), &ioptions_,
|
||||
nullptr /* table_cache */, &vstorage_,
|
||||
|
|
|
@ -231,6 +231,13 @@ bool VersionEdit::EncodeTo(std::string* dst) const {
|
|||
std::string unique_id_str = EncodeUniqueIdBytes(&unique_id);
|
||||
PutLengthPrefixedSlice(dst, Slice(unique_id_str));
|
||||
}
|
||||
if (f.compensated_range_deletion_size) {
|
||||
PutVarint32(dst, kCompensatedRangeDeletionSize);
|
||||
std::string compensated_range_deletion_size;
|
||||
PutVarint64(&compensated_range_deletion_size,
|
||||
f.compensated_range_deletion_size);
|
||||
PutLengthPrefixedSlice(dst, Slice(compensated_range_deletion_size));
|
||||
}
|
||||
|
||||
TEST_SYNC_POINT_CALLBACK("VersionEdit::EncodeTo:NewFile4:CustomizeFields",
|
||||
dst);
|
||||
|
@ -404,6 +411,11 @@ const char* VersionEdit::DecodeNewFile4From(Slice* input) {
|
|||
return "invalid unique id";
|
||||
}
|
||||
break;
|
||||
case kCompensatedRangeDeletionSize:
|
||||
if (!GetVarint64(&field, &f.compensated_range_deletion_size)) {
|
||||
return "Invalid compensated range deletion size";
|
||||
}
|
||||
break;
|
||||
default:
|
||||
if ((custom_tag & kCustomTagNonSafeIgnoreMask) != 0) {
|
||||
// Should not proceed if cannot understand it
|
||||
|
|
|
@ -89,6 +89,7 @@ enum NewFileCustomTag : uint32_t {
|
|||
kMaxTimestamp = 11,
|
||||
kUniqueId = 12,
|
||||
kEpochNumber = 13,
|
||||
kCompensatedRangeDeletionSize = 14,
|
||||
|
||||
// If this bit for the custom tag is set, opening DB should fail if
|
||||
// we don't know this field.
|
||||
|
@ -182,15 +183,22 @@ struct FileMetaData {
|
|||
// Stats for compensating deletion entries during compaction
|
||||
|
||||
// File size compensated by deletion entry.
|
||||
// This is updated in Version::UpdateAccumulatedStats() first time when the
|
||||
// file is created or loaded. After it is updated (!= 0), it is immutable.
|
||||
// This is used to compute a file's compaction priority, and is updated in
|
||||
// Version::ComputeCompensatedSizes() first time when the file is created or
|
||||
// loaded. After it is updated (!= 0), it is immutable.
|
||||
uint64_t compensated_file_size = 0;
|
||||
// These values can mutate, but they can only be read or written from
|
||||
// single-threaded LogAndApply thread
|
||||
uint64_t num_entries = 0; // the number of entries.
|
||||
uint64_t num_deletions = 0; // the number of deletion entries.
|
||||
// The number of deletion entries, including range deletions.
|
||||
uint64_t num_deletions = 0;
|
||||
uint64_t raw_key_size = 0; // total uncompressed key size.
|
||||
uint64_t raw_value_size = 0; // total uncompressed value size.
|
||||
uint64_t num_range_deletions = 0;
|
||||
// This is computed during Flush/Compaction, and is added to
|
||||
// `compensated_file_size`. Currently, this estimates the size of keys in the
|
||||
// next level covered by range tombstones in this file.
|
||||
uint64_t compensated_range_deletion_size = 0;
|
||||
|
||||
int refs = 0; // Reference count
|
||||
|
||||
|
@ -240,10 +248,12 @@ struct FileMetaData {
|
|||
uint64_t _oldest_ancester_time, uint64_t _file_creation_time,
|
||||
uint64_t _epoch_number, const std::string& _file_checksum,
|
||||
const std::string& _file_checksum_func_name,
|
||||
UniqueId64x2 _unique_id)
|
||||
UniqueId64x2 _unique_id,
|
||||
const uint64_t _compensated_range_deletion_size)
|
||||
: fd(file, file_path_id, file_size, smallest_seq, largest_seq),
|
||||
smallest(smallest_key),
|
||||
largest(largest_key),
|
||||
compensated_range_deletion_size(_compensated_range_deletion_size),
|
||||
marked_for_compaction(marked_for_compact),
|
||||
temperature(_temperature),
|
||||
oldest_blob_file_number(oldest_blob_file),
|
||||
|
@ -434,7 +444,8 @@ class VersionEdit {
|
|||
uint64_t oldest_ancester_time, uint64_t file_creation_time,
|
||||
uint64_t epoch_number, const std::string& file_checksum,
|
||||
const std::string& file_checksum_func_name,
|
||||
const UniqueId64x2& unique_id) {
|
||||
const UniqueId64x2& unique_id,
|
||||
const uint64_t compensated_range_deletion_size) {
|
||||
assert(smallest_seqno <= largest_seqno);
|
||||
new_files_.emplace_back(
|
||||
level,
|
||||
|
@ -442,7 +453,8 @@ class VersionEdit {
|
|||
smallest_seqno, largest_seqno, marked_for_compaction,
|
||||
temperature, oldest_blob_file_number, oldest_ancester_time,
|
||||
file_creation_time, epoch_number, file_checksum,
|
||||
file_checksum_func_name, unique_id));
|
||||
file_checksum_func_name, unique_id,
|
||||
compensated_range_deletion_size));
|
||||
if (!HasLastSequence() || largest_seqno > GetLastSequence()) {
|
||||
SetLastSequence(largest_seqno);
|
||||
}
|
||||
|
|
|
@ -45,7 +45,7 @@ TEST_F(VersionEditTest, EncodeDecode) {
|
|||
kBig + 500 + i, kBig + 600 + i, false, Temperature::kUnknown,
|
||||
kInvalidBlobFileNumber, 888, 678,
|
||||
kBig + 300 + i /* epoch_number */, "234", "crc32c",
|
||||
kNullUniqueId64x2);
|
||||
kNullUniqueId64x2, 0);
|
||||
edit.DeleteFile(4, kBig + 700 + i);
|
||||
}
|
||||
|
||||
|
@ -65,24 +65,24 @@ TEST_F(VersionEditTest, EncodeDecodeNewFile4) {
|
|||
kBig + 600, true, Temperature::kUnknown, kInvalidBlobFileNumber,
|
||||
kUnknownOldestAncesterTime, kUnknownFileCreationTime,
|
||||
300 /* epoch_number */, kUnknownFileChecksum,
|
||||
kUnknownFileChecksumFuncName, kNullUniqueId64x2);
|
||||
kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0);
|
||||
edit.AddFile(4, 301, 3, 100, InternalKey("foo", kBig + 501, kTypeValue),
|
||||
InternalKey("zoo", kBig + 601, kTypeDeletion), kBig + 501,
|
||||
kBig + 601, false, Temperature::kUnknown, kInvalidBlobFileNumber,
|
||||
kUnknownOldestAncesterTime, kUnknownFileCreationTime,
|
||||
301 /* epoch_number */, kUnknownFileChecksum,
|
||||
kUnknownFileChecksumFuncName, kNullUniqueId64x2);
|
||||
kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0);
|
||||
edit.AddFile(5, 302, 0, 100, InternalKey("foo", kBig + 502, kTypeValue),
|
||||
InternalKey("zoo", kBig + 602, kTypeDeletion), kBig + 502,
|
||||
kBig + 602, true, Temperature::kUnknown, kInvalidBlobFileNumber,
|
||||
666, 888, 302 /* epoch_number */, kUnknownFileChecksum,
|
||||
kUnknownFileChecksumFuncName, kNullUniqueId64x2);
|
||||
kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0);
|
||||
edit.AddFile(5, 303, 0, 100, InternalKey("foo", kBig + 503, kTypeBlobIndex),
|
||||
InternalKey("zoo", kBig + 603, kTypeBlobIndex), kBig + 503,
|
||||
kBig + 603, true, Temperature::kUnknown, 1001,
|
||||
kUnknownOldestAncesterTime, kUnknownFileCreationTime,
|
||||
303 /* epoch_number */, kUnknownFileChecksum,
|
||||
kUnknownFileChecksumFuncName, kNullUniqueId64x2);
|
||||
kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0);
|
||||
|
||||
edit.DeleteFile(4, 700);
|
||||
|
||||
|
@ -123,12 +123,12 @@ TEST_F(VersionEditTest, ForwardCompatibleNewFile4) {
|
|||
kBig + 600, true, Temperature::kUnknown, kInvalidBlobFileNumber,
|
||||
kUnknownOldestAncesterTime, kUnknownFileCreationTime,
|
||||
300 /* epoch_number */, kUnknownFileChecksum,
|
||||
kUnknownFileChecksumFuncName, kNullUniqueId64x2);
|
||||
kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0);
|
||||
edit.AddFile(4, 301, 3, 100, InternalKey("foo", kBig + 501, kTypeValue),
|
||||
InternalKey("zoo", kBig + 601, kTypeDeletion), kBig + 501,
|
||||
kBig + 601, false, Temperature::kUnknown, kInvalidBlobFileNumber,
|
||||
686, 868, 301 /* epoch_number */, "234", "crc32c",
|
||||
kNullUniqueId64x2);
|
||||
kNullUniqueId64x2, 0);
|
||||
edit.DeleteFile(4, 700);
|
||||
|
||||
edit.SetComparatorName("foo");
|
||||
|
@ -177,7 +177,7 @@ TEST_F(VersionEditTest, NewFile4NotSupportedField) {
|
|||
kBig + 600, true, Temperature::kUnknown, kInvalidBlobFileNumber,
|
||||
kUnknownOldestAncesterTime, kUnknownFileCreationTime,
|
||||
300 /* epoch_number */, kUnknownFileChecksum,
|
||||
kUnknownFileChecksumFuncName, kNullUniqueId64x2);
|
||||
kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0);
|
||||
|
||||
edit.SetComparatorName("foo");
|
||||
edit.SetLogNumber(kBig + 100);
|
||||
|
@ -208,7 +208,7 @@ TEST_F(VersionEditTest, EncodeEmptyFile) {
|
|||
Temperature::kUnknown, kInvalidBlobFileNumber,
|
||||
kUnknownOldestAncesterTime, kUnknownFileCreationTime,
|
||||
1 /*epoch_number*/, kUnknownFileChecksum,
|
||||
kUnknownFileChecksumFuncName, kNullUniqueId64x2);
|
||||
kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0);
|
||||
std::string buffer;
|
||||
ASSERT_TRUE(!edit.EncodeTo(&buffer));
|
||||
}
|
||||
|
|
|
@ -2960,7 +2960,7 @@ bool Version::MaybeInitializeFileMetaData(FileMetaData* file_meta) {
|
|||
file_meta->num_deletions = tp->num_deletions;
|
||||
file_meta->raw_value_size = tp->raw_value_size;
|
||||
file_meta->raw_key_size = tp->raw_key_size;
|
||||
|
||||
file_meta->num_range_deletions = tp->num_range_deletions;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -3062,11 +3062,15 @@ void VersionStorageInfo::ComputeCompensatedSizes() {
|
|||
// size of deletion entries in a stable workload, the deletion
|
||||
// compensation logic might introduce unwanted effet which changes the
|
||||
// shape of LSM tree.
|
||||
if (file_meta->num_deletions * 2 >= file_meta->num_entries) {
|
||||
if ((file_meta->num_deletions - file_meta->num_range_deletions) * 2 >=
|
||||
file_meta->num_entries) {
|
||||
file_meta->compensated_file_size +=
|
||||
(file_meta->num_deletions * 2 - file_meta->num_entries) *
|
||||
((file_meta->num_deletions - file_meta->num_range_deletions) * 2 -
|
||||
file_meta->num_entries) *
|
||||
average_value_size * kDeletionWeightOnCompaction;
|
||||
}
|
||||
file_meta->compensated_file_size +=
|
||||
file_meta->compensated_range_deletion_size;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -6215,7 +6219,8 @@ Status VersionSet::WriteCurrentStateToManifest(
|
|||
f->marked_for_compaction, f->temperature,
|
||||
f->oldest_blob_file_number, f->oldest_ancester_time,
|
||||
f->file_creation_time, f->epoch_number, f->file_checksum,
|
||||
f->file_checksum_func_name, f->unique_id);
|
||||
f->file_checksum_func_name, f->unique_id,
|
||||
f->compensated_range_deletion_size);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -6293,8 +6298,9 @@ uint64_t VersionSet::ApproximateSize(const SizeApproximationOptions& options,
|
|||
const int num_non_empty_levels = vstorage->num_non_empty_levels();
|
||||
end_level = (end_level == -1) ? num_non_empty_levels
|
||||
: std::min(end_level, num_non_empty_levels);
|
||||
|
||||
assert(start_level <= end_level);
|
||||
if (end_level <= start_level) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Outline of the optimization that uses options.files_size_error_margin.
|
||||
// When approximating the files total size that is used to store a keys range,
|
||||
|
|
|
@ -51,7 +51,7 @@ class GenerateLevelFilesBriefTest : public testing::Test {
|
|||
largest_seq, /* marked_for_compact */ false, Temperature::kUnknown,
|
||||
kInvalidBlobFileNumber, kUnknownOldestAncesterTime,
|
||||
kUnknownFileCreationTime, kUnknownEpochNumber, kUnknownFileChecksum,
|
||||
kUnknownFileChecksumFuncName, kNullUniqueId64x2);
|
||||
kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0);
|
||||
files_.push_back(f);
|
||||
}
|
||||
|
||||
|
@ -143,16 +143,19 @@ class VersionStorageInfoTestBase : public testing::Test {
|
|||
|
||||
void Add(int level, uint32_t file_number, const char* smallest,
|
||||
const char* largest, uint64_t file_size = 0,
|
||||
uint64_t oldest_blob_file_number = kInvalidBlobFileNumber) {
|
||||
uint64_t oldest_blob_file_number = kInvalidBlobFileNumber,
|
||||
uint64_t compensated_range_deletion_size = 0) {
|
||||
constexpr SequenceNumber dummy_seq = 0;
|
||||
|
||||
Add(level, file_number, GetInternalKey(smallest, dummy_seq),
|
||||
GetInternalKey(largest, dummy_seq), file_size, oldest_blob_file_number);
|
||||
GetInternalKey(largest, dummy_seq), file_size, oldest_blob_file_number,
|
||||
compensated_range_deletion_size);
|
||||
}
|
||||
|
||||
void Add(int level, uint32_t file_number, const InternalKey& smallest,
|
||||
const InternalKey& largest, uint64_t file_size = 0,
|
||||
uint64_t oldest_blob_file_number = kInvalidBlobFileNumber) {
|
||||
uint64_t oldest_blob_file_number = kInvalidBlobFileNumber,
|
||||
uint64_t compensated_range_deletion_size = 0) {
|
||||
assert(level < vstorage_.num_levels());
|
||||
FileMetaData* f = new FileMetaData(
|
||||
file_number, 0, file_size, smallest, largest, /* smallest_seq */ 0,
|
||||
|
@ -160,8 +163,7 @@ class VersionStorageInfoTestBase : public testing::Test {
|
|||
Temperature::kUnknown, oldest_blob_file_number,
|
||||
kUnknownOldestAncesterTime, kUnknownFileCreationTime,
|
||||
kUnknownEpochNumber, kUnknownFileChecksum, kUnknownFileChecksumFuncName,
|
||||
kNullUniqueId64x2);
|
||||
f->compensated_file_size = file_size;
|
||||
kNullUniqueId64x2, compensated_range_deletion_size);
|
||||
vstorage_.AddFile(level, f);
|
||||
}
|
||||
|
||||
|
@ -2136,6 +2138,17 @@ TEST_F(VersionSetTest, AtomicGroupWithWalEdits) {
|
|||
}
|
||||
}
|
||||
|
||||
TEST_F(VersionStorageInfoTest, AddRangeDeletionCompensatedFileSize) {
|
||||
// Tests that compensated range deletion size is added to compensated file
|
||||
// size.
|
||||
Add(4, 100U, "1", "2", 100U, kInvalidBlobFileNumber, 1000U);
|
||||
|
||||
UpdateVersionStorageInfo();
|
||||
|
||||
auto meta = vstorage_.GetFileMetaDataByNumber(100U);
|
||||
ASSERT_EQ(meta->compensated_file_size, 100U + 1000U);
|
||||
}
|
||||
|
||||
class VersionSetWithTimestampTest : public VersionSetTest {
|
||||
public:
|
||||
static const std::string kNewCfName;
|
||||
|
@ -3242,7 +3255,8 @@ class VersionSetTestMissingFiles : public VersionSetTestBase,
|
|||
file_metas->emplace_back(file_num, /*file_path_id=*/0, file_size, ikey,
|
||||
ikey, 0, 0, false, Temperature::kUnknown, 0, 0,
|
||||
0, info.epoch_number, kUnknownFileChecksum,
|
||||
kUnknownFileChecksumFuncName, kNullUniqueId64x2);
|
||||
kUnknownFileChecksumFuncName, kNullUniqueId64x2,
|
||||
0);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -3299,7 +3313,7 @@ TEST_F(VersionSetTestMissingFiles, ManifestFarBehindSst) {
|
|||
file_num, /*file_path_id=*/0, /*file_size=*/12, smallest_ikey,
|
||||
largest_ikey, 0, 0, false, Temperature::kUnknown, 0, 0, 0,
|
||||
file_num /* epoch_number */, kUnknownFileChecksum,
|
||||
kUnknownFileChecksumFuncName, kNullUniqueId64x2);
|
||||
kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0);
|
||||
added_files.emplace_back(0, meta);
|
||||
}
|
||||
WriteFileAdditionAndDeletionToManifest(
|
||||
|
@ -3360,7 +3374,7 @@ TEST_F(VersionSetTestMissingFiles, ManifestAheadofSst) {
|
|||
file_num, /*file_path_id=*/0, /*file_size=*/12, smallest_ikey,
|
||||
largest_ikey, 0, 0, false, Temperature::kUnknown, 0, 0, 0,
|
||||
file_num /* epoch_number */, kUnknownFileChecksum,
|
||||
kUnknownFileChecksumFuncName, kNullUniqueId64x2);
|
||||
kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0);
|
||||
added_files.emplace_back(0, meta);
|
||||
}
|
||||
WriteFileAdditionAndDeletionToManifest(
|
||||
|
|
Loading…
Reference in New Issue