diff --git a/db/builder.cc b/db/builder.cc index 9283ffd64d..cb7769e969 100644 --- a/db/builder.cc +++ b/db/builder.cc @@ -71,8 +71,9 @@ Status BuildTable( int job_id, const Env::IOPriority io_priority, TableProperties* table_properties, Env::WriteLifeTimeHint write_hint, const std::string* full_history_ts_low, - BlobFileCompletionCallback* blob_callback, uint64_t* num_input_entries, - uint64_t* memtable_payload_bytes, uint64_t* memtable_garbage_bytes) { + BlobFileCompletionCallback* blob_callback, Version* version, + uint64_t* num_input_entries, uint64_t* memtable_payload_bytes, + uint64_t* memtable_garbage_bytes) { assert((tboptions.column_family_id == TablePropertiesCollectorFactory::Context::kUnknownColumnFamily) == tboptions.column_family_name.empty()); @@ -246,9 +247,17 @@ Status BuildTable( auto tombstone = range_del_it->Tombstone(); auto kv = tombstone.Serialize(); builder->Add(kv.first.Encode(), kv.second); - meta->UpdateBoundariesForRange(kv.first, tombstone.SerializeEndKey(), - tombstone.seq_, + InternalKey tombstone_end = tombstone.SerializeEndKey(); + meta->UpdateBoundariesForRange(kv.first, tombstone_end, tombstone.seq_, tboptions.internal_comparator); + if (version) { + SizeApproximationOptions approx_opts; + approx_opts.files_size_error_margin = 0.1; + meta->compensated_range_deletion_size += versions->ApproximateSize( + approx_opts, version, kv.first.Encode(), tombstone_end.Encode(), + 0 /* start_level */, -1 /* end_level */, + TableReaderCaller::kFlush); + } } } diff --git a/db/builder.h b/db/builder.h index a028fd2ba3..063da5ca9e 100644 --- a/db/builder.h +++ b/db/builder.h @@ -13,6 +13,7 @@ #include "db/range_tombstone_fragmenter.h" #include "db/seqno_to_time_mapping.h" #include "db/table_properties_collector.h" +#include "db/version_set.h" #include "logging/event_logger.h" #include "options/cf_options.h" #include "rocksdb/comparator.h" @@ -70,7 +71,7 @@ extern Status BuildTable( Env::WriteLifeTimeHint write_hint = Env::WLTH_NOT_SET, const std::string* full_history_ts_low = nullptr, BlobFileCompletionCallback* blob_callback = nullptr, - uint64_t* num_input_entries = nullptr, + Version* version = nullptr, uint64_t* num_input_entries = nullptr, uint64_t* memtable_payload_bytes = nullptr, uint64_t* memtable_garbage_bytes = nullptr); diff --git a/db/compaction/compaction_job_test.cc b/db/compaction/compaction_job_test.cc index 8b312ea78d..0f0c5daf77 100644 --- a/db/compaction/compaction_job_test.cc +++ b/db/compaction/compaction_job_test.cc @@ -386,7 +386,8 @@ class CompactionJobTestBase : public testing::Test { oldest_blob_file_number, kUnknownOldestAncesterTime, kUnknownFileCreationTime, versions_->GetColumnFamilySet()->GetDefault()->NewEpochNumber(), - kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2); + kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2, + 0); mutex_.Lock(); EXPECT_OK( diff --git a/db/compaction/compaction_outputs.cc b/db/compaction/compaction_outputs.cc index 0722215865..e1fa21b4f9 100644 --- a/db/compaction/compaction_outputs.cc +++ b/db/compaction/compaction_outputs.cc @@ -525,7 +525,8 @@ Status CompactionOutputs::AddRangeDels( ucmp->CompareWithoutTimestamp(*lower_bound, kv.second) < 0); // Range tombstone is not supported by output validator yet. builder_->Add(kv.first.Encode(), kv.second); - InternalKey smallest_candidate = std::move(kv.first); + InternalKey tombstone_start = std::move(kv.first); + InternalKey smallest_candidate{tombstone_start}; if (lower_bound != nullptr && ucmp->CompareWithoutTimestamp(smallest_candidate.user_key(), *lower_bound) <= 0) { @@ -594,7 +595,8 @@ Status CompactionOutputs::AddRangeDels( smallest_candidate = InternalKey(*lower_bound, 0, kTypeRangeDeletion); } } - InternalKey largest_candidate = tombstone.SerializeEndKey(); + InternalKey tombstone_end = tombstone.SerializeEndKey(); + InternalKey largest_candidate{tombstone_end}; if (upper_bound != nullptr && ucmp->CompareWithoutTimestamp(*upper_bound, largest_candidate.user_key()) <= 0) { @@ -636,6 +638,24 @@ Status CompactionOutputs::AddRangeDels( #endif meta.UpdateBoundariesForRange(smallest_candidate, largest_candidate, tombstone.seq_, icmp); + if (!bottommost_level) { + // Range tombstones are truncated at file boundaries + if (icmp.Compare(tombstone_start, meta.smallest) < 0) { + tombstone_start = meta.smallest; + } + if (icmp.Compare(tombstone_end, meta.largest) > 0) { + tombstone_end = meta.largest; + } + SizeApproximationOptions approx_opts; + approx_opts.files_size_error_margin = 0.1; + auto approximate_covered_size = + compaction_->input_version()->version_set()->ApproximateSize( + approx_opts, compaction_->input_version(), + tombstone_start.Encode(), tombstone_end.Encode(), + compaction_->output_level() + 1 /* start_level */, + -1 /* end_level */, kCompaction); + meta.compensated_range_deletion_size += approximate_covered_size; + } // The smallest key in a file is used for range tombstone truncation, so // it cannot have a seqnum of 0 (unless the smallest data key in a file // has a seqnum of 0). Otherwise, the truncated tombstone may expose diff --git a/db/compaction/compaction_picker_test.cc b/db/compaction/compaction_picker_test.cc index dfc508fc5b..865518cb20 100644 --- a/db/compaction/compaction_picker_test.cc +++ b/db/compaction/compaction_picker_test.cc @@ -148,7 +148,7 @@ class CompactionPickerTestBase : public testing::Test { smallest_seq, largest_seq, marked_for_compact, temperature, kInvalidBlobFileNumber, kUnknownOldestAncesterTime, kUnknownFileCreationTime, epoch_number, kUnknownFileChecksum, - kUnknownFileChecksumFuncName, kNullUniqueId64x2); + kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0); f->compensated_file_size = (compensated_file_size != 0) ? compensated_file_size : file_size; f->oldest_ancester_time = oldest_ancestor_time; @@ -2873,7 +2873,6 @@ TEST_F(CompactionPickerTest, IntraL0MaxCompactionBytesHit) { ASSERT_EQ(0, compaction->output_level()); } - #ifndef ROCKSDB_LITE TEST_F(CompactionPickerTest, UniversalMarkedCompactionFullOverlap) { const uint64_t kFileSize = 100000; diff --git a/db/db_impl/db_impl_compaction_flush.cc b/db/db_impl/db_impl_compaction_flush.cc index 095336f643..95ee948eb2 100644 --- a/db/db_impl/db_impl_compaction_flush.cc +++ b/db/db_impl/db_impl_compaction_flush.cc @@ -1747,7 +1747,8 @@ Status DBImpl::ReFitLevel(ColumnFamilyData* cfd, int level, int target_level) { f->smallest, f->largest, f->fd.smallest_seqno, f->fd.largest_seqno, f->marked_for_compaction, f->temperature, f->oldest_blob_file_number, f->oldest_ancester_time, f->file_creation_time, f->epoch_number, - f->file_checksum, f->file_checksum_func_name, f->unique_id); + f->file_checksum, f->file_checksum_func_name, f->unique_id, + f->compensated_range_deletion_size); } ROCKS_LOG_DEBUG(immutable_db_options_.info_log, "[%s] Apply version edit:\n%s", cfd->GetName().c_str(), @@ -3388,7 +3389,8 @@ Status DBImpl::BackgroundCompaction(bool* made_progress, f->fd.largest_seqno, f->marked_for_compaction, f->temperature, f->oldest_blob_file_number, f->oldest_ancester_time, f->file_creation_time, f->epoch_number, f->file_checksum, - f->file_checksum_func_name, f->unique_id); + f->file_checksum_func_name, f->unique_id, + f->compensated_range_deletion_size); ROCKS_LOG_BUFFER( log_buffer, diff --git a/db/db_impl/db_impl_experimental.cc b/db/db_impl/db_impl_experimental.cc index 035fdbd412..2f732c1e47 100644 --- a/db/db_impl/db_impl_experimental.cc +++ b/db/db_impl/db_impl_experimental.cc @@ -137,7 +137,8 @@ Status DBImpl::PromoteL0(ColumnFamilyHandle* column_family, int target_level) { f->marked_for_compaction, f->temperature, f->oldest_blob_file_number, f->oldest_ancester_time, f->file_creation_time, f->epoch_number, f->file_checksum, - f->file_checksum_func_name, f->unique_id); + f->file_checksum_func_name, f->unique_id, + f->compensated_range_deletion_size); } status = versions_->LogAndApply(cfd, *cfd->GetLatestMutableCFOptions(), diff --git a/db/db_impl/db_impl_open.cc b/db/db_impl/db_impl_open.cc index f626df9ed3..3263e94bce 100644 --- a/db/db_impl/db_impl_open.cc +++ b/db/db_impl/db_impl_open.cc @@ -1550,6 +1550,8 @@ Status DBImpl::WriteLevel0TableForRecovery(int job_id, ColumnFamilyData* cfd, 0 /* file_creation_time */, db_id_, db_session_id_, 0 /* target_file_size */, meta.fd.GetNumber()); SeqnoToTimeMapping empty_seqno_time_mapping; + Version* version = cfd->current(); + version->Ref(); s = BuildTable( dbname_, versions_.get(), immutable_db_options_, tboptions, file_options_for_compaction_, cfd->table_cache(), iter.get(), @@ -1559,7 +1561,8 @@ Status DBImpl::WriteLevel0TableForRecovery(int job_id, ColumnFamilyData* cfd, io_tracer_, BlobFileCreationReason::kRecovery, empty_seqno_time_mapping, &event_logger_, job_id, Env::IO_HIGH, nullptr /* table_properties */, write_hint, - nullptr /*full_history_ts_low*/, &blob_callback_); + nullptr /*full_history_ts_low*/, &blob_callback_, version); + version->Unref(); LogFlush(immutable_db_options_.info_log); ROCKS_LOG_DEBUG(immutable_db_options_.info_log, "[%s] [WriteLevel0TableForRecovery]" @@ -1583,13 +1586,14 @@ Status DBImpl::WriteLevel0TableForRecovery(int job_id, ColumnFamilyData* cfd, constexpr int level = 0; if (s.ok() && has_output) { - edit->AddFile( - level, meta.fd.GetNumber(), meta.fd.GetPathId(), meta.fd.GetFileSize(), - meta.smallest, meta.largest, meta.fd.smallest_seqno, - meta.fd.largest_seqno, meta.marked_for_compaction, meta.temperature, - meta.oldest_blob_file_number, meta.oldest_ancester_time, - meta.file_creation_time, meta.epoch_number, meta.file_checksum, - meta.file_checksum_func_name, meta.unique_id); + edit->AddFile(level, meta.fd.GetNumber(), meta.fd.GetPathId(), + meta.fd.GetFileSize(), meta.smallest, meta.largest, + meta.fd.smallest_seqno, meta.fd.largest_seqno, + meta.marked_for_compaction, meta.temperature, + meta.oldest_blob_file_number, meta.oldest_ancester_time, + meta.file_creation_time, meta.epoch_number, + meta.file_checksum, meta.file_checksum_func_name, + meta.unique_id, meta.compensated_range_deletion_size); for (const auto& blob : blob_file_additions) { edit->AddBlobFile(blob); diff --git a/db/db_range_del_test.cc b/db/db_range_del_test.cc index 3d468a1c0a..e6f4e0e4d1 100644 --- a/db/db_range_del_test.cc +++ b/db/db_range_del_test.cc @@ -479,7 +479,10 @@ TEST_F(DBRangeDelTest, ValidUniversalSubcompactionBoundaries) { std::vector values; // Write 100KB (100 values, each 1K) for (int k = 0; k < kNumPerFile; k++) { - values.push_back(rnd.RandomString(990)); + // For the highest level, use smaller value size such that it does not + // prematurely cause auto compaction due to range tombstone adding + // additional compensated file size + values.push_back(rnd.RandomString((i == kNumLevels - 2) ? 600 : 990)); ASSERT_OK(Put(Key(j * kNumPerFile + k), values[k])); } // put extra key to trigger flush @@ -492,7 +495,13 @@ TEST_F(DBRangeDelTest, ValidUniversalSubcompactionBoundaries) { } ASSERT_OK(dbfull()->TEST_WaitForCompact()); ASSERT_EQ(NumTableFilesAtLevel(0), 0); - ASSERT_GT(NumTableFilesAtLevel(kNumLevels - 1 - i), kFilesPerLevel - 1); + if (i == kNumLevels - 2) { + // For the highest level, value size is smaller (see Put() above), + // so output file number is smaller. + ASSERT_GT(NumTableFilesAtLevel(kNumLevels - 1 - i), kFilesPerLevel - 2); + } else { + ASSERT_GT(NumTableFilesAtLevel(kNumLevels - 1 - i), kFilesPerLevel - 1); + } } // Now L1-L3 are full, when we compact L1->L2 we should see (1) subcompactions // happen since input level > 0; (2) range deletions are not dropped since @@ -3004,6 +3013,110 @@ TEST_F(DBRangeDelTest, RangeTombstoneRespectIterateUpperBound) { ASSERT_OK(iter->status()); } +TEST_F(DBRangeDelTest, RangetombesoneCompensateFilesize) { + Options opts = CurrentOptions(); + opts.disable_auto_compactions = true; + DestroyAndReopen(opts); + + std::vector values; + Random rnd(301); + // file in L2 + values.push_back(rnd.RandomString(1 << 10)); + ASSERT_OK(Put("a", values.back())); + values.push_back(rnd.RandomString(1 << 10)); + ASSERT_OK(Put("b", values.back())); + ASSERT_OK(Flush()); + MoveFilesToLevel(2); + uint64_t l2_size = 0; + ASSERT_OK(Size("a", "c", 0 /* cf */, &l2_size)); + ASSERT_GT(l2_size, 0); + // file in L1 + values.push_back(rnd.RandomString(1 << 10)); + ASSERT_OK(Put("d", values.back())); + values.push_back(rnd.RandomString(1 << 10)); + ASSERT_OK(Put("e", values.back())); + ASSERT_OK(Flush()); + MoveFilesToLevel(1); + uint64_t l1_size = 0; + ASSERT_OK(Size("d", "f", 0 /* cf */, &l1_size)); + ASSERT_GT(l1_size, 0); + + ASSERT_OK( + db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "f")); + ASSERT_OK(Flush()); + // Range deletion compensated size computed during flush time + std::vector> level_to_files; + dbfull()->TEST_GetFilesMetaData(dbfull()->DefaultColumnFamily(), + &level_to_files); + ASSERT_EQ(level_to_files[0].size(), 1); + ASSERT_EQ(level_to_files[0][0].compensated_range_deletion_size, + l1_size + l2_size); + ASSERT_EQ(level_to_files[1].size(), 1); + ASSERT_EQ(level_to_files[1][0].compensated_range_deletion_size, 0); + ASSERT_EQ(level_to_files[2].size(), 1); + ASSERT_EQ(level_to_files[2][0].compensated_range_deletion_size, 0); + + // Range deletion compensated size computed during compaction time + ASSERT_OK(dbfull()->TEST_CompactRange(0, nullptr, nullptr, nullptr, + true /* disallow_trivial_move */)); + ASSERT_EQ(NumTableFilesAtLevel(0), 0); + ASSERT_EQ(NumTableFilesAtLevel(1), 1); + ASSERT_EQ(NumTableFilesAtLevel(2), 1); + dbfull()->TEST_GetFilesMetaData(dbfull()->DefaultColumnFamily(), + &level_to_files); + ASSERT_EQ(level_to_files[1].size(), 1); + ASSERT_EQ(level_to_files[1][0].compensated_range_deletion_size, l2_size); + ASSERT_EQ(level_to_files[2].size(), 1); + ASSERT_EQ(level_to_files[2][0].compensated_range_deletion_size, 0); +} + +TEST_F(DBRangeDelTest, RangetombesoneCompensateFilesizePersistDuringReopen) { + Options opts = CurrentOptions(); + opts.disable_auto_compactions = true; + DestroyAndReopen(opts); + + std::vector values; + Random rnd(301); + values.push_back(rnd.RandomString(1 << 10)); + ASSERT_OK(Put("a", values.back())); + values.push_back(rnd.RandomString(1 << 10)); + ASSERT_OK(Put("b", values.back())); + ASSERT_OK(Flush()); + MoveFilesToLevel(2); + + ASSERT_OK( + db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "c")); + ASSERT_OK(Flush()); + MoveFilesToLevel(1); + + ASSERT_OK( + db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "z")); + ASSERT_OK(Flush()); + + std::vector> level_to_files; + dbfull()->TEST_GetFilesMetaData(dbfull()->DefaultColumnFamily(), + &level_to_files); + ASSERT_EQ(level_to_files[0].size(), 1); + ASSERT_EQ(level_to_files[1].size(), 1); + ASSERT_EQ(level_to_files[2].size(), 1); + uint64_t l2_size = level_to_files[2][0].fd.GetFileSize(); + uint64_t l1_size = level_to_files[1][0].fd.GetFileSize(); + ASSERT_GT(l2_size, 0); + ASSERT_GT(l1_size, 0); + ASSERT_EQ(level_to_files[0][0].compensated_range_deletion_size, + l1_size + l2_size); + ASSERT_EQ(level_to_files[1][0].compensated_range_deletion_size, l2_size); + + Reopen(opts); + dbfull()->TEST_GetFilesMetaData(dbfull()->DefaultColumnFamily(), + &level_to_files); + ASSERT_EQ(level_to_files[0].size(), 1); + ASSERT_EQ(level_to_files[0][0].compensated_range_deletion_size, + l1_size + l2_size); + ASSERT_EQ(level_to_files[1].size(), 1); + ASSERT_EQ(level_to_files[1][0].compensated_range_deletion_size, l2_size); +} + #endif // ROCKSDB_LITE } // namespace ROCKSDB_NAMESPACE diff --git a/db/experimental.cc b/db/experimental.cc index cb6286b1f4..20b5daa970 100644 --- a/db/experimental.cc +++ b/db/experimental.cc @@ -113,7 +113,8 @@ Status UpdateManifestForFilesState( lf->fd.largest_seqno, lf->marked_for_compaction, temp, lf->oldest_blob_file_number, lf->oldest_ancester_time, lf->file_creation_time, lf->epoch_number, lf->file_checksum, - lf->file_checksum_func_name, lf->unique_id); + lf->file_checksum_func_name, lf->unique_id, + lf->compensated_range_deletion_size); } } } else { diff --git a/db/external_sst_file_ingestion_job.cc b/db/external_sst_file_ingestion_job.cc index dfb9672683..849f98e874 100644 --- a/db/external_sst_file_ingestion_job.cc +++ b/db/external_sst_file_ingestion_job.cc @@ -473,7 +473,7 @@ Status ExternalSstFileIngestionJob::Run() { ingestion_options_.ingest_behind ? kReservedEpochNumberForFileIngestedBehind : cfd_->NewEpochNumber(), - f.file_checksum, f.file_checksum_func_name, f.unique_id); + f.file_checksum, f.file_checksum_func_name, f.unique_id, 0); f_metadata.temperature = f.file_temperature; edit_.AddFile(f.picked_level, f_metadata); } diff --git a/db/flush_job.cc b/db/flush_job.cc index c63ccec3e2..ac84da4cae 100644 --- a/db/flush_job.cc +++ b/db/flush_job.cc @@ -941,7 +941,7 @@ Status FlushJob::WriteLevel0Table() { cfd_->internal_stats(), &io_s, io_tracer_, BlobFileCreationReason::kFlush, seqno_to_time_mapping_, event_logger_, job_context_->job_id, io_priority, &table_properties_, write_hint, - full_history_ts_low, blob_callback_, &num_input_entries, + full_history_ts_low, blob_callback_, base_, &num_input_entries, &memtable_payload_bytes, &memtable_garbage_bytes); // TODO: Cleanup io_status in BuildTable and table builders assert(!s.ok() || io_s.ok()); @@ -1003,8 +1003,7 @@ Status FlushJob::WriteLevel0Table() { meta_.oldest_blob_file_number, meta_.oldest_ancester_time, meta_.file_creation_time, meta_.epoch_number, meta_.file_checksum, meta_.file_checksum_func_name, - meta_.unique_id); - + meta_.unique_id, meta_.compensated_range_deletion_size); edit_->SetBlobFileAdditions(std::move(blob_file_additions)); } #ifndef ROCKSDB_LITE diff --git a/db/import_column_family_job.cc b/db/import_column_family_job.cc index c59ef11ab5..17ad044a7e 100644 --- a/db/import_column_family_job.cc +++ b/db/import_column_family_job.cc @@ -143,7 +143,7 @@ Status ImportColumnFamilyJob::Run() { file_metadata.smallest_seqno, file_metadata.largest_seqno, false, file_metadata.temperature, kInvalidBlobFileNumber, oldest_ancester_time, current_time, file_metadata.epoch_number, kUnknownFileChecksum, - kUnknownFileChecksumFuncName, f.unique_id); + kUnknownFileChecksumFuncName, f.unique_id, 0); s = dummy_version_builder.Apply(&dummy_version_edit); } if (s.ok()) { diff --git a/db/repair.cc b/db/repair.cc index ae26f9c6f4..ddec43e9b6 100644 --- a/db/repair.cc +++ b/db/repair.cc @@ -665,7 +665,8 @@ class Repairer { table->meta.temperature, table->meta.oldest_blob_file_number, table->meta.oldest_ancester_time, table->meta.file_creation_time, table->meta.epoch_number, table->meta.file_checksum, - table->meta.file_checksum_func_name, table->meta.unique_id); + table->meta.file_checksum_func_name, table->meta.unique_id, + table->meta.compensated_range_deletion_size); } s = dummy_version_builder.Apply(&dummy_edit); if (s.ok()) { diff --git a/db/version_builder_test.cc b/db/version_builder_test.cc index ed276c65f8..611dee774b 100644 --- a/db/version_builder_test.cc +++ b/db/version_builder_test.cc @@ -73,7 +73,7 @@ class VersionBuilderTest : public testing::Test { /* marked_for_compact */ false, Temperature::kUnknown, oldest_blob_file_number, kUnknownOldestAncesterTime, kUnknownFileCreationTime, epoch_number, kUnknownFileChecksum, - kUnknownFileChecksumFuncName, kNullUniqueId64x2); + kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0); f->compensated_file_size = file_size; f->num_entries = num_entries; f->num_deletions = num_deletions; @@ -130,12 +130,13 @@ class VersionBuilderTest : public testing::Test { constexpr SequenceNumber largest_seqno = 300; constexpr bool marked_for_compaction = false; - edit->AddFile( - level, table_file_number, path_id, file_size, GetInternalKey(smallest), - GetInternalKey(largest), smallest_seqno, largest_seqno, - marked_for_compaction, Temperature::kUnknown, blob_file_number, - kUnknownOldestAncesterTime, kUnknownFileCreationTime, epoch_number, - kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2); + edit->AddFile(level, table_file_number, path_id, file_size, + GetInternalKey(smallest), GetInternalKey(largest), + smallest_seqno, largest_seqno, marked_for_compaction, + Temperature::kUnknown, blob_file_number, + kUnknownOldestAncesterTime, kUnknownFileCreationTime, + epoch_number, kUnknownFileChecksum, + kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0); } void UpdateVersionStorageInfo(VersionStorageInfo* vstorage) { @@ -186,7 +187,7 @@ TEST_F(VersionBuilderTest, ApplyAndSaveTo) { 2, 666, 0, 100U, GetInternalKey("301"), GetInternalKey("350"), 200, 200, false, Temperature::kUnknown, kInvalidBlobFileNumber, kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber, - kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2); + kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0); version_edit.DeleteFile(3, 27U); EnvOptions env_options; @@ -233,7 +234,8 @@ TEST_F(VersionBuilderTest, ApplyAndSaveToDynamic) { 3, 666, 0, 100U, GetInternalKey("301"), GetInternalKey("350"), 200, 200, false, Temperature::kUnknown, kInvalidBlobFileNumber, kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber, - kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2); + kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0); + version_edit.DeleteFile(0, 1U); version_edit.DeleteFile(0, 88U); @@ -283,7 +285,7 @@ TEST_F(VersionBuilderTest, ApplyAndSaveToDynamic2) { 4, 666, 0, 100U, GetInternalKey("301"), GetInternalKey("350"), 200, 200, false, Temperature::kUnknown, kInvalidBlobFileNumber, kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber, - kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2); + kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0); version_edit.DeleteFile(0, 1U); version_edit.DeleteFile(0, 88U); version_edit.DeleteFile(4, 6U); @@ -319,27 +321,27 @@ TEST_F(VersionBuilderTest, ApplyMultipleAndSaveTo) { 2, 666, 0, 100U, GetInternalKey("301"), GetInternalKey("350"), 200, 200, false, Temperature::kUnknown, kInvalidBlobFileNumber, kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber, - kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2); + kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0); version_edit.AddFile( 2, 676, 0, 100U, GetInternalKey("401"), GetInternalKey("450"), 200, 200, false, Temperature::kUnknown, kInvalidBlobFileNumber, kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber, - kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2); + kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0); version_edit.AddFile( 2, 636, 0, 100U, GetInternalKey("601"), GetInternalKey("650"), 200, 200, false, Temperature::kUnknown, kInvalidBlobFileNumber, kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber, - kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2); + kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0); version_edit.AddFile( 2, 616, 0, 100U, GetInternalKey("501"), GetInternalKey("550"), 200, 200, false, Temperature::kUnknown, kInvalidBlobFileNumber, kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber, - kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2); + kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0); version_edit.AddFile( 2, 606, 0, 100U, GetInternalKey("701"), GetInternalKey("750"), 200, 200, false, Temperature::kUnknown, kInvalidBlobFileNumber, kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber, - kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2); + kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0); EnvOptions env_options; constexpr TableCache* table_cache = nullptr; @@ -378,27 +380,27 @@ TEST_F(VersionBuilderTest, ApplyDeleteAndSaveTo) { 2, 666, 0, 100U, GetInternalKey("301"), GetInternalKey("350"), 200, 200, false, Temperature::kUnknown, kInvalidBlobFileNumber, kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber, - kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2); + kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0); version_edit.AddFile( 2, 676, 0, 100U, GetInternalKey("401"), GetInternalKey("450"), 200, 200, false, Temperature::kUnknown, kInvalidBlobFileNumber, kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber, - kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2); + kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0); version_edit.AddFile( 2, 636, 0, 100U, GetInternalKey("601"), GetInternalKey("650"), 200, 200, false, Temperature::kUnknown, kInvalidBlobFileNumber, kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber, - kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2); + kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0); version_edit.AddFile( 2, 616, 0, 100U, GetInternalKey("501"), GetInternalKey("550"), 200, 200, false, Temperature::kUnknown, kInvalidBlobFileNumber, kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber, - kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2); + kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0); version_edit.AddFile( 2, 606, 0, 100U, GetInternalKey("701"), GetInternalKey("750"), 200, 200, false, Temperature::kUnknown, kInvalidBlobFileNumber, kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber, - kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2); + kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0); ASSERT_OK(version_builder.Apply(&version_edit)); VersionEdit version_edit2; @@ -406,14 +408,14 @@ TEST_F(VersionBuilderTest, ApplyDeleteAndSaveTo) { 2, 808, 0, 100U, GetInternalKey("901"), GetInternalKey("950"), 200, 200, false, Temperature::kUnknown, kInvalidBlobFileNumber, kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber, - kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2); + kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0); version_edit2.DeleteFile(2, 616); version_edit2.DeleteFile(2, 636); version_edit.AddFile( 2, 806, 0, 100U, GetInternalKey("801"), GetInternalKey("850"), 200, 200, false, Temperature::kUnknown, kInvalidBlobFileNumber, kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber, - kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2); + kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0); ASSERT_OK(version_builder.Apply(&version_edit2)); ASSERT_OK(version_builder.SaveTo(&new_vstorage)); @@ -524,7 +526,7 @@ TEST_F(VersionBuilderTest, ApplyFileDeletionAndAddition) { GetInternalKey(largest, largest_seq), smallest_seqno, largest_seqno, marked_for_compaction, Temperature::kUnknown, kInvalidBlobFileNumber, kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber, - kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2); + kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0); ASSERT_OK(builder.Apply(&addition)); @@ -573,7 +575,7 @@ TEST_F(VersionBuilderTest, ApplyFileAdditionAlreadyInBase) { GetInternalKey(largest), smallest_seqno, largest_seqno, marked_for_compaction, Temperature::kUnknown, kInvalidBlobFileNumber, kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber, - kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2); + kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0); const Status s = builder.Apply(&edit); ASSERT_TRUE(s.IsCorruption()); @@ -609,7 +611,7 @@ TEST_F(VersionBuilderTest, ApplyFileAdditionAlreadyApplied) { GetInternalKey(largest), smallest_seqno, largest_seqno, marked_for_compaction, Temperature::kUnknown, kInvalidBlobFileNumber, kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber, - kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2); + kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0); ASSERT_OK(builder.Apply(&edit)); @@ -622,7 +624,7 @@ TEST_F(VersionBuilderTest, ApplyFileAdditionAlreadyApplied) { GetInternalKey(largest), smallest_seqno, largest_seqno, marked_for_compaction, Temperature::kUnknown, kInvalidBlobFileNumber, kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber, - kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2); + kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0); const Status s = builder.Apply(&other_edit); ASSERT_TRUE(s.IsCorruption()); @@ -658,7 +660,7 @@ TEST_F(VersionBuilderTest, ApplyFileAdditionAndDeletion) { GetInternalKey(largest), smallest_seqno, largest_seqno, marked_for_compaction, Temperature::kUnknown, kInvalidBlobFileNumber, kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber, - kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2); + kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0); ASSERT_OK(builder.Apply(&addition)); @@ -1231,7 +1233,7 @@ TEST_F(VersionBuilderTest, SaveBlobFilesToConcurrentJobs) { GetInternalKey(largest), smallest_seqno, largest_seqno, marked_for_compaction, Temperature::kUnknown, blob_file_number, kUnknownOldestAncesterTime, kUnknownFileCreationTime, 2 /*epoch_number*/, - checksum_value, checksum_method, kNullUniqueId64x2); + checksum_value, checksum_method, kNullUniqueId64x2, 0); edit.AddBlobFile(blob_file_number, total_blob_count, total_blob_bytes, checksum_method, checksum_value); @@ -1319,7 +1321,7 @@ TEST_F(VersionBuilderTest, CheckConsistencyForBlobFiles) { /* oldest_blob_file_number */ 16, kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber, kUnknownFileChecksum, kUnknownFileChecksumFuncName, - kNullUniqueId64x2); + kNullUniqueId64x2, 0); edit.AddFile(/* level */ 1, /* file_number */ 700, /* path_id */ 0, /* file_size */ 100, /* smallest */ GetInternalKey("801"), @@ -1329,7 +1331,7 @@ TEST_F(VersionBuilderTest, CheckConsistencyForBlobFiles) { /* oldest_blob_file_number */ 1000, kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber, kUnknownFileChecksum, kUnknownFileChecksumFuncName, - kNullUniqueId64x2); + kNullUniqueId64x2, 0); edit.AddBlobFile(/* blob_file_number */ 1000, /* total_blob_count */ 2000, /* total_blob_bytes */ 200000, /* checksum_method */ std::string(), @@ -1550,7 +1552,7 @@ TEST_F(VersionBuilderTest, MaintainLinkedSstsForBlobFiles) { Temperature::kUnknown, /* oldest_blob_file_number */ 1, kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber, kUnknownFileChecksum, - kUnknownFileChecksumFuncName, kNullUniqueId64x2); + kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0); // Add an SST that does not reference any blob files. edit.AddFile( @@ -1560,7 +1562,7 @@ TEST_F(VersionBuilderTest, MaintainLinkedSstsForBlobFiles) { /* largest_seqno */ 2200, /* marked_for_compaction */ false, Temperature::kUnknown, kInvalidBlobFileNumber, kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber, kUnknownFileChecksum, - kUnknownFileChecksumFuncName, kNullUniqueId64x2); + kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0); // Delete a file that references a blob file. edit.DeleteFile(/* level */ 1, /* file_number */ 6); @@ -1583,7 +1585,7 @@ TEST_F(VersionBuilderTest, MaintainLinkedSstsForBlobFiles) { /* oldest_blob_file_number */ 3, kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber, kUnknownFileChecksum, kUnknownFileChecksumFuncName, - kNullUniqueId64x2); + kNullUniqueId64x2, 0); // Trivially move a file that does not reference any blob files. edit.DeleteFile(/* level */ 1, /* file_number */ 13); @@ -1595,7 +1597,7 @@ TEST_F(VersionBuilderTest, MaintainLinkedSstsForBlobFiles) { Temperature::kUnknown, kInvalidBlobFileNumber, kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber, kUnknownFileChecksum, - kUnknownFileChecksumFuncName, kNullUniqueId64x2); + kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0); // Add one more SST file that references a blob file, then promptly // delete it in a second version edit before the new version gets saved. @@ -1609,7 +1611,7 @@ TEST_F(VersionBuilderTest, MaintainLinkedSstsForBlobFiles) { /* oldest_blob_file_number */ 5, kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber, kUnknownFileChecksum, kUnknownFileChecksumFuncName, - kNullUniqueId64x2); + kNullUniqueId64x2, 0); VersionEdit edit2; @@ -1710,7 +1712,7 @@ TEST_F(VersionBuilderTest, CheckConsistencyForL0FilesSortedByEpochNumber) { /* oldest_blob_file_number */ kInvalidBlobFileNumber, kUnknownOldestAncesterTime, kUnknownFileCreationTime, 1 /* epoch_number */, kUnknownFileChecksum, kUnknownFileChecksumFuncName, - kNullUniqueId64x2); + kNullUniqueId64x2, 0); version_edit_1.AddFile( /* level */ 0, /* file_number */ 2U, /* path_id */ 0, /* file_size */ 100, /* smallest */ GetInternalKey("b", 2), @@ -1720,7 +1722,7 @@ TEST_F(VersionBuilderTest, CheckConsistencyForL0FilesSortedByEpochNumber) { /* oldest_blob_file_number */ kInvalidBlobFileNumber, kUnknownOldestAncesterTime, kUnknownFileCreationTime, 1 /* epoch_number */, kUnknownFileChecksum, kUnknownFileChecksumFuncName, - kNullUniqueId64x2); + kNullUniqueId64x2, 0); VersionBuilder version_builder_1(EnvOptions(), &ioptions_, nullptr /* table_cache */, &vstorage_, @@ -1747,7 +1749,7 @@ TEST_F(VersionBuilderTest, CheckConsistencyForL0FilesSortedByEpochNumber) { /* oldest_blob_file_number */ kInvalidBlobFileNumber, kUnknownOldestAncesterTime, kUnknownFileCreationTime, 1 /* epoch_number */, kUnknownFileChecksum, kUnknownFileChecksumFuncName, - kNullUniqueId64x2); + kNullUniqueId64x2, 0); version_edit_2.AddFile( /* level */ 0, /* file_number */ 2U, /* path_id */ 0, /* file_size */ 100, /* smallest */ GetInternalKey("b", 2), @@ -1757,7 +1759,7 @@ TEST_F(VersionBuilderTest, CheckConsistencyForL0FilesSortedByEpochNumber) { /* oldest_blob_file_number */ kInvalidBlobFileNumber, kUnknownOldestAncesterTime, kUnknownFileCreationTime, 2 /* epoch_number */, kUnknownFileChecksum, kUnknownFileChecksumFuncName, - kNullUniqueId64x2); + kNullUniqueId64x2, 0); VersionBuilder version_builder_2(EnvOptions(), &ioptions_, nullptr /* table_cache */, &vstorage_, diff --git a/db/version_edit.cc b/db/version_edit.cc index df52260778..ecddaa49cc 100644 --- a/db/version_edit.cc +++ b/db/version_edit.cc @@ -231,6 +231,13 @@ bool VersionEdit::EncodeTo(std::string* dst) const { std::string unique_id_str = EncodeUniqueIdBytes(&unique_id); PutLengthPrefixedSlice(dst, Slice(unique_id_str)); } + if (f.compensated_range_deletion_size) { + PutVarint32(dst, kCompensatedRangeDeletionSize); + std::string compensated_range_deletion_size; + PutVarint64(&compensated_range_deletion_size, + f.compensated_range_deletion_size); + PutLengthPrefixedSlice(dst, Slice(compensated_range_deletion_size)); + } TEST_SYNC_POINT_CALLBACK("VersionEdit::EncodeTo:NewFile4:CustomizeFields", dst); @@ -404,6 +411,11 @@ const char* VersionEdit::DecodeNewFile4From(Slice* input) { return "invalid unique id"; } break; + case kCompensatedRangeDeletionSize: + if (!GetVarint64(&field, &f.compensated_range_deletion_size)) { + return "Invalid compensated range deletion size"; + } + break; default: if ((custom_tag & kCustomTagNonSafeIgnoreMask) != 0) { // Should not proceed if cannot understand it diff --git a/db/version_edit.h b/db/version_edit.h index cfc5f14e57..8be5f4f520 100644 --- a/db/version_edit.h +++ b/db/version_edit.h @@ -89,6 +89,7 @@ enum NewFileCustomTag : uint32_t { kMaxTimestamp = 11, kUniqueId = 12, kEpochNumber = 13, + kCompensatedRangeDeletionSize = 14, // If this bit for the custom tag is set, opening DB should fail if // we don't know this field. @@ -182,15 +183,22 @@ struct FileMetaData { // Stats for compensating deletion entries during compaction // File size compensated by deletion entry. - // This is updated in Version::UpdateAccumulatedStats() first time when the - // file is created or loaded. After it is updated (!= 0), it is immutable. + // This is used to compute a file's compaction priority, and is updated in + // Version::ComputeCompensatedSizes() first time when the file is created or + // loaded. After it is updated (!= 0), it is immutable. uint64_t compensated_file_size = 0; // These values can mutate, but they can only be read or written from // single-threaded LogAndApply thread uint64_t num_entries = 0; // the number of entries. - uint64_t num_deletions = 0; // the number of deletion entries. + // The number of deletion entries, including range deletions. + uint64_t num_deletions = 0; uint64_t raw_key_size = 0; // total uncompressed key size. uint64_t raw_value_size = 0; // total uncompressed value size. + uint64_t num_range_deletions = 0; + // This is computed during Flush/Compaction, and is added to + // `compensated_file_size`. Currently, this estimates the size of keys in the + // next level covered by range tombstones in this file. + uint64_t compensated_range_deletion_size = 0; int refs = 0; // Reference count @@ -240,10 +248,12 @@ struct FileMetaData { uint64_t _oldest_ancester_time, uint64_t _file_creation_time, uint64_t _epoch_number, const std::string& _file_checksum, const std::string& _file_checksum_func_name, - UniqueId64x2 _unique_id) + UniqueId64x2 _unique_id, + const uint64_t _compensated_range_deletion_size) : fd(file, file_path_id, file_size, smallest_seq, largest_seq), smallest(smallest_key), largest(largest_key), + compensated_range_deletion_size(_compensated_range_deletion_size), marked_for_compaction(marked_for_compact), temperature(_temperature), oldest_blob_file_number(oldest_blob_file), @@ -434,7 +444,8 @@ class VersionEdit { uint64_t oldest_ancester_time, uint64_t file_creation_time, uint64_t epoch_number, const std::string& file_checksum, const std::string& file_checksum_func_name, - const UniqueId64x2& unique_id) { + const UniqueId64x2& unique_id, + const uint64_t compensated_range_deletion_size) { assert(smallest_seqno <= largest_seqno); new_files_.emplace_back( level, @@ -442,7 +453,8 @@ class VersionEdit { smallest_seqno, largest_seqno, marked_for_compaction, temperature, oldest_blob_file_number, oldest_ancester_time, file_creation_time, epoch_number, file_checksum, - file_checksum_func_name, unique_id)); + file_checksum_func_name, unique_id, + compensated_range_deletion_size)); if (!HasLastSequence() || largest_seqno > GetLastSequence()) { SetLastSequence(largest_seqno); } diff --git a/db/version_edit_test.cc b/db/version_edit_test.cc index 7571291e22..1fa6c00549 100644 --- a/db/version_edit_test.cc +++ b/db/version_edit_test.cc @@ -45,7 +45,7 @@ TEST_F(VersionEditTest, EncodeDecode) { kBig + 500 + i, kBig + 600 + i, false, Temperature::kUnknown, kInvalidBlobFileNumber, 888, 678, kBig + 300 + i /* epoch_number */, "234", "crc32c", - kNullUniqueId64x2); + kNullUniqueId64x2, 0); edit.DeleteFile(4, kBig + 700 + i); } @@ -65,24 +65,24 @@ TEST_F(VersionEditTest, EncodeDecodeNewFile4) { kBig + 600, true, Temperature::kUnknown, kInvalidBlobFileNumber, kUnknownOldestAncesterTime, kUnknownFileCreationTime, 300 /* epoch_number */, kUnknownFileChecksum, - kUnknownFileChecksumFuncName, kNullUniqueId64x2); + kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0); edit.AddFile(4, 301, 3, 100, InternalKey("foo", kBig + 501, kTypeValue), InternalKey("zoo", kBig + 601, kTypeDeletion), kBig + 501, kBig + 601, false, Temperature::kUnknown, kInvalidBlobFileNumber, kUnknownOldestAncesterTime, kUnknownFileCreationTime, 301 /* epoch_number */, kUnknownFileChecksum, - kUnknownFileChecksumFuncName, kNullUniqueId64x2); + kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0); edit.AddFile(5, 302, 0, 100, InternalKey("foo", kBig + 502, kTypeValue), InternalKey("zoo", kBig + 602, kTypeDeletion), kBig + 502, kBig + 602, true, Temperature::kUnknown, kInvalidBlobFileNumber, 666, 888, 302 /* epoch_number */, kUnknownFileChecksum, - kUnknownFileChecksumFuncName, kNullUniqueId64x2); + kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0); edit.AddFile(5, 303, 0, 100, InternalKey("foo", kBig + 503, kTypeBlobIndex), InternalKey("zoo", kBig + 603, kTypeBlobIndex), kBig + 503, kBig + 603, true, Temperature::kUnknown, 1001, kUnknownOldestAncesterTime, kUnknownFileCreationTime, 303 /* epoch_number */, kUnknownFileChecksum, - kUnknownFileChecksumFuncName, kNullUniqueId64x2); + kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0); edit.DeleteFile(4, 700); @@ -123,12 +123,12 @@ TEST_F(VersionEditTest, ForwardCompatibleNewFile4) { kBig + 600, true, Temperature::kUnknown, kInvalidBlobFileNumber, kUnknownOldestAncesterTime, kUnknownFileCreationTime, 300 /* epoch_number */, kUnknownFileChecksum, - kUnknownFileChecksumFuncName, kNullUniqueId64x2); + kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0); edit.AddFile(4, 301, 3, 100, InternalKey("foo", kBig + 501, kTypeValue), InternalKey("zoo", kBig + 601, kTypeDeletion), kBig + 501, kBig + 601, false, Temperature::kUnknown, kInvalidBlobFileNumber, 686, 868, 301 /* epoch_number */, "234", "crc32c", - kNullUniqueId64x2); + kNullUniqueId64x2, 0); edit.DeleteFile(4, 700); edit.SetComparatorName("foo"); @@ -177,7 +177,7 @@ TEST_F(VersionEditTest, NewFile4NotSupportedField) { kBig + 600, true, Temperature::kUnknown, kInvalidBlobFileNumber, kUnknownOldestAncesterTime, kUnknownFileCreationTime, 300 /* epoch_number */, kUnknownFileChecksum, - kUnknownFileChecksumFuncName, kNullUniqueId64x2); + kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0); edit.SetComparatorName("foo"); edit.SetLogNumber(kBig + 100); @@ -208,7 +208,7 @@ TEST_F(VersionEditTest, EncodeEmptyFile) { Temperature::kUnknown, kInvalidBlobFileNumber, kUnknownOldestAncesterTime, kUnknownFileCreationTime, 1 /*epoch_number*/, kUnknownFileChecksum, - kUnknownFileChecksumFuncName, kNullUniqueId64x2); + kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0); std::string buffer; ASSERT_TRUE(!edit.EncodeTo(&buffer)); } diff --git a/db/version_set.cc b/db/version_set.cc index 8a8fa1d75b..be1db7ba3e 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -2960,7 +2960,7 @@ bool Version::MaybeInitializeFileMetaData(FileMetaData* file_meta) { file_meta->num_deletions = tp->num_deletions; file_meta->raw_value_size = tp->raw_value_size; file_meta->raw_key_size = tp->raw_key_size; - + file_meta->num_range_deletions = tp->num_range_deletions; return true; } @@ -3062,11 +3062,15 @@ void VersionStorageInfo::ComputeCompensatedSizes() { // size of deletion entries in a stable workload, the deletion // compensation logic might introduce unwanted effet which changes the // shape of LSM tree. - if (file_meta->num_deletions * 2 >= file_meta->num_entries) { + if ((file_meta->num_deletions - file_meta->num_range_deletions) * 2 >= + file_meta->num_entries) { file_meta->compensated_file_size += - (file_meta->num_deletions * 2 - file_meta->num_entries) * + ((file_meta->num_deletions - file_meta->num_range_deletions) * 2 - + file_meta->num_entries) * average_value_size * kDeletionWeightOnCompaction; } + file_meta->compensated_file_size += + file_meta->compensated_range_deletion_size; } } } @@ -6215,7 +6219,8 @@ Status VersionSet::WriteCurrentStateToManifest( f->marked_for_compaction, f->temperature, f->oldest_blob_file_number, f->oldest_ancester_time, f->file_creation_time, f->epoch_number, f->file_checksum, - f->file_checksum_func_name, f->unique_id); + f->file_checksum_func_name, f->unique_id, + f->compensated_range_deletion_size); } } @@ -6293,8 +6298,9 @@ uint64_t VersionSet::ApproximateSize(const SizeApproximationOptions& options, const int num_non_empty_levels = vstorage->num_non_empty_levels(); end_level = (end_level == -1) ? num_non_empty_levels : std::min(end_level, num_non_empty_levels); - - assert(start_level <= end_level); + if (end_level <= start_level) { + return 0; + } // Outline of the optimization that uses options.files_size_error_margin. // When approximating the files total size that is used to store a keys range, diff --git a/db/version_set_test.cc b/db/version_set_test.cc index c179f7a6a6..9234a4d880 100644 --- a/db/version_set_test.cc +++ b/db/version_set_test.cc @@ -51,7 +51,7 @@ class GenerateLevelFilesBriefTest : public testing::Test { largest_seq, /* marked_for_compact */ false, Temperature::kUnknown, kInvalidBlobFileNumber, kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber, kUnknownFileChecksum, - kUnknownFileChecksumFuncName, kNullUniqueId64x2); + kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0); files_.push_back(f); } @@ -143,16 +143,19 @@ class VersionStorageInfoTestBase : public testing::Test { void Add(int level, uint32_t file_number, const char* smallest, const char* largest, uint64_t file_size = 0, - uint64_t oldest_blob_file_number = kInvalidBlobFileNumber) { + uint64_t oldest_blob_file_number = kInvalidBlobFileNumber, + uint64_t compensated_range_deletion_size = 0) { constexpr SequenceNumber dummy_seq = 0; Add(level, file_number, GetInternalKey(smallest, dummy_seq), - GetInternalKey(largest, dummy_seq), file_size, oldest_blob_file_number); + GetInternalKey(largest, dummy_seq), file_size, oldest_blob_file_number, + compensated_range_deletion_size); } void Add(int level, uint32_t file_number, const InternalKey& smallest, const InternalKey& largest, uint64_t file_size = 0, - uint64_t oldest_blob_file_number = kInvalidBlobFileNumber) { + uint64_t oldest_blob_file_number = kInvalidBlobFileNumber, + uint64_t compensated_range_deletion_size = 0) { assert(level < vstorage_.num_levels()); FileMetaData* f = new FileMetaData( file_number, 0, file_size, smallest, largest, /* smallest_seq */ 0, @@ -160,8 +163,7 @@ class VersionStorageInfoTestBase : public testing::Test { Temperature::kUnknown, oldest_blob_file_number, kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownEpochNumber, kUnknownFileChecksum, kUnknownFileChecksumFuncName, - kNullUniqueId64x2); - f->compensated_file_size = file_size; + kNullUniqueId64x2, compensated_range_deletion_size); vstorage_.AddFile(level, f); } @@ -2136,6 +2138,17 @@ TEST_F(VersionSetTest, AtomicGroupWithWalEdits) { } } +TEST_F(VersionStorageInfoTest, AddRangeDeletionCompensatedFileSize) { + // Tests that compensated range deletion size is added to compensated file + // size. + Add(4, 100U, "1", "2", 100U, kInvalidBlobFileNumber, 1000U); + + UpdateVersionStorageInfo(); + + auto meta = vstorage_.GetFileMetaDataByNumber(100U); + ASSERT_EQ(meta->compensated_file_size, 100U + 1000U); +} + class VersionSetWithTimestampTest : public VersionSetTest { public: static const std::string kNewCfName; @@ -3242,7 +3255,8 @@ class VersionSetTestMissingFiles : public VersionSetTestBase, file_metas->emplace_back(file_num, /*file_path_id=*/0, file_size, ikey, ikey, 0, 0, false, Temperature::kUnknown, 0, 0, 0, info.epoch_number, kUnknownFileChecksum, - kUnknownFileChecksumFuncName, kNullUniqueId64x2); + kUnknownFileChecksumFuncName, kNullUniqueId64x2, + 0); } } @@ -3299,7 +3313,7 @@ TEST_F(VersionSetTestMissingFiles, ManifestFarBehindSst) { file_num, /*file_path_id=*/0, /*file_size=*/12, smallest_ikey, largest_ikey, 0, 0, false, Temperature::kUnknown, 0, 0, 0, file_num /* epoch_number */, kUnknownFileChecksum, - kUnknownFileChecksumFuncName, kNullUniqueId64x2); + kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0); added_files.emplace_back(0, meta); } WriteFileAdditionAndDeletionToManifest( @@ -3360,7 +3374,7 @@ TEST_F(VersionSetTestMissingFiles, ManifestAheadofSst) { file_num, /*file_path_id=*/0, /*file_size=*/12, smallest_ikey, largest_ikey, 0, 0, false, Temperature::kUnknown, 0, 0, 0, file_num /* epoch_number */, kUnknownFileChecksum, - kUnknownFileChecksumFuncName, kNullUniqueId64x2); + kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0); added_files.emplace_back(0, meta); } WriteFileAdditionAndDeletionToManifest(