From f22557886e3ae57438ed1204a2978a7ff0c08ec0 Mon Sep 17 00:00:00 2001 From: Jay Huh Date: Wed, 16 Oct 2024 19:20:37 -0700 Subject: [PATCH] Fix Compaction Stats (#13071) Summary: Compaction stats code is not so straightforward to understand. Here's a bit of context for this PR and why this change was made. - **CompactionStats (compaction_stats_.stats):** Internal stats about the compaction used for logging and public metrics. - **CompactionJobStats (compaction_job_stats_)**: The public stats at job level. It's part of Compaction event listener and included in the CompactionResult. - **CompactionOutputsStats**: output stats only. resides in CompactionOutputs. It gets aggregated toward the CompactionStats (internal stats). The internal stats, `compaction_stats_.stats`, has the output information recorded from the compaction iterator, but it does not have any input information (input records, input output files) until `UpdateCompactionStats()` gets called. We cannot simply call `UpdateCompactionStats()` to fill in the input information in the remote compaction (which is a subcompaction of the primary host's compaction) because the `compaction->inputs()` have the full list of input files and `UpdateCompactionStats()` takes the entire list of records in all files. `num_input_records` gets double-counted if multiple sub-compactions are submitted to the remote worker. The job level stats (in the case of remote compaction, it's subcompaction level stat), `compaction_job_stats_`, has the correct input records, but has no output information. We can use `UpdateCompactionJobStats(compaction_stats_.stats)` to set the output information (num_output_records, num_output_files, etc.) from the `compaction_stats_.stats`, but it also sets all other fields including the input information which sets all back to 0. Therefore, we are overriding `UpdateCompactionJobStats()` in remote worker only to update job level stats, `compaction_job_stats_`, with output information of the internal stats. Baiscally, we are merging the aggregated output info from the internal stats and aggregated input info from the compaction job stats. In this PR we are also fixing how we are setting `is_remote_compaction` in CompactionJobStats. - OnCompactionBegin event, if options.compaction_service is set, `is_remote_compaction=true` for all compactions except for trivial moves - OnCompactionCompleted event, if any of the sub_compactions were done remotely, compaction level stats's `is_remote_compaction` will be true Other minor changes - num_output_records is already available in CompactionJobStats. No need to store separately in CompactionResult. - total_bytes is not needed. - Renamed `SubcompactionState::AggregateCompactionStats()` to `SubcompactionState::AggregateCompactionOutputStats()` to make it clear that it's only aggregating output stats. - Renamed `SetTotalBytes()` to `AddBytesWritten()` to make it more clear that it's adding total written bytes from the compaction output. Pull Request resolved: https://github.com/facebook/rocksdb/pull/13071 Test Plan: Unit Tests added and updated ``` ./compaction_service_test ``` Reviewed By: anand1976 Differential Revision: D64479657 Pulled By: jaykorean fbshipit-source-id: a7a776a00dc718abae95d856b661bcbafd3b0ed5 --- db/compaction/compaction_job.cc | 7 ++ db/compaction/compaction_job.h | 11 +- db/compaction/compaction_job_test.cc | 3 +- db/compaction/compaction_outputs.h | 3 +- db/compaction/compaction_service_job.cc | 64 ++++++---- db/compaction/compaction_service_test.cc | 141 +++++++++++++++++++++++ db/compaction/compaction_state.cc | 2 +- db/compaction/subcompaction_state.cc | 2 +- db/compaction/subcompaction_state.h | 2 +- db/db_impl/db_impl_compaction_flush.cc | 10 ++ db/db_secondary_test.cc | 2 +- util/compaction_job_stats_impl.cc | 2 + 12 files changed, 213 insertions(+), 36 deletions(-) diff --git a/db/compaction/compaction_job.cc b/db/compaction/compaction_job.cc index b4b4eeacee..8a4f9a109f 100644 --- a/db/compaction/compaction_job.cc +++ b/db/compaction/compaction_job.cc @@ -806,6 +806,12 @@ Status CompactionJob::Run() { } } + // Before the compaction starts, is_remote_compaction was set to true if + // compaction_service is set. We now know whether each sub_compaction was + // done remotely or not. Reset is_remote_compaction back to false and allow + // AggregateCompactionStats() to set the right value. + compaction_job_stats_->is_remote_compaction = false; + // Finish up all bookkeeping to unify the subcompaction results. compact_->AggregateCompactionStats(compaction_stats_, *compaction_job_stats_); uint64_t num_input_range_del = 0; @@ -1084,6 +1090,7 @@ void CompactionJob::ProcessKeyValueCompaction(SubcompactionState* sub_compact) { } // fallback to local compaction assert(comp_status == CompactionServiceJobStatus::kUseLocal); + sub_compact->compaction_job_stats.is_remote_compaction = false; } uint64_t prev_cpu_micros = db_options_.clock->CPUMicros(); diff --git a/db/compaction/compaction_job.h b/db/compaction/compaction_job.h index 588c0a0c69..69895eac38 100644 --- a/db/compaction/compaction_job.h +++ b/db/compaction/compaction_job.h @@ -209,6 +209,8 @@ class CompactionJob { // Returns true iff compaction_stats_.stats.num_input_records and // num_input_range_del are calculated successfully. bool UpdateCompactionStats(uint64_t* num_input_range_del = nullptr); + virtual void UpdateCompactionJobStats( + const InternalStats::CompactionStats& stats) const; void LogCompaction(); virtual void RecordCompactionIOStats(); void CleanupCompaction(); @@ -279,8 +281,7 @@ class CompactionJob { bool* compaction_released); Status OpenCompactionOutputFile(SubcompactionState* sub_compact, CompactionOutputs& outputs); - void UpdateCompactionJobStats( - const InternalStats::CompactionStats& stats) const; + void RecordDroppedKeys(const CompactionIterationStats& c_iter_stats, CompactionJobStats* compaction_job_stats = nullptr); @@ -458,9 +459,6 @@ struct CompactionServiceResult { // location of the output files std::string output_path; - // some statistics about the compaction - uint64_t num_output_records = 0; - uint64_t total_bytes = 0; uint64_t bytes_read = 0; uint64_t bytes_written = 0; CompactionJobStats stats; @@ -506,6 +504,9 @@ class CompactionServiceCompactionJob : private CompactionJob { protected: void RecordCompactionIOStats() override; + void UpdateCompactionJobStats( + const InternalStats::CompactionStats& stats) const override; + private: // Get table file name in output_path std::string GetTableFileName(uint64_t file_number) override; diff --git a/db/compaction/compaction_job_test.cc b/db/compaction/compaction_job_test.cc index f9ea675eff..acf8828b05 100644 --- a/db/compaction/compaction_job_test.cc +++ b/db/compaction/compaction_job_test.cc @@ -1679,8 +1679,7 @@ TEST_F(CompactionJobTest, ResultSerialization) { } result.output_level = rnd.Uniform(10); result.output_path = rnd.RandomString(rnd.Uniform(kStrMaxLen)); - result.num_output_records = rnd64.Uniform(UINT64_MAX); - result.total_bytes = rnd64.Uniform(UINT64_MAX); + result.stats.num_output_records = rnd64.Uniform(UINT64_MAX); result.bytes_read = 123; result.bytes_written = rnd64.Uniform(UINT64_MAX); result.stats.elapsed_micros = rnd64.Uniform(UINT64_MAX); diff --git a/db/compaction/compaction_outputs.h b/db/compaction/compaction_outputs.h index 1b02fb0e9e..51d378ff01 100644 --- a/db/compaction/compaction_outputs.h +++ b/db/compaction/compaction_outputs.h @@ -62,8 +62,9 @@ class CompactionOutputs { } // TODO: Remove it when remote compaction support tiered compaction - void SetTotalBytes(uint64_t bytes) { stats_.bytes_written += bytes; } + void AddBytesWritten(uint64_t bytes) { stats_.bytes_written += bytes; } void SetNumOutputRecords(uint64_t num) { stats_.num_output_records = num; } + void SetNumOutputFiles(uint64_t num) { stats_.num_output_files = num; } // TODO: Move the BlobDB builder into CompactionOutputs const std::vector& GetBlobFileAdditions() const { diff --git a/db/compaction/compaction_service_job.cc b/db/compaction/compaction_service_job.cc index 178e5bde43..a39dff3f9f 100644 --- a/db/compaction/compaction_service_job.cc +++ b/db/compaction/compaction_service_job.cc @@ -215,8 +215,10 @@ CompactionJob::ProcessKeyValueCompactionWithCompactionService( } sub_compact->compaction_job_stats = compaction_result.stats; sub_compact->Current().SetNumOutputRecords( - compaction_result.num_output_records); - sub_compact->Current().SetTotalBytes(compaction_result.total_bytes); + compaction_result.stats.num_output_records); + sub_compact->Current().SetNumOutputFiles( + compaction_result.stats.num_output_files); + sub_compact->Current().AddBytesWritten(compaction_result.bytes_written); RecordTick(stats_, REMOTE_COMPACT_READ_BYTES, compaction_result.bytes_read); RecordTick(stats_, REMOTE_COMPACT_WRITE_BYTES, compaction_result.bytes_written); @@ -236,6 +238,18 @@ void CompactionServiceCompactionJob::RecordCompactionIOStats() { CompactionJob::RecordCompactionIOStats(); } +void CompactionServiceCompactionJob::UpdateCompactionJobStats( + const InternalStats::CompactionStats& stats) const { + compaction_job_stats_->elapsed_micros = stats.micros; + + // output information only in remote compaction + compaction_job_stats_->total_output_bytes = stats.bytes_written; + compaction_job_stats_->total_output_bytes_blob = stats.bytes_written_blob; + compaction_job_stats_->num_output_records = stats.num_output_records; + compaction_job_stats_->num_output_files = stats.num_output_files; + compaction_job_stats_->num_output_files_blob = stats.num_output_files_blob; +} + CompactionServiceCompactionJob::CompactionServiceCompactionJob( int job_id, Compaction* compaction, const ImmutableDBOptions& db_options, const MutableDBOptions& mutable_db_options, const FileOptions& file_options, @@ -290,6 +304,9 @@ Status CompactionServiceCompactionJob::Run() { log_buffer_->FlushBufferToLog(); LogCompaction(); + + compaction_result_->stats.Reset(); + const uint64_t start_micros = db_options_.clock->NowMicros(); c->GetOrInitInputTableProperties(); @@ -331,19 +348,33 @@ Status CompactionServiceCompactionJob::Run() { status = io_s; } - // Finish up all book-keeping to unify the subcompaction results - compact_->AggregateCompactionStats(compaction_stats_, *compaction_job_stats_); - UpdateCompactionStats(); - RecordCompactionIOStats(); - LogFlush(db_options_.info_log); compact_->status = status; compact_->status.PermitUncheckedError(); - // Build compaction result + // Build Compaction Job Stats + + // 1. Aggregate CompactionOutputStats into Internal Compaction Stats + // (compaction_stats_) and aggregate Compaction Job Stats + // (compaction_job_stats_) from the sub compactions + compact_->AggregateCompactionStats(compaction_stats_, *compaction_job_stats_); + + // 2. Update the Output information in the Compaction Job Stats with + // aggregated Internal Compaction Stats. + UpdateCompactionJobStats(compaction_stats_.stats); + + // 3. Set fields that are not propagated as part of aggregations above + compaction_result_->stats.is_manual_compaction = c->is_manual_compaction(); + compaction_result_->stats.is_full_compaction = c->is_full_compaction(); + compaction_result_->stats.is_remote_compaction = true; + + // 4. Update IO Stats that are not part of the aggregations above (bytes_read, + // bytes_written) + RecordCompactionIOStats(); + + // Build Output compaction_result_->output_level = compact_->compaction->output_level(); compaction_result_->output_path = output_path_; - compaction_result_->stats.is_remote_compaction = true; for (const auto& output_file : sub_compact->GetOutputs()) { auto& meta = output_file.meta; compaction_result_->output_files.emplace_back( @@ -357,13 +388,6 @@ Status CompactionServiceCompactionJob::Run() { TEST_SYNC_POINT_CALLBACK("CompactionServiceCompactionJob::Run:0", &compaction_result_); - - InternalStats::CompactionStatsFull compaction_stats; - sub_compact->AggregateCompactionStats(compaction_stats); - compaction_result_->num_output_records = - compaction_stats.stats.num_output_records; - compaction_result_->total_bytes = compaction_stats.TotalBytesWritten(); - return status; } @@ -726,14 +750,6 @@ static std::unordered_map cs_result_type_info = { {offsetof(struct CompactionServiceResult, output_path), OptionType::kEncodedString, OptionVerificationType::kNormal, OptionTypeFlags::kNone}}, - {"num_output_records", - {offsetof(struct CompactionServiceResult, num_output_records), - OptionType::kUInt64T, OptionVerificationType::kNormal, - OptionTypeFlags::kNone}}, - {"total_bytes", - {offsetof(struct CompactionServiceResult, total_bytes), - OptionType::kUInt64T, OptionVerificationType::kNormal, - OptionTypeFlags::kNone}}, {"bytes_read", {offsetof(struct CompactionServiceResult, bytes_read), OptionType::kUInt64T, OptionVerificationType::kNormal, diff --git a/db/compaction/compaction_service_test.cc b/db/compaction/compaction_service_test.cc index 4f767c865f..ad3fa1a5c8 100644 --- a/db/compaction/compaction_service_test.cc +++ b/db/compaction/compaction_service_test.cc @@ -349,7 +349,27 @@ TEST_F(CompactionServiceTest, BasicCompactions) { } else { ASSERT_OK(result.status); } + ASSERT_GE(result.stats.elapsed_micros, 1); + ASSERT_GE(result.stats.cpu_micros, 1); + + ASSERT_EQ(20, result.stats.num_output_records); + ASSERT_EQ(result.output_files.size(), result.stats.num_output_files); + + uint64_t total_size = 0; + for (auto output_file : result.output_files) { + std::string file_name = result.output_path + "/" + output_file.file_name; + + uint64_t file_size = 0; + ASSERT_OK(options.env->GetFileSize(file_name, &file_size)); + ASSERT_GT(file_size, 0); + total_size += file_size; + } + ASSERT_EQ(total_size, result.stats.total_output_bytes); + ASSERT_TRUE(result.stats.is_remote_compaction); + ASSERT_TRUE(result.stats.is_manual_compaction); + ASSERT_FALSE(result.stats.is_full_compaction); + Close(); } @@ -527,6 +547,127 @@ TEST_F(CompactionServiceTest, PreservedOptionsRemoteCompaction) { ASSERT_TRUE(result.stats.is_remote_compaction); } +class EventVerifier : public EventListener { + public: + explicit EventVerifier(uint64_t expected_num_input_records, + size_t expected_num_input_files, + uint64_t expected_num_output_records, + size_t expected_num_output_files, + const std::string& expected_smallest_output_key_prefix, + const std::string& expected_largest_output_key_prefix, + bool expected_is_remote_compaction_on_begin, + bool expected_is_remote_compaction_on_complete) + : expected_num_input_records_(expected_num_input_records), + expected_num_input_files_(expected_num_input_files), + expected_num_output_records_(expected_num_output_records), + expected_num_output_files_(expected_num_output_files), + expected_smallest_output_key_prefix_( + expected_smallest_output_key_prefix), + expected_largest_output_key_prefix_(expected_largest_output_key_prefix), + expected_is_remote_compaction_on_begin_( + expected_is_remote_compaction_on_begin), + expected_is_remote_compaction_on_complete_( + expected_is_remote_compaction_on_complete) {} + void OnCompactionBegin(DB* /*db*/, const CompactionJobInfo& ci) override { + ASSERT_EQ(expected_num_input_files_, ci.input_files.size()); + ASSERT_EQ(expected_num_input_files_, ci.input_file_infos.size()); + ASSERT_EQ(expected_is_remote_compaction_on_begin_, + ci.stats.is_remote_compaction); + ASSERT_TRUE(ci.stats.is_manual_compaction); + ASSERT_FALSE(ci.stats.is_full_compaction); + } + void OnCompactionCompleted(DB* /*db*/, const CompactionJobInfo& ci) override { + ASSERT_GT(ci.stats.elapsed_micros, 0); + ASSERT_GT(ci.stats.cpu_micros, 0); + ASSERT_EQ(expected_num_input_records_, ci.stats.num_input_records); + ASSERT_EQ(expected_num_input_files_, ci.stats.num_input_files); + ASSERT_EQ(expected_num_output_records_, ci.stats.num_output_records); + ASSERT_EQ(expected_num_output_files_, ci.stats.num_output_files); + ASSERT_EQ(expected_smallest_output_key_prefix_, + ci.stats.smallest_output_key_prefix); + ASSERT_EQ(expected_largest_output_key_prefix_, + ci.stats.largest_output_key_prefix); + ASSERT_GT(ci.stats.total_input_bytes, 0); + ASSERT_GT(ci.stats.total_output_bytes, 0); + ASSERT_EQ(ci.stats.num_input_records, + ci.stats.num_output_records + ci.stats.num_records_replaced); + ASSERT_EQ(expected_is_remote_compaction_on_complete_, + ci.stats.is_remote_compaction); + ASSERT_TRUE(ci.stats.is_manual_compaction); + ASSERT_FALSE(ci.stats.is_full_compaction); + } + + private: + uint64_t expected_num_input_records_; + size_t expected_num_input_files_; + uint64_t expected_num_output_records_; + size_t expected_num_output_files_; + std::string expected_smallest_output_key_prefix_; + std::string expected_largest_output_key_prefix_; + bool expected_is_remote_compaction_on_begin_; + bool expected_is_remote_compaction_on_complete_; +}; + +TEST_F(CompactionServiceTest, VerifyStats) { + Options options = CurrentOptions(); + options.disable_auto_compactions = true; + auto event_verifier = std::make_shared( + 30 /* expected_num_input_records */, 3 /* expected_num_input_files */, + 20 /* expected_num_output_records */, 1 /* expected_num_output_files */, + "key00000" /* expected_smallest_output_key_prefix */, + "key00001" /* expected_largest_output_key_prefix */, + true /* expected_is_remote_compaction_on_begin */, + true /* expected_is_remote_compaction_on_complete */); + options.listeners.push_back(event_verifier); + ReopenWithCompactionService(&options); + GenerateTestData(); + + auto my_cs = GetCompactionService(); + + std::string start_str = Key(0); + std::string end_str = Key(1); + Slice start(start_str); + Slice end(end_str); + uint64_t comp_num = my_cs->GetCompactionNum(); + ASSERT_OK(db_->CompactRange(CompactRangeOptions(), &start, &end)); + ASSERT_GE(my_cs->GetCompactionNum(), comp_num + 1); + VerifyTestData(); + + CompactionServiceResult result; + my_cs->GetResult(&result); + ASSERT_OK(result.status); + ASSERT_TRUE(result.stats.is_manual_compaction); + ASSERT_TRUE(result.stats.is_remote_compaction); +} + +TEST_F(CompactionServiceTest, VerifyStatsLocalFallback) { + Options options = CurrentOptions(); + options.disable_auto_compactions = true; + auto event_verifier = std::make_shared( + 30 /* expected_num_input_records */, 3 /* expected_num_input_files */, + 20 /* expected_num_output_records */, 1 /* expected_num_output_files */, + "key00000" /* expected_smallest_output_key_prefix */, + "key00001" /* expected_largest_output_key_prefix */, + true /* expected_is_remote_compaction_on_begin */, + false /* expected_is_remote_compaction_on_complete */); + options.listeners.push_back(event_verifier); + ReopenWithCompactionService(&options); + GenerateTestData(); + + auto my_cs = GetCompactionService(); + my_cs->OverrideStartStatus(CompactionServiceJobStatus::kUseLocal); + + std::string start_str = Key(0); + std::string end_str = Key(1); + Slice start(start_str); + Slice end(end_str); + uint64_t comp_num = my_cs->GetCompactionNum(); + ASSERT_OK(db_->CompactRange(CompactRangeOptions(), &start, &end)); + // Remote Compaction did not happen + ASSERT_EQ(my_cs->GetCompactionNum(), comp_num); + VerifyTestData(); +} + TEST_F(CompactionServiceTest, CorruptedOutput) { Options options = CurrentOptions(); options.disable_auto_compactions = true; diff --git a/db/compaction/compaction_state.cc b/db/compaction/compaction_state.cc index ee4b0c1896..bf016d04b6 100644 --- a/db/compaction/compaction_state.cc +++ b/db/compaction/compaction_state.cc @@ -39,7 +39,7 @@ void CompactionState::AggregateCompactionStats( InternalStats::CompactionStatsFull& compaction_stats, CompactionJobStats& compaction_job_stats) { for (const auto& sc : sub_compact_states) { - sc.AggregateCompactionStats(compaction_stats); + sc.AggregateCompactionOutputStats(compaction_stats); compaction_job_stats.Add(sc.compaction_job_stats); } } diff --git a/db/compaction/subcompaction_state.cc b/db/compaction/subcompaction_state.cc index 0c56471e92..aae446351f 100644 --- a/db/compaction/subcompaction_state.cc +++ b/db/compaction/subcompaction_state.cc @@ -13,7 +13,7 @@ #include "rocksdb/sst_partitioner.h" namespace ROCKSDB_NAMESPACE { -void SubcompactionState::AggregateCompactionStats( +void SubcompactionState::AggregateCompactionOutputStats( InternalStats::CompactionStatsFull& compaction_stats) const { compaction_stats.stats.Add(compaction_outputs_.stats_); if (HasPenultimateLevelOutputs()) { diff --git a/db/compaction/subcompaction_state.h b/db/compaction/subcompaction_state.h index b933a62a51..252fdfb8a1 100644 --- a/db/compaction/subcompaction_state.h +++ b/db/compaction/subcompaction_state.h @@ -179,7 +179,7 @@ class SubcompactionState { void Cleanup(Cache* cache); - void AggregateCompactionStats( + void AggregateCompactionOutputStats( InternalStats::CompactionStatsFull& compaction_stats) const; CompactionOutputs& Current() const { diff --git a/db/db_impl/db_impl_compaction_flush.cc b/db/db_impl/db_impl_compaction_flush.cc index 3c72052336..c058b518b3 100644 --- a/db/db_impl/db_impl_compaction_flush.cc +++ b/db/db_impl/db_impl_compaction_flush.cc @@ -3561,6 +3561,14 @@ Status DBImpl::BackgroundCompaction(bool* made_progress, is_manual && manual_compaction->disallow_trivial_move; CompactionJobStats compaction_job_stats; + // Set is_remote_compaction to true on CompactionBegin Event if + // compaction_service is set except for trivial moves. We do not know whether + // remote compaction will actually be successfully scheduled, or fall back to + // local at this time. CompactionCompleted event will tell the truth where + // the compaction actually happened. + compaction_job_stats.is_remote_compaction = + immutable_db_options().compaction_service != nullptr; + Status status; if (!error_handler_.IsBGWorkStopped()) { if (shutting_down_.load(std::memory_order_acquire)) { @@ -3786,6 +3794,8 @@ Status DBImpl::BackgroundCompaction(bool* made_progress, ThreadStatusUtil::SetThreadOperation(ThreadStatus::OP_COMPACTION); compaction_job_stats.num_input_files = c->num_input_files(0); + // Trivial moves do not get compacted remotely + compaction_job_stats.is_remote_compaction = false; NotifyOnCompactionBegin(c->column_family_data(), c.get(), status, compaction_job_stats, job_context->job_id); diff --git a/db/db_secondary_test.cc b/db/db_secondary_test.cc index 6c33d41dfe..ba1ed5c951 100644 --- a/db/db_secondary_test.cc +++ b/db/db_secondary_test.cc @@ -244,7 +244,7 @@ TEST_F(DBSecondaryTest, SimpleInternalCompaction) { ASSERT_EQ(largest.user_key().ToString(), "foo"); ASSERT_EQ(result.output_level, 1); ASSERT_EQ(result.output_path, this->secondary_path_); - ASSERT_EQ(result.num_output_records, 2); + ASSERT_EQ(result.stats.num_output_records, 2); ASSERT_GT(result.bytes_written, 0); ASSERT_OK(result.status); } diff --git a/util/compaction_job_stats_impl.cc b/util/compaction_job_stats_impl.cc index 37e39987e0..11ab63b620 100644 --- a/util/compaction_job_stats_impl.cc +++ b/util/compaction_job_stats_impl.cc @@ -89,6 +89,8 @@ void CompactionJobStats::Add(const CompactionJobStats& stats) { num_single_del_fallthru += stats.num_single_del_fallthru; num_single_del_mismatch += stats.num_single_del_mismatch; + + is_remote_compaction |= stats.is_remote_compaction; }