From bf937cf15b8f034491c724c9b6df3d4909b76d8f Mon Sep 17 00:00:00 2001 From: Yi Wu Date: Thu, 1 Mar 2018 17:50:54 -0800 Subject: [PATCH] Add "rocksdb.live-sst-files-size" DB property Summary: Add "rocksdb.live-sst-files-size" DB property which only include files of latest version. Existing "rocksdb.total-sst-files-size" include files from all versions and thus include files that's obsolete but not yet deleted. I'm going to use this new property to cap blob db sst + blob files size. Closes https://github.com/facebook/rocksdb/pull/3548 Differential Revision: D7116939 Pulled By: yiwu-arbug fbshipit-source-id: c6a52e45ce0f24ef78708156e1a923c1dd6bc79a --- HISTORY.md | 1 + db/column_family.cc | 4 ++++ db/column_family.h | 1 + db/db_impl.h | 4 ++-- db/db_properties_test.cc | 51 ++++++++++++++++++++++++++++++++++++++++ db/internal_stats.cc | 15 ++++++++++-- db/internal_stats.h | 1 + db/version_set.cc | 9 +++++++ db/version_set.h | 2 ++ include/rocksdb/db.h | 5 ++++ 10 files changed, 89 insertions(+), 4 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index 7c2d70d96e..3a7fed228f 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -5,6 +5,7 @@ ### New Features * Avoid unnecessarily flushing in `CompactRange()` when the range specified by the user does not overlap unflushed memtables. +* Add "rocksdb.live-sst-files-size" DB property to return total bytes of all SST files belong to the latest LSM tree. ## 5.12.0 (2/14/2018) ### Public API Change diff --git a/db/column_family.cc b/db/column_family.cc index 983be9e412..b1182e85a1 100644 --- a/db/column_family.cc +++ b/db/column_family.cc @@ -846,6 +846,10 @@ uint64_t ColumnFamilyData::GetTotalSstFilesSize() const { return VersionSet::GetTotalSstFilesSize(dummy_versions_); } +uint64_t ColumnFamilyData::GetLiveSstFilesSize() const { + return current_->GetSstFilesSize(); +} + MemTable* ColumnFamilyData::ConstructNewMemtable( const MutableCFOptions& mutable_cf_options, SequenceNumber earliest_seq) { return new MemTable(internal_comparator_, ioptions_, mutable_cf_options, diff --git a/db/column_family.h b/db/column_family.h index 51a88d9d8d..0101b06f35 100644 --- a/db/column_family.h +++ b/db/column_family.h @@ -244,6 +244,7 @@ class ColumnFamilyData { void SetCurrent(Version* _current); uint64_t GetNumLiveVersions() const; // REQUIRE: DB mutex held uint64_t GetTotalSstFilesSize() const; // REQUIRE: DB mutex held + uint64_t GetLiveSstFilesSize() const; // REQUIRE: DB mutex held void SetMemtable(MemTable* new_mem) { uint64_t memtable_id = last_memtable_id_.fetch_add(1) + 1; new_mem->SetID(memtable_id); diff --git a/db/db_impl.h b/db/db_impl.h index a9417bd7c8..e42accfeb3 100644 --- a/db/db_impl.h +++ b/db/db_impl.h @@ -1367,8 +1367,8 @@ class DBImpl : public DB { return Env::WLTH_SHORT; } - // When set, we use a separate queue for writes that dont write to memtable. In - // 2PC these are the writes at Prepare phase. + // When set, we use a separate queue for writes that dont write to memtable. + // In 2PC these are the writes at Prepare phase. const bool two_write_queues_; const bool manual_wal_flush_; // Increase the sequence number after writing each batch, whether memtable is diff --git a/db/db_properties_test.cc b/db/db_properties_test.cc index 0da64b1365..9bd587f123 100644 --- a/db/db_properties_test.cc +++ b/db/db_properties_test.cc @@ -14,6 +14,7 @@ #include "db/db_test_util.h" #include "port/stack_trace.h" +#include "rocksdb/listener.h" #include "rocksdb/options.h" #include "rocksdb/perf_context.h" #include "rocksdb/perf_level.h" @@ -1383,6 +1384,56 @@ TEST_F(DBPropertiesTest, EstimateOldestKeyTime) { Close(); } +TEST_F(DBPropertiesTest, SstFilesSize) { + struct TestListener : public EventListener { + void OnCompactionCompleted(DB* db, + const CompactionJobInfo& /*info*/) override { + assert(callback_triggered == false); + assert(size_before_compaction > 0); + callback_triggered = true; + uint64_t total_sst_size = 0; + uint64_t live_sst_size = 0; + bool ok = db->GetIntProperty(DB::Properties::kTotalSstFilesSize, + &total_sst_size); + ASSERT_TRUE(ok); + // total_sst_size include files before and after compaction. + ASSERT_GT(total_sst_size, size_before_compaction); + ok = + db->GetIntProperty(DB::Properties::kLiveSstFilesSize, &live_sst_size); + ASSERT_TRUE(ok); + // live_sst_size only include files after compaction. + ASSERT_GT(live_sst_size, 0); + ASSERT_LT(live_sst_size, size_before_compaction); + } + + uint64_t size_before_compaction = 0; + bool callback_triggered = false; + }; + std::shared_ptr listener = std::make_shared(); + + Options options; + options.disable_auto_compactions = true; + options.listeners.push_back(listener); + Reopen(options); + + for (int i = 0; i < 10; i++) { + ASSERT_OK(Put("key" + ToString(i), std::string(1000, 'v'))); + } + ASSERT_OK(Flush()); + for (int i = 0; i < 5; i++) { + ASSERT_OK(Delete("key" + ToString(i))); + } + ASSERT_OK(Flush()); + uint64_t sst_size; + bool ok = db_->GetIntProperty(DB::Properties::kTotalSstFilesSize, &sst_size); + ASSERT_TRUE(ok); + ASSERT_GT(sst_size, 0); + listener->size_before_compaction = sst_size; + // Compact to clean all keys and trigger listener. + ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); + ASSERT_TRUE(listener->callback_triggered); +} + #endif // ROCKSDB_LITE } // namespace rocksdb diff --git a/db/internal_stats.cc b/db/internal_stats.cc index 52ed4b4d93..359e52c307 100644 --- a/db/internal_stats.cc +++ b/db/internal_stats.cc @@ -233,6 +233,7 @@ static const std::string estimate_live_data_size = "estimate-live-data-size"; static const std::string min_log_number_to_keep = "min-log-number-to-keep"; static const std::string base_level = "base-level"; static const std::string total_sst_files_size = "total-sst-files-size"; +static const std::string live_sst_files_size = "live-sst-files-size"; static const std::string estimate_pending_comp_bytes = "estimate-pending-compaction-bytes"; static const std::string aggregated_table_properties = @@ -307,6 +308,8 @@ const std::string DB::Properties::kMinLogNumberToKeep = rocksdb_prefix + min_log_number_to_keep; const std::string DB::Properties::kTotalSstFilesSize = rocksdb_prefix + total_sst_files_size; +const std::string DB::Properties::kLiveSstFilesSize = + rocksdb_prefix + live_sst_files_size; const std::string DB::Properties::kBaseLevel = rocksdb_prefix + base_level; const std::string DB::Properties::kEstimatePendingCompactionBytes = rocksdb_prefix + estimate_pending_comp_bytes; @@ -405,6 +408,8 @@ const std::unordered_map {false, nullptr, &InternalStats::HandleBaseLevel, nullptr}}, {DB::Properties::kTotalSstFilesSize, {false, nullptr, &InternalStats::HandleTotalSstFilesSize, nullptr}}, + {DB::Properties::kLiveSstFilesSize, + {false, nullptr, &InternalStats::HandleLiveSstFilesSize, nullptr}}, {DB::Properties::kEstimatePendingCompactionBytes, {false, nullptr, &InternalStats::HandleEstimatePendingCompactionBytes, nullptr}}, @@ -733,12 +738,18 @@ bool InternalStats::HandleBaseLevel(uint64_t* value, DBImpl* db, return true; } -bool InternalStats::HandleTotalSstFilesSize(uint64_t* value, DBImpl* db, - Version* version) { +bool InternalStats::HandleTotalSstFilesSize(uint64_t* value, DBImpl* /*db*/, + Version* /*version*/) { *value = cfd_->GetTotalSstFilesSize(); return true; } +bool InternalStats::HandleLiveSstFilesSize(uint64_t* value, DBImpl* /*db*/, + Version* /*version*/) { + *value = cfd_->GetLiveSstFilesSize(); + return true; +} + bool InternalStats::HandleEstimatePendingCompactionBytes(uint64_t* value, DBImpl* db, Version* version) { diff --git a/db/internal_stats.h b/db/internal_stats.h index dea9c09872..481c6d32f9 100644 --- a/db/internal_stats.h +++ b/db/internal_stats.h @@ -467,6 +467,7 @@ class InternalStats { Version* version); bool HandleBaseLevel(uint64_t* value, DBImpl* db, Version* version); bool HandleTotalSstFilesSize(uint64_t* value, DBImpl* db, Version* version); + bool HandleLiveSstFilesSize(uint64_t* value, DBImpl* db, Version* version); bool HandleEstimatePendingCompactionBytes(uint64_t* value, DBImpl* db, Version* version); bool HandleEstimateTableReadersMem(uint64_t* value, DBImpl* db, diff --git a/db/version_set.cc b/db/version_set.cc index 0de142740f..2b6c4c6292 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -891,6 +891,15 @@ void Version::GetColumnFamilyMetaData(ColumnFamilyMetaData* cf_meta) { } } +uint64_t Version::GetSstFilesSize() { + uint64_t sst_files_size = 0; + for (int level = 0; level < storage_info_.num_levels_; level++) { + for (const auto& file_meta : storage_info_.LevelFiles(level)) { + sst_files_size += file_meta->fd.GetFileSize(); + } + } + return sst_files_size; +} uint64_t VersionStorageInfo::GetEstimatedActiveKeys() const { // Estimation will be inaccurate when: diff --git a/db/version_set.h b/db/version_set.h index ea6e4e88a7..688bf1183a 100644 --- a/db/version_set.h +++ b/db/version_set.h @@ -613,6 +613,8 @@ class Version { void GetColumnFamilyMetaData(ColumnFamilyMetaData* cf_meta); + uint64_t GetSstFilesSize(); + private: Env* env_; friend class VersionSet; diff --git a/include/rocksdb/db.h b/include/rocksdb/db.h index 909d33890a..94f72786ae 100644 --- a/include/rocksdb/db.h +++ b/include/rocksdb/db.h @@ -576,6 +576,10 @@ class DB { // WARNING: may slow down online queries if there are too many files. static const std::string kTotalSstFilesSize; + // "rocksdb.live-sst-files-size" - returns total size (bytes) of all SST + // files belong to the latest LSM tree. + static const std::string kLiveSstFilesSize; + // "rocksdb.base-level" - returns number of level to which L0 data will be // compacted. static const std::string kBaseLevel; @@ -651,6 +655,7 @@ class DB { // "rocksdb.estimate-live-data-size" // "rocksdb.min-log-number-to-keep" // "rocksdb.total-sst-files-size" + // "rocksdb.live-sst-files-size" // "rocksdb.base-level" // "rocksdb.estimate-pending-compaction-bytes" // "rocksdb.num-running-compactions"