diff --git a/HISTORY.md b/HISTORY.md index df2e99e5ae..e16c279bed 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -8,6 +8,7 @@ file_creation_time of the oldest SST file in the DB. ### New Features * Universal compaction to support options.periodic_compaction_seconds. A full compaction will be triggered if any file is over the threshold. +* `GetLiveFilesMetaData` and `GetColumnFamilyMetaData` now expose the file number of SST files as well as the oldest blob file referenced by each SST. ### Performance Improvements * For 64-bit hashing, RocksDB is standardizing on a slightly modified preview version of XXH3. This function is now used for many non-persisted hashes, along with fastrange64() in place of the modulus operator, and some benchmarks show a slight improvement. diff --git a/db/db_test.cc b/db/db_test.cc index c39345d8ea..56f7b3dbf7 100644 --- a/db/db_test.cc +++ b/db/db_test.cc @@ -24,6 +24,7 @@ #endif #include "cache/lru_cache.h" +#include "db/blob_index.h" #include "db/db_impl/db_impl.h" #include "db/db_test_util.h" #include "db/dbformat.h" @@ -1019,39 +1020,132 @@ TEST_F(DBTest, FailMoreDbPaths) { ASSERT_TRUE(TryReopen(options).IsNotSupported()); } -void CheckColumnFamilyMeta(const ColumnFamilyMetaData& cf_meta) { +void CheckColumnFamilyMeta( + const ColumnFamilyMetaData& cf_meta, + const std::vector>& files_by_level) { + ASSERT_EQ(cf_meta.name, kDefaultColumnFamilyName); + ASSERT_EQ(cf_meta.levels.size(), files_by_level.size()); + uint64_t cf_size = 0; - uint64_t cf_csize = 0; size_t file_count = 0; - for (auto level_meta : cf_meta.levels) { + + for (size_t i = 0; i < cf_meta.levels.size(); ++i) { + const auto& level_meta_from_cf = cf_meta.levels[i]; + const auto& level_meta_from_files = files_by_level[i]; + + ASSERT_EQ(level_meta_from_cf.level, i); + ASSERT_EQ(level_meta_from_cf.files.size(), level_meta_from_files.size()); + + file_count += level_meta_from_cf.files.size(); + uint64_t level_size = 0; - uint64_t level_csize = 0; - file_count += level_meta.files.size(); - for (auto file_meta : level_meta.files) { - level_size += file_meta.size; + for (size_t j = 0; j < level_meta_from_cf.files.size(); ++j) { + const auto& file_meta_from_cf = level_meta_from_cf.files[j]; + const auto& file_meta_from_files = level_meta_from_files[j]; + + level_size += file_meta_from_cf.size; + + ASSERT_EQ(file_meta_from_cf.file_number, + file_meta_from_files.fd.GetNumber()); + ASSERT_EQ(file_meta_from_cf.file_number, + TableFileNameToNumber(file_meta_from_cf.name)); + ASSERT_EQ(file_meta_from_cf.size, file_meta_from_files.fd.file_size); + ASSERT_EQ(file_meta_from_cf.smallest_seqno, + file_meta_from_files.fd.smallest_seqno); + ASSERT_EQ(file_meta_from_cf.largest_seqno, + file_meta_from_files.fd.largest_seqno); + ASSERT_EQ(file_meta_from_cf.smallestkey, + file_meta_from_files.smallest.user_key().ToString()); + ASSERT_EQ(file_meta_from_cf.largestkey, + file_meta_from_files.largest.user_key().ToString()); + ASSERT_EQ(file_meta_from_cf.oldest_blob_file_number, + file_meta_from_files.oldest_blob_file_number); } - ASSERT_EQ(level_meta.size, level_size); + + ASSERT_EQ(level_meta_from_cf.size, level_size); cf_size += level_size; - cf_csize += level_csize; } + ASSERT_EQ(cf_meta.file_count, file_count); ASSERT_EQ(cf_meta.size, cf_size); } +void CheckLiveFilesMeta( + const std::vector& live_file_meta, + const std::vector>& files_by_level) { + size_t total_file_count = 0; + for (const auto& f : files_by_level) { + total_file_count += f.size(); + } + + ASSERT_EQ(live_file_meta.size(), total_file_count); + + int level = 0; + int i = 0; + + for (const auto& meta : live_file_meta) { + if (level != meta.level) { + level = meta.level; + i = 0; + } + + ASSERT_LT(i, files_by_level[level].size()); + + const auto& expected_meta = files_by_level[level][i]; + + ASSERT_EQ(meta.column_family_name, kDefaultColumnFamilyName); + ASSERT_EQ(meta.file_number, expected_meta.fd.GetNumber()); + ASSERT_EQ(meta.file_number, TableFileNameToNumber(meta.name)); + ASSERT_EQ(meta.size, expected_meta.fd.file_size); + ASSERT_EQ(meta.smallest_seqno, expected_meta.fd.smallest_seqno); + ASSERT_EQ(meta.largest_seqno, expected_meta.fd.largest_seqno); + ASSERT_EQ(meta.smallestkey, expected_meta.smallest.user_key().ToString()); + ASSERT_EQ(meta.largestkey, expected_meta.largest.user_key().ToString()); + ASSERT_EQ(meta.oldest_blob_file_number, + expected_meta.oldest_blob_file_number); + + ++i; + } +} + #ifndef ROCKSDB_LITE -TEST_F(DBTest, ColumnFamilyMetaDataTest) { +TEST_F(DBTest, MetaDataTest) { Options options = CurrentOptions(); options.create_if_missing = true; + options.disable_auto_compactions = true; DestroyAndReopen(options); Random rnd(301); int key_index = 0; - ColumnFamilyMetaData cf_meta; for (int i = 0; i < 100; ++i) { - GenerateNewFile(&rnd, &key_index); - db_->GetColumnFamilyMetaData(&cf_meta); - CheckColumnFamilyMeta(cf_meta); + // Add a single blob reference to each file + std::string blob_index; + BlobIndex::EncodeBlob(&blob_index, /* blob_file_number */ i + 1000, + /* offset */ 1234, /* size */ 5678, kNoCompression); + + WriteBatch batch; + ASSERT_OK(WriteBatchInternal::PutBlobIndex(&batch, 0, Key(key_index), + blob_index)); + ASSERT_OK(dbfull()->Write(WriteOptions(), &batch)); + + ++key_index; + + // Fill up the rest of the file with random values. + GenerateNewFile(&rnd, &key_index, /* nowait */ true); + + Flush(); } + + std::vector> files_by_level; + dbfull()->TEST_GetFilesMetaData(db_->DefaultColumnFamily(), &files_by_level); + + ColumnFamilyMetaData cf_meta; + db_->GetColumnFamilyMetaData(&cf_meta); + CheckColumnFamilyMeta(cf_meta, files_by_level); + + std::vector live_file_meta; + db_->GetLiveFilesMetaData(&live_file_meta); + CheckLiveFilesMeta(live_file_meta, files_by_level); } namespace { diff --git a/db/version_set.cc b/db/version_set.cc index 61d140a6fb..a73806b819 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -1451,16 +1451,14 @@ void Version::GetColumnFamilyMetaData(ColumnFamilyMetaData* cf_meta) { assert(!ioptions->cf_paths.empty()); file_path = ioptions->cf_paths.back().path; } + const uint64_t file_number = file->fd.GetNumber(); files.emplace_back(SstFileMetaData{ - MakeTableFileName("", file->fd.GetNumber()), - file_path, - static_cast(file->fd.GetFileSize()), - file->fd.smallest_seqno, - file->fd.largest_seqno, - file->smallest.user_key().ToString(), + MakeTableFileName("", file_number), file_number, file_path, + static_cast(file->fd.GetFileSize()), file->fd.smallest_seqno, + file->fd.largest_seqno, file->smallest.user_key().ToString(), file->largest.user_key().ToString(), file->stats.num_reads_sampled.load(std::memory_order_relaxed), - file->being_compacted}); + file->being_compacted, file->oldest_blob_file_number}); files.back().num_entries = file->num_entries; files.back().num_deletions = file->num_deletions; level_size += file->fd.GetFileSize(); @@ -5393,7 +5391,9 @@ void VersionSet::GetLiveFilesMetaData(std::vector* metadata) { assert(!cfd->ioptions()->cf_paths.empty()); filemetadata.db_path = cfd->ioptions()->cf_paths.back().path; } - filemetadata.name = MakeTableFileName("", file->fd.GetNumber()); + const uint64_t file_number = file->fd.GetNumber(); + filemetadata.name = MakeTableFileName("", file_number); + filemetadata.file_number = file_number; filemetadata.level = level; filemetadata.size = static_cast(file->fd.GetFileSize()); filemetadata.smallestkey = file->smallest.user_key().ToString(); @@ -5405,6 +5405,7 @@ void VersionSet::GetLiveFilesMetaData(std::vector* metadata) { filemetadata.being_compacted = file->being_compacted; filemetadata.num_entries = file->num_entries; filemetadata.num_deletions = file->num_deletions; + filemetadata.oldest_blob_file_number = file->oldest_blob_file_number; metadata->push_back(filemetadata); } } diff --git a/include/rocksdb/metadata.h b/include/rocksdb/metadata.h index 7b251eb720..52b5657c3b 100644 --- a/include/rocksdb/metadata.h +++ b/include/rocksdb/metadata.h @@ -55,25 +55,24 @@ struct LevelMetaData { struct SstFileMetaData { SstFileMetaData() : size(0), - name(""), - db_path(""), + file_number(0), smallest_seqno(0), largest_seqno(0), - smallestkey(""), - largestkey(""), num_reads_sampled(0), being_compacted(false), num_entries(0), - num_deletions(0) {} + num_deletions(0), + oldest_blob_file_number(0) {} - SstFileMetaData(const std::string& _file_name, const std::string& _path, - size_t _size, SequenceNumber _smallest_seqno, - SequenceNumber _largest_seqno, + SstFileMetaData(const std::string& _file_name, uint64_t _file_number, + const std::string& _path, size_t _size, + SequenceNumber _smallest_seqno, SequenceNumber _largest_seqno, const std::string& _smallestkey, const std::string& _largestkey, uint64_t _num_reads_sampled, - bool _being_compacted) + bool _being_compacted, uint64_t _oldest_blob_file_number) : size(_size), name(_file_name), + file_number(_file_number), db_path(_path), smallest_seqno(_smallest_seqno), largest_seqno(_largest_seqno), @@ -82,12 +81,15 @@ struct SstFileMetaData { num_reads_sampled(_num_reads_sampled), being_compacted(_being_compacted), num_entries(0), - num_deletions(0) {} + num_deletions(0), + oldest_blob_file_number(_oldest_blob_file_number) {} // File size in bytes. size_t size; // The name of the file. std::string name; + // The id of the file. + uint64_t file_number; // The full path where the file locates. std::string db_path; @@ -100,6 +102,9 @@ struct SstFileMetaData { uint64_t num_entries; uint64_t num_deletions; + + uint64_t oldest_blob_file_number; // The id of the oldest blob file + // referenced by the file. }; // The full set of metadata associated with each SST file. diff --git a/utilities/blob_db/blob_db_impl_filesnapshot.cc b/utilities/blob_db/blob_db_impl_filesnapshot.cc index e74396a33d..4544ecb5a2 100644 --- a/utilities/blob_db/blob_db_impl_filesnapshot.cc +++ b/utilities/blob_db/blob_db_impl_filesnapshot.cc @@ -94,9 +94,10 @@ void BlobDBImpl::GetLiveFilesMetaData(std::vector* metadata) { auto blob_file = bfile_pair.second; LiveFileMetaData filemetadata; filemetadata.size = static_cast(blob_file->GetFileSize()); + const uint64_t file_number = blob_file->BlobFileNumber(); // Path should be relative to db_name, but begin with slash. - filemetadata.name = - BlobFileName("", bdb_options_.blob_dir, blob_file->BlobFileNumber()); + filemetadata.name = BlobFileName("", bdb_options_.blob_dir, file_number); + filemetadata.file_number = file_number; auto cfh = reinterpret_cast(DefaultColumnFamily()); filemetadata.column_family_name = cfh->GetName(); metadata->emplace_back(filemetadata); diff --git a/utilities/blob_db/blob_db_test.cc b/utilities/blob_db/blob_db_test.cc index c45001c44a..b8e6766146 100644 --- a/utilities/blob_db/blob_db_test.cc +++ b/utilities/blob_db/blob_db_test.cc @@ -999,6 +999,7 @@ TEST_F(BlobDBTest, GetLiveFilesMetaData) { // Path should be relative to db_name, but begin with slash. std::string filename = "/blob_dir/000001.blob"; ASSERT_EQ(filename, metadata[0].name); + ASSERT_EQ(1, metadata[0].file_number); ASSERT_EQ("default", metadata[0].column_family_name); std::vector livefile; uint64_t mfs; diff --git a/utilities/checkpoint/checkpoint_impl.cc b/utilities/checkpoint/checkpoint_impl.cc index 6d025662e0..35344a988a 100644 --- a/utilities/checkpoint/checkpoint_impl.cc +++ b/utilities/checkpoint/checkpoint_impl.cc @@ -420,11 +420,14 @@ Status CheckpointImpl::ExportColumnFamily( LiveFileMetaData live_file_metadata; live_file_metadata.size = file_metadata.size; live_file_metadata.name = std::move(file_metadata.name); + live_file_metadata.file_number = file_metadata.file_number; live_file_metadata.db_path = export_dir; live_file_metadata.smallest_seqno = file_metadata.smallest_seqno; live_file_metadata.largest_seqno = file_metadata.largest_seqno; live_file_metadata.smallestkey = std::move(file_metadata.smallestkey); live_file_metadata.largestkey = std::move(file_metadata.largestkey); + live_file_metadata.oldest_blob_file_number = + file_metadata.oldest_blob_file_number; live_file_metadata.level = level_metadata.level; result_metadata->files.push_back(live_file_metadata); }