mirror of
https://github.com/facebook/rocksdb.git
synced 2024-11-29 09:36:17 +00:00
Add file number/oldest referenced blob file number to {Sst,Live}FileMetaData (#6011)
Summary: The patch exposes the file numbers of the SSTs as well as the oldest blob files they contain a reference to through the GetColumnFamilyMetaData/ GetLiveFilesMetaData interface. Pull Request resolved: https://github.com/facebook/rocksdb/pull/6011 Test Plan: Fixed and extended the existing unit tests. (The earlier ColumnFamilyMetaDataTest wasn't really testing anything because the generated memtables were never flushed, so the metadata structure was essentially empty.) Differential Revision: D18361697 Pulled By: ltamasi fbshipit-source-id: d5ed1d94ac70858b84393c48711441ddfe1251e9
This commit is contained in:
parent
07a0ad3c29
commit
f80050fa8f
|
@ -8,6 +8,7 @@ file_creation_time of the oldest SST file in the DB.
|
|||
|
||||
### New Features
|
||||
* Universal compaction to support options.periodic_compaction_seconds. A full compaction will be triggered if any file is over the threshold.
|
||||
* `GetLiveFilesMetaData` and `GetColumnFamilyMetaData` now expose the file number of SST files as well as the oldest blob file referenced by each SST.
|
||||
|
||||
### Performance Improvements
|
||||
* For 64-bit hashing, RocksDB is standardizing on a slightly modified preview version of XXH3. This function is now used for many non-persisted hashes, along with fastrange64() in place of the modulus operator, and some benchmarks show a slight improvement.
|
||||
|
|
122
db/db_test.cc
122
db/db_test.cc
|
@ -24,6 +24,7 @@
|
|||
#endif
|
||||
|
||||
#include "cache/lru_cache.h"
|
||||
#include "db/blob_index.h"
|
||||
#include "db/db_impl/db_impl.h"
|
||||
#include "db/db_test_util.h"
|
||||
#include "db/dbformat.h"
|
||||
|
@ -1019,39 +1020,132 @@ TEST_F(DBTest, FailMoreDbPaths) {
|
|||
ASSERT_TRUE(TryReopen(options).IsNotSupported());
|
||||
}
|
||||
|
||||
void CheckColumnFamilyMeta(const ColumnFamilyMetaData& cf_meta) {
|
||||
void CheckColumnFamilyMeta(
|
||||
const ColumnFamilyMetaData& cf_meta,
|
||||
const std::vector<std::vector<FileMetaData>>& files_by_level) {
|
||||
ASSERT_EQ(cf_meta.name, kDefaultColumnFamilyName);
|
||||
ASSERT_EQ(cf_meta.levels.size(), files_by_level.size());
|
||||
|
||||
uint64_t cf_size = 0;
|
||||
uint64_t cf_csize = 0;
|
||||
size_t file_count = 0;
|
||||
for (auto level_meta : cf_meta.levels) {
|
||||
|
||||
for (size_t i = 0; i < cf_meta.levels.size(); ++i) {
|
||||
const auto& level_meta_from_cf = cf_meta.levels[i];
|
||||
const auto& level_meta_from_files = files_by_level[i];
|
||||
|
||||
ASSERT_EQ(level_meta_from_cf.level, i);
|
||||
ASSERT_EQ(level_meta_from_cf.files.size(), level_meta_from_files.size());
|
||||
|
||||
file_count += level_meta_from_cf.files.size();
|
||||
|
||||
uint64_t level_size = 0;
|
||||
uint64_t level_csize = 0;
|
||||
file_count += level_meta.files.size();
|
||||
for (auto file_meta : level_meta.files) {
|
||||
level_size += file_meta.size;
|
||||
for (size_t j = 0; j < level_meta_from_cf.files.size(); ++j) {
|
||||
const auto& file_meta_from_cf = level_meta_from_cf.files[j];
|
||||
const auto& file_meta_from_files = level_meta_from_files[j];
|
||||
|
||||
level_size += file_meta_from_cf.size;
|
||||
|
||||
ASSERT_EQ(file_meta_from_cf.file_number,
|
||||
file_meta_from_files.fd.GetNumber());
|
||||
ASSERT_EQ(file_meta_from_cf.file_number,
|
||||
TableFileNameToNumber(file_meta_from_cf.name));
|
||||
ASSERT_EQ(file_meta_from_cf.size, file_meta_from_files.fd.file_size);
|
||||
ASSERT_EQ(file_meta_from_cf.smallest_seqno,
|
||||
file_meta_from_files.fd.smallest_seqno);
|
||||
ASSERT_EQ(file_meta_from_cf.largest_seqno,
|
||||
file_meta_from_files.fd.largest_seqno);
|
||||
ASSERT_EQ(file_meta_from_cf.smallestkey,
|
||||
file_meta_from_files.smallest.user_key().ToString());
|
||||
ASSERT_EQ(file_meta_from_cf.largestkey,
|
||||
file_meta_from_files.largest.user_key().ToString());
|
||||
ASSERT_EQ(file_meta_from_cf.oldest_blob_file_number,
|
||||
file_meta_from_files.oldest_blob_file_number);
|
||||
}
|
||||
ASSERT_EQ(level_meta.size, level_size);
|
||||
|
||||
ASSERT_EQ(level_meta_from_cf.size, level_size);
|
||||
cf_size += level_size;
|
||||
cf_csize += level_csize;
|
||||
}
|
||||
|
||||
ASSERT_EQ(cf_meta.file_count, file_count);
|
||||
ASSERT_EQ(cf_meta.size, cf_size);
|
||||
}
|
||||
|
||||
void CheckLiveFilesMeta(
|
||||
const std::vector<LiveFileMetaData>& live_file_meta,
|
||||
const std::vector<std::vector<FileMetaData>>& files_by_level) {
|
||||
size_t total_file_count = 0;
|
||||
for (const auto& f : files_by_level) {
|
||||
total_file_count += f.size();
|
||||
}
|
||||
|
||||
ASSERT_EQ(live_file_meta.size(), total_file_count);
|
||||
|
||||
int level = 0;
|
||||
int i = 0;
|
||||
|
||||
for (const auto& meta : live_file_meta) {
|
||||
if (level != meta.level) {
|
||||
level = meta.level;
|
||||
i = 0;
|
||||
}
|
||||
|
||||
ASSERT_LT(i, files_by_level[level].size());
|
||||
|
||||
const auto& expected_meta = files_by_level[level][i];
|
||||
|
||||
ASSERT_EQ(meta.column_family_name, kDefaultColumnFamilyName);
|
||||
ASSERT_EQ(meta.file_number, expected_meta.fd.GetNumber());
|
||||
ASSERT_EQ(meta.file_number, TableFileNameToNumber(meta.name));
|
||||
ASSERT_EQ(meta.size, expected_meta.fd.file_size);
|
||||
ASSERT_EQ(meta.smallest_seqno, expected_meta.fd.smallest_seqno);
|
||||
ASSERT_EQ(meta.largest_seqno, expected_meta.fd.largest_seqno);
|
||||
ASSERT_EQ(meta.smallestkey, expected_meta.smallest.user_key().ToString());
|
||||
ASSERT_EQ(meta.largestkey, expected_meta.largest.user_key().ToString());
|
||||
ASSERT_EQ(meta.oldest_blob_file_number,
|
||||
expected_meta.oldest_blob_file_number);
|
||||
|
||||
++i;
|
||||
}
|
||||
}
|
||||
|
||||
#ifndef ROCKSDB_LITE
|
||||
TEST_F(DBTest, ColumnFamilyMetaDataTest) {
|
||||
TEST_F(DBTest, MetaDataTest) {
|
||||
Options options = CurrentOptions();
|
||||
options.create_if_missing = true;
|
||||
options.disable_auto_compactions = true;
|
||||
DestroyAndReopen(options);
|
||||
|
||||
Random rnd(301);
|
||||
int key_index = 0;
|
||||
ColumnFamilyMetaData cf_meta;
|
||||
for (int i = 0; i < 100; ++i) {
|
||||
GenerateNewFile(&rnd, &key_index);
|
||||
db_->GetColumnFamilyMetaData(&cf_meta);
|
||||
CheckColumnFamilyMeta(cf_meta);
|
||||
// Add a single blob reference to each file
|
||||
std::string blob_index;
|
||||
BlobIndex::EncodeBlob(&blob_index, /* blob_file_number */ i + 1000,
|
||||
/* offset */ 1234, /* size */ 5678, kNoCompression);
|
||||
|
||||
WriteBatch batch;
|
||||
ASSERT_OK(WriteBatchInternal::PutBlobIndex(&batch, 0, Key(key_index),
|
||||
blob_index));
|
||||
ASSERT_OK(dbfull()->Write(WriteOptions(), &batch));
|
||||
|
||||
++key_index;
|
||||
|
||||
// Fill up the rest of the file with random values.
|
||||
GenerateNewFile(&rnd, &key_index, /* nowait */ true);
|
||||
|
||||
Flush();
|
||||
}
|
||||
|
||||
std::vector<std::vector<FileMetaData>> files_by_level;
|
||||
dbfull()->TEST_GetFilesMetaData(db_->DefaultColumnFamily(), &files_by_level);
|
||||
|
||||
ColumnFamilyMetaData cf_meta;
|
||||
db_->GetColumnFamilyMetaData(&cf_meta);
|
||||
CheckColumnFamilyMeta(cf_meta, files_by_level);
|
||||
|
||||
std::vector<LiveFileMetaData> live_file_meta;
|
||||
db_->GetLiveFilesMetaData(&live_file_meta);
|
||||
CheckLiveFilesMeta(live_file_meta, files_by_level);
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
|
|
@ -1451,16 +1451,14 @@ void Version::GetColumnFamilyMetaData(ColumnFamilyMetaData* cf_meta) {
|
|||
assert(!ioptions->cf_paths.empty());
|
||||
file_path = ioptions->cf_paths.back().path;
|
||||
}
|
||||
const uint64_t file_number = file->fd.GetNumber();
|
||||
files.emplace_back(SstFileMetaData{
|
||||
MakeTableFileName("", file->fd.GetNumber()),
|
||||
file_path,
|
||||
static_cast<size_t>(file->fd.GetFileSize()),
|
||||
file->fd.smallest_seqno,
|
||||
file->fd.largest_seqno,
|
||||
file->smallest.user_key().ToString(),
|
||||
MakeTableFileName("", file_number), file_number, file_path,
|
||||
static_cast<size_t>(file->fd.GetFileSize()), file->fd.smallest_seqno,
|
||||
file->fd.largest_seqno, file->smallest.user_key().ToString(),
|
||||
file->largest.user_key().ToString(),
|
||||
file->stats.num_reads_sampled.load(std::memory_order_relaxed),
|
||||
file->being_compacted});
|
||||
file->being_compacted, file->oldest_blob_file_number});
|
||||
files.back().num_entries = file->num_entries;
|
||||
files.back().num_deletions = file->num_deletions;
|
||||
level_size += file->fd.GetFileSize();
|
||||
|
@ -5393,7 +5391,9 @@ void VersionSet::GetLiveFilesMetaData(std::vector<LiveFileMetaData>* metadata) {
|
|||
assert(!cfd->ioptions()->cf_paths.empty());
|
||||
filemetadata.db_path = cfd->ioptions()->cf_paths.back().path;
|
||||
}
|
||||
filemetadata.name = MakeTableFileName("", file->fd.GetNumber());
|
||||
const uint64_t file_number = file->fd.GetNumber();
|
||||
filemetadata.name = MakeTableFileName("", file_number);
|
||||
filemetadata.file_number = file_number;
|
||||
filemetadata.level = level;
|
||||
filemetadata.size = static_cast<size_t>(file->fd.GetFileSize());
|
||||
filemetadata.smallestkey = file->smallest.user_key().ToString();
|
||||
|
@ -5405,6 +5405,7 @@ void VersionSet::GetLiveFilesMetaData(std::vector<LiveFileMetaData>* metadata) {
|
|||
filemetadata.being_compacted = file->being_compacted;
|
||||
filemetadata.num_entries = file->num_entries;
|
||||
filemetadata.num_deletions = file->num_deletions;
|
||||
filemetadata.oldest_blob_file_number = file->oldest_blob_file_number;
|
||||
metadata->push_back(filemetadata);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -55,25 +55,24 @@ struct LevelMetaData {
|
|||
struct SstFileMetaData {
|
||||
SstFileMetaData()
|
||||
: size(0),
|
||||
name(""),
|
||||
db_path(""),
|
||||
file_number(0),
|
||||
smallest_seqno(0),
|
||||
largest_seqno(0),
|
||||
smallestkey(""),
|
||||
largestkey(""),
|
||||
num_reads_sampled(0),
|
||||
being_compacted(false),
|
||||
num_entries(0),
|
||||
num_deletions(0) {}
|
||||
num_deletions(0),
|
||||
oldest_blob_file_number(0) {}
|
||||
|
||||
SstFileMetaData(const std::string& _file_name, const std::string& _path,
|
||||
size_t _size, SequenceNumber _smallest_seqno,
|
||||
SequenceNumber _largest_seqno,
|
||||
SstFileMetaData(const std::string& _file_name, uint64_t _file_number,
|
||||
const std::string& _path, size_t _size,
|
||||
SequenceNumber _smallest_seqno, SequenceNumber _largest_seqno,
|
||||
const std::string& _smallestkey,
|
||||
const std::string& _largestkey, uint64_t _num_reads_sampled,
|
||||
bool _being_compacted)
|
||||
bool _being_compacted, uint64_t _oldest_blob_file_number)
|
||||
: size(_size),
|
||||
name(_file_name),
|
||||
file_number(_file_number),
|
||||
db_path(_path),
|
||||
smallest_seqno(_smallest_seqno),
|
||||
largest_seqno(_largest_seqno),
|
||||
|
@ -82,12 +81,15 @@ struct SstFileMetaData {
|
|||
num_reads_sampled(_num_reads_sampled),
|
||||
being_compacted(_being_compacted),
|
||||
num_entries(0),
|
||||
num_deletions(0) {}
|
||||
num_deletions(0),
|
||||
oldest_blob_file_number(_oldest_blob_file_number) {}
|
||||
|
||||
// File size in bytes.
|
||||
size_t size;
|
||||
// The name of the file.
|
||||
std::string name;
|
||||
// The id of the file.
|
||||
uint64_t file_number;
|
||||
// The full path where the file locates.
|
||||
std::string db_path;
|
||||
|
||||
|
@ -100,6 +102,9 @@ struct SstFileMetaData {
|
|||
|
||||
uint64_t num_entries;
|
||||
uint64_t num_deletions;
|
||||
|
||||
uint64_t oldest_blob_file_number; // The id of the oldest blob file
|
||||
// referenced by the file.
|
||||
};
|
||||
|
||||
// The full set of metadata associated with each SST file.
|
||||
|
|
|
@ -94,9 +94,10 @@ void BlobDBImpl::GetLiveFilesMetaData(std::vector<LiveFileMetaData>* metadata) {
|
|||
auto blob_file = bfile_pair.second;
|
||||
LiveFileMetaData filemetadata;
|
||||
filemetadata.size = static_cast<size_t>(blob_file->GetFileSize());
|
||||
const uint64_t file_number = blob_file->BlobFileNumber();
|
||||
// Path should be relative to db_name, but begin with slash.
|
||||
filemetadata.name =
|
||||
BlobFileName("", bdb_options_.blob_dir, blob_file->BlobFileNumber());
|
||||
filemetadata.name = BlobFileName("", bdb_options_.blob_dir, file_number);
|
||||
filemetadata.file_number = file_number;
|
||||
auto cfh = reinterpret_cast<ColumnFamilyHandleImpl*>(DefaultColumnFamily());
|
||||
filemetadata.column_family_name = cfh->GetName();
|
||||
metadata->emplace_back(filemetadata);
|
||||
|
|
|
@ -999,6 +999,7 @@ TEST_F(BlobDBTest, GetLiveFilesMetaData) {
|
|||
// Path should be relative to db_name, but begin with slash.
|
||||
std::string filename = "/blob_dir/000001.blob";
|
||||
ASSERT_EQ(filename, metadata[0].name);
|
||||
ASSERT_EQ(1, metadata[0].file_number);
|
||||
ASSERT_EQ("default", metadata[0].column_family_name);
|
||||
std::vector<std::string> livefile;
|
||||
uint64_t mfs;
|
||||
|
|
|
@ -420,11 +420,14 @@ Status CheckpointImpl::ExportColumnFamily(
|
|||
LiveFileMetaData live_file_metadata;
|
||||
live_file_metadata.size = file_metadata.size;
|
||||
live_file_metadata.name = std::move(file_metadata.name);
|
||||
live_file_metadata.file_number = file_metadata.file_number;
|
||||
live_file_metadata.db_path = export_dir;
|
||||
live_file_metadata.smallest_seqno = file_metadata.smallest_seqno;
|
||||
live_file_metadata.largest_seqno = file_metadata.largest_seqno;
|
||||
live_file_metadata.smallestkey = std::move(file_metadata.smallestkey);
|
||||
live_file_metadata.largestkey = std::move(file_metadata.largestkey);
|
||||
live_file_metadata.oldest_blob_file_number =
|
||||
file_metadata.oldest_blob_file_number;
|
||||
live_file_metadata.level = level_metadata.level;
|
||||
result_metadata->files.push_back(live_file_metadata);
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue