mirror of https://github.com/facebook/rocksdb.git
Pass the size of blob files to SstFileManager during DB open (#10062)
Summary: RocksDB uses the (no longer aptly named) SST file manager (see https://github.com/facebook/rocksdb/wiki/Managing-Disk-Space-Utilization) to track and potentially limit the space used by SST and blob files (as well as to rate-limit the deletion of these data files). The SST file manager tracks the SST and blob file sizes in an in-memory hash map, which has to be rebuilt during DB open. File sizes can be generally obtained by querying the file system; however, there is a performance optimization possibility here since the sizes of SST and blob files are also tracked in the RocksDB MANIFEST, so we can simply pass the file sizes stored there instead of consulting the file system for each file. Currently, this optimization is only implemented for SST files; we would like to extend it to blob files as well. Pull Request resolved: https://github.com/facebook/rocksdb/pull/10062 Test Plan: Add unit tests for the change to the test suite ltamasi riversand963 akankshamahajan15 Reviewed By: ltamasi Differential Revision: D36726621 Pulled By: gangliao fbshipit-source-id: 4010dc46ef7306142f1c2e0d1c3bf75b196ef82a
This commit is contained in:
parent
8c4ea7b851
commit
e228515740
|
@ -1930,21 +1930,24 @@ Status DBImpl::Open(const DBOptions& db_options, const std::string& dbname,
|
|||
// vast majority of all files), we'll pass the size to SstFileManager.
|
||||
// For all other files SstFileManager will query the size from filesystem.
|
||||
|
||||
std::vector<LiveFileMetaData> metadata;
|
||||
|
||||
// TODO: Once GetLiveFilesMetaData supports blob files, update the logic
|
||||
// below to get known_file_sizes for blob files.
|
||||
impl->mutex_.Lock();
|
||||
impl->versions_->GetLiveFilesMetaData(&metadata);
|
||||
impl->mutex_.Unlock();
|
||||
std::vector<ColumnFamilyMetaData> metadata;
|
||||
impl->GetAllColumnFamilyMetaData(&metadata);
|
||||
|
||||
std::unordered_map<std::string, uint64_t> known_file_sizes;
|
||||
for (const auto& md : metadata) {
|
||||
std::string name = md.name;
|
||||
if (!name.empty() && name[0] == '/') {
|
||||
name = name.substr(1);
|
||||
for (const auto& lmd : md.levels) {
|
||||
for (const auto& fmd : lmd.files) {
|
||||
known_file_sizes[fmd.relative_filename] = fmd.size;
|
||||
}
|
||||
}
|
||||
for (const auto& bmd : md.blob_files) {
|
||||
std::string name = bmd.blob_file_name;
|
||||
// The BlobMetaData.blob_file_name may start with "/".
|
||||
if (!name.empty() && name[0] == '/') {
|
||||
name = name.substr(1);
|
||||
}
|
||||
known_file_sizes[name] = bmd.blob_file_size;
|
||||
}
|
||||
known_file_sizes[name] = md.size;
|
||||
}
|
||||
|
||||
std::vector<std::string> paths;
|
||||
|
|
|
@ -8,6 +8,7 @@
|
|||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||
|
||||
#include "db/db_test_util.h"
|
||||
#include "env/mock_env.h"
|
||||
#include "file/sst_file_manager_impl.h"
|
||||
#include "port/port.h"
|
||||
#include "port/stack_trace.h"
|
||||
|
@ -961,7 +962,6 @@ TEST_F(DBSSTTest, OpenDBWithExistingTrash) {
|
|||
ASSERT_NOK(env_->FileExists(dbname_ + "/" + "003.sst.trash"));
|
||||
}
|
||||
|
||||
|
||||
// Create a DB with 2 db_paths, and generate multiple files in the 2
|
||||
// db_paths using CompactRangeOptions, make sure that files that were
|
||||
// deleted from first db_path were deleted using DeleteScheduler and
|
||||
|
@ -1233,7 +1233,9 @@ TEST_F(DBSSTTest, CancellingCompactionsWorks) {
|
|||
ASSERT_GT(completed_compactions, 0);
|
||||
ASSERT_EQ(sfm->GetCompactionsReservedSize(), 0);
|
||||
// Make sure the stat is bumped
|
||||
ASSERT_GT(dbfull()->immutable_db_options().statistics.get()->getTickerCount(COMPACTION_CANCELLED), 0);
|
||||
ASSERT_GT(dbfull()->immutable_db_options().statistics.get()->getTickerCount(
|
||||
COMPACTION_CANCELLED),
|
||||
0);
|
||||
ASSERT_EQ(0,
|
||||
dbfull()->immutable_db_options().statistics.get()->getTickerCount(
|
||||
FILES_MARKED_TRASH));
|
||||
|
@ -1629,6 +1631,45 @@ TEST_F(DBSSTTest, GetTotalSstFilesSize) {
|
|||
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
|
||||
}
|
||||
|
||||
TEST_F(DBSSTTest, OpenDBWithoutGetFileSizeInvocations) {
|
||||
Options options = CurrentOptions();
|
||||
std::unique_ptr<MockEnv> env{MockEnv::Create(Env::Default())};
|
||||
options.env = env.get();
|
||||
options.disable_auto_compactions = true;
|
||||
options.compression = kNoCompression;
|
||||
options.enable_blob_files = true;
|
||||
options.blob_file_size = 32; // create one blob per file
|
||||
options.skip_checking_sst_file_sizes_on_db_open = true;
|
||||
|
||||
DestroyAndReopen(options);
|
||||
// Generate 5 files in L0
|
||||
for (int i = 0; i < 5; i++) {
|
||||
for (int j = 0; j < 10; j++) {
|
||||
std::string val = "val_file_" + std::to_string(i);
|
||||
ASSERT_OK(Put(Key(j), val));
|
||||
}
|
||||
ASSERT_OK(Flush());
|
||||
}
|
||||
Close();
|
||||
|
||||
bool is_get_file_size_called = false;
|
||||
SyncPoint::GetInstance()->SetCallBack(
|
||||
"MockFileSystem::GetFileSize:CheckFileType", [&](void* arg) {
|
||||
std::string* filename = reinterpret_cast<std::string*>(arg);
|
||||
if (filename->find(".blob") != std::string::npos) {
|
||||
is_get_file_size_called = true;
|
||||
}
|
||||
});
|
||||
|
||||
SyncPoint::GetInstance()->EnableProcessing();
|
||||
Reopen(options);
|
||||
ASSERT_FALSE(is_get_file_size_called);
|
||||
SyncPoint::GetInstance()->DisableProcessing();
|
||||
SyncPoint::GetInstance()->ClearAllCallBacks();
|
||||
|
||||
Destroy(options);
|
||||
}
|
||||
|
||||
TEST_F(DBSSTTest, GetTotalSstFilesSizeVersionsFilesShared) {
|
||||
Options options = CurrentOptions();
|
||||
options.disable_auto_compactions = true;
|
||||
|
|
|
@ -878,6 +878,7 @@ IOStatus MockFileSystem::GetFileSize(const std::string& fname,
|
|||
uint64_t* file_size,
|
||||
IODebugContext* /*dbg*/) {
|
||||
auto fn = NormalizeMockPath(fname);
|
||||
TEST_SYNC_POINT_CALLBACK("MockFileSystem::GetFileSize:CheckFileType", &fn);
|
||||
MutexLock lock(&mutex_);
|
||||
auto iter = file_map_.find(fn);
|
||||
if (iter == file_map_.end()) {
|
||||
|
|
|
@ -1516,8 +1516,8 @@ class DB {
|
|||
GetColumnFamilyMetaData(DefaultColumnFamily(), metadata);
|
||||
}
|
||||
|
||||
// Obtains the LSM-tree meta data of all column families of the DB,
|
||||
// including metadata for each live table (SST) file in the DB.
|
||||
// Obtains the LSM-tree meta data of all column families of the DB, including
|
||||
// metadata for each live table (SST) file and each blob file in the DB.
|
||||
virtual void GetAllColumnFamilyMetaData(
|
||||
std::vector<ColumnFamilyMetaData>* /*metadata*/) {}
|
||||
|
||||
|
|
|
@ -232,7 +232,7 @@ struct ColumnFamilyMetaData {
|
|||
uint64_t blob_file_size = 0;
|
||||
// The number of blob files in this column family.
|
||||
size_t blob_file_count = 0;
|
||||
// The metadata of the blobs in this column family
|
||||
// The metadata of the blobs in this column family.
|
||||
std::vector<BlobMetaData> blob_files;
|
||||
};
|
||||
|
||||
|
|
Loading…
Reference in New Issue