mirror of https://github.com/facebook/rocksdb.git
Don't wait for indirect flush in read-only DB (#10569)
Summary: Some APIs for getting live files, which are used by Checkpoint and BackupEngine, can optionally trigger and wait for a flush. These would deadlock when used on a read-only DB. Here we fix that by assuming the user wants the overall operation to succeed and is OK without flushing (because the DB is read-only). Follow-up work: the same or other issues can be hit by directly invoking some DB functions that are clearly not appropriate for read-only instance, but are not covered by overrides in DBImplReadOnly and CompactedDBImpl. These should be fixed to avoid similar problems on accidental misuse. (Long term, it would be nice to have a DBReadOnly class without those members, like BackupEngineReadOnly.) Pull Request resolved: https://github.com/facebook/rocksdb/pull/10569 Test Plan: tests updated to catch regression (hang before the fix) Reviewed By: riversand963 Differential Revision: D38995759 Pulled By: pdillinger fbshipit-source-id: f5f8bc7123e13cb45bd393dd974d7d6eda20bc68
This commit is contained in:
parent
5532b462c4
commit
c5afbbfe4b
|
@ -1,6 +1,7 @@
|
|||
# Rocksdb Change Log
|
||||
## Unreleased
|
||||
### Bug Fixes
|
||||
* Fixed a hang when an operation such as `GetLiveFiles` or `CreateNewBackup` is asked to trigger and wait for memtable flush on a read-only DB. Such indirect requests for memtable flush are now ignored on a read-only DB.
|
||||
* Fixed bug where `FlushWAL(true /* sync */)` (used by `GetLiveFilesStorageInfo()`, which is used by checkpoint and backup) could cause parallel writes at the tail of a WAL file to never be synced.
|
||||
* Fix periodic_task unable to re-register the same task type, which may cause `SetOptions()` fail to update periodical_task time like: `stats_dump_period_sec`, `stats_persist_period_sec`.
|
||||
|
||||
|
|
|
@ -253,6 +253,7 @@ TEST_F(DBBasicTest, CompactedDB) {
|
|||
ASSERT_OK(Put("bbb", DummyString(kFileSize / 2, 'b')));
|
||||
ASSERT_OK(Put("eee", DummyString(kFileSize / 2, 'e')));
|
||||
ASSERT_OK(Flush());
|
||||
ASSERT_OK(Put("something_not_flushed", "x"));
|
||||
Close();
|
||||
|
||||
ASSERT_OK(ReadOnlyReopen(options));
|
||||
|
@ -260,6 +261,20 @@ TEST_F(DBBasicTest, CompactedDB) {
|
|||
s = Put("new", "value");
|
||||
ASSERT_EQ(s.ToString(),
|
||||
"Not implemented: Not supported operation in read only mode.");
|
||||
|
||||
// TODO: validate that other write ops return NotImplemented
|
||||
// (DBImplReadOnly is missing some overrides)
|
||||
|
||||
// Ensure no deadlock on flush triggered by another API function
|
||||
// (Old deadlock bug depends on something_not_flushed above.)
|
||||
std::vector<std::string> files;
|
||||
uint64_t manifest_file_size;
|
||||
ASSERT_OK(db_->GetLiveFiles(files, &manifest_file_size, /*flush*/ true));
|
||||
LiveFilesStorageInfoOptions lfsi_opts;
|
||||
lfsi_opts.wal_size_for_flush = 0; // always
|
||||
std::vector<LiveFileStorageInfo> files2;
|
||||
ASSERT_OK(db_->GetLiveFilesStorageInfo(lfsi_opts, &files2));
|
||||
|
||||
Close();
|
||||
|
||||
// Full compaction
|
||||
|
@ -290,6 +305,13 @@ TEST_F(DBBasicTest, CompactedDB) {
|
|||
ASSERT_EQ(DummyString(kFileSize / 2, 'j'), Get("jjj"));
|
||||
ASSERT_EQ("NOT_FOUND", Get("kkk"));
|
||||
|
||||
// TODO: validate that other write ops return NotImplemented
|
||||
// (CompactedDB is missing some overrides)
|
||||
|
||||
// Ensure no deadlock on flush triggered by another API function
|
||||
ASSERT_OK(db_->GetLiveFiles(files, &manifest_file_size, /*flush*/ true));
|
||||
ASSERT_OK(db_->GetLiveFilesStorageInfo(lfsi_opts, &files2));
|
||||
|
||||
// MultiGet
|
||||
std::vector<std::string> values;
|
||||
std::vector<Status> status_list = dbfull()->MultiGet(
|
||||
|
|
|
@ -11,6 +11,7 @@
|
|||
|
||||
namespace ROCKSDB_NAMESPACE {
|
||||
|
||||
// TODO: Share common structure with DBImplSecondary and DBImplReadOnly
|
||||
class CompactedDBImpl : public DBImpl {
|
||||
public:
|
||||
CompactedDBImpl(const DBOptions& options, const std::string& dbname);
|
||||
|
@ -127,6 +128,17 @@ class CompactedDBImpl : public DBImpl {
|
|||
return Status::NotSupported("Not supported in compacted db mode.");
|
||||
}
|
||||
|
||||
// FIXME: some missing overrides for more "write" functions
|
||||
// Share with DBImplReadOnly?
|
||||
|
||||
protected:
|
||||
#ifndef ROCKSDB_LITE
|
||||
Status FlushForGetLiveFiles() override {
|
||||
// No-op for read-only DB
|
||||
return Status::OK();
|
||||
}
|
||||
#endif // !ROCKSDB_LITE
|
||||
|
||||
private:
|
||||
friend class DB;
|
||||
inline size_t FindFile(const Slice& key);
|
||||
|
|
|
@ -1387,7 +1387,7 @@ class DBImpl : public DB {
|
|||
void NotifyOnExternalFileIngested(
|
||||
ColumnFamilyData* cfd, const ExternalSstFileIngestionJob& ingestion_job);
|
||||
|
||||
Status FlushForGetLiveFiles();
|
||||
virtual Status FlushForGetLiveFiles();
|
||||
#endif // !ROCKSDB_LITE
|
||||
|
||||
void NewThreadStatusCfInfo(ColumnFamilyData* cfd) const;
|
||||
|
|
|
@ -13,6 +13,7 @@
|
|||
|
||||
namespace ROCKSDB_NAMESPACE {
|
||||
|
||||
// TODO: Share common structure with CompactedDBImpl and DBImplSecondary
|
||||
class DBImplReadOnly : public DBImpl {
|
||||
public:
|
||||
DBImplReadOnly(const DBOptions& options, const std::string& dbname);
|
||||
|
@ -141,6 +142,16 @@ class DBImplReadOnly : public DBImpl {
|
|||
return Status::NotSupported("Not supported operation in read only mode.");
|
||||
}
|
||||
|
||||
// FIXME: some missing overrides for more "write" functions
|
||||
|
||||
protected:
|
||||
#ifndef ROCKSDB_LITE
|
||||
Status FlushForGetLiveFiles() override {
|
||||
// No-op for read-only DB
|
||||
return Status::OK();
|
||||
}
|
||||
#endif // !ROCKSDB_LITE
|
||||
|
||||
private:
|
||||
// A "helper" function for DB::OpenForReadOnly without column families
|
||||
// to reduce unnecessary I/O
|
||||
|
|
|
@ -71,6 +71,7 @@ class LogReaderContainer {
|
|||
// The secondary instance can be opened using `DB::OpenAsSecondary`. After
|
||||
// that, it can call `DBImplSecondary::TryCatchUpWithPrimary` to make best
|
||||
// effort attempts to catch up with the primary.
|
||||
// TODO: Share common structure with CompactedDBImpl and DBImplReadOnly
|
||||
class DBImplSecondary : public DBImpl {
|
||||
public:
|
||||
DBImplSecondary(const DBOptions& options, const std::string& dbname,
|
||||
|
@ -268,6 +269,13 @@ class DBImplSecondary : public DBImpl {
|
|||
#endif // NDEBUG
|
||||
|
||||
protected:
|
||||
#ifndef ROCKSDB_LITE
|
||||
Status FlushForGetLiveFiles() override {
|
||||
// No-op for read-only DB
|
||||
return Status::OK();
|
||||
}
|
||||
#endif // !ROCKSDB_LITE
|
||||
|
||||
// ColumnFamilyCollector is a write batch handler which does nothing
|
||||
// except recording unique column family IDs
|
||||
class ColumnFamilyCollector : public WriteBatch::Handler {
|
||||
|
|
|
@ -1562,9 +1562,10 @@ class DB {
|
|||
// The valid size of the manifest file is returned in manifest_file_size.
|
||||
// The manifest file is an ever growing file, but only the portion specified
|
||||
// by manifest_file_size is valid for this snapshot. Setting flush_memtable
|
||||
// to true does Flush before recording the live files. Setting flush_memtable
|
||||
// to false is useful when we don't want to wait for flush which may have to
|
||||
// wait for compaction to complete taking an indeterminate time.
|
||||
// to true does Flush before recording the live files (unless DB is
|
||||
// read-only). Setting flush_memtable to false is useful when we don't want
|
||||
// to wait for flush which may have to wait for compaction to complete
|
||||
// taking an indeterminate time.
|
||||
//
|
||||
// NOTE: Although GetLiveFiles() followed by GetSortedWalFiles() can generate
|
||||
// a lossless backup, GetLiveFilesStorageInfo() is strongly recommended
|
||||
|
|
|
@ -2094,7 +2094,8 @@ struct LiveFilesStorageInfoOptions {
|
|||
// Whether to populate FileStorageInfo::file_checksum* or leave blank
|
||||
bool include_checksum_info = false;
|
||||
// Flushes memtables if total size in bytes of live WAL files is >= this
|
||||
// number. Default: always force a flush without checking sizes.
|
||||
// number (and DB is not read-only).
|
||||
// Default: always force a flush without checking sizes.
|
||||
uint64_t wal_size_for_flush = 0;
|
||||
};
|
||||
#endif // !ROCKSDB_LITE
|
||||
|
|
|
@ -2984,9 +2984,12 @@ TEST_F(BackupEngineTest, ReadOnlyBackupEngine) {
|
|||
DestroyDBWithoutCheck(dbname_, options_);
|
||||
OpenDBAndBackupEngine(true);
|
||||
FillDB(db_.get(), 0, 100);
|
||||
ASSERT_OK(backup_engine_->CreateNewBackup(db_.get(), true));
|
||||
// Also test read-only DB with CreateNewBackup and flush=true (no flush)
|
||||
CloseAndReopenDB(/*read_only*/ true);
|
||||
ASSERT_OK(backup_engine_->CreateNewBackup(db_.get(), /*flush*/ true));
|
||||
CloseAndReopenDB(/*read_only*/ false);
|
||||
FillDB(db_.get(), 100, 200);
|
||||
ASSERT_OK(backup_engine_->CreateNewBackup(db_.get(), true));
|
||||
ASSERT_OK(backup_engine_->CreateNewBackup(db_.get(), /*flush*/ true));
|
||||
CloseDBAndBackupEngine();
|
||||
DestroyDBWithoutCheck(dbname_, options_);
|
||||
|
||||
|
|
Loading…
Reference in New Issue