[CF] Adaptation of GetLiveFiles for CF

Summary: Even if user flushes the memtables before getting live files, we still can't guarantee that new data didn't come in (to already-flushed memtables). If we want backups to provide consistent view of the database, we still need to get WAL files.

Test Plan: backupable_db_test

Reviewers: dhruba

CC: leveldb

Differential Revision: https://reviews.facebook.net/D16299
This commit is contained in:
Igor Canadi 2014-02-25 13:16:59 -08:00
parent 5a91746277
commit dc277f0ab7
3 changed files with 28 additions and 8 deletions

View file

@ -60,18 +60,35 @@ Status DBImpl::GetLiveFiles(std::vector<std::string>& ret,
*manifest_file_size = 0;
mutex_.Lock();
if (flush_memtable) {
// flush all dirty data to disk.
Status status = Flush(FlushOptions());
autovector<ColumnFamilyData*> to_delete;
Status status;
for (auto cfd : *versions_->GetColumnFamilySet()) {
cfd->Ref();
mutex_.Unlock();
status = FlushMemTable(cfd, FlushOptions());
mutex_.Lock();
if (cfd->Unref()) {
to_delete.push_back(cfd);
}
if (!status.ok()) {
break;
}
}
for (auto cfd : to_delete) {
delete cfd;
}
if (!status.ok()) {
mutex_.Unlock();
Log(options_.info_log, "Cannot Flush data %s\n",
status.ToString().c_str());
return status;
}
}
MutexLock l(&mutex_);
// Make a set of all of the live *.sst files
std::set<uint64_t> live;
for (auto cfd : *versions_->GetColumnFamilySet()) {
@ -93,6 +110,7 @@ Status DBImpl::GetLiveFiles(std::vector<std::string>& ret,
// find length of manifest file while holding the mutex lock
*manifest_file_size = versions_->ManifestFileSize();
mutex_.Unlock();
return Status::OK();
}

View file

@ -394,9 +394,12 @@ class DB {
// Setting flush_memtable to true does Flush before recording the live files.
// Setting flush_memtable to false is useful when we don't want to wait for
// flush which may have to wait for compaction to complete taking an
// indeterminate time. But this will have to use GetSortedWalFiles after
// GetLiveFiles to compensate for memtables missed in this snapshot due to the
// absence of Flush, by WAL files to recover the database consistently later
// indeterminate time.
//
// In case you have multiple column families, even if flush_memtable is true,
// you still need to call GetSortedWalFiles after GetLiveFiles to compensate
// for new data that arrived to already-flushed column families while other
// column families were flushing
virtual Status GetLiveFiles(std::vector<std::string>&,
uint64_t* manifest_file_size,
bool flush_memtable = true) = 0;

View file

@ -311,8 +311,7 @@ Status BackupEngineImpl::CreateNewBackup(DB* db, bool flush_before_backup) {
// this will return live_files prefixed with "/"
s = db->GetLiveFiles(live_files, &manifest_file_size, flush_before_backup);
}
// if we didn't flush before backup, we need to also get WAL files
if (s.ok() && !flush_before_backup) {
if (s.ok()) {
// returns file names prefixed with "/"
s = db->GetSortedWalFiles(live_wal_files);
}