mirror of https://github.com/facebook/rocksdb.git
Enable flushing memtables from arbitrary column families
Summary: Removed default_cfd_ from all flush code paths. This means we can now flush memtables from arbitrary column families! Test Plan: Added a new unit test Reviewers: dhruba, haobo CC: leveldb Differential Revision: https://reviews.facebook.net/D15789
This commit is contained in:
parent
9ca638a86d
commit
3615f534d1
|
@ -66,6 +66,9 @@ class ColumnFamilyTest {
|
||||||
Status Merge(int cf, const string& key, const string& value) {
|
Status Merge(int cf, const string& key, const string& value) {
|
||||||
return db_->Merge(WriteOptions(), handles_[cf], Slice(key), Slice(value));
|
return db_->Merge(WriteOptions(), handles_[cf], Slice(key), Slice(value));
|
||||||
}
|
}
|
||||||
|
Status Flush(int cf) {
|
||||||
|
return db_->Flush(FlushOptions(), handles_[cf]);
|
||||||
|
}
|
||||||
|
|
||||||
string Get(int cf, const string& key) {
|
string Get(int cf, const string& key) {
|
||||||
ReadOptions options;
|
ReadOptions options;
|
||||||
|
@ -238,6 +241,40 @@ TEST(ColumnFamilyTest, IgnoreRecoveredLog) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST(ColumnFamilyTest, FlushTest) {
|
||||||
|
ASSERT_OK(Open({"default"}));
|
||||||
|
CreateColumnFamilies({"one", "two"});
|
||||||
|
Close();
|
||||||
|
ASSERT_OK(Open({"default", "one", "two"}));
|
||||||
|
ASSERT_OK(Put(0, "foo", "v1"));
|
||||||
|
ASSERT_OK(Put(0, "bar", "v2"));
|
||||||
|
ASSERT_OK(Put(1, "mirko", "v3"));
|
||||||
|
ASSERT_OK(Put(0, "foo", "v2"));
|
||||||
|
ASSERT_OK(Put(2, "fodor", "v5"));
|
||||||
|
for (int i = 0; i < 3; ++i) {
|
||||||
|
Flush(i);
|
||||||
|
}
|
||||||
|
Close();
|
||||||
|
ASSERT_OK(Open({"default", "one", "two"}));
|
||||||
|
|
||||||
|
for (int iter = 0; iter <= 2; ++iter) {
|
||||||
|
ASSERT_EQ("v2", Get(0, "foo"));
|
||||||
|
ASSERT_EQ("v2", Get(0, "bar"));
|
||||||
|
ASSERT_EQ("v3", Get(1, "mirko"));
|
||||||
|
ASSERT_EQ("v5", Get(2, "fodor"));
|
||||||
|
ASSERT_EQ("NOT_FOUND", Get(0, "fodor"));
|
||||||
|
ASSERT_EQ("NOT_FOUND", Get(1, "fodor"));
|
||||||
|
ASSERT_EQ("NOT_FOUND", Get(2, "foo"));
|
||||||
|
if (iter <= 1) {
|
||||||
|
// reopen
|
||||||
|
Close();
|
||||||
|
ASSERT_OK(Open({"default", "one", "two"}));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Close();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
} // namespace rocksdb
|
} // namespace rocksdb
|
||||||
|
|
||||||
int main(int argc, char** argv) {
|
int main(int argc, char** argv) {
|
||||||
|
|
151
db/db_impl.cc
151
db/db_impl.cc
|
@ -317,8 +317,12 @@ DBImpl::DBImpl(const Options& options, const std::string& dbname)
|
||||||
|
|
||||||
DBImpl::~DBImpl() {
|
DBImpl::~DBImpl() {
|
||||||
// Wait for background work to finish
|
// Wait for background work to finish
|
||||||
if (flush_on_destroy_ && default_cfd_->mem()->GetFirstSequenceNumber() != 0) {
|
if (flush_on_destroy_) {
|
||||||
FlushMemTable(FlushOptions());
|
for (auto cfd : *versions_->GetColumnFamilySet()) {
|
||||||
|
if (cfd->mem()->GetFirstSequenceNumber() != 0) {
|
||||||
|
FlushMemTable(cfd, FlushOptions());
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
mutex_.Lock();
|
mutex_.Lock();
|
||||||
shutting_down_.Release_Store(this); // Any non-nullptr value is ok
|
shutting_down_.Release_Store(this); // Any non-nullptr value is ok
|
||||||
|
@ -979,6 +983,9 @@ Status DBImpl::RecoverLogFile(uint64_t log_number, SequenceNumber* max_sequence,
|
||||||
for (auto cfd : *versions_->GetColumnFamilySet()) {
|
for (auto cfd : *versions_->GetColumnFamilySet()) {
|
||||||
if (cfd->mem()->ApproximateMemoryUsage() >
|
if (cfd->mem()->ApproximateMemoryUsage() >
|
||||||
cfd->options()->write_buffer_size) {
|
cfd->options()->write_buffer_size) {
|
||||||
|
// If this asserts, it means that ColumnFamilyMemTablesImpl failed in
|
||||||
|
// filtering updates to already-flushed column families
|
||||||
|
assert(cfd->GetLogNumber() <= log_number);
|
||||||
auto iter = version_edits.find(cfd->GetID());
|
auto iter = version_edits.find(cfd->GetID());
|
||||||
assert(iter != version_edits.end());
|
assert(iter != version_edits.end());
|
||||||
VersionEdit* edit = &iter->second;
|
VersionEdit* edit = &iter->second;
|
||||||
|
@ -1001,8 +1008,20 @@ Status DBImpl::RecoverLogFile(uint64_t log_number, SequenceNumber* max_sequence,
|
||||||
assert(iter != version_edits.end());
|
assert(iter != version_edits.end());
|
||||||
VersionEdit* edit = &iter->second;
|
VersionEdit* edit = &iter->second;
|
||||||
|
|
||||||
// flush the final memtable
|
if (cfd->GetLogNumber() > log_number) {
|
||||||
status = WriteLevel0TableForRecovery(cfd->mem(), edit);
|
// Column family cfd has already flushed the data
|
||||||
|
// from log_number. Memtable has to be empty because
|
||||||
|
// we filter the updates based on log_number
|
||||||
|
// (in ColumnFamilyMemTablesImpl)
|
||||||
|
assert(cfd->mem()->GetFirstSequenceNumber() == 0);
|
||||||
|
assert(edit->NumEntries() == 0);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// flush the final memtable (if non-empty)
|
||||||
|
if (cfd->mem()->GetFirstSequenceNumber() != 0) {
|
||||||
|
status = WriteLevel0TableForRecovery(cfd->mem(), edit);
|
||||||
|
}
|
||||||
// we still want to clear the memtable, even if the recovery failed
|
// we still want to clear the memtable, even if the recovery failed
|
||||||
cfd->CreateNewMemtable();
|
cfd->CreateNewMemtable();
|
||||||
if (!status.ok()) {
|
if (!status.ok()) {
|
||||||
|
@ -1016,6 +1035,12 @@ Status DBImpl::RecoverLogFile(uint64_t log_number, SequenceNumber* max_sequence,
|
||||||
// Since we already recovered log_number, we want all logs
|
// Since we already recovered log_number, we want all logs
|
||||||
// with numbers `<= log_number` (includes this one) to be ignored
|
// with numbers `<= log_number` (includes this one) to be ignored
|
||||||
edit->SetLogNumber(log_number + 1);
|
edit->SetLogNumber(log_number + 1);
|
||||||
|
// we must mark the next log number as used, even though it's
|
||||||
|
// not actually used. that is because VersionSet assumes
|
||||||
|
// VersionSet::next_file_number_ always to be strictly greater than any
|
||||||
|
// log
|
||||||
|
// number
|
||||||
|
versions_->MarkFileNumberUsed(log_number + 1);
|
||||||
status = versions_->LogAndApply(cfd, edit, &mutex_);
|
status = versions_->LogAndApply(cfd, edit, &mutex_);
|
||||||
if (!status.ok()) {
|
if (!status.ok()) {
|
||||||
return status;
|
return status;
|
||||||
|
@ -1077,8 +1102,8 @@ Status DBImpl::WriteLevel0TableForRecovery(MemTable* mem, VersionEdit* edit) {
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Status DBImpl::WriteLevel0Table(ColumnFamilyData* cfd,
|
||||||
Status DBImpl::WriteLevel0Table(std::vector<MemTable*> &mems, VersionEdit* edit,
|
std::vector<MemTable*>& mems, VersionEdit* edit,
|
||||||
uint64_t* filenumber) {
|
uint64_t* filenumber) {
|
||||||
mutex_.AssertHeld();
|
mutex_.AssertHeld();
|
||||||
const uint64_t start_micros = env_->NowMicros();
|
const uint64_t start_micros = env_->NowMicros();
|
||||||
|
@ -1090,7 +1115,7 @@ Status DBImpl::WriteLevel0Table(std::vector<MemTable*> &mems, VersionEdit* edit,
|
||||||
const SequenceNumber newest_snapshot = snapshots_.GetNewest();
|
const SequenceNumber newest_snapshot = snapshots_.GetNewest();
|
||||||
const SequenceNumber earliest_seqno_in_memtable =
|
const SequenceNumber earliest_seqno_in_memtable =
|
||||||
mems[0]->GetFirstSequenceNumber();
|
mems[0]->GetFirstSequenceNumber();
|
||||||
Version* base = default_cfd_->current();
|
Version* base = cfd->current();
|
||||||
base->Ref(); // it is likely that we do not need this reference
|
base->Ref(); // it is likely that we do not need this reference
|
||||||
Status s;
|
Status s;
|
||||||
{
|
{
|
||||||
|
@ -1127,7 +1152,7 @@ Status DBImpl::WriteLevel0Table(std::vector<MemTable*> &mems, VersionEdit* edit,
|
||||||
|
|
||||||
|
|
||||||
// re-acquire the most current version
|
// re-acquire the most current version
|
||||||
base = default_cfd_->current();
|
base = cfd->current();
|
||||||
|
|
||||||
// There could be multiple threads writing to its own level-0 file.
|
// There could be multiple threads writing to its own level-0 file.
|
||||||
// The pending_outputs cannot be cleared here, otherwise this newly
|
// The pending_outputs cannot be cleared here, otherwise this newly
|
||||||
|
@ -1149,7 +1174,7 @@ Status DBImpl::WriteLevel0Table(std::vector<MemTable*> &mems, VersionEdit* edit,
|
||||||
// threads could be concurrently producing compacted files for
|
// threads could be concurrently producing compacted files for
|
||||||
// that key range.
|
// that key range.
|
||||||
if (base != nullptr && options_.max_background_compactions <= 1 &&
|
if (base != nullptr && options_.max_background_compactions <= 1 &&
|
||||||
options_.compaction_style == kCompactionStyleLevel) {
|
cfd->options()->compaction_style == kCompactionStyleLevel) {
|
||||||
level = base->PickLevelForMemTableOutput(min_user_key, max_user_key);
|
level = base->PickLevelForMemTableOutput(min_user_key, max_user_key);
|
||||||
}
|
}
|
||||||
edit->AddFile(level, meta.number, meta.file_size,
|
edit->AddFile(level, meta.number, meta.file_size,
|
||||||
|
@ -1165,12 +1190,13 @@ Status DBImpl::WriteLevel0Table(std::vector<MemTable*> &mems, VersionEdit* edit,
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
Status DBImpl::FlushMemTableToOutputFile(bool* madeProgress,
|
Status DBImpl::FlushMemTableToOutputFile(ColumnFamilyData* cfd,
|
||||||
|
bool* madeProgress,
|
||||||
DeletionState& deletion_state) {
|
DeletionState& deletion_state) {
|
||||||
mutex_.AssertHeld();
|
mutex_.AssertHeld();
|
||||||
assert(default_cfd_->imm()->size() != 0);
|
assert(cfd->imm()->size() != 0);
|
||||||
|
|
||||||
if (!default_cfd_->imm()->IsFlushPending()) {
|
if (!cfd->imm()->IsFlushPending()) {
|
||||||
Log(options_.info_log, "FlushMemTableToOutputFile already in progress");
|
Log(options_.info_log, "FlushMemTableToOutputFile already in progress");
|
||||||
return Status::IOError("FlushMemTableToOutputFile already in progress");
|
return Status::IOError("FlushMemTableToOutputFile already in progress");
|
||||||
}
|
}
|
||||||
|
@ -1178,7 +1204,7 @@ Status DBImpl::FlushMemTableToOutputFile(bool* madeProgress,
|
||||||
// Save the contents of the earliest memtable as a new Table
|
// Save the contents of the earliest memtable as a new Table
|
||||||
uint64_t file_number;
|
uint64_t file_number;
|
||||||
std::vector<MemTable*> mems;
|
std::vector<MemTable*> mems;
|
||||||
default_cfd_->imm()->PickMemtablesToFlush(&mems);
|
cfd->imm()->PickMemtablesToFlush(&mems);
|
||||||
if (mems.empty()) {
|
if (mems.empty()) {
|
||||||
Log(options_.info_log, "Nothing in memstore to flush");
|
Log(options_.info_log, "Nothing in memstore to flush");
|
||||||
return Status::IOError("Nothing in memstore to flush");
|
return Status::IOError("Nothing in memstore to flush");
|
||||||
|
@ -1193,9 +1219,8 @@ Status DBImpl::FlushMemTableToOutputFile(bool* madeProgress,
|
||||||
edit->SetPrevLogNumber(0);
|
edit->SetPrevLogNumber(0);
|
||||||
// SetLogNumber(log_num) indicates logs with number smaller than log_num
|
// SetLogNumber(log_num) indicates logs with number smaller than log_num
|
||||||
// will no longer be picked up for recovery.
|
// will no longer be picked up for recovery.
|
||||||
edit->SetLogNumber(
|
edit->SetLogNumber(mems.back()->GetNextLogNumber());
|
||||||
mems.back()->GetNextLogNumber()
|
edit->SetColumnFamily(cfd->GetID());
|
||||||
);
|
|
||||||
|
|
||||||
std::vector<uint64_t> logs_to_delete;
|
std::vector<uint64_t> logs_to_delete;
|
||||||
for (auto mem : mems) {
|
for (auto mem : mems) {
|
||||||
|
@ -1203,7 +1228,7 @@ Status DBImpl::FlushMemTableToOutputFile(bool* madeProgress,
|
||||||
}
|
}
|
||||||
|
|
||||||
// This will release and re-acquire the mutex.
|
// This will release and re-acquire the mutex.
|
||||||
Status s = WriteLevel0Table(mems, edit, &file_number);
|
Status s = WriteLevel0Table(cfd, mems, edit, &file_number);
|
||||||
|
|
||||||
if (s.ok() && shutting_down_.Acquire_Load()) {
|
if (s.ok() && shutting_down_.Acquire_Load()) {
|
||||||
s = Status::IOError(
|
s = Status::IOError(
|
||||||
|
@ -1212,13 +1237,13 @@ Status DBImpl::FlushMemTableToOutputFile(bool* madeProgress,
|
||||||
}
|
}
|
||||||
|
|
||||||
// Replace immutable memtable with the generated Table
|
// Replace immutable memtable with the generated Table
|
||||||
s = default_cfd_->imm()->InstallMemtableFlushResults(
|
s = cfd->imm()->InstallMemtableFlushResults(
|
||||||
default_cfd_, mems, versions_.get(), s, &mutex_, options_.info_log.get(),
|
cfd, mems, versions_.get(), s, &mutex_, options_.info_log.get(),
|
||||||
file_number, pending_outputs_, &deletion_state.memtables_to_free,
|
file_number, pending_outputs_, &deletion_state.memtables_to_free,
|
||||||
db_directory_.get());
|
db_directory_.get());
|
||||||
|
|
||||||
if (s.ok()) {
|
if (s.ok()) {
|
||||||
InstallSuperVersion(default_cfd_, deletion_state);
|
InstallSuperVersion(cfd, deletion_state);
|
||||||
if (madeProgress) {
|
if (madeProgress) {
|
||||||
*madeProgress = 1;
|
*madeProgress = 1;
|
||||||
}
|
}
|
||||||
|
@ -1239,7 +1264,7 @@ Status DBImpl::FlushMemTableToOutputFile(bool* madeProgress,
|
||||||
Status DBImpl::CompactRange(const ColumnFamilyHandle& column_family,
|
Status DBImpl::CompactRange(const ColumnFamilyHandle& column_family,
|
||||||
const Slice* begin, const Slice* end,
|
const Slice* begin, const Slice* end,
|
||||||
bool reduce_level, int target_level) {
|
bool reduce_level, int target_level) {
|
||||||
Status s = FlushMemTable(FlushOptions());
|
Status s = FlushMemTable(default_cfd_, FlushOptions());
|
||||||
if (!s.ok()) {
|
if (!s.ok()) {
|
||||||
LogFlush(options_.info_log);
|
LogFlush(options_.info_log);
|
||||||
return s;
|
return s;
|
||||||
|
@ -1382,8 +1407,12 @@ uint64_t DBImpl::CurrentVersionNumber() const {
|
||||||
|
|
||||||
Status DBImpl::Flush(const FlushOptions& options,
|
Status DBImpl::Flush(const FlushOptions& options,
|
||||||
const ColumnFamilyHandle& column_family) {
|
const ColumnFamilyHandle& column_family) {
|
||||||
Status status = FlushMemTable(options);
|
mutex_.Lock();
|
||||||
return status;
|
auto cfd = versions_->GetColumnFamilySet()->GetColumnFamily(column_family.id);
|
||||||
|
mutex_.Unlock();
|
||||||
|
assert(cfd != nullptr);
|
||||||
|
|
||||||
|
return FlushMemTable(cfd, options);
|
||||||
}
|
}
|
||||||
|
|
||||||
SequenceNumber DBImpl::GetLatestSequenceNumber() const {
|
SequenceNumber DBImpl::GetLatestSequenceNumber() const {
|
||||||
|
@ -1657,35 +1686,36 @@ Status DBImpl::TEST_CompactRange(int level,
|
||||||
return RunManualCompaction(level, output_level, begin, end);
|
return RunManualCompaction(level, output_level, begin, end);
|
||||||
}
|
}
|
||||||
|
|
||||||
Status DBImpl::FlushMemTable(const FlushOptions& options) {
|
Status DBImpl::FlushMemTable(ColumnFamilyData* cfd,
|
||||||
|
const FlushOptions& options) {
|
||||||
// nullptr batch means just wait for earlier writes to be done
|
// nullptr batch means just wait for earlier writes to be done
|
||||||
Status s = Write(WriteOptions(), nullptr);
|
Status s = Write(WriteOptions(), nullptr);
|
||||||
if (s.ok() && options.wait) {
|
if (s.ok() && options.wait) {
|
||||||
// Wait until the compaction completes
|
// Wait until the compaction completes
|
||||||
s = WaitForFlushMemTable();
|
s = WaitForFlushMemTable(cfd);
|
||||||
}
|
}
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
Status DBImpl::WaitForFlushMemTable() {
|
Status DBImpl::WaitForFlushMemTable(ColumnFamilyData* cfd) {
|
||||||
Status s;
|
Status s;
|
||||||
// Wait until the compaction completes
|
// Wait until the compaction completes
|
||||||
MutexLock l(&mutex_);
|
MutexLock l(&mutex_);
|
||||||
while (default_cfd_->imm()->size() > 0 && bg_error_.ok()) {
|
while (cfd->imm()->size() > 0 && bg_error_.ok()) {
|
||||||
bg_cv_.Wait();
|
bg_cv_.Wait();
|
||||||
}
|
}
|
||||||
if (default_cfd_->imm()->size() != 0) {
|
if (!bg_error_.ok()) {
|
||||||
s = bg_error_;
|
s = bg_error_;
|
||||||
}
|
}
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
Status DBImpl::TEST_FlushMemTable() {
|
Status DBImpl::TEST_FlushMemTable() {
|
||||||
return FlushMemTable(FlushOptions());
|
return FlushMemTable(default_cfd_, FlushOptions());
|
||||||
}
|
}
|
||||||
|
|
||||||
Status DBImpl::TEST_WaitForFlushMemTable() {
|
Status DBImpl::TEST_WaitForFlushMemTable() {
|
||||||
return WaitForFlushMemTable();
|
return WaitForFlushMemTable(default_cfd_);
|
||||||
}
|
}
|
||||||
|
|
||||||
Status DBImpl::TEST_WaitForCompact() {
|
Status DBImpl::TEST_WaitForCompact() {
|
||||||
|
@ -1710,19 +1740,31 @@ void DBImpl::MaybeScheduleFlushOrCompaction() {
|
||||||
} else if (shutting_down_.Acquire_Load()) {
|
} else if (shutting_down_.Acquire_Load()) {
|
||||||
// DB is being deleted; no more background compactions
|
// DB is being deleted; no more background compactions
|
||||||
} else {
|
} else {
|
||||||
bool is_flush_pending = default_cfd_->imm()->IsFlushPending();
|
bool is_flush_pending = false;
|
||||||
|
for (auto cfd : *versions_->GetColumnFamilySet()) {
|
||||||
|
if (cfd->imm()->IsFlushPending()) {
|
||||||
|
is_flush_pending = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
if (is_flush_pending &&
|
if (is_flush_pending &&
|
||||||
(bg_flush_scheduled_ < options_.max_background_flushes)) {
|
(bg_flush_scheduled_ < options_.max_background_flushes)) {
|
||||||
// memtable flush needed
|
// memtable flush needed
|
||||||
bg_flush_scheduled_++;
|
bg_flush_scheduled_++;
|
||||||
env_->Schedule(&DBImpl::BGWorkFlush, this, Env::Priority::HIGH);
|
env_->Schedule(&DBImpl::BGWorkFlush, this, Env::Priority::HIGH);
|
||||||
}
|
}
|
||||||
|
bool is_compaction_needed = false;
|
||||||
|
for (auto cfd : *versions_->GetColumnFamilySet()) {
|
||||||
|
if (cfd->current()->NeedsCompaction()) {
|
||||||
|
is_compaction_needed = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Schedule BGWorkCompaction if there's a compaction pending (or a memtable
|
// Schedule BGWorkCompaction if there's a compaction pending (or a memtable
|
||||||
// flush, but the HIGH pool is not enabled). Do it only if
|
// flush, but the HIGH pool is not enabled). Do it only if
|
||||||
// max_background_compactions hasn't been reached and, in case
|
// max_background_compactions hasn't been reached and, in case
|
||||||
// bg_manual_only_ > 0, if it's a manual compaction.
|
// bg_manual_only_ > 0, if it's a manual compaction.
|
||||||
if ((manual_compaction_ || default_cfd_->current()->NeedsCompaction() ||
|
if ((manual_compaction_ || is_compaction_needed ||
|
||||||
(is_flush_pending && (options_.max_background_flushes <= 0))) &&
|
(is_flush_pending && (options_.max_background_flushes <= 0))) &&
|
||||||
bg_compaction_scheduled_ < options_.max_background_compactions &&
|
bg_compaction_scheduled_ < options_.max_background_compactions &&
|
||||||
(!bg_manual_only_ || manual_compaction_)) {
|
(!bg_manual_only_ || manual_compaction_)) {
|
||||||
|
@ -1744,11 +1786,14 @@ void DBImpl::BGWorkCompaction(void* db) {
|
||||||
Status DBImpl::BackgroundFlush(bool* madeProgress,
|
Status DBImpl::BackgroundFlush(bool* madeProgress,
|
||||||
DeletionState& deletion_state) {
|
DeletionState& deletion_state) {
|
||||||
Status stat;
|
Status stat;
|
||||||
while (stat.ok() && default_cfd_->imm()->IsFlushPending()) {
|
for (auto cfd : *versions_->GetColumnFamilySet()) {
|
||||||
Log(options_.info_log,
|
while (stat.ok() && cfd->imm()->IsFlushPending()) {
|
||||||
"BackgroundCallFlush doing FlushMemTableToOutputFile, flush slots available %d",
|
Log(options_.info_log,
|
||||||
options_.max_background_flushes - bg_flush_scheduled_);
|
"BackgroundCallFlush doing FlushMemTableToOutputFile with column "
|
||||||
stat = FlushMemTableToOutputFile(madeProgress, deletion_state);
|
"family %u, flush slots available %d",
|
||||||
|
cfd->GetID(), options_.max_background_flushes - bg_flush_scheduled_);
|
||||||
|
stat = FlushMemTableToOutputFile(cfd, madeProgress, deletion_state);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return stat;
|
return stat;
|
||||||
}
|
}
|
||||||
|
@ -1871,20 +1916,24 @@ Status DBImpl::BackgroundCompaction(bool* madeProgress,
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: remove memtable flush from formal compaction
|
// TODO: remove memtable flush from formal compaction
|
||||||
while (default_cfd_->imm()->IsFlushPending()) {
|
for (auto cfd : *versions_->GetColumnFamilySet()) {
|
||||||
Log(options_.info_log,
|
while (cfd->imm()->IsFlushPending()) {
|
||||||
"BackgroundCompaction doing FlushMemTableToOutputFile, compaction slots "
|
Log(options_.info_log,
|
||||||
"available %d",
|
"BackgroundCompaction doing FlushMemTableToOutputFile with column "
|
||||||
options_.max_background_compactions - bg_compaction_scheduled_);
|
"family %d, compaction slots available %d",
|
||||||
Status stat = FlushMemTableToOutputFile(madeProgress, deletion_state);
|
cfd->GetID(),
|
||||||
if (!stat.ok()) {
|
options_.max_background_compactions - bg_compaction_scheduled_);
|
||||||
if (is_manual) {
|
Status stat =
|
||||||
manual_compaction_->status = stat;
|
FlushMemTableToOutputFile(cfd, madeProgress, deletion_state);
|
||||||
manual_compaction_->done = true;
|
if (!stat.ok()) {
|
||||||
manual_compaction_->in_progress = false;
|
if (is_manual) {
|
||||||
manual_compaction_ = nullptr;
|
manual_compaction_->status = stat;
|
||||||
|
manual_compaction_->done = true;
|
||||||
|
manual_compaction_->in_progress = false;
|
||||||
|
manual_compaction_ = nullptr;
|
||||||
|
}
|
||||||
|
return stat;
|
||||||
}
|
}
|
||||||
return stat;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2285,7 +2334,7 @@ Status DBImpl::DoCompactionWork(CompactionState* compact,
|
||||||
LogFlush(options_.info_log);
|
LogFlush(options_.info_log);
|
||||||
mutex_.Lock();
|
mutex_.Lock();
|
||||||
if (default_cfd_->imm()->IsFlushPending()) {
|
if (default_cfd_->imm()->IsFlushPending()) {
|
||||||
FlushMemTableToOutputFile(nullptr, deletion_state);
|
FlushMemTableToOutputFile(default_cfd_, nullptr, deletion_state);
|
||||||
bg_cv_.SignalAll(); // Wakeup MakeRoomForWrite() if necessary
|
bg_cv_.SignalAll(); // Wakeup MakeRoomForWrite() if necessary
|
||||||
}
|
}
|
||||||
mutex_.Unlock();
|
mutex_.Unlock();
|
||||||
|
|
10
db/db_impl.h
10
db/db_impl.h
|
@ -286,7 +286,7 @@ class DBImpl : public DB {
|
||||||
|
|
||||||
// Flush the in-memory write buffer to storage. Switches to a new
|
// Flush the in-memory write buffer to storage. Switches to a new
|
||||||
// log-file/memtable and writes a new descriptor iff successful.
|
// log-file/memtable and writes a new descriptor iff successful.
|
||||||
Status FlushMemTableToOutputFile(bool* madeProgress,
|
Status FlushMemTableToOutputFile(ColumnFamilyData* cfd, bool* madeProgress,
|
||||||
DeletionState& deletion_state);
|
DeletionState& deletion_state);
|
||||||
|
|
||||||
Status RecoverLogFile(uint64_t log_number, SequenceNumber* max_sequence,
|
Status RecoverLogFile(uint64_t log_number, SequenceNumber* max_sequence,
|
||||||
|
@ -298,8 +298,8 @@ class DBImpl : public DB {
|
||||||
// for the entire period. The second method WriteLevel0Table supports
|
// for the entire period. The second method WriteLevel0Table supports
|
||||||
// concurrent flush memtables to storage.
|
// concurrent flush memtables to storage.
|
||||||
Status WriteLevel0TableForRecovery(MemTable* mem, VersionEdit* edit);
|
Status WriteLevel0TableForRecovery(MemTable* mem, VersionEdit* edit);
|
||||||
Status WriteLevel0Table(std::vector<MemTable*> &mems, VersionEdit* edit,
|
Status WriteLevel0Table(ColumnFamilyData* cfd, std::vector<MemTable*>& mems,
|
||||||
uint64_t* filenumber);
|
VersionEdit* edit, uint64_t* filenumber);
|
||||||
|
|
||||||
uint64_t SlowdownAmount(int n, double bottom, double top);
|
uint64_t SlowdownAmount(int n, double bottom, double top);
|
||||||
Status MakeRoomForWrite(ColumnFamilyData* cfd,
|
Status MakeRoomForWrite(ColumnFamilyData* cfd,
|
||||||
|
@ -308,10 +308,10 @@ class DBImpl : public DB {
|
||||||
autovector<WriteBatch*>* write_batch_group);
|
autovector<WriteBatch*>* write_batch_group);
|
||||||
|
|
||||||
// Force current memtable contents to be flushed.
|
// Force current memtable contents to be flushed.
|
||||||
Status FlushMemTable(const FlushOptions& options);
|
Status FlushMemTable(ColumnFamilyData* cfd, const FlushOptions& options);
|
||||||
|
|
||||||
// Wait for memtable flushed
|
// Wait for memtable flushed
|
||||||
Status WaitForFlushMemTable();
|
Status WaitForFlushMemTable(ColumnFamilyData* cfd);
|
||||||
|
|
||||||
void MaybeScheduleLogDBDeployStats();
|
void MaybeScheduleLogDBDeployStats();
|
||||||
static void BGLogDBDeployStats(void* db);
|
static void BGLogDBDeployStats(void* db);
|
||||||
|
|
|
@ -1130,10 +1130,12 @@ struct VersionSet::ManifestWriter {
|
||||||
Status status;
|
Status status;
|
||||||
bool done;
|
bool done;
|
||||||
port::CondVar cv;
|
port::CondVar cv;
|
||||||
|
ColumnFamilyData* cfd;
|
||||||
VersionEdit* edit;
|
VersionEdit* edit;
|
||||||
|
|
||||||
explicit ManifestWriter(port::Mutex* mu, VersionEdit* e) :
|
explicit ManifestWriter(port::Mutex* mu, ColumnFamilyData* cfd,
|
||||||
done(false), cv(mu), edit(e) {}
|
VersionEdit* e)
|
||||||
|
: done(false), cv(mu), cfd(cfd), edit(e) {}
|
||||||
};
|
};
|
||||||
|
|
||||||
// A helper class so we can efficiently apply a whole sequence
|
// A helper class so we can efficiently apply a whole sequence
|
||||||
|
@ -1374,7 +1376,6 @@ VersionSet::VersionSet(const std::string& dbname, const Options* options,
|
||||||
next_file_number_(2),
|
next_file_number_(2),
|
||||||
manifest_file_number_(0), // Filled by Recover()
|
manifest_file_number_(0), // Filled by Recover()
|
||||||
last_sequence_(0),
|
last_sequence_(0),
|
||||||
log_number_(0),
|
|
||||||
prev_log_number_(0),
|
prev_log_number_(0),
|
||||||
num_levels_(options_->num_levels),
|
num_levels_(options_->num_levels),
|
||||||
current_version_number_(0),
|
current_version_number_(0),
|
||||||
|
@ -1428,7 +1429,7 @@ Status VersionSet::LogAndApply(ColumnFamilyData* column_family_data,
|
||||||
mu->AssertHeld();
|
mu->AssertHeld();
|
||||||
|
|
||||||
// queue our request
|
// queue our request
|
||||||
ManifestWriter w(mu, edit);
|
ManifestWriter w(mu, column_family_data, edit);
|
||||||
manifest_writers_.push_back(&w);
|
manifest_writers_.push_back(&w);
|
||||||
while (!w.done && &w != manifest_writers_.front()) {
|
while (!w.done && &w != manifest_writers_.front()) {
|
||||||
w.cv.Wait();
|
w.cv.Wait();
|
||||||
|
@ -1447,8 +1448,12 @@ Status VersionSet::LogAndApply(ColumnFamilyData* column_family_data,
|
||||||
assert(manifest_writers_.front() == &w);
|
assert(manifest_writers_.front() == &w);
|
||||||
std::deque<ManifestWriter*>::iterator iter = manifest_writers_.begin();
|
std::deque<ManifestWriter*>::iterator iter = manifest_writers_.begin();
|
||||||
for (; iter != manifest_writers_.end(); ++iter) {
|
for (; iter != manifest_writers_.end(); ++iter) {
|
||||||
|
if ((*iter)->cfd->GetID() != column_family_data->GetID()) {
|
||||||
|
// group commits across column families are not yet supported
|
||||||
|
break;
|
||||||
|
}
|
||||||
last_writer = *iter;
|
last_writer = *iter;
|
||||||
LogAndApplyHelper(&builder, v, last_writer->edit, mu);
|
LogAndApplyHelper(column_family_data, &builder, v, last_writer->edit, mu);
|
||||||
batch_edits.push_back(last_writer->edit);
|
batch_edits.push_back(last_writer->edit);
|
||||||
}
|
}
|
||||||
builder.SaveTo(v);
|
builder.SaveTo(v);
|
||||||
|
@ -1564,7 +1569,6 @@ Status VersionSet::LogAndApply(ColumnFamilyData* column_family_data,
|
||||||
if (s.ok()) {
|
if (s.ok()) {
|
||||||
manifest_file_size_ = new_manifest_file_size;
|
manifest_file_size_ = new_manifest_file_size;
|
||||||
AppendVersion(column_family_data, v);
|
AppendVersion(column_family_data, v);
|
||||||
log_number_ = edit->log_number_;
|
|
||||||
column_family_data->SetLogNumber(edit->log_number_);
|
column_family_data->SetLogNumber(edit->log_number_);
|
||||||
prev_log_number_ = edit->prev_log_number_;
|
prev_log_number_ = edit->prev_log_number_;
|
||||||
|
|
||||||
|
@ -1596,15 +1600,16 @@ Status VersionSet::LogAndApply(ColumnFamilyData* column_family_data,
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
void VersionSet::LogAndApplyHelper(Builder* builder, Version* v,
|
void VersionSet::LogAndApplyHelper(ColumnFamilyData* cfd, Builder* builder,
|
||||||
VersionEdit* edit, port::Mutex* mu) {
|
Version* v, VersionEdit* edit,
|
||||||
|
port::Mutex* mu) {
|
||||||
mu->AssertHeld();
|
mu->AssertHeld();
|
||||||
|
|
||||||
if (edit->has_log_number_) {
|
if (edit->has_log_number_) {
|
||||||
assert(edit->log_number_ >= log_number_);
|
assert(edit->log_number_ >= cfd->GetLogNumber());
|
||||||
assert(edit->log_number_ < next_file_number_);
|
assert(edit->log_number_ < next_file_number_);
|
||||||
} else {
|
} else {
|
||||||
edit->SetLogNumber(log_number_);
|
edit->SetLogNumber(cfd->GetLogNumber());
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!edit->has_prev_log_number_) {
|
if (!edit->has_prev_log_number_) {
|
||||||
|
@ -1754,6 +1759,7 @@ Status VersionSet::Recover(
|
||||||
|
|
||||||
if (edit.has_log_number_) {
|
if (edit.has_log_number_) {
|
||||||
cfd->SetLogNumber(edit.log_number_);
|
cfd->SetLogNumber(edit.log_number_);
|
||||||
|
have_log_number = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// if it is not column family add or column family drop,
|
// if it is not column family add or column family drop,
|
||||||
|
@ -1764,11 +1770,6 @@ Status VersionSet::Recover(
|
||||||
builder->second->Apply(&edit);
|
builder->second->Apply(&edit);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (edit.has_log_number_) {
|
|
||||||
log_number = edit.log_number_;
|
|
||||||
have_log_number = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (edit.has_prev_log_number_) {
|
if (edit.has_prev_log_number_) {
|
||||||
prev_log_number = edit.prev_log_number_;
|
prev_log_number = edit.prev_log_number_;
|
||||||
have_prev_log_number = true;
|
have_prev_log_number = true;
|
||||||
|
@ -1828,7 +1829,6 @@ Status VersionSet::Recover(
|
||||||
manifest_file_number_ = next_file;
|
manifest_file_number_ = next_file;
|
||||||
next_file_number_ = next_file + 1;
|
next_file_number_ = next_file + 1;
|
||||||
last_sequence_ = last_sequence;
|
last_sequence_ = last_sequence;
|
||||||
log_number_ = log_number;
|
|
||||||
prev_log_number_ = prev_log_number;
|
prev_log_number_ = prev_log_number;
|
||||||
|
|
||||||
Log(options_->info_log, "Recovered from manifest file:%s succeeded,"
|
Log(options_->info_log, "Recovered from manifest file:%s succeeded,"
|
||||||
|
@ -1839,7 +1839,7 @@ Status VersionSet::Recover(
|
||||||
(unsigned long)manifest_file_number_,
|
(unsigned long)manifest_file_number_,
|
||||||
(unsigned long)next_file_number_,
|
(unsigned long)next_file_number_,
|
||||||
(unsigned long)last_sequence_,
|
(unsigned long)last_sequence_,
|
||||||
(unsigned long)log_number_,
|
(unsigned long)log_number,
|
||||||
(unsigned long)prev_log_number_);
|
(unsigned long)prev_log_number_);
|
||||||
|
|
||||||
for (auto cfd : *column_family_set_) {
|
for (auto cfd : *column_family_set_) {
|
||||||
|
@ -2041,7 +2041,7 @@ Status VersionSet::DumpManifest(Options& options, std::string& dscname,
|
||||||
}
|
}
|
||||||
|
|
||||||
if (edit.has_log_number_) {
|
if (edit.has_log_number_) {
|
||||||
log_number = edit.log_number_;
|
log_number = std::max(log_number, edit.log_number_);
|
||||||
have_log_number = true;
|
have_log_number = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2090,7 +2090,6 @@ Status VersionSet::DumpManifest(Options& options, std::string& dscname,
|
||||||
manifest_file_number_ = next_file;
|
manifest_file_number_ = next_file;
|
||||||
next_file_number_ = next_file + 1;
|
next_file_number_ = next_file + 1;
|
||||||
last_sequence_ = last_sequence;
|
last_sequence_ = last_sequence;
|
||||||
log_number_ = log_number;
|
|
||||||
prev_log_number_ = prev_log_number;
|
prev_log_number_ = prev_log_number;
|
||||||
|
|
||||||
printf("manifest_file_number %lu next_file_number %lu last_sequence "
|
printf("manifest_file_number %lu next_file_number %lu last_sequence "
|
||||||
|
|
|
@ -344,10 +344,6 @@ class VersionSet {
|
||||||
// Mark the specified file number as used.
|
// Mark the specified file number as used.
|
||||||
void MarkFileNumberUsed(uint64_t number);
|
void MarkFileNumberUsed(uint64_t number);
|
||||||
|
|
||||||
// Return the current log file number. This is the biggest log_number from
|
|
||||||
// all column families
|
|
||||||
uint64_t LogNumber() const { return log_number_; }
|
|
||||||
|
|
||||||
// Return the log file number for the log file that is currently
|
// Return the log file number for the log file that is currently
|
||||||
// being compacted, or zero if there is no such log file.
|
// being compacted, or zero if there is no such log file.
|
||||||
uint64_t PrevLogNumber() const { return prev_log_number_; }
|
uint64_t PrevLogNumber() const { return prev_log_number_; }
|
||||||
|
@ -468,7 +464,6 @@ class VersionSet {
|
||||||
uint64_t next_file_number_;
|
uint64_t next_file_number_;
|
||||||
uint64_t manifest_file_number_;
|
uint64_t manifest_file_number_;
|
||||||
std::atomic<uint64_t> last_sequence_;
|
std::atomic<uint64_t> last_sequence_;
|
||||||
uint64_t log_number_;
|
|
||||||
uint64_t prev_log_number_; // 0 or backing store for memtable being compacted
|
uint64_t prev_log_number_; // 0 or backing store for memtable being compacted
|
||||||
|
|
||||||
int num_levels_;
|
int num_levels_;
|
||||||
|
@ -502,8 +497,8 @@ class VersionSet {
|
||||||
VersionSet(const VersionSet&);
|
VersionSet(const VersionSet&);
|
||||||
void operator=(const VersionSet&);
|
void operator=(const VersionSet&);
|
||||||
|
|
||||||
void LogAndApplyHelper(Builder*b, Version* v,
|
void LogAndApplyHelper(ColumnFamilyData* cfd, Builder* b, Version* v,
|
||||||
VersionEdit* edit, port::Mutex* mu);
|
VersionEdit* edit, port::Mutex* mu);
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace rocksdb
|
} // namespace rocksdb
|
||||||
|
|
Loading…
Reference in New Issue