Don't roll empty logs

Summary:
With multiple column families, especially when manual Flush is executed, we might roll the log file, although the current log file is empty (no data has been written to the log).

After the diff, we won't create new log file if current is empty.

Next, I will write an algorithm that will flush column families that reference old log files (i.e., that weren't flushed in a while)

Test Plan: Added an unit test. Confirmed that unit test failes in master

Reviewers: dhruba, haobo, ljin, sdong

Reviewed By: ljin

CC: leveldb

Differential Revision: https://reviews.facebook.net/D17631
This commit is contained in:
Igor Canadi 2014-04-15 09:57:25 -07:00
parent c87ed0942c
commit e6acb874cd
4 changed files with 81 additions and 31 deletions

View file

@ -29,12 +29,31 @@ std::string RandomString(Random* rnd, int len) {
}
} // anonymous namespace
// counts how many operations were performed
class EnvCounter : public EnvWrapper {
public:
explicit EnvCounter(Env* base)
: EnvWrapper(base), num_new_writable_file_(0) {}
int GetNumberOfNewWritableFileCalls() {
return num_new_writable_file_;
}
Status NewWritableFile(const std::string& f, unique_ptr<WritableFile>* r,
const EnvOptions& soptions) {
++num_new_writable_file_;
return EnvWrapper::NewWritableFile(f, r, soptions);
}
private:
int num_new_writable_file_;
};
class ColumnFamilyTest {
public:
ColumnFamilyTest() : rnd_(139) {
env_ = Env::Default();
env_ = new EnvCounter(Env::Default());
dbname_ = test::TmpDir() + "/column_family_test";
db_options_.create_if_missing = true;
db_options_.env = env_;
DestroyDB(dbname_, Options(db_options_, column_family_options_));
}
@ -299,7 +318,7 @@ class ColumnFamilyTest {
DBOptions db_options_;
std::string dbname_;
DB* db_ = nullptr;
Env* env_;
EnvCounter* env_;
Random rnd_;
};
@ -895,6 +914,25 @@ TEST(ColumnFamilyTest, ReadOnlyDBTest) {
ASSERT_TRUE(!s.ok());
}
TEST(ColumnFamilyTest, DontRollEmptyLogs) {
Open();
CreateColumnFamiliesAndReopen({"one", "two", "three", "four"});
for (int i = 0; i < handles_.size(); ++i) {
PutRandomData(i, 10, 100);
}
int num_writable_file_start = env_->GetNumberOfNewWritableFileCalls();
// this will trigger the flushes
ASSERT_OK(db_->Write(WriteOptions(), nullptr));
for (int i = 0; i < 4; ++i) {
dbfull()->TEST_WaitForFlushMemTable(handles_[i]);
}
int total_new_writable_files =
env_->GetNumberOfNewWritableFileCalls() - num_writable_file_start;
ASSERT_EQ(total_new_writable_files, handles_.size() + 1);
}
} // namespace rocksdb
int main(int argc, char** argv) {

View file

@ -349,6 +349,7 @@ DBImpl::DBImpl(const DBOptions& options, const std::string& dbname)
shutting_down_(nullptr),
bg_cv_(&mutex_),
logfile_number_(0),
log_empty_(true),
default_cf_handle_(nullptr),
tmp_batch_(),
bg_schedule_needed_(false),
@ -3785,6 +3786,7 @@ Status DBImpl::Write(const WriteOptions& options, WriteBatch* my_batch) {
RecordTick(options_.statistics.get(), WAL_FILE_SYNCED, 1);
RecordTick(options_.statistics.get(), WAL_FILE_BYTES, log_entry.size());
if (status.ok() && options.sync) {
log_empty_ = false;
if (options_.use_fsync) {
StopWatch(env_, options_.statistics.get(), WAL_FILE_SYNC_MICROS);
status = log_->file()->Fsync();
@ -4057,57 +4059,66 @@ Status DBImpl::MakeRoomForWrite(ColumnFamilyData* cfd, bool force) {
// Attempt to switch to a new memtable and trigger flush of old.
// Do this without holding the dbmutex lock.
assert(versions_->PrevLogNumber() == 0);
uint64_t new_log_number = versions_->NewFileNumber();
bool creating_new_log = !log_empty_;
uint64_t new_log_number =
creating_new_log ? versions_->NewFileNumber() : logfile_number_;
SuperVersion* new_superversion = nullptr;
mutex_.Unlock();
{
DelayLoggingAndReset();
s = env_->NewWritableFile(LogFileName(options_.wal_dir, new_log_number),
&lfile,
env_->OptimizeForLogWrite(storage_options_));
if (creating_new_log) {
s = env_->NewWritableFile(
LogFileName(options_.wal_dir, new_log_number), &lfile,
env_->OptimizeForLogWrite(storage_options_));
if (s.ok()) {
// Our final size should be less than write_buffer_size
// (compression, etc) but err on the side of caution.
lfile->SetPreallocationBlockSize(1.1 *
cfd->options()->write_buffer_size);
new_log = new log::Writer(std::move(lfile));
}
}
if (s.ok()) {
// Our final size should be less than write_buffer_size
// (compression, etc) but err on the side of caution.
lfile->SetPreallocationBlockSize(1.1 *
cfd->options()->write_buffer_size);
new_log = new log::Writer(std::move(lfile));
new_mem = new MemTable(cfd->internal_comparator(), *cfd->options());
new_superversion = new SuperVersion();
}
Log(options_.info_log,
"New memtable created with log file: #%lu\n",
(unsigned long)new_log_number);
}
mutex_.Lock();
if (!s.ok()) {
// how do we fail if we're not creating new log?
assert(creating_new_log);
// Avoid chewing through file number space in a tight loop.
versions_->ReuseFileNumber(new_log_number);
assert(!new_mem);
assert(!new_log);
break;
}
logfile_number_ = new_log_number;
assert(new_log != nullptr);
// TODO(icanadi) delete outside of mutex
delete log_.release();
log_.reset(new_log);
if (creating_new_log) {
logfile_number_ = new_log_number;
assert(new_log != nullptr);
// TODO(icanadi) delete outside of mutex
delete log_.release();
log_.reset(new_log);
log_empty_ = true;
alive_log_files_.push_back(logfile_number_);
for (auto cfd : *versions_->GetColumnFamilySet()) {
// all this is just optimization to delete logs that
// are no longer needed -- if CF is empty, that means it
// doesn't need that particular log to stay alive, so we just
// advance the log number. no need to persist this in the manifest
if (cfd->mem()->GetFirstSequenceNumber() == 0 &&
cfd->imm()->size() == 0) {
cfd->SetLogNumber(logfile_number_);
}
}
}
cfd->mem()->SetNextLogNumber(logfile_number_);
cfd->imm()->Add(cfd->mem());
if (force) {
cfd->imm()->FlushRequested();
}
new_mem->Ref();
alive_log_files_.push_back(logfile_number_);
for (auto cfd : *versions_->GetColumnFamilySet()) {
// all this is just optimization to delete logs that
// are no longer needed -- if CF is empty, that means it
// doesn't need that particular log to stay alive, so we just
// advance the log number. no need to persist this in the manifest
if (cfd->mem()->GetFirstSequenceNumber() == 0 &&
cfd->imm()->size() == 0) {
cfd->SetLogNumber(logfile_number_);
}
}
cfd->SetMemtable(new_mem);
Log(options_.info_log,
"[CF %" PRIu32 "] New memtable created with log file: #%lu\n",

View file

@ -421,6 +421,7 @@ class DBImpl : public DB {
port::CondVar bg_cv_; // Signalled when background work finishes
uint64_t logfile_number_;
unique_ptr<log::Writer> log_;
bool log_empty_;
ColumnFamilyHandleImpl* default_cf_handle_;
unique_ptr<ColumnFamilyMemTablesImpl> column_family_memtables_;
std::deque<uint64_t> alive_log_files_;

View file

@ -1742,7 +1742,7 @@ Status VersionSet::LogAndApply(ColumnFamilyData* column_family_data,
}
}
if (max_log_number_in_batch != 0) {
assert(column_family_data->GetLogNumber() < max_log_number_in_batch);
assert(column_family_data->GetLogNumber() <= max_log_number_in_batch);
column_family_data->SetLogNumber(max_log_number_in_batch);
}
AppendVersion(column_family_data, v);