log flush reason for better debugging experience

Summary:
It's always a mystery from the logs why flush was triggered -- user triggered it manually, WriteBufferManager triggered it,  logs were full, write buffer was full, etc.
This PR logs Flush reason whenever a flush is scheduled.
Closes https://github.com/facebook/rocksdb/pull/3401

Differential Revision: D6788142

Pulled By: miasantreble

fbshipit-source-id: a867e54d493c06adf5172bd36a180fb3faae3511
This commit is contained in:
Zhongyi Xie 2018-02-09 12:09:55 -08:00 committed by Facebook Github Bot
parent e78715c29a
commit 945f618ba5
10 changed files with 80 additions and 23 deletions

View file

@ -381,6 +381,7 @@ ColumnFamilyData::ColumnFamilyData(
next_(nullptr), next_(nullptr),
prev_(nullptr), prev_(nullptr),
log_number_(0), log_number_(0),
flush_reason_(FlushReason::kUnknown),
column_family_set_(column_family_set), column_family_set_(column_family_set),
pending_flush_(false), pending_flush_(false),
pending_compaction_(false), pending_compaction_(false),

View file

@ -202,6 +202,10 @@ class ColumnFamilyData {
void SetLogNumber(uint64_t log_number) { log_number_ = log_number; } void SetLogNumber(uint64_t log_number) { log_number_ = log_number; }
uint64_t GetLogNumber() const { return log_number_; } uint64_t GetLogNumber() const { return log_number_; }
void SetFlushReason(FlushReason flush_reason) {
flush_reason_ = flush_reason;
}
FlushReason GetFlushReason() const { return flush_reason_; }
// thread-safe // thread-safe
const EnvOptions* soptions() const; const EnvOptions* soptions() const;
const ImmutableCFOptions* ioptions() const { return &ioptions_; } const ImmutableCFOptions* ioptions() const { return &ioptions_; }
@ -404,6 +408,8 @@ class ColumnFamilyData {
// recovered from // recovered from
uint64_t log_number_; uint64_t log_number_;
FlushReason flush_reason_;
// An object that keeps all the compaction stats // An object that keeps all the compaction stats
// and picks the next compaction // and picks the next compaction
std::unique_ptr<CompactionPicker> compaction_picker_; std::unique_ptr<CompactionPicker> compaction_picker_;

View file

@ -93,7 +93,7 @@ Status DBImpl::GetLiveFiles(std::vector<std::string>& ret,
} }
cfd->Ref(); cfd->Ref();
mutex_.Unlock(); mutex_.Unlock();
status = FlushMemTable(cfd, FlushOptions()); status = FlushMemTable(cfd, FlushOptions(), FlushReason::kGetLiveFiles);
TEST_SYNC_POINT("DBImpl::GetLiveFiles:1"); TEST_SYNC_POINT("DBImpl::GetLiveFiles:1");
TEST_SYNC_POINT("DBImpl::GetLiveFiles:2"); TEST_SYNC_POINT("DBImpl::GetLiveFiles:2");
mutex_.Lock(); mutex_.Lock();

View file

@ -258,7 +258,7 @@ void DBImpl::CancelAllBackgroundWork(bool wait) {
if (!cfd->IsDropped() && cfd->initialized() && !cfd->mem()->IsEmpty()) { if (!cfd->IsDropped() && cfd->initialized() && !cfd->mem()->IsEmpty()) {
cfd->Ref(); cfd->Ref();
mutex_.Unlock(); mutex_.Unlock();
FlushMemTable(cfd, FlushOptions()); FlushMemTable(cfd, FlushOptions(), FlushReason::kShutDown);
mutex_.Lock(); mutex_.Lock();
cfd->Unref(); cfd->Unref();
} }
@ -2826,7 +2826,9 @@ Status DBImpl::IngestExternalFile(
&need_flush); &need_flush);
if (status.ok() && need_flush) { if (status.ok() && need_flush) {
mutex_.Unlock(); mutex_.Unlock();
status = FlushMemTable(cfd, FlushOptions(), true /* writes_stopped */); status = FlushMemTable(cfd, FlushOptions(),
FlushReason::kExternalFileIngestion,
true /* writes_stopped */);
mutex_.Lock(); mutex_.Lock();
} }
} }

View file

@ -819,7 +819,7 @@ class DBImpl : public DB {
// Force current memtable contents to be flushed. // Force current memtable contents to be flushed.
Status FlushMemTable(ColumnFamilyData* cfd, const FlushOptions& options, Status FlushMemTable(ColumnFamilyData* cfd, const FlushOptions& options,
bool writes_stopped = false); FlushReason flush_reason, bool writes_stopped = false);
// Wait for memtable flushed. // Wait for memtable flushed.
// If flush_memtable_id is non-null, wait until the memtable with the ID // If flush_memtable_id is non-null, wait until the memtable with the ID
@ -881,7 +881,7 @@ class DBImpl : public DB {
ColumnFamilyData* GetColumnFamilyDataByName(const std::string& cf_name); ColumnFamilyData* GetColumnFamilyDataByName(const std::string& cf_name);
void MaybeScheduleFlushOrCompaction(); void MaybeScheduleFlushOrCompaction();
void SchedulePendingFlush(ColumnFamilyData* cfd); void SchedulePendingFlush(ColumnFamilyData* cfd, FlushReason flush_reason);
void SchedulePendingCompaction(ColumnFamilyData* cfd); void SchedulePendingCompaction(ColumnFamilyData* cfd);
void SchedulePendingPurge(std::string fname, FileType type, uint64_t number, void SchedulePendingPurge(std::string fname, FileType type, uint64_t number,
uint32_t path_id, int job_id); uint32_t path_id, int job_id);
@ -920,7 +920,7 @@ class DBImpl : public DB {
// helper functions for adding and removing from flush & compaction queues // helper functions for adding and removing from flush & compaction queues
void AddToCompactionQueue(ColumnFamilyData* cfd); void AddToCompactionQueue(ColumnFamilyData* cfd);
ColumnFamilyData* PopFirstFromCompactionQueue(); ColumnFamilyData* PopFirstFromCompactionQueue();
void AddToFlushQueue(ColumnFamilyData* cfd); void AddToFlushQueue(ColumnFamilyData* cfd, FlushReason flush_reason);
ColumnFamilyData* PopFirstFromFlushQueue(); ColumnFamilyData* PopFirstFromFlushQueue();
// helper function to call after some of the logs_ were synced // helper function to call after some of the logs_ were synced

View file

@ -23,6 +23,7 @@
#include "util/sync_point.h" #include "util/sync_point.h"
namespace rocksdb { namespace rocksdb {
Status DBImpl::SyncClosedLogs(JobContext* job_context) { Status DBImpl::SyncClosedLogs(JobContext* job_context) {
TEST_SYNC_POINT("DBImpl::SyncClosedLogs:Start"); TEST_SYNC_POINT("DBImpl::SyncClosedLogs:Start");
mutex_.AssertHeld(); mutex_.AssertHeld();
@ -222,6 +223,7 @@ void DBImpl::NotifyOnFlushBegin(ColumnFamilyData* cfd, FileMetaData* file_meta,
info.smallest_seqno = file_meta->smallest_seqno; info.smallest_seqno = file_meta->smallest_seqno;
info.largest_seqno = file_meta->largest_seqno; info.largest_seqno = file_meta->largest_seqno;
info.table_properties = prop; info.table_properties = prop;
info.flush_reason = cfd->GetFlushReason();
for (auto listener : immutable_db_options_.listeners) { for (auto listener : immutable_db_options_.listeners) {
listener->OnFlushBegin(this, info); listener->OnFlushBegin(this, info);
} }
@ -266,6 +268,7 @@ void DBImpl::NotifyOnFlushCompleted(ColumnFamilyData* cfd,
info.smallest_seqno = file_meta->smallest_seqno; info.smallest_seqno = file_meta->smallest_seqno;
info.largest_seqno = file_meta->largest_seqno; info.largest_seqno = file_meta->largest_seqno;
info.table_properties = prop; info.table_properties = prop;
info.flush_reason = cfd->GetFlushReason();
for (auto listener : immutable_db_options_.listeners) { for (auto listener : immutable_db_options_.listeners) {
listener->OnFlushCompleted(this, info); listener->OnFlushCompleted(this, info);
} }
@ -287,7 +290,7 @@ Status DBImpl::CompactRange(const CompactRangeOptions& options,
auto cfd = cfh->cfd(); auto cfd = cfh->cfd();
bool exclusive = options.exclusive_manual_compaction; bool exclusive = options.exclusive_manual_compaction;
Status s = FlushMemTable(cfd, FlushOptions()); Status s = FlushMemTable(cfd, FlushOptions(), FlushReason::kManualCompaction);
if (!s.ok()) { if (!s.ok()) {
LogFlush(immutable_db_options_.info_log); LogFlush(immutable_db_options_.info_log);
return s; return s;
@ -812,7 +815,8 @@ Status DBImpl::Flush(const FlushOptions& flush_options,
auto cfh = reinterpret_cast<ColumnFamilyHandleImpl*>(column_family); auto cfh = reinterpret_cast<ColumnFamilyHandleImpl*>(column_family);
ROCKS_LOG_INFO(immutable_db_options_.info_log, "[%s] Manual flush start.", ROCKS_LOG_INFO(immutable_db_options_.info_log, "[%s] Manual flush start.",
cfh->GetName().c_str()); cfh->GetName().c_str());
Status s = FlushMemTable(cfh->cfd(), flush_options); Status s =
FlushMemTable(cfh->cfd(), flush_options, FlushReason::kManualCompaction);
ROCKS_LOG_INFO(immutable_db_options_.info_log, ROCKS_LOG_INFO(immutable_db_options_.info_log,
"[%s] Manual flush finished, status: %s\n", "[%s] Manual flush finished, status: %s\n",
cfh->GetName().c_str(), s.ToString().c_str()); cfh->GetName().c_str(), s.ToString().c_str());
@ -949,7 +953,7 @@ Status DBImpl::RunManualCompaction(ColumnFamilyData* cfd, int input_level,
Status DBImpl::FlushMemTable(ColumnFamilyData* cfd, Status DBImpl::FlushMemTable(ColumnFamilyData* cfd,
const FlushOptions& flush_options, const FlushOptions& flush_options,
bool writes_stopped) { FlushReason flush_reason, bool writes_stopped) {
Status s; Status s;
uint64_t flush_memtable_id = 0; uint64_t flush_memtable_id = 0;
{ {
@ -978,7 +982,7 @@ Status DBImpl::FlushMemTable(ColumnFamilyData* cfd,
cfd->imm()->FlushRequested(); cfd->imm()->FlushRequested();
// schedule flush // schedule flush
SchedulePendingFlush(cfd); SchedulePendingFlush(cfd, flush_reason);
MaybeScheduleFlushOrCompaction(); MaybeScheduleFlushOrCompaction();
} }
@ -1134,11 +1138,12 @@ ColumnFamilyData* DBImpl::PopFirstFromCompactionQueue() {
return cfd; return cfd;
} }
void DBImpl::AddToFlushQueue(ColumnFamilyData* cfd) { void DBImpl::AddToFlushQueue(ColumnFamilyData* cfd, FlushReason flush_reason) {
assert(!cfd->pending_flush()); assert(!cfd->pending_flush());
cfd->Ref(); cfd->Ref();
flush_queue_.push_back(cfd); flush_queue_.push_back(cfd);
cfd->set_pending_flush(true); cfd->set_pending_flush(true);
cfd->SetFlushReason(flush_reason);
} }
ColumnFamilyData* DBImpl::PopFirstFromFlushQueue() { ColumnFamilyData* DBImpl::PopFirstFromFlushQueue() {
@ -1147,12 +1152,14 @@ ColumnFamilyData* DBImpl::PopFirstFromFlushQueue() {
flush_queue_.pop_front(); flush_queue_.pop_front();
assert(cfd->pending_flush()); assert(cfd->pending_flush());
cfd->set_pending_flush(false); cfd->set_pending_flush(false);
// TODO: need to unset flush reason?
return cfd; return cfd;
} }
void DBImpl::SchedulePendingFlush(ColumnFamilyData* cfd) { void DBImpl::SchedulePendingFlush(ColumnFamilyData* cfd,
FlushReason flush_reason) {
if (!cfd->pending_flush() && cfd->imm()->IsFlushPending()) { if (!cfd->pending_flush() && cfd->imm()->IsFlushPending()) {
AddToFlushQueue(cfd); AddToFlushQueue(cfd, flush_reason);
++unscheduled_flushes_; ++unscheduled_flushes_;
} }
} }
@ -1929,7 +1936,7 @@ void DBImpl::InstallSuperVersionAndScheduleWork(
// Whenever we install new SuperVersion, we might need to issue new flushes or // Whenever we install new SuperVersion, we might need to issue new flushes or
// compactions. // compactions.
SchedulePendingFlush(cfd); SchedulePendingFlush(cfd, FlushReason::kSuperVersionChange);
SchedulePendingCompaction(cfd); SchedulePendingCompaction(cfd);
MaybeScheduleFlushOrCompaction(); MaybeScheduleFlushOrCompaction();

View file

@ -103,7 +103,7 @@ Status DBImpl::TEST_FlushMemTable(bool wait, ColumnFamilyHandle* cfh) {
auto cfhi = reinterpret_cast<ColumnFamilyHandleImpl*>(cfh); auto cfhi = reinterpret_cast<ColumnFamilyHandleImpl*>(cfh);
cfd = cfhi->cfd(); cfd = cfhi->cfd();
} }
return FlushMemTable(cfd, fo); return FlushMemTable(cfd, fo, FlushReason::kTest);
} }
Status DBImpl::TEST_WaitForFlushMemTable(ColumnFamilyHandle* column_family) { Status DBImpl::TEST_WaitForFlushMemTable(ColumnFamilyHandle* column_family) {

View file

@ -1019,7 +1019,7 @@ Status DBImpl::SwitchWAL(WriteContext* write_context) {
break; break;
} }
cfd->imm()->FlushRequested(); cfd->imm()->FlushRequested();
SchedulePendingFlush(cfd); SchedulePendingFlush(cfd, FlushReason::kWriteBufferManager);
} }
} }
MaybeScheduleFlushOrCompaction(); MaybeScheduleFlushOrCompaction();
@ -1065,7 +1065,7 @@ Status DBImpl::HandleWriteBufferFull(WriteContext* write_context) {
status = SwitchMemtable(cfd_picked, write_context); status = SwitchMemtable(cfd_picked, write_context);
if (status.ok()) { if (status.ok()) {
cfd_picked->imm()->FlushRequested(); cfd_picked->imm()->FlushRequested();
SchedulePendingFlush(cfd_picked); SchedulePendingFlush(cfd_picked, FlushReason::kWriteBufferFull);
MaybeScheduleFlushOrCompaction(); MaybeScheduleFlushOrCompaction();
} }
} }

View file

@ -54,6 +54,32 @@
namespace rocksdb { namespace rocksdb {
const char* GetFlushReasonString (FlushReason flush_reason) {
switch (flush_reason) {
case FlushReason::kUnknown:
return "Unknown";
case FlushReason::kGetLiveFiles:
return "Get Live Files";
case FlushReason::kShutDown:
return "Shut down";
case FlushReason::kExternalFileIngestion:
return "External File Ingestion";
case FlushReason::kManualCompaction:
return "Manual Compaction";
case FlushReason::kWriteBufferManager:
return "Write Buffer Manager";
case FlushReason::kWriteBufferFull:
return "Write Buffer Full";
case FlushReason::kTest:
return "Test";
case FlushReason::kSuperVersionChange:
return "SuperVersion Change";
default:
return "Invalid";
}
}
FlushJob::FlushJob(const std::string& dbname, ColumnFamilyData* cfd, FlushJob::FlushJob(const std::string& dbname, ColumnFamilyData* cfd,
const ImmutableDBOptions& db_options, const ImmutableDBOptions& db_options,
const MutableCFOptions& mutable_cf_options, const MutableCFOptions& mutable_cf_options,
@ -278,12 +304,13 @@ Status FlushJob::WriteLevel0Table() {
total_memory_usage += m->ApproximateMemoryUsage(); total_memory_usage += m->ApproximateMemoryUsage();
} }
event_logger_->Log() << "job" << job_context_->job_id << "event" event_logger_->Log()
<< "flush_started" << "job" << job_context_->job_id << "event"
<< "num_memtables" << mems_.size() << "num_entries" << "flush_started"
<< total_num_entries << "num_deletes" << "num_memtables" << mems_.size() << "num_entries" << total_num_entries
<< total_num_deletes << "memory_usage" << "num_deletes" << total_num_deletes << "memory_usage"
<< total_memory_usage; << total_memory_usage << "flush_reason"
<< GetFlushReasonString(cfd_->GetFlushReason());
{ {
ScopedArenaIterator iter( ScopedArenaIterator iter(

View file

@ -82,6 +82,18 @@ enum class CompactionReason {
kBottommostFiles, kBottommostFiles,
}; };
enum class FlushReason : int {
kUnknown = 0x00,
kGetLiveFiles = 0x01,
kShutDown = 0x02,
kExternalFileIngestion = 0x03,
kManualCompaction = 0x04,
kWriteBufferManager = 0x05,
kWriteBufferFull = 0x06,
kTest = 0x07,
kSuperVersionChange = 0x08,
};
enum class BackgroundErrorReason { enum class BackgroundErrorReason {
kFlush, kFlush,
kCompaction, kCompaction,
@ -143,6 +155,8 @@ struct FlushJobInfo {
SequenceNumber largest_seqno; SequenceNumber largest_seqno;
// Table properties of the table being flushed // Table properties of the table being flushed
TableProperties table_properties; TableProperties table_properties;
FlushReason flush_reason;
}; };
struct CompactionJobInfo { struct CompactionJobInfo {