mirror of https://github.com/facebook/rocksdb.git
Slowdown when writing to the last write buffer
Summary: Now if inserting to mem table is much faster than writing to files, there is no mechanism users can rely on to avoid stopping for reaching options.max_write_buffer_number. With the commit, if there are more than four maximum write buffers configured, we slow down to the rate of options.delayed_write_rate while we reach the last one. Test Plan: 1. Add a new unit test. 2. Run db_bench with ./db_bench --benchmarks=fillrandom --num=10000000 --max_background_flushes=6 --batch_size=32 -max_write_buffer_number=4 --delayed_write_rate=500000 --statistics based on hard drive and see stopping is avoided with the commit. Reviewers: yhchiang, IslamAbdelRahman, anthony, rven, kradhakrishnan, igor Reviewed By: igor Subscribers: MarkCallaghan, leveldb, dhruba Differential Revision: https://reviews.facebook.net/D52047
This commit is contained in:
parent
6b2a3ac92c
commit
d72b31774e
|
@ -4,6 +4,7 @@
|
|||
### Public API Changes
|
||||
* Change names in CompactionPri and add a new one.
|
||||
* Deprecate options.soft_rate_limit and add options.soft_pending_compaction_bytes_limit.
|
||||
* If options.max_write_buffer_number > 3, writes will be slowed down when writing to the last write buffer to delay a full stop.
|
||||
|
||||
## 4.3.0 (12/8/2015)
|
||||
### New Features
|
||||
|
|
|
@ -447,6 +447,16 @@ void ColumnFamilyData::RecalculateWriteStallConditions(
|
|||
"(waiting for flush), max_write_buffer_number is set to %d",
|
||||
name_.c_str(), imm()->NumNotFlushed(),
|
||||
mutable_cf_options.max_write_buffer_number);
|
||||
} else if (mutable_cf_options.max_write_buffer_number > 3 &&
|
||||
imm()->NumNotFlushed() >=
|
||||
mutable_cf_options.max_write_buffer_number - 1) {
|
||||
write_controller_token_ = write_controller->GetDelayToken();
|
||||
internal_stats_->AddCFStats(InternalStats::MEMTABLE_SLOWDOWN, 1);
|
||||
Log(InfoLogLevel::WARN_LEVEL, ioptions_.info_log,
|
||||
"[%s] Stalling writes because we have %d immutable memtables "
|
||||
"(waiting for flush), max_write_buffer_number is set to %d",
|
||||
name_.c_str(), imm()->NumNotFlushed(),
|
||||
mutable_cf_options.max_write_buffer_number);
|
||||
} else if (vstorage->l0_delay_trigger_count() >=
|
||||
mutable_cf_options.level0_stop_writes_trigger) {
|
||||
write_controller_token_ = write_controller->GetStopToken();
|
||||
|
|
|
@ -9144,6 +9144,42 @@ TEST_F(DBTest, SoftLimit) {
|
|||
sleeping_task_low.WakeUp();
|
||||
sleeping_task_low.WaitUntilDone();
|
||||
}
|
||||
|
||||
TEST_F(DBTest, LastWriteBufferDelay) {
|
||||
Options options;
|
||||
options.env = env_;
|
||||
options = CurrentOptions(options);
|
||||
options.write_buffer_size = 100000;
|
||||
options.max_write_buffer_number = 4;
|
||||
options.delayed_write_rate = 20000;
|
||||
options.compression = kNoCompression;
|
||||
options.disable_auto_compactions = true;
|
||||
int kNumKeysPerMemtable = 3;
|
||||
options.memtable_factory.reset(
|
||||
new SpecialSkipListFactory(kNumKeysPerMemtable));
|
||||
|
||||
Reopen(options);
|
||||
test::SleepingBackgroundTask sleeping_task;
|
||||
// Block flushes
|
||||
env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task,
|
||||
Env::Priority::HIGH);
|
||||
sleeping_task.WaitUntilSleeping();
|
||||
|
||||
// Create 3 L0 files, making score of L0 to be 3.
|
||||
for (int i = 0; i < 3; i++) {
|
||||
// Fill one mem table
|
||||
for (int j = 0; j < kNumKeysPerMemtable; j++) {
|
||||
Put(Key(j), "");
|
||||
}
|
||||
ASSERT_TRUE(!dbfull()->TEST_write_controler().NeedsDelay());
|
||||
}
|
||||
// Inserting a new entry would create a new mem table, triggering slow down.
|
||||
Put(Key(0), "");
|
||||
ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay());
|
||||
|
||||
sleeping_task.WakeUp();
|
||||
sleeping_task.WaitUntilDone();
|
||||
}
|
||||
#endif // ROCKSDB_LITE
|
||||
|
||||
TEST_F(DBTest, FailWhenCompressionNotSupportedTest) {
|
||||
|
|
|
@ -700,7 +700,7 @@ void InternalStats::DumpCFStats(std::string* value) {
|
|||
cf_stats_count_[LEVEL0_NUM_FILES_TOTAL] +
|
||||
cf_stats_count_[SOFT_PENDING_COMPACTION_BYTES_LIMIT] +
|
||||
cf_stats_count_[HARD_PENDING_COMPACTION_BYTES_LIMIT] +
|
||||
cf_stats_count_[MEMTABLE_COMPACTION];
|
||||
cf_stats_count_[MEMTABLE_COMPACTION] + cf_stats_count_[MEMTABLE_SLOWDOWN];
|
||||
// Stats summary across levels
|
||||
PrintLevelStats(buf, sizeof(buf), "Sum", total_files,
|
||||
total_files_being_compacted, total_file_size, 0, w_amp,
|
||||
|
@ -734,6 +734,8 @@ void InternalStats::DumpCFStats(std::string* value) {
|
|||
" slowdown for pending_compaction_bytes, "
|
||||
"%" PRIu64
|
||||
" memtable_compaction, "
|
||||
"%" PRIu64
|
||||
" memtable_slowdown, "
|
||||
"interval %" PRIu64 " total count\n",
|
||||
cf_stats_count_[LEVEL0_SLOWDOWN_TOTAL],
|
||||
cf_stats_count_[LEVEL0_SLOWDOWN_WITH_COMPACTION],
|
||||
|
@ -742,6 +744,7 @@ void InternalStats::DumpCFStats(std::string* value) {
|
|||
cf_stats_count_[HARD_PENDING_COMPACTION_BYTES_LIMIT],
|
||||
cf_stats_count_[SOFT_PENDING_COMPACTION_BYTES_LIMIT],
|
||||
cf_stats_count_[MEMTABLE_COMPACTION],
|
||||
cf_stats_count_[MEMTABLE_SLOWDOWN],
|
||||
total_stall_count - cf_stats_snapshot_.stall_count);
|
||||
value->append(buf);
|
||||
|
||||
|
|
|
@ -86,6 +86,7 @@ class InternalStats {
|
|||
LEVEL0_SLOWDOWN_TOTAL,
|
||||
LEVEL0_SLOWDOWN_WITH_COMPACTION,
|
||||
MEMTABLE_COMPACTION,
|
||||
MEMTABLE_SLOWDOWN,
|
||||
LEVEL0_NUM_FILES_TOTAL,
|
||||
LEVEL0_NUM_FILES_WITH_COMPACTION,
|
||||
SOFT_PENDING_COMPACTION_BYTES_LIMIT,
|
||||
|
@ -343,6 +344,7 @@ class InternalStats {
|
|||
LEVEL0_SLOWDOWN_TOTAL,
|
||||
LEVEL0_SLOWDOWN_WITH_COMPACTION,
|
||||
MEMTABLE_COMPACTION,
|
||||
MEMTABLE_SLOWDOWN,
|
||||
LEVEL0_NUM_FILES_TOTAL,
|
||||
LEVEL0_NUM_FILES_WITH_COMPACTION,
|
||||
SOFT_PENDING_COMPACTION_BYTES_LIMIT,
|
||||
|
|
|
@ -256,6 +256,9 @@ struct ColumnFamilyOptions {
|
|||
// The default and the minimum number is 2, so that when 1 write buffer
|
||||
// is being flushed to storage, new writes can continue to the other
|
||||
// write buffer.
|
||||
// If max_write_buffer_number > 3, writing will be slowed down to
|
||||
// options.delayed_write_rate if we are writing to the last write buffer
|
||||
// allowed.
|
||||
//
|
||||
// Default: 2
|
||||
//
|
||||
|
|
Loading…
Reference in New Issue