diff --git a/.gitignore b/.gitignore index a4cddf141a..974991fd8d 100644 --- a/.gitignore +++ b/.gitignore @@ -22,3 +22,4 @@ build_tools/VALGRIND_LOGS/ coverage/COVERAGE_REPORT .gdbhistory .phutil_module_cache +tags diff --git a/db/compaction.cc b/db/compaction.cc index 48866a799f..bafb5b4ea9 100644 --- a/db/compaction.cc +++ b/db/compaction.cc @@ -213,6 +213,7 @@ static void FileSizeSummary(unsigned long long sz, char* output, int len) { static int InputSummary(std::vector& files, char* output, int len) { + *output = '\0'; int write = 0; for (unsigned int i = 0; i < files.size(); i++) { int sz = len - write; @@ -249,9 +250,7 @@ void Compaction::Summary(char* output, int len) { return; } - if (inputs_[1].size()) { - write += InputSummary(inputs_[1], output+write, len-write); - } + write += InputSummary(inputs_[1], output+write, len-write); if (write < 0 || write >= len) { return; } diff --git a/db/compaction_picker.cc b/db/compaction_picker.cc index c58f699e41..30e187a191 100644 --- a/db/compaction_picker.cc +++ b/db/compaction_picker.cc @@ -178,7 +178,11 @@ bool CompactionPicker::ExpandWhileOverlapping(Compaction* c) { // If, after the expansion, there are files that are already under // compaction, then we must drop/cancel this compaction. int parent_index = -1; - if (FilesInCompaction(c->inputs_[0]) || + if (c->inputs_[0].empty()) { + Log(options_->info_log, + "ExpandWhileOverlapping() failure because zero input files"); + } + if (c->inputs_[0].empty() || FilesInCompaction(c->inputs_[0]) || (c->level() != c->output_level() && ParentRangeInCompaction(c->input_version_, &smallest, &largest, level, &parent_index))) { @@ -369,6 +373,12 @@ Compaction* LevelCompactionPicker::PickCompaction(Version* version, Compaction* c = nullptr; int level = -1; + // Compute the compactions needed. It is better to do it here + // and also in LogAndApply(), otherwise the values could be stale. + std::vector size_being_compacted(NumberLevels() - 1); + SizeBeingCompacted(size_being_compacted); + version->ComputeCompactionScore(size_being_compacted); + // We prefer compactions triggered by too much data in a level over // the compactions triggered by seeks. // diff --git a/db/db_impl.cc b/db/db_impl.cc index b33a662e01..33ba416f6e 100644 --- a/db/db_impl.cc +++ b/db/db_impl.cc @@ -10,7 +10,6 @@ #include "db/db_impl.h" #define __STDC_FORMAT_MACROS - #include #include #include @@ -1711,8 +1710,10 @@ Status DBImpl::WaitForFlushMemTable(ColumnFamilyData* cfd) { return s; } -Status DBImpl::TEST_FlushMemTable() { - return FlushMemTable(default_cf_handle_->cfd(), FlushOptions()); +Status DBImpl::TEST_FlushMemTable(bool wait) { + FlushOptions fo; + fo.wait = wait; + return FlushMemTable(default_cf_handle_->cfd(), fo); } Status DBImpl::TEST_WaitForFlushMemTable(ColumnFamilyHandle* column_family) { @@ -1851,10 +1852,15 @@ void DBImpl::BackgroundCallFlush() { // case this is an environmental problem and we do not want to // chew up resources for failed compactions for the duration of // the problem. + uint64_t error_cnt = default_cf_handle_->cfd() + ->internal_stats() + ->BumpAndGetBackgroundErrorCount(); bg_cv_.SignalAll(); // In case a waiter can proceed despite the error - Log(options_.info_log, "Waiting after background flush error: %s", - s.ToString().c_str()); mutex_.Unlock(); + Log(options_.info_log, + "Waiting after background flush error: %s" + "Accumulated background error counts: %" PRIu64, + s.ToString().c_str(), error_cnt); log_buffer.FlushBufferToLog(); LogFlush(options_.info_log); env_->SleepForMicroseconds(1000000); @@ -1925,11 +1931,16 @@ void DBImpl::BackgroundCallCompaction() { // case this is an environmental problem and we do not want to // chew up resources for failed compactions for the duration of // the problem. + uint64_t error_cnt = default_cf_handle_->cfd() + ->internal_stats() + ->BumpAndGetBackgroundErrorCount(); bg_cv_.SignalAll(); // In case a waiter can proceed despite the error mutex_.Unlock(); log_buffer.FlushBufferToLog(); - Log(options_.info_log, "Waiting after background compaction error: %s", - s.ToString().c_str()); + Log(options_.info_log, + "Waiting after background compaction error: %s, " + "Accumulated background error counts: %" PRIu64, + s.ToString().c_str(), error_cnt); LogFlush(options_.info_log); env_->SleepForMicroseconds(1000000); mutex_.Lock(); @@ -3820,8 +3831,10 @@ bool DBImpl::GetProperty(ColumnFamilyHandle* column_family, value->clear(); auto cfh = reinterpret_cast(column_family); auto cfd = cfh->cfd(); + DBPropertyType property_type = GetPropertyType(property); MutexLock l(&mutex_); - return cfd->internal_stats()->GetProperty(property, value, cfd); + return cfd->internal_stats()->GetProperty(property_type, property, value, + cfd); } void DBImpl::GetApproximateSizes(ColumnFamilyHandle* column_family, diff --git a/db/db_impl.h b/db/db_impl.h index 042b1d2f45..9467c9cdc1 100644 --- a/db/db_impl.h +++ b/db/db_impl.h @@ -143,7 +143,7 @@ class DBImpl : public DB { ColumnFamilyHandle* column_family = nullptr); // Force current memtable contents to be flushed. - Status TEST_FlushMemTable(); + Status TEST_FlushMemTable(bool wait = true); // Wait for memtable compaction Status TEST_WaitForFlushMemTable(ColumnFamilyHandle* column_family = nullptr); diff --git a/db/db_test.cc b/db/db_test.cc index cce43ae9d4..ef21eb9a7c 100644 --- a/db/db_test.cc +++ b/db/db_test.cc @@ -2206,6 +2206,92 @@ TEST(DBTest, NumImmutableMemTable) { } while (ChangeCompactOptions()); } +class SleepingBackgroundTask { + public: + SleepingBackgroundTask() : bg_cv_(&mutex_), should_sleep_(true) {} + void DoSleep() { + MutexLock l(&mutex_); + while (should_sleep_) { + bg_cv_.Wait(); + } + } + void WakeUp() { + MutexLock l(&mutex_); + should_sleep_ = false; + bg_cv_.SignalAll(); + } + + static void DoSleepTask(void* arg) { + reinterpret_cast(arg)->DoSleep(); + } + + private: + port::Mutex mutex_; + port::CondVar bg_cv_; // Signalled when background work finishes + bool should_sleep_; +}; + +TEST(DBTest, GetProperty) { + // Set sizes to both background thread pool to be 1 and block them. + env_->SetBackgroundThreads(1, Env::HIGH); + env_->SetBackgroundThreads(1, Env::LOW); + SleepingBackgroundTask sleeping_task_low; + env_->Schedule(&SleepingBackgroundTask::DoSleepTask, &sleeping_task_low, + Env::Priority::LOW); + SleepingBackgroundTask sleeping_task_high; + env_->Schedule(&SleepingBackgroundTask::DoSleepTask, &sleeping_task_high, + Env::Priority::HIGH); + + Options options = CurrentOptions(); + WriteOptions writeOpt = WriteOptions(); + writeOpt.disableWAL = true; + options.compaction_style = kCompactionStyleUniversal; + options.level0_file_num_compaction_trigger = 1; + options.compaction_options_universal.size_ratio = 50; + options.max_background_compactions = 1; + options.max_background_flushes = 1; + options.max_write_buffer_number = 10; + options.min_write_buffer_number_to_merge = 1; + options.write_buffer_size = 1000000; + Reopen(&options); + + std::string big_value(1000000 * 2, 'x'); + std::string num; + SetPerfLevel(kEnableTime); + + ASSERT_OK(dbfull()->Put(writeOpt, "k1", big_value)); + ASSERT_TRUE(dbfull()->GetProperty("rocksdb.num-immutable-mem-table", &num)); + ASSERT_EQ(num, "0"); + ASSERT_TRUE(dbfull()->GetProperty("rocksdb.mem-table-flush-pending", &num)); + ASSERT_EQ(num, "0"); + ASSERT_TRUE(dbfull()->GetProperty("rocksdb.compaction-pending", &num)); + ASSERT_EQ(num, "0"); + perf_context.Reset(); + + ASSERT_OK(dbfull()->Put(writeOpt, "k2", big_value)); + ASSERT_TRUE(dbfull()->GetProperty("rocksdb.num-immutable-mem-table", &num)); + ASSERT_EQ(num, "1"); + ASSERT_OK(dbfull()->Put(writeOpt, "k3", big_value)); + ASSERT_TRUE(dbfull()->GetProperty("rocksdb.num-immutable-mem-table", &num)); + ASSERT_EQ(num, "2"); + ASSERT_TRUE(dbfull()->GetProperty("rocksdb.mem-table-flush-pending", &num)); + ASSERT_EQ(num, "1"); + ASSERT_TRUE(dbfull()->GetProperty("rocksdb.compaction-pending", &num)); + ASSERT_EQ(num, "0"); + + sleeping_task_high.WakeUp(); + dbfull()->TEST_WaitForFlushMemTable(); + + ASSERT_OK(dbfull()->Put(writeOpt, "k4", big_value)); + ASSERT_OK(dbfull()->Put(writeOpt, "k5", big_value)); + dbfull()->TEST_WaitForFlushMemTable(); + ASSERT_TRUE(dbfull()->GetProperty("rocksdb.mem-table-flush-pending", &num)); + ASSERT_EQ(num, "0"); + ASSERT_TRUE(dbfull()->GetProperty("rocksdb.compaction-pending", &num)); + ASSERT_EQ(num, "1"); + sleeping_task_low.WakeUp(); +} + TEST(DBTest, FLUSH) { do { CreateAndReopenWithCF({"pikachu"}); @@ -4286,6 +4372,11 @@ TEST(DBTest, NoSpace) { dbfull()->TEST_CompactRange(level, nullptr, nullptr); } } + + std::string property_value; + ASSERT_TRUE(db_->GetProperty("rocksdb.background-errors", &property_value)); + ASSERT_EQ("5", property_value); + env_->no_space_.Release_Store(nullptr); ASSERT_LT(CountFiles(), num_files + 3); @@ -4294,6 +4385,43 @@ TEST(DBTest, NoSpace) { } while (ChangeCompactOptions()); } +// Check background error counter bumped on flush failures. +TEST(DBTest, NoSpaceFlush) { + do { + Options options = CurrentOptions(); + options.env = env_; + options.max_background_flushes = 1; + Reopen(&options); + + ASSERT_OK(Put("foo", "v1")); + env_->no_space_.Release_Store(env_); // Force out-of-space errors + + std::string property_value; + // Background error count is 0 now. + ASSERT_TRUE(db_->GetProperty("rocksdb.background-errors", &property_value)); + ASSERT_EQ("0", property_value); + + dbfull()->TEST_FlushMemTable(false); + + // Wait 300 milliseconds or background-errors turned 1 from 0. + int time_to_sleep_limit = 300000; + while (time_to_sleep_limit > 0) { + int to_sleep = (time_to_sleep_limit > 1000) ? 1000 : time_to_sleep_limit; + time_to_sleep_limit -= to_sleep; + env_->SleepForMicroseconds(to_sleep); + + ASSERT_TRUE( + db_->GetProperty("rocksdb.background-errors", &property_value)); + if (property_value == "1") { + break; + } + } + ASSERT_EQ("1", property_value); + + env_->no_space_.Release_Store(nullptr); + } while (ChangeCompactOptions()); +} + TEST(DBTest, NonWritableFileSystem) { do { Options options = CurrentOptions(); diff --git a/db/internal_stats.cc b/db/internal_stats.cc index 29842eff3a..2988c88b24 100644 --- a/db/internal_stats.cc +++ b/db/internal_stats.cc @@ -1,4 +1,3 @@ - // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. @@ -14,285 +13,341 @@ namespace rocksdb { -bool InternalStats::GetProperty(const Slice& property, std::string* value, - ColumnFamilyData* cfd) { +DBPropertyType GetPropertyType(const Slice& property) { Slice in = property; Slice prefix("rocksdb."); - if (!in.starts_with(prefix)) return false; + if (!in.starts_with(prefix)) return kUnknown; in.remove_prefix(prefix.size()); if (in.starts_with("num-files-at-level")) { - in.remove_prefix(strlen("num-files-at-level")); - uint64_t level; - bool ok = ConsumeDecimalNumber(&in, &level) && in.empty(); - if (!ok || (int)level >= number_levels_) { - return false; - } else { - char buf[100]; - snprintf(buf, sizeof(buf), "%d", - cfd->current()->NumLevelFiles(static_cast(level))); - *value = buf; - return true; - } + return kNumFilesAtLevel; } else if (in == "levelstats") { - char buf[1000]; - snprintf(buf, sizeof(buf), - "Level Files Size(MB)\n" - "--------------------\n"); - value->append(buf); - - for (int level = 0; level < number_levels_; level++) { - snprintf(buf, sizeof(buf), "%3d %8d %8.0f\n", level, - cfd->current()->NumLevelFiles(level), - cfd->current()->NumLevelBytes(level) / 1048576.0); - value->append(buf); - } - return true; - + return kLevelStats; } else if (in == "stats") { - char buf[1000]; + return kStats; + } else if (in == "sstables") { + return kSsTables; + } else if (in == "num-immutable-mem-table") { + return kNumImmutableMemTable; + } else if (in == "mem-table-flush-pending") { + return kMemtableFlushPending; + } else if (in == "compaction-pending") { + return kCompactionPending; + } else if (in == "background-errors") { + return kBackgroundErrors; + } + return kUnknown; +} - uint64_t wal_bytes = 0; - uint64_t wal_synced = 0; - uint64_t user_bytes_written = 0; - uint64_t write_other = 0; - uint64_t write_self = 0; - uint64_t write_with_wal = 0; - uint64_t total_bytes_written = 0; - uint64_t total_bytes_read = 0; - uint64_t micros_up = env_->NowMicros() - started_at_; - // Add "+1" to make sure seconds_up is > 0 and avoid NaN later - double seconds_up = (micros_up + 1) / 1000000.0; - uint64_t total_slowdown = 0; - uint64_t total_slowdown_count = 0; - uint64_t interval_bytes_written = 0; - uint64_t interval_bytes_read = 0; - uint64_t interval_bytes_new = 0; - double interval_seconds_up = 0; +bool InternalStats::GetProperty(DBPropertyType property_type, + const Slice& property, std::string* value, + ColumnFamilyData* cfd) { + Version* current = cfd->current(); + Slice in = property; - if (statistics_) { - wal_bytes = statistics_->getTickerCount(WAL_FILE_BYTES); - wal_synced = statistics_->getTickerCount(WAL_FILE_SYNCED); - user_bytes_written = statistics_->getTickerCount(BYTES_WRITTEN); - write_other = statistics_->getTickerCount(WRITE_DONE_BY_OTHER); - write_self = statistics_->getTickerCount(WRITE_DONE_BY_SELF); - write_with_wal = statistics_->getTickerCount(WRITE_WITH_WAL); - } - - // Pardon the long line but I think it is easier to read this way. - snprintf(buf, sizeof(buf), - " Compactions\n" - "Level Files Size(MB) Score Time(sec) Read(MB) Write(MB) Rn(MB) Rnp1(MB) Wnew(MB) RW-Amplify Read(MB/s) Write(MB/s) Rn Rnp1 Wnp1 NewW Count msComp msStall Ln-stall Stall-cnt\n" - "------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\n" - ); - value->append(buf); - for (int level = 0; level < number_levels_; level++) { - int files = cfd->current()->NumLevelFiles(level); - if (compaction_stats_[level].micros > 0 || files > 0) { - int64_t bytes_read = compaction_stats_[level].bytes_readn + - compaction_stats_[level].bytes_readnp1; - int64_t bytes_new = compaction_stats_[level].bytes_written - - compaction_stats_[level].bytes_readnp1; - double amplify = (compaction_stats_[level].bytes_readn == 0) - ? 0.0 - : (compaction_stats_[level].bytes_written + - compaction_stats_[level].bytes_readnp1 + - compaction_stats_[level].bytes_readn) / - (double)compaction_stats_[level].bytes_readn; - - total_bytes_read += bytes_read; - total_bytes_written += compaction_stats_[level].bytes_written; - - uint64_t stalls = level == 0 ? (stall_counts_[LEVEL0_SLOWDOWN] + - stall_counts_[LEVEL0_NUM_FILES] + - stall_counts_[MEMTABLE_COMPACTION]) - : stall_leveln_slowdown_count_[level]; - - double stall_us = level == 0 ? (stall_micros_[LEVEL0_SLOWDOWN] + - stall_micros_[LEVEL0_NUM_FILES] + - stall_micros_[MEMTABLE_COMPACTION]) - : stall_leveln_slowdown_[level]; - - snprintf(buf, sizeof(buf), - "%3d %8d %8.0f %5.1f %9.0f %9.0f %9.0f %9.0f %9.0f %9.0f " - "%10.1f %9.1f %11.1f %8d %8d %8d %8d %8d %8d %9.1f %9.1f " - "%9lu\n", - level, files, cfd->current()->NumLevelBytes(level) / 1048576.0, - cfd->current()->NumLevelBytes(level) / - cfd->compaction_picker()->MaxBytesForLevel(level), - compaction_stats_[level].micros / 1e6, bytes_read / 1048576.0, - compaction_stats_[level].bytes_written / 1048576.0, - compaction_stats_[level].bytes_readn / 1048576.0, - compaction_stats_[level].bytes_readnp1 / 1048576.0, - bytes_new / 1048576.0, amplify, - // +1 to avoid division by 0 - (bytes_read / 1048576.0) / - ((compaction_stats_[level].micros + 1) / 1000000.0), - (compaction_stats_[level].bytes_written / 1048576.0) / - ((compaction_stats_[level].micros + 1) / 1000000.0), - compaction_stats_[level].files_in_leveln, - compaction_stats_[level].files_in_levelnp1, - compaction_stats_[level].files_out_levelnp1, - compaction_stats_[level].files_out_levelnp1 - - compaction_stats_[level].files_in_levelnp1, - compaction_stats_[level].count, - (int)((double)compaction_stats_[level].micros / 1000.0 / - (compaction_stats_[level].count + 1)), - (double)stall_us / 1000.0 / (stalls + 1), stall_us / 1000000.0, - (unsigned long)stalls); - total_slowdown += stall_leveln_slowdown_[level]; - total_slowdown_count += stall_leveln_slowdown_count_[level]; - value->append(buf); + switch (property_type) { + case kNumFilesAtLevel: { + in.remove_prefix(strlen("rocksdb.num-files-at-level")); + uint64_t level; + bool ok = ConsumeDecimalNumber(&in, &level) && in.empty(); + if (!ok || (int)level >= number_levels_) { + return false; + } else { + char buf[100]; + snprintf(buf, sizeof(buf), "%d", + current->NumLevelFiles(static_cast(level))); + *value = buf; + return true; } } + case kLevelStats: { + char buf[1000]; + snprintf(buf, sizeof(buf), + "Level Files Size(MB)\n" + "--------------------\n"); + value->append(buf); - interval_bytes_new = user_bytes_written - last_stats_.ingest_bytes_; - interval_bytes_read = total_bytes_read - last_stats_.compaction_bytes_read_; - interval_bytes_written = - total_bytes_written - last_stats_.compaction_bytes_written_; - interval_seconds_up = seconds_up - last_stats_.seconds_up_; + for (int level = 0; level < number_levels_; level++) { + snprintf(buf, sizeof(buf), "%3d %8d %8.0f\n", level, + current->NumLevelFiles(level), + current->NumLevelBytes(level) / 1048576.0); + value->append(buf); + } + return true; + } + case kStats: { + char buf[1000]; - snprintf(buf, sizeof(buf), "Uptime(secs): %.1f total, %.1f interval\n", - seconds_up, interval_seconds_up); - value->append(buf); + uint64_t wal_bytes = 0; + uint64_t wal_synced = 0; + uint64_t user_bytes_written = 0; + uint64_t write_other = 0; + uint64_t write_self = 0; + uint64_t write_with_wal = 0; + uint64_t total_bytes_written = 0; + uint64_t total_bytes_read = 0; + uint64_t micros_up = env_->NowMicros() - started_at_; + // Add "+1" to make sure seconds_up is > 0 and avoid NaN later + double seconds_up = (micros_up + 1) / 1000000.0; + uint64_t total_slowdown = 0; + uint64_t total_slowdown_count = 0; + uint64_t interval_bytes_written = 0; + uint64_t interval_bytes_read = 0; + uint64_t interval_bytes_new = 0; + double interval_seconds_up = 0; - snprintf(buf, sizeof(buf), - "Writes cumulative: %llu total, %llu batches, " - "%.1f per batch, %.2f ingest GB\n", - (unsigned long long)(write_other + write_self), - (unsigned long long)write_self, - (write_other + write_self) / (double)(write_self + 1), - user_bytes_written / (1048576.0 * 1024)); - value->append(buf); + if (statistics_) { + wal_bytes = statistics_->getTickerCount(WAL_FILE_BYTES); + wal_synced = statistics_->getTickerCount(WAL_FILE_SYNCED); + user_bytes_written = statistics_->getTickerCount(BYTES_WRITTEN); + write_other = statistics_->getTickerCount(WRITE_DONE_BY_OTHER); + write_self = statistics_->getTickerCount(WRITE_DONE_BY_SELF); + write_with_wal = statistics_->getTickerCount(WRITE_WITH_WAL); + } - snprintf(buf, sizeof(buf), - "WAL cumulative: %llu WAL writes, %llu WAL syncs, " - "%.2f writes per sync, %.2f GB written\n", - (unsigned long long)write_with_wal, (unsigned long long)wal_synced, - write_with_wal / (double)(wal_synced + 1), - wal_bytes / (1048576.0 * 1024)); - value->append(buf); + snprintf( + buf, sizeof(buf), + " Compactions\n" + "Level Files Size(MB) Score Time(sec) Read(MB) Write(MB) Rn(MB) " + " " + "Rnp1(MB) Wnew(MB) RW-Amplify Read(MB/s) Write(MB/s) Rn " + "Rnp1 " + " Wnp1 NewW Count msComp msStall Ln-stall Stall-cnt\n" + "--------------------------------------------------------------------" + "--" + "--------------------------------------------------------------------" + "--" + "----------------------------------------------------------------\n"); + value->append(buf); + for (int level = 0; level < number_levels_; level++) { + int files = current->NumLevelFiles(level); + if (compaction_stats_[level].micros > 0 || files > 0) { + int64_t bytes_read = compaction_stats_[level].bytes_readn + + compaction_stats_[level].bytes_readnp1; + int64_t bytes_new = compaction_stats_[level].bytes_written - + compaction_stats_[level].bytes_readnp1; + double amplify = + (compaction_stats_[level].bytes_readn == 0) + ? 0.0 + : (compaction_stats_[level].bytes_written + + compaction_stats_[level].bytes_readnp1 + + compaction_stats_[level].bytes_readn) / + (double)compaction_stats_[level].bytes_readn; - snprintf(buf, sizeof(buf), - "Compaction IO cumulative (GB): " - "%.2f new, %.2f read, %.2f write, %.2f read+write\n", - user_bytes_written / (1048576.0 * 1024), - total_bytes_read / (1048576.0 * 1024), - total_bytes_written / (1048576.0 * 1024), - (total_bytes_read + total_bytes_written) / (1048576.0 * 1024)); - value->append(buf); + total_bytes_read += bytes_read; + total_bytes_written += compaction_stats_[level].bytes_written; - snprintf(buf, sizeof(buf), - "Compaction IO cumulative (MB/sec): " - "%.1f new, %.1f read, %.1f write, %.1f read+write\n", - user_bytes_written / 1048576.0 / seconds_up, - total_bytes_read / 1048576.0 / seconds_up, - total_bytes_written / 1048576.0 / seconds_up, - (total_bytes_read + total_bytes_written) / 1048576.0 / seconds_up); - value->append(buf); + uint64_t stalls = level == 0 ? (stall_counts_[LEVEL0_SLOWDOWN] + + stall_counts_[LEVEL0_NUM_FILES] + + stall_counts_[MEMTABLE_COMPACTION]) + : stall_leveln_slowdown_count_[level]; - // +1 to avoid divide by 0 and NaN - snprintf( - buf, sizeof(buf), - "Amplification cumulative: %.1f write, %.1f compaction\n", - (double)(total_bytes_written + wal_bytes) / (user_bytes_written + 1), - (double)(total_bytes_written + total_bytes_read + wal_bytes) / - (user_bytes_written + 1)); - value->append(buf); + double stall_us = level == 0 ? (stall_micros_[LEVEL0_SLOWDOWN] + + stall_micros_[LEVEL0_NUM_FILES] + + stall_micros_[MEMTABLE_COMPACTION]) + : stall_leveln_slowdown_[level]; - uint64_t interval_write_other = write_other - last_stats_.write_other_; - uint64_t interval_write_self = write_self - last_stats_.write_self_; + snprintf(buf, sizeof(buf), + "%3d %8d %8.0f %5.1f %9.0f %9.0f %9.0f %9.0f %9.0f %9.0f " + "%10.1f %9.1f %11.1f %8d %8d %8d %8d %8d %8d %9.1f %9.1f " + "%9lu\n", + level, files, current->NumLevelBytes(level) / 1048576.0, + current->NumLevelBytes(level) / + cfd->compaction_picker()->MaxBytesForLevel(level), + compaction_stats_[level].micros / 1e6, + bytes_read / 1048576.0, + compaction_stats_[level].bytes_written / 1048576.0, + compaction_stats_[level].bytes_readn / 1048576.0, + compaction_stats_[level].bytes_readnp1 / 1048576.0, + bytes_new / 1048576.0, amplify, + // +1 to avoid division by 0 + (bytes_read / 1048576.0) / + ((compaction_stats_[level].micros + 1) / 1000000.0), + (compaction_stats_[level].bytes_written / 1048576.0) / + ((compaction_stats_[level].micros + 1) / 1000000.0), + compaction_stats_[level].files_in_leveln, + compaction_stats_[level].files_in_levelnp1, + compaction_stats_[level].files_out_levelnp1, + compaction_stats_[level].files_out_levelnp1 - + compaction_stats_[level].files_in_levelnp1, + compaction_stats_[level].count, + (int)((double)compaction_stats_[level].micros / 1000.0 / + (compaction_stats_[level].count + 1)), + (double)stall_us / 1000.0 / (stalls + 1), + stall_us / 1000000.0, (unsigned long)stalls); + total_slowdown += stall_leveln_slowdown_[level]; + total_slowdown_count += stall_leveln_slowdown_count_[level]; + value->append(buf); + } + } - snprintf(buf, sizeof(buf), - "Writes interval: %llu total, %llu batches, " - "%.1f per batch, %.1f ingest MB\n", - (unsigned long long)(interval_write_other + interval_write_self), - (unsigned long long)interval_write_self, - (double)(interval_write_other + interval_write_self) / - (interval_write_self + 1), - (user_bytes_written - last_stats_.ingest_bytes_) / 1048576.0); - value->append(buf); + interval_bytes_new = user_bytes_written - last_stats_.ingest_bytes_; + interval_bytes_read = + total_bytes_read - last_stats_.compaction_bytes_read_; + interval_bytes_written = + total_bytes_written - last_stats_.compaction_bytes_written_; + interval_seconds_up = seconds_up - last_stats_.seconds_up_; - uint64_t interval_write_with_wal = - write_with_wal - last_stats_.write_with_wal_; + snprintf(buf, sizeof(buf), "Uptime(secs): %.1f total, %.1f interval\n", + seconds_up, interval_seconds_up); + value->append(buf); - uint64_t interval_wal_synced = wal_synced - last_stats_.wal_synced_; - uint64_t interval_wal_bytes = wal_bytes - last_stats_.wal_bytes_; + snprintf(buf, sizeof(buf), + "Writes cumulative: %llu total, %llu batches, " + "%.1f per batch, %.2f ingest GB\n", + (unsigned long long)(write_other + write_self), + (unsigned long long)write_self, + (write_other + write_self) / (double)(write_self + 1), + user_bytes_written / (1048576.0 * 1024)); + value->append(buf); - snprintf(buf, sizeof(buf), - "WAL interval: %llu WAL writes, %llu WAL syncs, " - "%.2f writes per sync, %.2f MB written\n", - (unsigned long long)interval_write_with_wal, - (unsigned long long)interval_wal_synced, - interval_write_with_wal / (double)(interval_wal_synced + 1), - interval_wal_bytes / (1048576.0 * 1024)); - value->append(buf); + snprintf(buf, sizeof(buf), + "WAL cumulative: %llu WAL writes, %llu WAL syncs, " + "%.2f writes per sync, %.2f GB written\n", + (unsigned long long)write_with_wal, + (unsigned long long)wal_synced, + write_with_wal / (double)(wal_synced + 1), + wal_bytes / (1048576.0 * 1024)); + value->append(buf); - snprintf(buf, sizeof(buf), - "Compaction IO interval (MB): " - "%.2f new, %.2f read, %.2f write, %.2f read+write\n", - interval_bytes_new / 1048576.0, interval_bytes_read / 1048576.0, - interval_bytes_written / 1048576.0, - (interval_bytes_read + interval_bytes_written) / 1048576.0); - value->append(buf); + snprintf(buf, sizeof(buf), + "Compaction IO cumulative (GB): " + "%.2f new, %.2f read, %.2f write, %.2f read+write\n", + user_bytes_written / (1048576.0 * 1024), + total_bytes_read / (1048576.0 * 1024), + total_bytes_written / (1048576.0 * 1024), + (total_bytes_read + total_bytes_written) / (1048576.0 * 1024)); + value->append(buf); - snprintf(buf, sizeof(buf), - "Compaction IO interval (MB/sec): " - "%.1f new, %.1f read, %.1f write, %.1f read+write\n", - interval_bytes_new / 1048576.0 / interval_seconds_up, - interval_bytes_read / 1048576.0 / interval_seconds_up, - interval_bytes_written / 1048576.0 / interval_seconds_up, - (interval_bytes_read + interval_bytes_written) / 1048576.0 / - interval_seconds_up); - value->append(buf); + snprintf( + buf, sizeof(buf), + "Compaction IO cumulative (MB/sec): " + "%.1f new, %.1f read, %.1f write, %.1f read+write\n", + user_bytes_written / 1048576.0 / seconds_up, + total_bytes_read / 1048576.0 / seconds_up, + total_bytes_written / 1048576.0 / seconds_up, + (total_bytes_read + total_bytes_written) / 1048576.0 / seconds_up); + value->append(buf); - // +1 to avoid divide by 0 and NaN - snprintf( - buf, sizeof(buf), - "Amplification interval: %.1f write, %.1f compaction\n", - (double)(interval_bytes_written + wal_bytes) / (interval_bytes_new + 1), - (double)(interval_bytes_written + interval_bytes_read + wal_bytes) / - (interval_bytes_new + 1)); - value->append(buf); + // +1 to avoid divide by 0 and NaN + snprintf( + buf, sizeof(buf), + "Amplification cumulative: %.1f write, %.1f compaction\n", + (double)(total_bytes_written + wal_bytes) / (user_bytes_written + 1), + (double)(total_bytes_written + total_bytes_read + wal_bytes) / + (user_bytes_written + 1)); + value->append(buf); - snprintf(buf, sizeof(buf), - "Stalls(secs): %.3f level0_slowdown, %.3f level0_numfiles, " - "%.3f memtable_compaction, %.3f leveln_slowdown\n", - stall_micros_[LEVEL0_SLOWDOWN] / 1000000.0, - stall_micros_[LEVEL0_NUM_FILES] / 1000000.0, - stall_micros_[MEMTABLE_COMPACTION] / 1000000.0, - total_slowdown / 1000000.0); - value->append(buf); + uint64_t interval_write_other = write_other - last_stats_.write_other_; + uint64_t interval_write_self = write_self - last_stats_.write_self_; - snprintf(buf, sizeof(buf), - "Stalls(count): %lu level0_slowdown, %lu level0_numfiles, " - "%lu memtable_compaction, %lu leveln_slowdown\n", - (unsigned long)stall_counts_[LEVEL0_SLOWDOWN], - (unsigned long)stall_counts_[LEVEL0_NUM_FILES], - (unsigned long)stall_counts_[MEMTABLE_COMPACTION], - (unsigned long)total_slowdown_count); - value->append(buf); + snprintf(buf, sizeof(buf), + "Writes interval: %llu total, %llu batches, " + "%.1f per batch, %.1f ingest MB\n", + (unsigned long long)(interval_write_other + interval_write_self), + (unsigned long long)interval_write_self, + (double)(interval_write_other + interval_write_self) / + (interval_write_self + 1), + (user_bytes_written - last_stats_.ingest_bytes_) / 1048576.0); + value->append(buf); - last_stats_.compaction_bytes_read_ = total_bytes_read; - last_stats_.compaction_bytes_written_ = total_bytes_written; - last_stats_.ingest_bytes_ = user_bytes_written; - last_stats_.seconds_up_ = seconds_up; - last_stats_.wal_bytes_ = wal_bytes; - last_stats_.wal_synced_ = wal_synced; - last_stats_.write_with_wal_ = write_with_wal; - last_stats_.write_other_ = write_other; - last_stats_.write_self_ = write_self; + uint64_t interval_write_with_wal = + write_with_wal - last_stats_.write_with_wal_; - return true; - } else if (in == "sstables") { - *value = cfd->current()->DebugString(); - return true; - } else if (in == "num-immutable-mem-table") { - *value = std::to_string(cfd->imm()->size()); - return true; + uint64_t interval_wal_synced = wal_synced - last_stats_.wal_synced_; + uint64_t interval_wal_bytes = wal_bytes - last_stats_.wal_bytes_; + + snprintf(buf, sizeof(buf), + "WAL interval: %llu WAL writes, %llu WAL syncs, " + "%.2f writes per sync, %.2f MB written\n", + (unsigned long long)interval_write_with_wal, + (unsigned long long)interval_wal_synced, + interval_write_with_wal / (double)(interval_wal_synced + 1), + interval_wal_bytes / (1048576.0 * 1024)); + value->append(buf); + + snprintf(buf, sizeof(buf), + "Compaction IO interval (MB): " + "%.2f new, %.2f read, %.2f write, %.2f read+write\n", + interval_bytes_new / 1048576.0, interval_bytes_read / 1048576.0, + interval_bytes_written / 1048576.0, + (interval_bytes_read + interval_bytes_written) / 1048576.0); + value->append(buf); + + snprintf(buf, sizeof(buf), + "Compaction IO interval (MB/sec): " + "%.1f new, %.1f read, %.1f write, %.1f read+write\n", + interval_bytes_new / 1048576.0 / interval_seconds_up, + interval_bytes_read / 1048576.0 / interval_seconds_up, + interval_bytes_written / 1048576.0 / interval_seconds_up, + (interval_bytes_read + interval_bytes_written) / 1048576.0 / + interval_seconds_up); + value->append(buf); + + // +1 to avoid divide by 0 and NaN + snprintf( + buf, sizeof(buf), + "Amplification interval: %.1f write, %.1f compaction\n", + (double)(interval_bytes_written + wal_bytes) / + (interval_bytes_new + 1), + (double)(interval_bytes_written + interval_bytes_read + wal_bytes) / + (interval_bytes_new + 1)); + value->append(buf); + + snprintf(buf, sizeof(buf), + "Stalls(secs): %.3f level0_slowdown, %.3f level0_numfiles, " + "%.3f memtable_compaction, %.3f leveln_slowdown\n", + stall_micros_[LEVEL0_SLOWDOWN] / 1000000.0, + stall_micros_[LEVEL0_NUM_FILES] / 1000000.0, + stall_micros_[MEMTABLE_COMPACTION] / 1000000.0, + total_slowdown / 1000000.0); + value->append(buf); + + snprintf(buf, sizeof(buf), + "Stalls(count): %lu level0_slowdown, %lu level0_numfiles, " + "%lu memtable_compaction, %lu leveln_slowdown\n", + (unsigned long)stall_counts_[LEVEL0_SLOWDOWN], + (unsigned long)stall_counts_[LEVEL0_NUM_FILES], + (unsigned long)stall_counts_[MEMTABLE_COMPACTION], + (unsigned long)total_slowdown_count); + value->append(buf); + + last_stats_.compaction_bytes_read_ = total_bytes_read; + last_stats_.compaction_bytes_written_ = total_bytes_written; + last_stats_.ingest_bytes_ = user_bytes_written; + last_stats_.seconds_up_ = seconds_up; + last_stats_.wal_bytes_ = wal_bytes; + last_stats_.wal_synced_ = wal_synced; + last_stats_.write_with_wal_ = write_with_wal; + last_stats_.write_other_ = write_other; + last_stats_.write_self_ = write_self; + + return true; + } + case kSsTables: + *value = current->DebugString(); + return true; + case kNumImmutableMemTable: + *value = std::to_string(cfd->imm()->size()); + return true; + case kMemtableFlushPending: + // Return number of mem tables that are ready to flush (made immutable) + *value = std::to_string(cfd->imm()->IsFlushPending() ? 1 : 0); + return true; + case kCompactionPending: + // 1 if the system already determines at least one compacdtion is needed. + // 0 otherwise, + *value = std::to_string(current->NeedsCompaction() ? 1 : 0); + return true; + ///////////// + case kBackgroundErrors: + // Accumulated number of errors in background flushes or compactions. + *value = std::to_string(GetBackgroundErrorCount()); + return true; + ///////// + default: + return false; } - - return false; } } // namespace rocksdb diff --git a/db/internal_stats.h b/db/internal_stats.h index bc590f992c..5922c6c81b 100644 --- a/db/internal_stats.h +++ b/db/internal_stats.h @@ -19,6 +19,25 @@ class ColumnFamilyData; namespace rocksdb { + +class MemTableList; + +enum DBPropertyType { + kNumFilesAtLevel, // Number of files at a specific level + kLevelStats, // Return number of files and total sizes of each level + kStats, // Return general statitistics of DB + kSsTables, // Return a human readable string of current SST files + kNumImmutableMemTable, // Return number of immutable mem tables + kMemtableFlushPending, // Return 1 if mem table flushing is pending, + // otherwise + // 0. + kCompactionPending, // Return 1 if a compaction is pending. Otherwise 0. + kBackgroundErrors, // Return accumulated background errors encountered. + kUnknown, +}; + +extern DBPropertyType GetPropertyType(const Slice& property); + class InternalStats { public: enum WriteStallType { @@ -34,6 +53,7 @@ class InternalStats { stall_counts_(WRITE_STALLS_ENUM_MAX, 0), stall_leveln_slowdown_(num_levels, 0), stall_leveln_slowdown_count_(num_levels, 0), + bg_error_count_(0), number_levels_(num_levels), statistics_(statistics), env_(env), @@ -101,8 +121,12 @@ class InternalStats { stall_leveln_slowdown_count_[level] += micros; } - bool GetProperty(const Slice& property, std::string* value, - ColumnFamilyData* cfd); + uint64_t GetBackgroundErrorCount() const { return bg_error_count_; } + + uint64_t BumpAndGetBackgroundErrorCount() { return ++bg_error_count_; } + + bool GetProperty(DBPropertyType property_type, const Slice& property, + std::string* value, ColumnFamilyData* cfd); private: std::vector compaction_stats_; @@ -142,6 +166,13 @@ class InternalStats { std::vector stall_leveln_slowdown_; std::vector stall_leveln_slowdown_count_; + // Total number of background errors encountered. Every time a flush task + // or compaction task fails, this counter is incremented. The failure can + // be caused by any possible reason, including file system errors, out of + // resources, or input file corruption. Failing when retrying the same flush + // or compaction will cause the counter to increase too. + uint64_t bg_error_count_; + int number_levels_; Statistics* statistics_; Env* env_; diff --git a/db/memtable_list.cc b/db/memtable_list.cc index 25971755d7..641ab3ed5f 100644 --- a/db/memtable_list.cc +++ b/db/memtable_list.cc @@ -93,7 +93,7 @@ void MemTableListVersion::Remove(MemTable* m) { // Returns true if there is at least one memtable on which flush has // not yet started. -bool MemTableList::IsFlushPending() { +bool MemTableList::IsFlushPending() const { if ((flush_requested_ && num_flush_not_started_ >= 1) || (num_flush_not_started_ >= min_write_buffer_number_to_merge_)) { assert(imm_flush_needed.NoBarrier_Load() != nullptr); diff --git a/db/memtable_list.h b/db/memtable_list.h index 5fa10accf1..31f189322d 100644 --- a/db/memtable_list.h +++ b/db/memtable_list.h @@ -91,7 +91,7 @@ class MemTableList { // Returns true if there is at least one memtable on which flush has // not yet started. - bool IsFlushPending(); + bool IsFlushPending() const; // Returns the earliest memtables that needs to be flushed. The returned // memtables are guaranteed to be in the ascending order of created time. diff --git a/db/version_set.cc b/db/version_set.cc index 03604f4bab..68773e6672 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -469,7 +469,6 @@ Version::Version(ColumnFamilyData* cfd, VersionSet* vset, refs_(0), // cfd is nullptr if Version is dummy num_levels_(cfd == nullptr ? 0 : cfd->NumberLevels()), - finalized_(false), files_(new std::vector[num_levels_]), files_by_size_(num_levels_), next_file_to_compact_by_size_(num_levels_), @@ -487,13 +486,12 @@ void Version::Get(const ReadOptions& options, GetStats* stats, const Options& db_options, bool* value_found) { - assert(finalized_); Slice ikey = k.internal_key(); Slice user_key = k.user_key(); const Comparator* ucmp = cfd_->internal_comparator().user_comparator(); auto merge_operator = db_options.merge_operator.get(); - auto logger = db_options.info_log; + auto logger = db_options.info_log.get(); assert(status->ok() || status->IsMergeInProgress()); Saver saver; @@ -504,7 +502,7 @@ void Version::Get(const ReadOptions& options, saver.value = value; saver.merge_operator = merge_operator; saver.merge_context = merge_context; - saver.logger = logger.get(); + saver.logger = logger; saver.didIO = false; saver.statistics = db_options.statistics.get(); @@ -627,7 +625,7 @@ void Version::Get(const ReadOptions& options, // do a final merge of nullptr and operands; if (merge_operator->FullMerge(user_key, nullptr, saver.merge_context->GetOperands(), - value, logger.get())) { + value, logger)) { *status = Status::OK(); } else { RecordTick(db_options.statistics.get(), NUMBER_MERGE_FAILURES); @@ -652,16 +650,8 @@ bool Version::UpdateStats(const GetStats& stats) { return false; } -void Version::Finalize(std::vector& size_being_compacted) { - assert(!finalized_); - finalized_ = true; - // Pre-sort level0 for Get() - if (cfd_->options()->compaction_style == kCompactionStyleUniversal) { - std::sort(files_[0].begin(), files_[0].end(), NewestFirstBySeqNo); - } else { - std::sort(files_[0].begin(), files_[0].end(), NewestFirst); - } - +void Version::ComputeCompactionScore( + std::vector& size_being_compacted) { double max_score = 0; int max_score_level = 0; @@ -1408,6 +1398,13 @@ class VersionSet::Builder { } } + // TODO(icanadi) do it in the loop above, which already sorts the files + // Pre-sort level0 for Get() + if (cfd_->options()->compaction_style == kCompactionStyleUniversal) { + std::sort(v->files_[0].begin(), v->files_[0].end(), NewestFirstBySeqNo); + } else { + std::sort(v->files_[0].begin(), v->files_[0].end(), NewestFirst); + } CheckConsistency(v); } @@ -1605,9 +1602,9 @@ Status VersionSet::LogAndApply(ColumnFamilyData* column_family_data, } if (!edit->IsColumnFamilyManipulation()) { - // The calls to Finalize and UpdateFilesBySize are cpu-heavy + // The calls to ComputeCompactionScore and UpdateFilesBySize are cpu-heavy // and is best called outside the mutex. - v->Finalize(size_being_compacted); + v->ComputeCompactionScore(size_being_compacted); v->UpdateFilesBySize(); } @@ -2040,7 +2037,7 @@ Status VersionSet::Recover( // Install recovered version std::vector size_being_compacted(v->NumberLevels() - 1); cfd->compaction_picker()->SizeBeingCompacted(size_being_compacted); - v->Finalize(size_being_compacted); + v->ComputeCompactionScore(size_being_compacted); v->UpdateFilesBySize(); AppendVersion(cfd, v); } @@ -2373,7 +2370,7 @@ Status VersionSet::DumpManifest(Options& options, std::string& dscname, builder->SaveTo(v); std::vector size_being_compacted(v->NumberLevels() - 1); cfd->compaction_picker()->SizeBeingCompacted(size_being_compacted); - v->Finalize(size_being_compacted); + v->ComputeCompactionScore(size_being_compacted); v->UpdateFilesBySize(); delete builder; @@ -2709,8 +2706,6 @@ ColumnFamilyData* VersionSet::CreateColumnFamily( edit->column_family_name_, edit->column_family_, dummy_versions, options); Version* v = new Version(new_cfd, this, current_version_number_++); - std::vector size_being_compacted(options.num_levels - 1, 0); - v->Finalize(size_being_compacted); AppendVersion(new_cfd, v); new_cfd->CreateNewMemtable(); diff --git a/db/version_set.h b/db/version_set.h index d4473a5040..d3bd97f3f1 100644 --- a/db/version_set.h +++ b/db/version_set.h @@ -99,8 +99,9 @@ class Version { // Updates internal structures that keep track of compaction scores // We use compaction scores to figure out which compaction to do next - // Also pre-sorts level0 files for Get() - void Finalize(std::vector& size_being_compacted); + // REQUIRES: If Version is not yet saved to current_, it can be called without + // a lock. Once a version is saved to current_, call only with mutex held + void ComputeCompactionScore(std::vector& size_being_compacted); // Reference count management (so Versions do not disappear out from // under live iterators) @@ -234,7 +235,6 @@ class Version { Version* prev_; // Previous version in linked list int refs_; // Number of live refs to this version int num_levels_; // Number of levels - bool finalized_; // True if Finalized is called // List of files per level, files in each level are arranged // in increasing order of keys diff --git a/table/format.h b/table/format.h index ed292347e3..f05fbf890a 100644 --- a/table/format.h +++ b/table/format.h @@ -112,14 +112,10 @@ class Footer { static const uint64_t kInvalidTableMagicNumber = 0; private: - // Set the table_magic_number only when it was not previously - // initialized. Return true on success. - bool set_table_magic_number(uint64_t magic_number) { - if (HasInitializedTableMagicNumber()) { - table_magic_number_ = magic_number; - return true; - } - return false; + // REQUIRES: magic number wasn't initialized. + void set_table_magic_number(uint64_t magic_number) { + assert(!HasInitializedTableMagicNumber()); + table_magic_number_ = magic_number; } // return true if @table_magic_number_ is set to a value different @@ -130,7 +126,7 @@ class Footer { BlockHandle metaindex_handle_; BlockHandle index_handle_; - uint64_t table_magic_number_; + uint64_t table_magic_number_ = 0; }; // Read the footer from file diff --git a/util/env_posix.cc b/util/env_posix.cc index c610c1546e..856d492503 100644 --- a/util/env_posix.cc +++ b/util/env_posix.cc @@ -356,7 +356,9 @@ class PosixMmapFile : public WritableFile { uint64_t file_offset_; // Offset of base_ in file // Have we done an munmap of unsynced data? bool pending_sync_; +#ifdef ROCKSDB_FALLOCATE_PRESENT bool fallocate_with_keep_size_; +#endif // Roundup x to a multiple of y static size_t Roundup(size_t x, size_t y) { @@ -441,8 +443,10 @@ class PosixMmapFile : public WritableFile { dst_(nullptr), last_sync_(nullptr), file_offset_(0), - pending_sync_(false), - fallocate_with_keep_size_(options.fallocate_with_keep_size) { + pending_sync_(false) { +#ifdef ROCKSDB_FALLOCATE_PRESENT + fallocate_with_keep_size_ = options.fallocate_with_keep_size; +#endif assert((page_size & (page_size - 1)) == 0); assert(options.use_mmap_writes); } @@ -614,7 +618,9 @@ class PosixWritableFile : public WritableFile { bool pending_fsync_; uint64_t last_sync_size_; uint64_t bytes_per_sync_; +#ifdef ROCKSDB_FALLOCATE_PRESENT bool fallocate_with_keep_size_; +#endif public: PosixWritableFile(const std::string& fname, int fd, size_t capacity, @@ -628,8 +634,10 @@ class PosixWritableFile : public WritableFile { pending_sync_(false), pending_fsync_(false), last_sync_size_(0), - bytes_per_sync_(options.bytes_per_sync), - fallocate_with_keep_size_(options.fallocate_with_keep_size) { + bytes_per_sync_(options.bytes_per_sync) { +#ifdef ROCKSDB_FALLOCATE_PRESENT + fallocate_with_keep_size_ = options.fallocate_with_keep_size; +#endif assert(!options.use_mmap_writes); } @@ -809,15 +817,19 @@ class PosixRandomRWFile : public RandomRWFile { int fd_; bool pending_sync_; bool pending_fsync_; +#ifdef ROCKSDB_FALLOCATE_PRESENT bool fallocate_with_keep_size_; +#endif public: PosixRandomRWFile(const std::string& fname, int fd, const EnvOptions& options) : filename_(fname), fd_(fd), pending_sync_(false), - pending_fsync_(false), - fallocate_with_keep_size_(options.fallocate_with_keep_size) { + pending_fsync_(false) { +#ifdef ROCKSDB_FALLOCATE_PRESENT + fallocate_with_keep_size_ = options.fallocate_with_keep_size; +#endif assert(!options.use_mmap_writes && !options.use_mmap_reads); }