mirror of
https://github.com/facebook/rocksdb.git
synced 2024-11-27 02:44:18 +00:00
734e4acafb
Summary: When we introduced range deletion block, TableCache::Get() and TableCache::NewIterator() each did two table cache lookups, one for range deletion block iterator and another for getting the table reader to which the Get()/NewIterator() is delegated. This extra cache lookup was very CPU-intensive (about 10% overhead in a read-heavy benchmark). We can avoid it by reusing the Cache::Handle created for range deletion block iterator to get the file reader. Closes https://github.com/facebook/rocksdb/pull/1537 Differential Revision: D4201167 Pulled By: ajkr fbshipit-source-id: d33ffd8
2562 lines
84 KiB
C++
2562 lines
84 KiB
C++
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
|
// This source code is licensed under the BSD-style license found in the
|
|
// LICENSE file in the root directory of this source tree. An additional grant
|
|
// of patent rights can be found in the PATENTS file in the same directory.
|
|
//
|
|
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style license that can be
|
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
|
|
|
#include "db/db_test_util.h"
|
|
#include "port/stack_trace.h"
|
|
#include "rocksdb/experimental.h"
|
|
#include "rocksdb/utilities/convenience.h"
|
|
#include "util/sync_point.h"
|
|
namespace rocksdb {
|
|
|
|
// SYNC_POINT is not supported in released Windows mode.
|
|
#if !defined(ROCKSDB_LITE)
|
|
|
|
class DBCompactionTest : public DBTestBase {
|
|
public:
|
|
DBCompactionTest() : DBTestBase("/db_compaction_test") {}
|
|
};
|
|
|
|
class DBCompactionTestWithParam
|
|
: public DBTestBase,
|
|
public testing::WithParamInterface<std::tuple<uint32_t, bool>> {
|
|
public:
|
|
DBCompactionTestWithParam() : DBTestBase("/db_compaction_test") {
|
|
max_subcompactions_ = std::get<0>(GetParam());
|
|
exclusive_manual_compaction_ = std::get<1>(GetParam());
|
|
}
|
|
|
|
// Required if inheriting from testing::WithParamInterface<>
|
|
static void SetUpTestCase() {}
|
|
static void TearDownTestCase() {}
|
|
|
|
uint32_t max_subcompactions_;
|
|
bool exclusive_manual_compaction_;
|
|
};
|
|
|
|
namespace {
|
|
|
|
class FlushedFileCollector : public EventListener {
|
|
public:
|
|
FlushedFileCollector() {}
|
|
~FlushedFileCollector() {}
|
|
|
|
virtual void OnFlushCompleted(DB* db, const FlushJobInfo& info) override {
|
|
std::lock_guard<std::mutex> lock(mutex_);
|
|
flushed_files_.push_back(info.file_path);
|
|
}
|
|
|
|
std::vector<std::string> GetFlushedFiles() {
|
|
std::lock_guard<std::mutex> lock(mutex_);
|
|
std::vector<std::string> result;
|
|
for (auto fname : flushed_files_) {
|
|
result.push_back(fname);
|
|
}
|
|
return result;
|
|
}
|
|
|
|
void ClearFlushedFiles() { flushed_files_.clear(); }
|
|
|
|
private:
|
|
std::vector<std::string> flushed_files_;
|
|
std::mutex mutex_;
|
|
};
|
|
|
|
static const int kCDTValueSize = 1000;
|
|
static const int kCDTKeysPerBuffer = 4;
|
|
static const int kCDTNumLevels = 8;
|
|
Options DeletionTriggerOptions(Options options) {
|
|
options.compression = kNoCompression;
|
|
options.write_buffer_size = kCDTKeysPerBuffer * (kCDTValueSize + 24);
|
|
options.min_write_buffer_number_to_merge = 1;
|
|
options.max_write_buffer_number_to_maintain = 0;
|
|
options.num_levels = kCDTNumLevels;
|
|
options.level0_file_num_compaction_trigger = 1;
|
|
options.target_file_size_base = options.write_buffer_size * 2;
|
|
options.target_file_size_multiplier = 2;
|
|
options.max_bytes_for_level_base =
|
|
options.target_file_size_base * options.target_file_size_multiplier;
|
|
options.max_bytes_for_level_multiplier = 2;
|
|
options.disable_auto_compactions = false;
|
|
return options;
|
|
}
|
|
|
|
bool HaveOverlappingKeyRanges(
|
|
const Comparator* c,
|
|
const SstFileMetaData& a, const SstFileMetaData& b) {
|
|
if (c->Compare(a.smallestkey, b.smallestkey) >= 0) {
|
|
if (c->Compare(a.smallestkey, b.largestkey) <= 0) {
|
|
// b.smallestkey <= a.smallestkey <= b.largestkey
|
|
return true;
|
|
}
|
|
} else if (c->Compare(a.largestkey, b.smallestkey) >= 0) {
|
|
// a.smallestkey < b.smallestkey <= a.largestkey
|
|
return true;
|
|
}
|
|
if (c->Compare(a.largestkey, b.largestkey) <= 0) {
|
|
if (c->Compare(a.largestkey, b.smallestkey) >= 0) {
|
|
// b.smallestkey <= a.largestkey <= b.largestkey
|
|
return true;
|
|
}
|
|
} else if (c->Compare(a.smallestkey, b.largestkey) <= 0) {
|
|
// a.smallestkey <= b.largestkey < a.largestkey
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
// Identifies all files between level "min_level" and "max_level"
|
|
// which has overlapping key range with "input_file_meta".
|
|
void GetOverlappingFileNumbersForLevelCompaction(
|
|
const ColumnFamilyMetaData& cf_meta,
|
|
const Comparator* comparator,
|
|
int min_level, int max_level,
|
|
const SstFileMetaData* input_file_meta,
|
|
std::set<std::string>* overlapping_file_names) {
|
|
std::set<const SstFileMetaData*> overlapping_files;
|
|
overlapping_files.insert(input_file_meta);
|
|
for (int m = min_level; m <= max_level; ++m) {
|
|
for (auto& file : cf_meta.levels[m].files) {
|
|
for (auto* included_file : overlapping_files) {
|
|
if (HaveOverlappingKeyRanges(
|
|
comparator, *included_file, file)) {
|
|
overlapping_files.insert(&file);
|
|
overlapping_file_names->insert(file.name);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void VerifyCompactionResult(
|
|
const ColumnFamilyMetaData& cf_meta,
|
|
const std::set<std::string>& overlapping_file_numbers) {
|
|
#ifndef NDEBUG
|
|
for (auto& level : cf_meta.levels) {
|
|
for (auto& file : level.files) {
|
|
assert(overlapping_file_numbers.find(file.name) ==
|
|
overlapping_file_numbers.end());
|
|
}
|
|
}
|
|
#endif
|
|
}
|
|
|
|
const SstFileMetaData* PickFileRandomly(
|
|
const ColumnFamilyMetaData& cf_meta,
|
|
Random* rand,
|
|
int* level = nullptr) {
|
|
auto file_id = rand->Uniform(static_cast<int>(
|
|
cf_meta.file_count)) + 1;
|
|
for (auto& level_meta : cf_meta.levels) {
|
|
if (file_id <= level_meta.files.size()) {
|
|
if (level != nullptr) {
|
|
*level = level_meta.level;
|
|
}
|
|
auto result = rand->Uniform(file_id);
|
|
return &(level_meta.files[result]);
|
|
}
|
|
file_id -= static_cast<uint32_t>(level_meta.files.size());
|
|
}
|
|
assert(false);
|
|
return nullptr;
|
|
}
|
|
} // anonymous namespace
|
|
|
|
// All the TEST_P tests run once with sub_compactions disabled (i.e.
|
|
// options.max_subcompactions = 1) and once with it enabled
|
|
TEST_P(DBCompactionTestWithParam, CompactionDeletionTrigger) {
|
|
for (int tid = 0; tid < 3; ++tid) {
|
|
uint64_t db_size[2];
|
|
Options options = DeletionTriggerOptions(CurrentOptions());
|
|
options.max_subcompactions = max_subcompactions_;
|
|
|
|
if (tid == 1) {
|
|
// the following only disable stats update in DB::Open()
|
|
// and should not affect the result of this test.
|
|
options.skip_stats_update_on_db_open = true;
|
|
} else if (tid == 2) {
|
|
// third pass with universal compaction
|
|
options.compaction_style = kCompactionStyleUniversal;
|
|
options.num_levels = 1;
|
|
}
|
|
|
|
DestroyAndReopen(options);
|
|
Random rnd(301);
|
|
|
|
const int kTestSize = kCDTKeysPerBuffer * 1024;
|
|
std::vector<std::string> values;
|
|
for (int k = 0; k < kTestSize; ++k) {
|
|
values.push_back(RandomString(&rnd, kCDTValueSize));
|
|
ASSERT_OK(Put(Key(k), values[k]));
|
|
}
|
|
dbfull()->TEST_WaitForFlushMemTable();
|
|
dbfull()->TEST_WaitForCompact();
|
|
db_size[0] = Size(Key(0), Key(kTestSize - 1));
|
|
|
|
for (int k = 0; k < kTestSize; ++k) {
|
|
ASSERT_OK(Delete(Key(k)));
|
|
}
|
|
dbfull()->TEST_WaitForFlushMemTable();
|
|
dbfull()->TEST_WaitForCompact();
|
|
db_size[1] = Size(Key(0), Key(kTestSize - 1));
|
|
|
|
// must have much smaller db size.
|
|
ASSERT_GT(db_size[0] / 3, db_size[1]);
|
|
}
|
|
}
|
|
|
|
TEST_F(DBCompactionTest, SkipStatsUpdateTest) {
|
|
// This test verify UpdateAccumulatedStats is not on
|
|
// if options.skip_stats_update_on_db_open = true
|
|
// The test will need to be updated if the internal behavior changes.
|
|
|
|
Options options = DeletionTriggerOptions(CurrentOptions());
|
|
options.env = env_;
|
|
DestroyAndReopen(options);
|
|
Random rnd(301);
|
|
|
|
const int kTestSize = kCDTKeysPerBuffer * 512;
|
|
std::vector<std::string> values;
|
|
for (int k = 0; k < kTestSize; ++k) {
|
|
values.push_back(RandomString(&rnd, kCDTValueSize));
|
|
ASSERT_OK(Put(Key(k), values[k]));
|
|
}
|
|
dbfull()->TEST_WaitForFlushMemTable();
|
|
dbfull()->TEST_WaitForCompact();
|
|
|
|
// Reopen the DB with stats-update disabled
|
|
options.skip_stats_update_on_db_open = true;
|
|
env_->random_file_open_counter_.store(0);
|
|
Reopen(options);
|
|
|
|
// As stats-update is disabled, we expect a very low number of
|
|
// random file open.
|
|
// Note that this number must be changed accordingly if we change
|
|
// the number of files needed to be opened in the DB::Open process.
|
|
const int kMaxFileOpenCount = 10;
|
|
ASSERT_LT(env_->random_file_open_counter_.load(), kMaxFileOpenCount);
|
|
|
|
// Repeat the reopen process, but this time we enable
|
|
// stats-update.
|
|
options.skip_stats_update_on_db_open = false;
|
|
env_->random_file_open_counter_.store(0);
|
|
Reopen(options);
|
|
|
|
// Since we do a normal stats update on db-open, there
|
|
// will be more random open files.
|
|
ASSERT_GT(env_->random_file_open_counter_.load(), kMaxFileOpenCount);
|
|
}
|
|
|
|
TEST_F(DBCompactionTest, TestTableReaderForCompaction) {
|
|
Options options = CurrentOptions();
|
|
options.env = env_;
|
|
options.new_table_reader_for_compaction_inputs = true;
|
|
options.max_open_files = 100;
|
|
options.level0_file_num_compaction_trigger = 3;
|
|
DestroyAndReopen(options);
|
|
Random rnd(301);
|
|
|
|
int num_table_cache_lookup = 0;
|
|
int num_new_table_reader = 0;
|
|
rocksdb::SyncPoint::GetInstance()->SetCallBack(
|
|
"TableCache::FindTable:0", [&](void* arg) {
|
|
assert(arg != nullptr);
|
|
bool no_io = *(reinterpret_cast<bool*>(arg));
|
|
if (!no_io) {
|
|
// filter out cases for table properties queries.
|
|
num_table_cache_lookup++;
|
|
}
|
|
});
|
|
rocksdb::SyncPoint::GetInstance()->SetCallBack(
|
|
"TableCache::GetTableReader:0",
|
|
[&](void* arg) { num_new_table_reader++; });
|
|
rocksdb::SyncPoint::GetInstance()->EnableProcessing();
|
|
|
|
for (int k = 0; k < options.level0_file_num_compaction_trigger; ++k) {
|
|
ASSERT_OK(Put(Key(k), Key(k)));
|
|
ASSERT_OK(Put(Key(10 - k), "bar"));
|
|
if (k < options.level0_file_num_compaction_trigger - 1) {
|
|
num_table_cache_lookup = 0;
|
|
Flush();
|
|
dbfull()->TEST_WaitForCompact();
|
|
// preloading iterator issues one table cache lookup and create
|
|
// a new table reader.
|
|
ASSERT_EQ(num_table_cache_lookup, 1);
|
|
ASSERT_EQ(num_new_table_reader, 1);
|
|
|
|
num_table_cache_lookup = 0;
|
|
num_new_table_reader = 0;
|
|
ASSERT_EQ(Key(k), Get(Key(k)));
|
|
// lookup iterator from table cache and no need to create a new one.
|
|
ASSERT_EQ(num_table_cache_lookup, 1);
|
|
ASSERT_EQ(num_new_table_reader, 0);
|
|
}
|
|
}
|
|
|
|
num_table_cache_lookup = 0;
|
|
num_new_table_reader = 0;
|
|
Flush();
|
|
dbfull()->TEST_WaitForCompact();
|
|
// Preloading iterator issues one table cache lookup and creates
|
|
// a new table reader. One file is created for flush and one for compaction.
|
|
// Compaction inputs make no table cache look-up for data/range deletion
|
|
// iterators
|
|
ASSERT_EQ(num_table_cache_lookup, 2);
|
|
// Create new iterator for:
|
|
// (1) 1 for verifying flush results
|
|
// (2) 3 for compaction input files
|
|
// (3) 1 for verifying compaction results.
|
|
ASSERT_EQ(num_new_table_reader, 5);
|
|
|
|
num_table_cache_lookup = 0;
|
|
num_new_table_reader = 0;
|
|
ASSERT_EQ(Key(1), Get(Key(1)));
|
|
ASSERT_EQ(num_table_cache_lookup, 1);
|
|
ASSERT_EQ(num_new_table_reader, 0);
|
|
|
|
num_table_cache_lookup = 0;
|
|
num_new_table_reader = 0;
|
|
CompactRangeOptions cro;
|
|
cro.change_level = true;
|
|
cro.target_level = 2;
|
|
cro.bottommost_level_compaction = BottommostLevelCompaction::kForce;
|
|
db_->CompactRange(cro, nullptr, nullptr);
|
|
// Only verifying compaction outputs issues one table cache lookup
|
|
// for both data block and range deletion block).
|
|
ASSERT_EQ(num_table_cache_lookup, 1);
|
|
// One for compaction input, one for verifying compaction results.
|
|
ASSERT_EQ(num_new_table_reader, 2);
|
|
|
|
num_table_cache_lookup = 0;
|
|
num_new_table_reader = 0;
|
|
ASSERT_EQ(Key(1), Get(Key(1)));
|
|
ASSERT_EQ(num_table_cache_lookup, 1);
|
|
ASSERT_EQ(num_new_table_reader, 0);
|
|
|
|
rocksdb::SyncPoint::GetInstance()->ClearAllCallBacks();
|
|
}
|
|
|
|
TEST_P(DBCompactionTestWithParam, CompactionDeletionTriggerReopen) {
|
|
for (int tid = 0; tid < 2; ++tid) {
|
|
uint64_t db_size[3];
|
|
Options options = DeletionTriggerOptions(CurrentOptions());
|
|
options.max_subcompactions = max_subcompactions_;
|
|
|
|
if (tid == 1) {
|
|
// second pass with universal compaction
|
|
options.compaction_style = kCompactionStyleUniversal;
|
|
options.num_levels = 1;
|
|
}
|
|
|
|
DestroyAndReopen(options);
|
|
Random rnd(301);
|
|
|
|
// round 1 --- insert key/value pairs.
|
|
const int kTestSize = kCDTKeysPerBuffer * 512;
|
|
std::vector<std::string> values;
|
|
for (int k = 0; k < kTestSize; ++k) {
|
|
values.push_back(RandomString(&rnd, kCDTValueSize));
|
|
ASSERT_OK(Put(Key(k), values[k]));
|
|
}
|
|
dbfull()->TEST_WaitForFlushMemTable();
|
|
dbfull()->TEST_WaitForCompact();
|
|
db_size[0] = Size(Key(0), Key(kTestSize - 1));
|
|
Close();
|
|
|
|
// round 2 --- disable auto-compactions and issue deletions.
|
|
options.create_if_missing = false;
|
|
options.disable_auto_compactions = true;
|
|
Reopen(options);
|
|
|
|
for (int k = 0; k < kTestSize; ++k) {
|
|
ASSERT_OK(Delete(Key(k)));
|
|
}
|
|
db_size[1] = Size(Key(0), Key(kTestSize - 1));
|
|
Close();
|
|
// as auto_compaction is off, we shouldn't see too much reduce
|
|
// in db size.
|
|
ASSERT_LT(db_size[0] / 3, db_size[1]);
|
|
|
|
// round 3 --- reopen db with auto_compaction on and see if
|
|
// deletion compensation still work.
|
|
options.disable_auto_compactions = false;
|
|
Reopen(options);
|
|
// insert relatively small amount of data to trigger auto compaction.
|
|
for (int k = 0; k < kTestSize / 10; ++k) {
|
|
ASSERT_OK(Put(Key(k), values[k]));
|
|
}
|
|
dbfull()->TEST_WaitForFlushMemTable();
|
|
dbfull()->TEST_WaitForCompact();
|
|
db_size[2] = Size(Key(0), Key(kTestSize - 1));
|
|
// this time we're expecting significant drop in size.
|
|
ASSERT_GT(db_size[0] / 3, db_size[2]);
|
|
}
|
|
}
|
|
|
|
TEST_F(DBCompactionTest, DisableStatsUpdateReopen) {
|
|
uint64_t db_size[3];
|
|
for (int test = 0; test < 2; ++test) {
|
|
Options options = DeletionTriggerOptions(CurrentOptions());
|
|
options.skip_stats_update_on_db_open = (test == 0);
|
|
|
|
env_->random_read_counter_.Reset();
|
|
DestroyAndReopen(options);
|
|
Random rnd(301);
|
|
|
|
// round 1 --- insert key/value pairs.
|
|
const int kTestSize = kCDTKeysPerBuffer * 512;
|
|
std::vector<std::string> values;
|
|
for (int k = 0; k < kTestSize; ++k) {
|
|
values.push_back(RandomString(&rnd, kCDTValueSize));
|
|
ASSERT_OK(Put(Key(k), values[k]));
|
|
}
|
|
dbfull()->TEST_WaitForFlushMemTable();
|
|
dbfull()->TEST_WaitForCompact();
|
|
db_size[0] = Size(Key(0), Key(kTestSize - 1));
|
|
Close();
|
|
|
|
// round 2 --- disable auto-compactions and issue deletions.
|
|
options.create_if_missing = false;
|
|
options.disable_auto_compactions = true;
|
|
|
|
env_->random_read_counter_.Reset();
|
|
Reopen(options);
|
|
|
|
for (int k = 0; k < kTestSize; ++k) {
|
|
ASSERT_OK(Delete(Key(k)));
|
|
}
|
|
db_size[1] = Size(Key(0), Key(kTestSize - 1));
|
|
Close();
|
|
// as auto_compaction is off, we shouldn't see too much reduce
|
|
// in db size.
|
|
ASSERT_LT(db_size[0] / 3, db_size[1]);
|
|
|
|
// round 3 --- reopen db with auto_compaction on and see if
|
|
// deletion compensation still work.
|
|
options.disable_auto_compactions = false;
|
|
Reopen(options);
|
|
dbfull()->TEST_WaitForFlushMemTable();
|
|
dbfull()->TEST_WaitForCompact();
|
|
db_size[2] = Size(Key(0), Key(kTestSize - 1));
|
|
|
|
if (options.skip_stats_update_on_db_open) {
|
|
// If update stats on DB::Open is disable, we don't expect
|
|
// deletion entries taking effect.
|
|
ASSERT_LT(db_size[0] / 3, db_size[2]);
|
|
} else {
|
|
// Otherwise, we should see a significant drop in db size.
|
|
ASSERT_GT(db_size[0] / 3, db_size[2]);
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
TEST_P(DBCompactionTestWithParam, CompactionTrigger) {
|
|
const int kNumKeysPerFile = 100;
|
|
|
|
Options options = CurrentOptions();
|
|
options.write_buffer_size = 110 << 10; // 110KB
|
|
options.arena_block_size = 4 << 10;
|
|
options.num_levels = 3;
|
|
options.level0_file_num_compaction_trigger = 3;
|
|
options.max_subcompactions = max_subcompactions_;
|
|
options.memtable_factory.reset(new SpecialSkipListFactory(kNumKeysPerFile));
|
|
CreateAndReopenWithCF({"pikachu"}, options);
|
|
|
|
Random rnd(301);
|
|
|
|
for (int num = 0; num < options.level0_file_num_compaction_trigger - 1;
|
|
num++) {
|
|
std::vector<std::string> values;
|
|
// Write 100KB (100 values, each 1K)
|
|
for (int i = 0; i < kNumKeysPerFile; i++) {
|
|
values.push_back(RandomString(&rnd, 990));
|
|
ASSERT_OK(Put(1, Key(i), values[i]));
|
|
}
|
|
// put extra key to trigger flush
|
|
ASSERT_OK(Put(1, "", ""));
|
|
dbfull()->TEST_WaitForFlushMemTable(handles_[1]);
|
|
ASSERT_EQ(NumTableFilesAtLevel(0, 1), num + 1);
|
|
}
|
|
|
|
// generate one more file in level-0, and should trigger level-0 compaction
|
|
std::vector<std::string> values;
|
|
for (int i = 0; i < kNumKeysPerFile; i++) {
|
|
values.push_back(RandomString(&rnd, 990));
|
|
ASSERT_OK(Put(1, Key(i), values[i]));
|
|
}
|
|
// put extra key to trigger flush
|
|
ASSERT_OK(Put(1, "", ""));
|
|
dbfull()->TEST_WaitForCompact();
|
|
|
|
ASSERT_EQ(NumTableFilesAtLevel(0, 1), 0);
|
|
ASSERT_EQ(NumTableFilesAtLevel(1, 1), 1);
|
|
}
|
|
|
|
TEST_F(DBCompactionTest, BGCompactionsAllowed) {
|
|
// Create several column families. Make compaction triggers in all of them
|
|
// and see number of compactions scheduled to be less than allowed.
|
|
const int kNumKeysPerFile = 100;
|
|
|
|
Options options = CurrentOptions();
|
|
options.write_buffer_size = 110 << 10; // 110KB
|
|
options.arena_block_size = 4 << 10;
|
|
options.num_levels = 3;
|
|
// Should speed up compaction when there are 4 files.
|
|
options.level0_file_num_compaction_trigger = 2;
|
|
options.level0_slowdown_writes_trigger = 20;
|
|
options.soft_pending_compaction_bytes_limit = 1 << 30; // Infinitely large
|
|
options.base_background_compactions = 1;
|
|
options.max_background_compactions = 3;
|
|
options.memtable_factory.reset(new SpecialSkipListFactory(kNumKeysPerFile));
|
|
|
|
// Block all threads in thread pool.
|
|
const size_t kTotalTasks = 4;
|
|
env_->SetBackgroundThreads(4, Env::LOW);
|
|
test::SleepingBackgroundTask sleeping_tasks[kTotalTasks];
|
|
for (size_t i = 0; i < kTotalTasks; i++) {
|
|
env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask,
|
|
&sleeping_tasks[i], Env::Priority::LOW);
|
|
sleeping_tasks[i].WaitUntilSleeping();
|
|
}
|
|
|
|
CreateAndReopenWithCF({"one", "two", "three"}, options);
|
|
|
|
Random rnd(301);
|
|
for (int cf = 0; cf < 4; cf++) {
|
|
for (int num = 0; num < options.level0_file_num_compaction_trigger; num++) {
|
|
for (int i = 0; i < kNumKeysPerFile; i++) {
|
|
ASSERT_OK(Put(cf, Key(i), ""));
|
|
}
|
|
// put extra key to trigger flush
|
|
ASSERT_OK(Put(cf, "", ""));
|
|
dbfull()->TEST_WaitForFlushMemTable(handles_[cf]);
|
|
ASSERT_EQ(NumTableFilesAtLevel(0, cf), num + 1);
|
|
}
|
|
}
|
|
|
|
// Now all column families qualify compaction but only one should be
|
|
// scheduled, because no column family hits speed up condition.
|
|
ASSERT_EQ(1, env_->GetThreadPoolQueueLen(Env::Priority::LOW));
|
|
|
|
// Create two more files for one column family, which triggers speed up
|
|
// condition, three compactions will be scheduled.
|
|
for (int num = 0; num < options.level0_file_num_compaction_trigger; num++) {
|
|
for (int i = 0; i < kNumKeysPerFile; i++) {
|
|
ASSERT_OK(Put(2, Key(i), ""));
|
|
}
|
|
// put extra key to trigger flush
|
|
ASSERT_OK(Put(2, "", ""));
|
|
dbfull()->TEST_WaitForFlushMemTable(handles_[2]);
|
|
ASSERT_EQ(options.level0_file_num_compaction_trigger + num + 1,
|
|
NumTableFilesAtLevel(0, 2));
|
|
}
|
|
ASSERT_EQ(3, env_->GetThreadPoolQueueLen(Env::Priority::LOW));
|
|
|
|
// Unblock all threads to unblock all compactions.
|
|
for (size_t i = 0; i < kTotalTasks; i++) {
|
|
sleeping_tasks[i].WakeUp();
|
|
sleeping_tasks[i].WaitUntilDone();
|
|
}
|
|
dbfull()->TEST_WaitForCompact();
|
|
|
|
// Verify number of compactions allowed will come back to 1.
|
|
|
|
for (size_t i = 0; i < kTotalTasks; i++) {
|
|
sleeping_tasks[i].Reset();
|
|
env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask,
|
|
&sleeping_tasks[i], Env::Priority::LOW);
|
|
sleeping_tasks[i].WaitUntilSleeping();
|
|
}
|
|
for (int cf = 0; cf < 4; cf++) {
|
|
for (int num = 0; num < options.level0_file_num_compaction_trigger; num++) {
|
|
for (int i = 0; i < kNumKeysPerFile; i++) {
|
|
ASSERT_OK(Put(cf, Key(i), ""));
|
|
}
|
|
// put extra key to trigger flush
|
|
ASSERT_OK(Put(cf, "", ""));
|
|
dbfull()->TEST_WaitForFlushMemTable(handles_[cf]);
|
|
ASSERT_EQ(NumTableFilesAtLevel(0, cf), num + 1);
|
|
}
|
|
}
|
|
|
|
// Now all column families qualify compaction but only one should be
|
|
// scheduled, because no column family hits speed up condition.
|
|
ASSERT_EQ(1, env_->GetThreadPoolQueueLen(Env::Priority::LOW));
|
|
|
|
for (size_t i = 0; i < kTotalTasks; i++) {
|
|
sleeping_tasks[i].WakeUp();
|
|
sleeping_tasks[i].WaitUntilDone();
|
|
}
|
|
}
|
|
|
|
TEST_P(DBCompactionTestWithParam, CompactionsGenerateMultipleFiles) {
|
|
Options options = CurrentOptions();
|
|
options.write_buffer_size = 100000000; // Large write buffer
|
|
options.max_subcompactions = max_subcompactions_;
|
|
CreateAndReopenWithCF({"pikachu"}, options);
|
|
|
|
Random rnd(301);
|
|
|
|
// Write 8MB (80 values, each 100K)
|
|
ASSERT_EQ(NumTableFilesAtLevel(0, 1), 0);
|
|
std::vector<std::string> values;
|
|
for (int i = 0; i < 80; i++) {
|
|
values.push_back(RandomString(&rnd, 100000));
|
|
ASSERT_OK(Put(1, Key(i), values[i]));
|
|
}
|
|
|
|
// Reopening moves updates to level-0
|
|
ReopenWithColumnFamilies({"default", "pikachu"}, options);
|
|
dbfull()->TEST_CompactRange(0, nullptr, nullptr, handles_[1],
|
|
true /* disallow trivial move */);
|
|
|
|
ASSERT_EQ(NumTableFilesAtLevel(0, 1), 0);
|
|
ASSERT_GT(NumTableFilesAtLevel(1, 1), 1);
|
|
for (int i = 0; i < 80; i++) {
|
|
ASSERT_EQ(Get(1, Key(i)), values[i]);
|
|
}
|
|
}
|
|
|
|
TEST_F(DBCompactionTest, MinorCompactionsHappen) {
|
|
do {
|
|
Options options = CurrentOptions();
|
|
options.write_buffer_size = 10000;
|
|
CreateAndReopenWithCF({"pikachu"}, options);
|
|
|
|
const int N = 500;
|
|
|
|
int starting_num_tables = TotalTableFiles(1);
|
|
for (int i = 0; i < N; i++) {
|
|
ASSERT_OK(Put(1, Key(i), Key(i) + std::string(1000, 'v')));
|
|
}
|
|
int ending_num_tables = TotalTableFiles(1);
|
|
ASSERT_GT(ending_num_tables, starting_num_tables);
|
|
|
|
for (int i = 0; i < N; i++) {
|
|
ASSERT_EQ(Key(i) + std::string(1000, 'v'), Get(1, Key(i)));
|
|
}
|
|
|
|
ReopenWithColumnFamilies({"default", "pikachu"}, options);
|
|
|
|
for (int i = 0; i < N; i++) {
|
|
ASSERT_EQ(Key(i) + std::string(1000, 'v'), Get(1, Key(i)));
|
|
}
|
|
} while (ChangeCompactOptions());
|
|
}
|
|
|
|
TEST_F(DBCompactionTest, UserKeyCrossFile1) {
|
|
Options options = CurrentOptions();
|
|
options.compaction_style = kCompactionStyleLevel;
|
|
options.level0_file_num_compaction_trigger = 3;
|
|
|
|
DestroyAndReopen(options);
|
|
|
|
// create first file and flush to l0
|
|
Put("4", "A");
|
|
Put("3", "A");
|
|
Flush();
|
|
dbfull()->TEST_WaitForFlushMemTable();
|
|
|
|
Put("2", "A");
|
|
Delete("3");
|
|
Flush();
|
|
dbfull()->TEST_WaitForFlushMemTable();
|
|
ASSERT_EQ("NOT_FOUND", Get("3"));
|
|
|
|
// move both files down to l1
|
|
dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr);
|
|
ASSERT_EQ("NOT_FOUND", Get("3"));
|
|
|
|
for (int i = 0; i < 3; i++) {
|
|
Put("2", "B");
|
|
Flush();
|
|
dbfull()->TEST_WaitForFlushMemTable();
|
|
}
|
|
dbfull()->TEST_WaitForCompact();
|
|
|
|
ASSERT_EQ("NOT_FOUND", Get("3"));
|
|
}
|
|
|
|
TEST_F(DBCompactionTest, UserKeyCrossFile2) {
|
|
Options options = CurrentOptions();
|
|
options.compaction_style = kCompactionStyleLevel;
|
|
options.level0_file_num_compaction_trigger = 3;
|
|
|
|
DestroyAndReopen(options);
|
|
|
|
// create first file and flush to l0
|
|
Put("4", "A");
|
|
Put("3", "A");
|
|
Flush();
|
|
dbfull()->TEST_WaitForFlushMemTable();
|
|
|
|
Put("2", "A");
|
|
SingleDelete("3");
|
|
Flush();
|
|
dbfull()->TEST_WaitForFlushMemTable();
|
|
ASSERT_EQ("NOT_FOUND", Get("3"));
|
|
|
|
// move both files down to l1
|
|
dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr);
|
|
ASSERT_EQ("NOT_FOUND", Get("3"));
|
|
|
|
for (int i = 0; i < 3; i++) {
|
|
Put("2", "B");
|
|
Flush();
|
|
dbfull()->TEST_WaitForFlushMemTable();
|
|
}
|
|
dbfull()->TEST_WaitForCompact();
|
|
|
|
ASSERT_EQ("NOT_FOUND", Get("3"));
|
|
}
|
|
|
|
TEST_F(DBCompactionTest, ZeroSeqIdCompaction) {
|
|
Options options = CurrentOptions();
|
|
options.compaction_style = kCompactionStyleLevel;
|
|
options.level0_file_num_compaction_trigger = 3;
|
|
|
|
FlushedFileCollector* collector = new FlushedFileCollector();
|
|
options.listeners.emplace_back(collector);
|
|
|
|
// compaction options
|
|
CompactionOptions compact_opt;
|
|
compact_opt.compression = kNoCompression;
|
|
compact_opt.output_file_size_limit = 4096;
|
|
const size_t key_len =
|
|
static_cast<size_t>(compact_opt.output_file_size_limit) / 5;
|
|
|
|
DestroyAndReopen(options);
|
|
|
|
std::vector<const Snapshot*> snaps;
|
|
|
|
// create first file and flush to l0
|
|
for (auto& key : {"1", "2", "3", "3", "3", "3"}) {
|
|
Put(key, std::string(key_len, 'A'));
|
|
snaps.push_back(dbfull()->GetSnapshot());
|
|
}
|
|
Flush();
|
|
dbfull()->TEST_WaitForFlushMemTable();
|
|
|
|
// create second file and flush to l0
|
|
for (auto& key : {"3", "4", "5", "6", "7", "8"}) {
|
|
Put(key, std::string(key_len, 'A'));
|
|
snaps.push_back(dbfull()->GetSnapshot());
|
|
}
|
|
Flush();
|
|
dbfull()->TEST_WaitForFlushMemTable();
|
|
|
|
// move both files down to l1
|
|
dbfull()->CompactFiles(compact_opt, collector->GetFlushedFiles(), 1);
|
|
|
|
// release snap so that first instance of key(3) can have seqId=0
|
|
for (auto snap : snaps) {
|
|
dbfull()->ReleaseSnapshot(snap);
|
|
}
|
|
|
|
// create 3 files in l0 so to trigger compaction
|
|
for (int i = 0; i < options.level0_file_num_compaction_trigger; i++) {
|
|
Put("2", std::string(1, 'A'));
|
|
Flush();
|
|
dbfull()->TEST_WaitForFlushMemTable();
|
|
}
|
|
|
|
dbfull()->TEST_WaitForCompact();
|
|
ASSERT_OK(Put("", ""));
|
|
}
|
|
|
|
// Check that writes done during a memtable compaction are recovered
|
|
// if the database is shutdown during the memtable compaction.
|
|
TEST_F(DBCompactionTest, RecoverDuringMemtableCompaction) {
|
|
do {
|
|
Options options = CurrentOptions();
|
|
options.env = env_;
|
|
CreateAndReopenWithCF({"pikachu"}, options);
|
|
|
|
// Trigger a long memtable compaction and reopen the database during it
|
|
ASSERT_OK(Put(1, "foo", "v1")); // Goes to 1st log file
|
|
ASSERT_OK(Put(1, "big1", std::string(10000000, 'x'))); // Fills memtable
|
|
ASSERT_OK(Put(1, "big2", std::string(1000, 'y'))); // Triggers compaction
|
|
ASSERT_OK(Put(1, "bar", "v2")); // Goes to new log file
|
|
|
|
ReopenWithColumnFamilies({"default", "pikachu"}, options);
|
|
ASSERT_EQ("v1", Get(1, "foo"));
|
|
ASSERT_EQ("v2", Get(1, "bar"));
|
|
ASSERT_EQ(std::string(10000000, 'x'), Get(1, "big1"));
|
|
ASSERT_EQ(std::string(1000, 'y'), Get(1, "big2"));
|
|
} while (ChangeOptions());
|
|
}
|
|
|
|
TEST_P(DBCompactionTestWithParam, TrivialMoveOneFile) {
|
|
int32_t trivial_move = 0;
|
|
rocksdb::SyncPoint::GetInstance()->SetCallBack(
|
|
"DBImpl::BackgroundCompaction:TrivialMove",
|
|
[&](void* arg) { trivial_move++; });
|
|
rocksdb::SyncPoint::GetInstance()->EnableProcessing();
|
|
|
|
Options options = CurrentOptions();
|
|
options.write_buffer_size = 100000000;
|
|
options.max_subcompactions = max_subcompactions_;
|
|
DestroyAndReopen(options);
|
|
|
|
int32_t num_keys = 80;
|
|
int32_t value_size = 100 * 1024; // 100 KB
|
|
|
|
Random rnd(301);
|
|
std::vector<std::string> values;
|
|
for (int i = 0; i < num_keys; i++) {
|
|
values.push_back(RandomString(&rnd, value_size));
|
|
ASSERT_OK(Put(Key(i), values[i]));
|
|
}
|
|
|
|
// Reopening moves updates to L0
|
|
Reopen(options);
|
|
ASSERT_EQ(NumTableFilesAtLevel(0, 0), 1); // 1 file in L0
|
|
ASSERT_EQ(NumTableFilesAtLevel(1, 0), 0); // 0 files in L1
|
|
|
|
std::vector<LiveFileMetaData> metadata;
|
|
db_->GetLiveFilesMetaData(&metadata);
|
|
ASSERT_EQ(metadata.size(), 1U);
|
|
LiveFileMetaData level0_file = metadata[0]; // L0 file meta
|
|
|
|
CompactRangeOptions cro;
|
|
cro.exclusive_manual_compaction = exclusive_manual_compaction_;
|
|
|
|
// Compaction will initiate a trivial move from L0 to L1
|
|
dbfull()->CompactRange(cro, nullptr, nullptr);
|
|
|
|
// File moved From L0 to L1
|
|
ASSERT_EQ(NumTableFilesAtLevel(0, 0), 0); // 0 files in L0
|
|
ASSERT_EQ(NumTableFilesAtLevel(1, 0), 1); // 1 file in L1
|
|
|
|
metadata.clear();
|
|
db_->GetLiveFilesMetaData(&metadata);
|
|
ASSERT_EQ(metadata.size(), 1U);
|
|
ASSERT_EQ(metadata[0].name /* level1_file.name */, level0_file.name);
|
|
ASSERT_EQ(metadata[0].size /* level1_file.size */, level0_file.size);
|
|
|
|
for (int i = 0; i < num_keys; i++) {
|
|
ASSERT_EQ(Get(Key(i)), values[i]);
|
|
}
|
|
|
|
ASSERT_EQ(trivial_move, 1);
|
|
rocksdb::SyncPoint::GetInstance()->DisableProcessing();
|
|
}
|
|
|
|
TEST_P(DBCompactionTestWithParam, TrivialMoveNonOverlappingFiles) {
|
|
int32_t trivial_move = 0;
|
|
int32_t non_trivial_move = 0;
|
|
rocksdb::SyncPoint::GetInstance()->SetCallBack(
|
|
"DBImpl::BackgroundCompaction:TrivialMove",
|
|
[&](void* arg) { trivial_move++; });
|
|
rocksdb::SyncPoint::GetInstance()->SetCallBack(
|
|
"DBImpl::BackgroundCompaction:NonTrivial",
|
|
[&](void* arg) { non_trivial_move++; });
|
|
rocksdb::SyncPoint::GetInstance()->EnableProcessing();
|
|
|
|
Options options = CurrentOptions();
|
|
options.disable_auto_compactions = true;
|
|
options.write_buffer_size = 10 * 1024 * 1024;
|
|
options.max_subcompactions = max_subcompactions_;
|
|
|
|
DestroyAndReopen(options);
|
|
// non overlapping ranges
|
|
std::vector<std::pair<int32_t, int32_t>> ranges = {
|
|
{100, 199},
|
|
{300, 399},
|
|
{0, 99},
|
|
{200, 299},
|
|
{600, 699},
|
|
{400, 499},
|
|
{500, 550},
|
|
{551, 599},
|
|
};
|
|
int32_t value_size = 10 * 1024; // 10 KB
|
|
|
|
Random rnd(301);
|
|
std::map<int32_t, std::string> values;
|
|
for (size_t i = 0; i < ranges.size(); i++) {
|
|
for (int32_t j = ranges[i].first; j <= ranges[i].second; j++) {
|
|
values[j] = RandomString(&rnd, value_size);
|
|
ASSERT_OK(Put(Key(j), values[j]));
|
|
}
|
|
ASSERT_OK(Flush());
|
|
}
|
|
|
|
int32_t level0_files = NumTableFilesAtLevel(0, 0);
|
|
ASSERT_EQ(level0_files, ranges.size()); // Multiple files in L0
|
|
ASSERT_EQ(NumTableFilesAtLevel(1, 0), 0); // No files in L1
|
|
|
|
CompactRangeOptions cro;
|
|
cro.exclusive_manual_compaction = exclusive_manual_compaction_;
|
|
|
|
// Since data is non-overlapping we expect compaction to initiate
|
|
// a trivial move
|
|
db_->CompactRange(cro, nullptr, nullptr);
|
|
// We expect that all the files were trivially moved from L0 to L1
|
|
ASSERT_EQ(NumTableFilesAtLevel(0, 0), 0);
|
|
ASSERT_EQ(NumTableFilesAtLevel(1, 0) /* level1_files */, level0_files);
|
|
|
|
for (size_t i = 0; i < ranges.size(); i++) {
|
|
for (int32_t j = ranges[i].first; j <= ranges[i].second; j++) {
|
|
ASSERT_EQ(Get(Key(j)), values[j]);
|
|
}
|
|
}
|
|
|
|
ASSERT_EQ(trivial_move, 1);
|
|
ASSERT_EQ(non_trivial_move, 0);
|
|
|
|
trivial_move = 0;
|
|
non_trivial_move = 0;
|
|
values.clear();
|
|
DestroyAndReopen(options);
|
|
// Same ranges as above but overlapping
|
|
ranges = {
|
|
{100, 199},
|
|
{300, 399},
|
|
{0, 99},
|
|
{200, 299},
|
|
{600, 699},
|
|
{400, 499},
|
|
{500, 560}, // this range overlap with the next one
|
|
{551, 599},
|
|
};
|
|
for (size_t i = 0; i < ranges.size(); i++) {
|
|
for (int32_t j = ranges[i].first; j <= ranges[i].second; j++) {
|
|
values[j] = RandomString(&rnd, value_size);
|
|
ASSERT_OK(Put(Key(j), values[j]));
|
|
}
|
|
ASSERT_OK(Flush());
|
|
}
|
|
|
|
db_->CompactRange(cro, nullptr, nullptr);
|
|
|
|
for (size_t i = 0; i < ranges.size(); i++) {
|
|
for (int32_t j = ranges[i].first; j <= ranges[i].second; j++) {
|
|
ASSERT_EQ(Get(Key(j)), values[j]);
|
|
}
|
|
}
|
|
ASSERT_EQ(trivial_move, 0);
|
|
ASSERT_EQ(non_trivial_move, 1);
|
|
|
|
rocksdb::SyncPoint::GetInstance()->DisableProcessing();
|
|
}
|
|
|
|
TEST_P(DBCompactionTestWithParam, TrivialMoveTargetLevel) {
|
|
int32_t trivial_move = 0;
|
|
int32_t non_trivial_move = 0;
|
|
rocksdb::SyncPoint::GetInstance()->SetCallBack(
|
|
"DBImpl::BackgroundCompaction:TrivialMove",
|
|
[&](void* arg) { trivial_move++; });
|
|
rocksdb::SyncPoint::GetInstance()->SetCallBack(
|
|
"DBImpl::BackgroundCompaction:NonTrivial",
|
|
[&](void* arg) { non_trivial_move++; });
|
|
rocksdb::SyncPoint::GetInstance()->EnableProcessing();
|
|
|
|
Options options = CurrentOptions();
|
|
options.disable_auto_compactions = true;
|
|
options.write_buffer_size = 10 * 1024 * 1024;
|
|
options.num_levels = 7;
|
|
options.max_subcompactions = max_subcompactions_;
|
|
|
|
DestroyAndReopen(options);
|
|
int32_t value_size = 10 * 1024; // 10 KB
|
|
|
|
// Add 2 non-overlapping files
|
|
Random rnd(301);
|
|
std::map<int32_t, std::string> values;
|
|
|
|
// file 1 [0 => 300]
|
|
for (int32_t i = 0; i <= 300; i++) {
|
|
values[i] = RandomString(&rnd, value_size);
|
|
ASSERT_OK(Put(Key(i), values[i]));
|
|
}
|
|
ASSERT_OK(Flush());
|
|
|
|
// file 2 [600 => 700]
|
|
for (int32_t i = 600; i <= 700; i++) {
|
|
values[i] = RandomString(&rnd, value_size);
|
|
ASSERT_OK(Put(Key(i), values[i]));
|
|
}
|
|
ASSERT_OK(Flush());
|
|
|
|
// 2 files in L0
|
|
ASSERT_EQ("2", FilesPerLevel(0));
|
|
CompactRangeOptions compact_options;
|
|
compact_options.change_level = true;
|
|
compact_options.target_level = 6;
|
|
compact_options.exclusive_manual_compaction = exclusive_manual_compaction_;
|
|
ASSERT_OK(db_->CompactRange(compact_options, nullptr, nullptr));
|
|
// 2 files in L6
|
|
ASSERT_EQ("0,0,0,0,0,0,2", FilesPerLevel(0));
|
|
|
|
ASSERT_EQ(trivial_move, 1);
|
|
ASSERT_EQ(non_trivial_move, 0);
|
|
|
|
for (int32_t i = 0; i <= 300; i++) {
|
|
ASSERT_EQ(Get(Key(i)), values[i]);
|
|
}
|
|
for (int32_t i = 600; i <= 700; i++) {
|
|
ASSERT_EQ(Get(Key(i)), values[i]);
|
|
}
|
|
}
|
|
|
|
TEST_P(DBCompactionTestWithParam, ManualCompactionPartial) {
|
|
int32_t trivial_move = 0;
|
|
int32_t non_trivial_move = 0;
|
|
rocksdb::SyncPoint::GetInstance()->SetCallBack(
|
|
"DBImpl::BackgroundCompaction:TrivialMove",
|
|
[&](void* arg) { trivial_move++; });
|
|
rocksdb::SyncPoint::GetInstance()->SetCallBack(
|
|
"DBImpl::BackgroundCompaction:NonTrivial",
|
|
[&](void* arg) { non_trivial_move++; });
|
|
bool first = true;
|
|
// Purpose of dependencies:
|
|
// 4 -> 1: ensure the order of two non-trivial compactions
|
|
// 5 -> 2 and 5 -> 3: ensure we do a check before two non-trivial compactions
|
|
// are installed
|
|
rocksdb::SyncPoint::GetInstance()->LoadDependency(
|
|
{{"DBCompaction::ManualPartial:4", "DBCompaction::ManualPartial:1"},
|
|
{"DBCompaction::ManualPartial:5", "DBCompaction::ManualPartial:2"},
|
|
{"DBCompaction::ManualPartial:5", "DBCompaction::ManualPartial:3"}});
|
|
rocksdb::SyncPoint::GetInstance()->SetCallBack(
|
|
"DBImpl::BackgroundCompaction:NonTrivial:AfterRun", [&](void* arg) {
|
|
if (first) {
|
|
first = false;
|
|
TEST_SYNC_POINT("DBCompaction::ManualPartial:4");
|
|
TEST_SYNC_POINT("DBCompaction::ManualPartial:3");
|
|
} else { // second non-trivial compaction
|
|
TEST_SYNC_POINT("DBCompaction::ManualPartial:2");
|
|
}
|
|
});
|
|
|
|
rocksdb::SyncPoint::GetInstance()->EnableProcessing();
|
|
|
|
Options options = CurrentOptions();
|
|
options.write_buffer_size = 10 * 1024 * 1024;
|
|
options.num_levels = 7;
|
|
options.max_subcompactions = max_subcompactions_;
|
|
options.level0_file_num_compaction_trigger = 3;
|
|
options.max_background_compactions = 3;
|
|
options.target_file_size_base = 1 << 23; // 8 MB
|
|
|
|
DestroyAndReopen(options);
|
|
int32_t value_size = 10 * 1024; // 10 KB
|
|
|
|
// Add 2 non-overlapping files
|
|
Random rnd(301);
|
|
std::map<int32_t, std::string> values;
|
|
|
|
// file 1 [0 => 100]
|
|
for (int32_t i = 0; i < 100; i++) {
|
|
values[i] = RandomString(&rnd, value_size);
|
|
ASSERT_OK(Put(Key(i), values[i]));
|
|
}
|
|
ASSERT_OK(Flush());
|
|
|
|
// file 2 [100 => 300]
|
|
for (int32_t i = 100; i < 300; i++) {
|
|
values[i] = RandomString(&rnd, value_size);
|
|
ASSERT_OK(Put(Key(i), values[i]));
|
|
}
|
|
ASSERT_OK(Flush());
|
|
|
|
// 2 files in L0
|
|
ASSERT_EQ("2", FilesPerLevel(0));
|
|
CompactRangeOptions compact_options;
|
|
compact_options.change_level = true;
|
|
compact_options.target_level = 6;
|
|
compact_options.exclusive_manual_compaction = exclusive_manual_compaction_;
|
|
// Trivial move the two non-overlapping files to level 6
|
|
ASSERT_OK(db_->CompactRange(compact_options, nullptr, nullptr));
|
|
// 2 files in L6
|
|
ASSERT_EQ("0,0,0,0,0,0,2", FilesPerLevel(0));
|
|
|
|
ASSERT_EQ(trivial_move, 1);
|
|
ASSERT_EQ(non_trivial_move, 0);
|
|
|
|
// file 3 [ 0 => 200]
|
|
for (int32_t i = 0; i < 200; i++) {
|
|
values[i] = RandomString(&rnd, value_size);
|
|
ASSERT_OK(Put(Key(i), values[i]));
|
|
}
|
|
ASSERT_OK(Flush());
|
|
|
|
// 1 files in L0
|
|
ASSERT_EQ("1,0,0,0,0,0,2", FilesPerLevel(0));
|
|
ASSERT_OK(dbfull()->TEST_CompactRange(0, nullptr, nullptr, nullptr, false));
|
|
ASSERT_OK(dbfull()->TEST_CompactRange(1, nullptr, nullptr, nullptr, false));
|
|
ASSERT_OK(dbfull()->TEST_CompactRange(2, nullptr, nullptr, nullptr, false));
|
|
ASSERT_OK(dbfull()->TEST_CompactRange(3, nullptr, nullptr, nullptr, false));
|
|
ASSERT_OK(dbfull()->TEST_CompactRange(4, nullptr, nullptr, nullptr, false));
|
|
// 2 files in L6, 1 file in L5
|
|
ASSERT_EQ("0,0,0,0,0,1,2", FilesPerLevel(0));
|
|
|
|
ASSERT_EQ(trivial_move, 6);
|
|
ASSERT_EQ(non_trivial_move, 0);
|
|
|
|
std::thread threads([&] {
|
|
compact_options.change_level = false;
|
|
compact_options.exclusive_manual_compaction = false;
|
|
std::string begin_string = Key(0);
|
|
std::string end_string = Key(199);
|
|
Slice begin(begin_string);
|
|
Slice end(end_string);
|
|
// First non-trivial compaction is triggered
|
|
ASSERT_OK(db_->CompactRange(compact_options, &begin, &end));
|
|
});
|
|
|
|
TEST_SYNC_POINT("DBCompaction::ManualPartial:1");
|
|
// file 4 [300 => 400)
|
|
for (int32_t i = 300; i <= 400; i++) {
|
|
values[i] = RandomString(&rnd, value_size);
|
|
ASSERT_OK(Put(Key(i), values[i]));
|
|
}
|
|
ASSERT_OK(Flush());
|
|
|
|
// file 5 [400 => 500)
|
|
for (int32_t i = 400; i <= 500; i++) {
|
|
values[i] = RandomString(&rnd, value_size);
|
|
ASSERT_OK(Put(Key(i), values[i]));
|
|
}
|
|
ASSERT_OK(Flush());
|
|
|
|
// file 6 [500 => 600)
|
|
for (int32_t i = 500; i <= 600; i++) {
|
|
values[i] = RandomString(&rnd, value_size);
|
|
ASSERT_OK(Put(Key(i), values[i]));
|
|
}
|
|
// Second non-trivial compaction is triggered
|
|
ASSERT_OK(Flush());
|
|
|
|
// Before two non-trivial compactions are installed, there are 3 files in L0
|
|
ASSERT_EQ("3,0,0,0,0,1,2", FilesPerLevel(0));
|
|
TEST_SYNC_POINT("DBCompaction::ManualPartial:5");
|
|
|
|
dbfull()->TEST_WaitForFlushMemTable();
|
|
dbfull()->TEST_WaitForCompact();
|
|
// After two non-trivial compactions are installed, there is 1 file in L6, and
|
|
// 1 file in L1
|
|
ASSERT_EQ("0,1,0,0,0,0,1", FilesPerLevel(0));
|
|
threads.join();
|
|
|
|
for (int32_t i = 0; i < 600; i++) {
|
|
ASSERT_EQ(Get(Key(i)), values[i]);
|
|
}
|
|
}
|
|
|
|
TEST_F(DBCompactionTest, ManualPartialFill) {
|
|
int32_t trivial_move = 0;
|
|
int32_t non_trivial_move = 0;
|
|
rocksdb::SyncPoint::GetInstance()->SetCallBack(
|
|
"DBImpl::BackgroundCompaction:TrivialMove",
|
|
[&](void* arg) { trivial_move++; });
|
|
rocksdb::SyncPoint::GetInstance()->SetCallBack(
|
|
"DBImpl::BackgroundCompaction:NonTrivial",
|
|
[&](void* arg) { non_trivial_move++; });
|
|
bool first = true;
|
|
bool second = true;
|
|
rocksdb::SyncPoint::GetInstance()->LoadDependency(
|
|
{{"DBCompaction::PartialFill:4", "DBCompaction::PartialFill:1"},
|
|
{"DBCompaction::PartialFill:2", "DBCompaction::PartialFill:3"}});
|
|
rocksdb::SyncPoint::GetInstance()->SetCallBack(
|
|
"DBImpl::BackgroundCompaction:NonTrivial:AfterRun", [&](void* arg) {
|
|
if (first) {
|
|
TEST_SYNC_POINT("DBCompaction::PartialFill:4");
|
|
first = false;
|
|
TEST_SYNC_POINT("DBCompaction::PartialFill:3");
|
|
} else if (second) {
|
|
}
|
|
});
|
|
|
|
rocksdb::SyncPoint::GetInstance()->EnableProcessing();
|
|
|
|
Options options = CurrentOptions();
|
|
options.write_buffer_size = 10 * 1024 * 1024;
|
|
options.max_bytes_for_level_multiplier = 2;
|
|
options.num_levels = 4;
|
|
options.level0_file_num_compaction_trigger = 3;
|
|
options.max_background_compactions = 3;
|
|
|
|
DestroyAndReopen(options);
|
|
int32_t value_size = 10 * 1024; // 10 KB
|
|
|
|
// Add 2 non-overlapping files
|
|
Random rnd(301);
|
|
std::map<int32_t, std::string> values;
|
|
|
|
// file 1 [0 => 100]
|
|
for (int32_t i = 0; i < 100; i++) {
|
|
values[i] = RandomString(&rnd, value_size);
|
|
ASSERT_OK(Put(Key(i), values[i]));
|
|
}
|
|
ASSERT_OK(Flush());
|
|
|
|
// file 2 [100 => 300]
|
|
for (int32_t i = 100; i < 300; i++) {
|
|
values[i] = RandomString(&rnd, value_size);
|
|
ASSERT_OK(Put(Key(i), values[i]));
|
|
}
|
|
ASSERT_OK(Flush());
|
|
|
|
// 2 files in L0
|
|
ASSERT_EQ("2", FilesPerLevel(0));
|
|
CompactRangeOptions compact_options;
|
|
compact_options.change_level = true;
|
|
compact_options.target_level = 2;
|
|
ASSERT_OK(db_->CompactRange(compact_options, nullptr, nullptr));
|
|
// 2 files in L2
|
|
ASSERT_EQ("0,0,2", FilesPerLevel(0));
|
|
|
|
ASSERT_EQ(trivial_move, 1);
|
|
ASSERT_EQ(non_trivial_move, 0);
|
|
|
|
// file 3 [ 0 => 200]
|
|
for (int32_t i = 0; i < 200; i++) {
|
|
values[i] = RandomString(&rnd, value_size);
|
|
ASSERT_OK(Put(Key(i), values[i]));
|
|
}
|
|
ASSERT_OK(Flush());
|
|
|
|
// 2 files in L2, 1 in L0
|
|
ASSERT_EQ("1,0,2", FilesPerLevel(0));
|
|
ASSERT_OK(dbfull()->TEST_CompactRange(0, nullptr, nullptr, nullptr, false));
|
|
// 2 files in L2, 1 in L1
|
|
ASSERT_EQ("0,1,2", FilesPerLevel(0));
|
|
|
|
ASSERT_EQ(trivial_move, 2);
|
|
ASSERT_EQ(non_trivial_move, 0);
|
|
|
|
std::thread threads([&] {
|
|
compact_options.change_level = false;
|
|
compact_options.exclusive_manual_compaction = false;
|
|
std::string begin_string = Key(0);
|
|
std::string end_string = Key(199);
|
|
Slice begin(begin_string);
|
|
Slice end(end_string);
|
|
ASSERT_OK(db_->CompactRange(compact_options, &begin, &end));
|
|
});
|
|
|
|
TEST_SYNC_POINT("DBCompaction::PartialFill:1");
|
|
// Many files 4 [300 => 4300)
|
|
for (int32_t i = 0; i <= 5; i++) {
|
|
for (int32_t j = 300; j < 4300; j++) {
|
|
if (j == 2300) {
|
|
ASSERT_OK(Flush());
|
|
dbfull()->TEST_WaitForFlushMemTable();
|
|
}
|
|
values[j] = RandomString(&rnd, value_size);
|
|
ASSERT_OK(Put(Key(j), values[j]));
|
|
}
|
|
}
|
|
|
|
// Verify level sizes
|
|
uint64_t target_size = 4 * options.max_bytes_for_level_base;
|
|
for (int32_t i = 1; i < options.num_levels; i++) {
|
|
ASSERT_LE(SizeAtLevel(i), target_size);
|
|
target_size = static_cast<uint64_t>(target_size *
|
|
options.max_bytes_for_level_multiplier);
|
|
}
|
|
|
|
TEST_SYNC_POINT("DBCompaction::PartialFill:2");
|
|
dbfull()->TEST_WaitForFlushMemTable();
|
|
dbfull()->TEST_WaitForCompact();
|
|
threads.join();
|
|
|
|
for (int32_t i = 0; i < 4300; i++) {
|
|
ASSERT_EQ(Get(Key(i)), values[i]);
|
|
}
|
|
}
|
|
|
|
TEST_F(DBCompactionTest, DeleteFileRange) {
|
|
Options options = CurrentOptions();
|
|
options.write_buffer_size = 10 * 1024 * 1024;
|
|
options.max_bytes_for_level_multiplier = 2;
|
|
options.num_levels = 4;
|
|
options.level0_file_num_compaction_trigger = 3;
|
|
options.max_background_compactions = 3;
|
|
|
|
DestroyAndReopen(options);
|
|
int32_t value_size = 10 * 1024; // 10 KB
|
|
|
|
// Add 2 non-overlapping files
|
|
Random rnd(301);
|
|
std::map<int32_t, std::string> values;
|
|
|
|
// file 1 [0 => 100]
|
|
for (int32_t i = 0; i < 100; i++) {
|
|
values[i] = RandomString(&rnd, value_size);
|
|
ASSERT_OK(Put(Key(i), values[i]));
|
|
}
|
|
ASSERT_OK(Flush());
|
|
|
|
// file 2 [100 => 300]
|
|
for (int32_t i = 100; i < 300; i++) {
|
|
values[i] = RandomString(&rnd, value_size);
|
|
ASSERT_OK(Put(Key(i), values[i]));
|
|
}
|
|
ASSERT_OK(Flush());
|
|
|
|
// 2 files in L0
|
|
ASSERT_EQ("2", FilesPerLevel(0));
|
|
CompactRangeOptions compact_options;
|
|
compact_options.change_level = true;
|
|
compact_options.target_level = 2;
|
|
ASSERT_OK(db_->CompactRange(compact_options, nullptr, nullptr));
|
|
// 2 files in L2
|
|
ASSERT_EQ("0,0,2", FilesPerLevel(0));
|
|
|
|
// file 3 [ 0 => 200]
|
|
for (int32_t i = 0; i < 200; i++) {
|
|
values[i] = RandomString(&rnd, value_size);
|
|
ASSERT_OK(Put(Key(i), values[i]));
|
|
}
|
|
ASSERT_OK(Flush());
|
|
|
|
// Many files 4 [300 => 4300)
|
|
for (int32_t i = 0; i <= 5; i++) {
|
|
for (int32_t j = 300; j < 4300; j++) {
|
|
if (j == 2300) {
|
|
ASSERT_OK(Flush());
|
|
dbfull()->TEST_WaitForFlushMemTable();
|
|
}
|
|
values[j] = RandomString(&rnd, value_size);
|
|
ASSERT_OK(Put(Key(j), values[j]));
|
|
}
|
|
}
|
|
ASSERT_OK(Flush());
|
|
dbfull()->TEST_WaitForFlushMemTable();
|
|
dbfull()->TEST_WaitForCompact();
|
|
|
|
// Verify level sizes
|
|
uint64_t target_size = 4 * options.max_bytes_for_level_base;
|
|
for (int32_t i = 1; i < options.num_levels; i++) {
|
|
ASSERT_LE(SizeAtLevel(i), target_size);
|
|
target_size = static_cast<uint64_t>(target_size *
|
|
options.max_bytes_for_level_multiplier);
|
|
}
|
|
|
|
size_t old_num_files = CountFiles();
|
|
std::string begin_string = Key(1000);
|
|
std::string end_string = Key(2000);
|
|
Slice begin(begin_string);
|
|
Slice end(end_string);
|
|
ASSERT_OK(DeleteFilesInRange(db_, db_->DefaultColumnFamily(), &begin, &end));
|
|
|
|
int32_t deleted_count = 0;
|
|
for (int32_t i = 0; i < 4300; i++) {
|
|
if (i < 1000 || i > 2000) {
|
|
ASSERT_EQ(Get(Key(i)), values[i]);
|
|
} else {
|
|
ReadOptions roptions;
|
|
std::string result;
|
|
Status s = db_->Get(roptions, Key(i), &result);
|
|
ASSERT_TRUE(s.IsNotFound() || s.ok());
|
|
if (s.IsNotFound()) {
|
|
deleted_count++;
|
|
}
|
|
}
|
|
}
|
|
ASSERT_GT(deleted_count, 0);
|
|
begin_string = Key(5000);
|
|
end_string = Key(6000);
|
|
Slice begin1(begin_string);
|
|
Slice end1(end_string);
|
|
// Try deleting files in range which contain no keys
|
|
ASSERT_OK(
|
|
DeleteFilesInRange(db_, db_->DefaultColumnFamily(), &begin1, &end1));
|
|
|
|
// Push data from level 0 to level 1 to force all data to be deleted
|
|
// Note that we don't delete level 0 files
|
|
compact_options.change_level = true;
|
|
compact_options.target_level = 1;
|
|
ASSERT_OK(dbfull()->TEST_CompactRange(0, nullptr, nullptr));
|
|
|
|
ASSERT_OK(
|
|
DeleteFilesInRange(db_, db_->DefaultColumnFamily(), nullptr, nullptr));
|
|
|
|
int32_t deleted_count2 = 0;
|
|
for (int32_t i = 0; i < 4300; i++) {
|
|
ReadOptions roptions;
|
|
std::string result;
|
|
Status s = db_->Get(roptions, Key(i), &result);
|
|
ASSERT_TRUE(s.IsNotFound());
|
|
deleted_count2++;
|
|
}
|
|
ASSERT_GT(deleted_count2, deleted_count);
|
|
size_t new_num_files = CountFiles();
|
|
ASSERT_GT(old_num_files, new_num_files);
|
|
}
|
|
|
|
TEST_P(DBCompactionTestWithParam, TrivialMoveToLastLevelWithFiles) {
|
|
int32_t trivial_move = 0;
|
|
int32_t non_trivial_move = 0;
|
|
rocksdb::SyncPoint::GetInstance()->SetCallBack(
|
|
"DBImpl::BackgroundCompaction:TrivialMove",
|
|
[&](void* arg) { trivial_move++; });
|
|
rocksdb::SyncPoint::GetInstance()->SetCallBack(
|
|
"DBImpl::BackgroundCompaction:NonTrivial",
|
|
[&](void* arg) { non_trivial_move++; });
|
|
rocksdb::SyncPoint::GetInstance()->EnableProcessing();
|
|
|
|
Options options = CurrentOptions();
|
|
options.write_buffer_size = 100000000;
|
|
options.max_subcompactions = max_subcompactions_;
|
|
DestroyAndReopen(options);
|
|
|
|
int32_t value_size = 10 * 1024; // 10 KB
|
|
|
|
Random rnd(301);
|
|
std::vector<std::string> values;
|
|
// File with keys [ 0 => 99 ]
|
|
for (int i = 0; i < 100; i++) {
|
|
values.push_back(RandomString(&rnd, value_size));
|
|
ASSERT_OK(Put(Key(i), values[i]));
|
|
}
|
|
ASSERT_OK(Flush());
|
|
|
|
ASSERT_EQ("1", FilesPerLevel(0));
|
|
// Compaction will do L0=>L1 (trivial move) then move L1 files to L3
|
|
CompactRangeOptions compact_options;
|
|
compact_options.change_level = true;
|
|
compact_options.target_level = 3;
|
|
compact_options.exclusive_manual_compaction = exclusive_manual_compaction_;
|
|
ASSERT_OK(db_->CompactRange(compact_options, nullptr, nullptr));
|
|
ASSERT_EQ("0,0,0,1", FilesPerLevel(0));
|
|
ASSERT_EQ(trivial_move, 1);
|
|
ASSERT_EQ(non_trivial_move, 0);
|
|
|
|
// File with keys [ 100 => 199 ]
|
|
for (int i = 100; i < 200; i++) {
|
|
values.push_back(RandomString(&rnd, value_size));
|
|
ASSERT_OK(Put(Key(i), values[i]));
|
|
}
|
|
ASSERT_OK(Flush());
|
|
|
|
ASSERT_EQ("1,0,0,1", FilesPerLevel(0));
|
|
CompactRangeOptions cro;
|
|
cro.exclusive_manual_compaction = exclusive_manual_compaction_;
|
|
// Compaction will do L0=>L1 L1=>L2 L2=>L3 (3 trivial moves)
|
|
ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr));
|
|
ASSERT_EQ("0,0,0,2", FilesPerLevel(0));
|
|
ASSERT_EQ(trivial_move, 4);
|
|
ASSERT_EQ(non_trivial_move, 0);
|
|
|
|
for (int i = 0; i < 200; i++) {
|
|
ASSERT_EQ(Get(Key(i)), values[i]);
|
|
}
|
|
|
|
rocksdb::SyncPoint::GetInstance()->DisableProcessing();
|
|
}
|
|
|
|
TEST_P(DBCompactionTestWithParam, LevelCompactionThirdPath) {
|
|
Options options = CurrentOptions();
|
|
options.db_paths.emplace_back(dbname_, 500 * 1024);
|
|
options.db_paths.emplace_back(dbname_ + "_2", 4 * 1024 * 1024);
|
|
options.db_paths.emplace_back(dbname_ + "_3", 1024 * 1024 * 1024);
|
|
options.memtable_factory.reset(
|
|
new SpecialSkipListFactory(KNumKeysByGenerateNewFile - 1));
|
|
options.compaction_style = kCompactionStyleLevel;
|
|
options.write_buffer_size = 110 << 10; // 110KB
|
|
options.arena_block_size = 4 << 10;
|
|
options.level0_file_num_compaction_trigger = 2;
|
|
options.num_levels = 4;
|
|
options.max_bytes_for_level_base = 400 * 1024;
|
|
options.max_subcompactions = max_subcompactions_;
|
|
// options = CurrentOptions(options);
|
|
|
|
std::vector<std::string> filenames;
|
|
env_->GetChildren(options.db_paths[1].path, &filenames);
|
|
// Delete archival files.
|
|
for (size_t i = 0; i < filenames.size(); ++i) {
|
|
env_->DeleteFile(options.db_paths[1].path + "/" + filenames[i]);
|
|
}
|
|
env_->DeleteDir(options.db_paths[1].path);
|
|
Reopen(options);
|
|
|
|
Random rnd(301);
|
|
int key_idx = 0;
|
|
|
|
// First three 110KB files are not going to second path.
|
|
// After that, (100K, 200K)
|
|
for (int num = 0; num < 3; num++) {
|
|
GenerateNewFile(&rnd, &key_idx);
|
|
}
|
|
|
|
// Another 110KB triggers a compaction to 400K file to fill up first path
|
|
GenerateNewFile(&rnd, &key_idx);
|
|
ASSERT_EQ(3, GetSstFileCount(options.db_paths[1].path));
|
|
|
|
// (1, 4)
|
|
GenerateNewFile(&rnd, &key_idx);
|
|
ASSERT_EQ("1,4", FilesPerLevel(0));
|
|
ASSERT_EQ(4, GetSstFileCount(options.db_paths[1].path));
|
|
ASSERT_EQ(1, GetSstFileCount(dbname_));
|
|
|
|
// (1, 4, 1)
|
|
GenerateNewFile(&rnd, &key_idx);
|
|
ASSERT_EQ("1,4,1", FilesPerLevel(0));
|
|
ASSERT_EQ(1, GetSstFileCount(options.db_paths[2].path));
|
|
ASSERT_EQ(4, GetSstFileCount(options.db_paths[1].path));
|
|
ASSERT_EQ(1, GetSstFileCount(dbname_));
|
|
|
|
// (1, 4, 2)
|
|
GenerateNewFile(&rnd, &key_idx);
|
|
ASSERT_EQ("1,4,2", FilesPerLevel(0));
|
|
ASSERT_EQ(2, GetSstFileCount(options.db_paths[2].path));
|
|
ASSERT_EQ(4, GetSstFileCount(options.db_paths[1].path));
|
|
ASSERT_EQ(1, GetSstFileCount(dbname_));
|
|
|
|
// (1, 4, 3)
|
|
GenerateNewFile(&rnd, &key_idx);
|
|
ASSERT_EQ("1,4,3", FilesPerLevel(0));
|
|
ASSERT_EQ(3, GetSstFileCount(options.db_paths[2].path));
|
|
ASSERT_EQ(4, GetSstFileCount(options.db_paths[1].path));
|
|
ASSERT_EQ(1, GetSstFileCount(dbname_));
|
|
|
|
// (1, 4, 4)
|
|
GenerateNewFile(&rnd, &key_idx);
|
|
ASSERT_EQ("1,4,4", FilesPerLevel(0));
|
|
ASSERT_EQ(4, GetSstFileCount(options.db_paths[2].path));
|
|
ASSERT_EQ(4, GetSstFileCount(options.db_paths[1].path));
|
|
ASSERT_EQ(1, GetSstFileCount(dbname_));
|
|
|
|
// (1, 4, 5)
|
|
GenerateNewFile(&rnd, &key_idx);
|
|
ASSERT_EQ("1,4,5", FilesPerLevel(0));
|
|
ASSERT_EQ(5, GetSstFileCount(options.db_paths[2].path));
|
|
ASSERT_EQ(4, GetSstFileCount(options.db_paths[1].path));
|
|
ASSERT_EQ(1, GetSstFileCount(dbname_));
|
|
|
|
// (1, 4, 6)
|
|
GenerateNewFile(&rnd, &key_idx);
|
|
ASSERT_EQ("1,4,6", FilesPerLevel(0));
|
|
ASSERT_EQ(6, GetSstFileCount(options.db_paths[2].path));
|
|
ASSERT_EQ(4, GetSstFileCount(options.db_paths[1].path));
|
|
ASSERT_EQ(1, GetSstFileCount(dbname_));
|
|
|
|
// (1, 4, 7)
|
|
GenerateNewFile(&rnd, &key_idx);
|
|
ASSERT_EQ("1,4,7", FilesPerLevel(0));
|
|
ASSERT_EQ(7, GetSstFileCount(options.db_paths[2].path));
|
|
ASSERT_EQ(4, GetSstFileCount(options.db_paths[1].path));
|
|
ASSERT_EQ(1, GetSstFileCount(dbname_));
|
|
|
|
// (1, 4, 8)
|
|
GenerateNewFile(&rnd, &key_idx);
|
|
ASSERT_EQ("1,4,8", FilesPerLevel(0));
|
|
ASSERT_EQ(8, GetSstFileCount(options.db_paths[2].path));
|
|
ASSERT_EQ(4, GetSstFileCount(options.db_paths[1].path));
|
|
ASSERT_EQ(1, GetSstFileCount(dbname_));
|
|
|
|
for (int i = 0; i < key_idx; i++) {
|
|
auto v = Get(Key(i));
|
|
ASSERT_NE(v, "NOT_FOUND");
|
|
ASSERT_TRUE(v.size() == 1 || v.size() == 990);
|
|
}
|
|
|
|
Reopen(options);
|
|
|
|
for (int i = 0; i < key_idx; i++) {
|
|
auto v = Get(Key(i));
|
|
ASSERT_NE(v, "NOT_FOUND");
|
|
ASSERT_TRUE(v.size() == 1 || v.size() == 990);
|
|
}
|
|
|
|
Destroy(options);
|
|
}
|
|
|
|
TEST_P(DBCompactionTestWithParam, LevelCompactionPathUse) {
|
|
Options options = CurrentOptions();
|
|
options.db_paths.emplace_back(dbname_, 500 * 1024);
|
|
options.db_paths.emplace_back(dbname_ + "_2", 4 * 1024 * 1024);
|
|
options.db_paths.emplace_back(dbname_ + "_3", 1024 * 1024 * 1024);
|
|
options.memtable_factory.reset(
|
|
new SpecialSkipListFactory(KNumKeysByGenerateNewFile - 1));
|
|
options.compaction_style = kCompactionStyleLevel;
|
|
options.write_buffer_size = 110 << 10; // 110KB
|
|
options.arena_block_size = 4 << 10;
|
|
options.level0_file_num_compaction_trigger = 2;
|
|
options.num_levels = 4;
|
|
options.max_bytes_for_level_base = 400 * 1024;
|
|
options.max_subcompactions = max_subcompactions_;
|
|
// options = CurrentOptions(options);
|
|
|
|
std::vector<std::string> filenames;
|
|
env_->GetChildren(options.db_paths[1].path, &filenames);
|
|
// Delete archival files.
|
|
for (size_t i = 0; i < filenames.size(); ++i) {
|
|
env_->DeleteFile(options.db_paths[1].path + "/" + filenames[i]);
|
|
}
|
|
env_->DeleteDir(options.db_paths[1].path);
|
|
Reopen(options);
|
|
|
|
Random rnd(301);
|
|
int key_idx = 0;
|
|
|
|
// Always gets compacted into 1 Level1 file,
|
|
// 0/1 Level 0 file
|
|
for (int num = 0; num < 3; num++) {
|
|
key_idx = 0;
|
|
GenerateNewFile(&rnd, &key_idx);
|
|
}
|
|
|
|
key_idx = 0;
|
|
GenerateNewFile(&rnd, &key_idx);
|
|
ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path));
|
|
|
|
key_idx = 0;
|
|
GenerateNewFile(&rnd, &key_idx);
|
|
ASSERT_EQ("1,1", FilesPerLevel(0));
|
|
ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path));
|
|
ASSERT_EQ(1, GetSstFileCount(dbname_));
|
|
|
|
key_idx = 0;
|
|
GenerateNewFile(&rnd, &key_idx);
|
|
ASSERT_EQ("0,1", FilesPerLevel(0));
|
|
ASSERT_EQ(0, GetSstFileCount(options.db_paths[2].path));
|
|
ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path));
|
|
ASSERT_EQ(0, GetSstFileCount(dbname_));
|
|
|
|
key_idx = 0;
|
|
GenerateNewFile(&rnd, &key_idx);
|
|
ASSERT_EQ("1,1", FilesPerLevel(0));
|
|
ASSERT_EQ(0, GetSstFileCount(options.db_paths[2].path));
|
|
ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path));
|
|
ASSERT_EQ(1, GetSstFileCount(dbname_));
|
|
|
|
key_idx = 0;
|
|
GenerateNewFile(&rnd, &key_idx);
|
|
ASSERT_EQ("0,1", FilesPerLevel(0));
|
|
ASSERT_EQ(0, GetSstFileCount(options.db_paths[2].path));
|
|
ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path));
|
|
ASSERT_EQ(0, GetSstFileCount(dbname_));
|
|
|
|
key_idx = 0;
|
|
GenerateNewFile(&rnd, &key_idx);
|
|
ASSERT_EQ("1,1", FilesPerLevel(0));
|
|
ASSERT_EQ(0, GetSstFileCount(options.db_paths[2].path));
|
|
ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path));
|
|
ASSERT_EQ(1, GetSstFileCount(dbname_));
|
|
|
|
key_idx = 0;
|
|
GenerateNewFile(&rnd, &key_idx);
|
|
ASSERT_EQ("0,1", FilesPerLevel(0));
|
|
ASSERT_EQ(0, GetSstFileCount(options.db_paths[2].path));
|
|
ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path));
|
|
ASSERT_EQ(0, GetSstFileCount(dbname_));
|
|
|
|
key_idx = 0;
|
|
GenerateNewFile(&rnd, &key_idx);
|
|
ASSERT_EQ("1,1", FilesPerLevel(0));
|
|
ASSERT_EQ(0, GetSstFileCount(options.db_paths[2].path));
|
|
ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path));
|
|
ASSERT_EQ(1, GetSstFileCount(dbname_));
|
|
|
|
key_idx = 0;
|
|
GenerateNewFile(&rnd, &key_idx);
|
|
ASSERT_EQ("0,1", FilesPerLevel(0));
|
|
ASSERT_EQ(0, GetSstFileCount(options.db_paths[2].path));
|
|
ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path));
|
|
ASSERT_EQ(0, GetSstFileCount(dbname_));
|
|
|
|
key_idx = 0;
|
|
GenerateNewFile(&rnd, &key_idx);
|
|
ASSERT_EQ("1,1", FilesPerLevel(0));
|
|
ASSERT_EQ(0, GetSstFileCount(options.db_paths[2].path));
|
|
ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path));
|
|
ASSERT_EQ(1, GetSstFileCount(dbname_));
|
|
|
|
for (int i = 0; i < key_idx; i++) {
|
|
auto v = Get(Key(i));
|
|
ASSERT_NE(v, "NOT_FOUND");
|
|
ASSERT_TRUE(v.size() == 1 || v.size() == 990);
|
|
}
|
|
|
|
Reopen(options);
|
|
|
|
for (int i = 0; i < key_idx; i++) {
|
|
auto v = Get(Key(i));
|
|
ASSERT_NE(v, "NOT_FOUND");
|
|
ASSERT_TRUE(v.size() == 1 || v.size() == 990);
|
|
}
|
|
|
|
Destroy(options);
|
|
}
|
|
|
|
TEST_P(DBCompactionTestWithParam, ConvertCompactionStyle) {
|
|
Random rnd(301);
|
|
int max_key_level_insert = 200;
|
|
int max_key_universal_insert = 600;
|
|
|
|
// Stage 1: generate a db with level compaction
|
|
Options options = CurrentOptions();
|
|
options.write_buffer_size = 110 << 10; // 110KB
|
|
options.arena_block_size = 4 << 10;
|
|
options.num_levels = 4;
|
|
options.level0_file_num_compaction_trigger = 3;
|
|
options.max_bytes_for_level_base = 500 << 10; // 500KB
|
|
options.max_bytes_for_level_multiplier = 1;
|
|
options.target_file_size_base = 200 << 10; // 200KB
|
|
options.target_file_size_multiplier = 1;
|
|
options.max_subcompactions = max_subcompactions_;
|
|
CreateAndReopenWithCF({"pikachu"}, options);
|
|
|
|
for (int i = 0; i <= max_key_level_insert; i++) {
|
|
// each value is 10K
|
|
ASSERT_OK(Put(1, Key(i), RandomString(&rnd, 10000)));
|
|
}
|
|
ASSERT_OK(Flush(1));
|
|
dbfull()->TEST_WaitForCompact();
|
|
|
|
ASSERT_GT(TotalTableFiles(1, 4), 1);
|
|
int non_level0_num_files = 0;
|
|
for (int i = 1; i < options.num_levels; i++) {
|
|
non_level0_num_files += NumTableFilesAtLevel(i, 1);
|
|
}
|
|
ASSERT_GT(non_level0_num_files, 0);
|
|
|
|
// Stage 2: reopen with universal compaction - should fail
|
|
options = CurrentOptions();
|
|
options.compaction_style = kCompactionStyleUniversal;
|
|
options.num_levels = 1;
|
|
options = CurrentOptions(options);
|
|
Status s = TryReopenWithColumnFamilies({"default", "pikachu"}, options);
|
|
ASSERT_TRUE(s.IsInvalidArgument());
|
|
|
|
// Stage 3: compact into a single file and move the file to level 0
|
|
options = CurrentOptions();
|
|
options.disable_auto_compactions = true;
|
|
options.target_file_size_base = INT_MAX;
|
|
options.target_file_size_multiplier = 1;
|
|
options.max_bytes_for_level_base = INT_MAX;
|
|
options.max_bytes_for_level_multiplier = 1;
|
|
options.num_levels = 4;
|
|
options = CurrentOptions(options);
|
|
ReopenWithColumnFamilies({"default", "pikachu"}, options);
|
|
|
|
CompactRangeOptions compact_options;
|
|
compact_options.change_level = true;
|
|
compact_options.target_level = 0;
|
|
compact_options.bottommost_level_compaction =
|
|
BottommostLevelCompaction::kForce;
|
|
compact_options.exclusive_manual_compaction = exclusive_manual_compaction_;
|
|
dbfull()->CompactRange(compact_options, handles_[1], nullptr, nullptr);
|
|
|
|
// Only 1 file in L0
|
|
ASSERT_EQ("1", FilesPerLevel(1));
|
|
|
|
// Stage 4: re-open in universal compaction style and do some db operations
|
|
options = CurrentOptions();
|
|
options.compaction_style = kCompactionStyleUniversal;
|
|
options.num_levels = 4;
|
|
options.write_buffer_size = 110 << 10; // 110KB
|
|
options.arena_block_size = 4 << 10;
|
|
options.level0_file_num_compaction_trigger = 3;
|
|
options = CurrentOptions(options);
|
|
ReopenWithColumnFamilies({"default", "pikachu"}, options);
|
|
|
|
options.num_levels = 1;
|
|
ReopenWithColumnFamilies({"default", "pikachu"}, options);
|
|
|
|
for (int i = max_key_level_insert / 2; i <= max_key_universal_insert; i++) {
|
|
ASSERT_OK(Put(1, Key(i), RandomString(&rnd, 10000)));
|
|
}
|
|
dbfull()->Flush(FlushOptions());
|
|
ASSERT_OK(Flush(1));
|
|
dbfull()->TEST_WaitForCompact();
|
|
|
|
for (int i = 1; i < options.num_levels; i++) {
|
|
ASSERT_EQ(NumTableFilesAtLevel(i, 1), 0);
|
|
}
|
|
|
|
// verify keys inserted in both level compaction style and universal
|
|
// compaction style
|
|
std::string keys_in_db;
|
|
Iterator* iter = dbfull()->NewIterator(ReadOptions(), handles_[1]);
|
|
for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
|
|
keys_in_db.append(iter->key().ToString());
|
|
keys_in_db.push_back(',');
|
|
}
|
|
delete iter;
|
|
|
|
std::string expected_keys;
|
|
for (int i = 0; i <= max_key_universal_insert; i++) {
|
|
expected_keys.append(Key(i));
|
|
expected_keys.push_back(',');
|
|
}
|
|
|
|
ASSERT_EQ(keys_in_db, expected_keys);
|
|
}
|
|
|
|
TEST_F(DBCompactionTest, L0_CompactionBug_Issue44_a) {
|
|
do {
|
|
CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
|
|
ASSERT_OK(Put(1, "b", "v"));
|
|
ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions());
|
|
ASSERT_OK(Delete(1, "b"));
|
|
ASSERT_OK(Delete(1, "a"));
|
|
ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions());
|
|
ASSERT_OK(Delete(1, "a"));
|
|
ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions());
|
|
ASSERT_OK(Put(1, "a", "v"));
|
|
ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions());
|
|
ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions());
|
|
ASSERT_EQ("(a->v)", Contents(1));
|
|
env_->SleepForMicroseconds(1000000); // Wait for compaction to finish
|
|
ASSERT_EQ("(a->v)", Contents(1));
|
|
} while (ChangeCompactOptions());
|
|
}
|
|
|
|
TEST_F(DBCompactionTest, L0_CompactionBug_Issue44_b) {
|
|
do {
|
|
CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
|
|
Put(1, "", "");
|
|
ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions());
|
|
Delete(1, "e");
|
|
Put(1, "", "");
|
|
ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions());
|
|
Put(1, "c", "cv");
|
|
ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions());
|
|
Put(1, "", "");
|
|
ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions());
|
|
Put(1, "", "");
|
|
env_->SleepForMicroseconds(1000000); // Wait for compaction to finish
|
|
ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions());
|
|
Put(1, "d", "dv");
|
|
ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions());
|
|
Put(1, "", "");
|
|
ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions());
|
|
Delete(1, "d");
|
|
Delete(1, "b");
|
|
ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions());
|
|
ASSERT_EQ("(->)(c->cv)", Contents(1));
|
|
env_->SleepForMicroseconds(1000000); // Wait for compaction to finish
|
|
ASSERT_EQ("(->)(c->cv)", Contents(1));
|
|
} while (ChangeCompactOptions());
|
|
}
|
|
|
|
TEST_P(DBCompactionTestWithParam, ManualCompaction) {
|
|
Options options = CurrentOptions();
|
|
options.max_subcompactions = max_subcompactions_;
|
|
options.statistics = rocksdb::CreateDBStatistics();
|
|
CreateAndReopenWithCF({"pikachu"}, options);
|
|
|
|
// iter - 0 with 7 levels
|
|
// iter - 1 with 3 levels
|
|
for (int iter = 0; iter < 2; ++iter) {
|
|
MakeTables(3, "p", "q", 1);
|
|
ASSERT_EQ("1,1,1", FilesPerLevel(1));
|
|
|
|
// Compaction range falls before files
|
|
Compact(1, "", "c");
|
|
ASSERT_EQ("1,1,1", FilesPerLevel(1));
|
|
|
|
// Compaction range falls after files
|
|
Compact(1, "r", "z");
|
|
ASSERT_EQ("1,1,1", FilesPerLevel(1));
|
|
|
|
// Compaction range overlaps files
|
|
Compact(1, "p1", "p9");
|
|
ASSERT_EQ("0,0,1", FilesPerLevel(1));
|
|
|
|
// Populate a different range
|
|
MakeTables(3, "c", "e", 1);
|
|
ASSERT_EQ("1,1,2", FilesPerLevel(1));
|
|
|
|
// Compact just the new range
|
|
Compact(1, "b", "f");
|
|
ASSERT_EQ("0,0,2", FilesPerLevel(1));
|
|
|
|
// Compact all
|
|
MakeTables(1, "a", "z", 1);
|
|
ASSERT_EQ("1,0,2", FilesPerLevel(1));
|
|
|
|
uint64_t prev_block_cache_add =
|
|
options.statistics->getTickerCount(BLOCK_CACHE_ADD);
|
|
CompactRangeOptions cro;
|
|
cro.exclusive_manual_compaction = exclusive_manual_compaction_;
|
|
db_->CompactRange(cro, handles_[1], nullptr, nullptr);
|
|
// Verify manual compaction doesn't fill block cache
|
|
ASSERT_EQ(prev_block_cache_add,
|
|
options.statistics->getTickerCount(BLOCK_CACHE_ADD));
|
|
|
|
ASSERT_EQ("0,0,1", FilesPerLevel(1));
|
|
|
|
if (iter == 0) {
|
|
options = CurrentOptions();
|
|
options.max_background_flushes = 0;
|
|
options.num_levels = 3;
|
|
options.create_if_missing = true;
|
|
options.statistics = rocksdb::CreateDBStatistics();
|
|
DestroyAndReopen(options);
|
|
CreateAndReopenWithCF({"pikachu"}, options);
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
TEST_P(DBCompactionTestWithParam, ManualLevelCompactionOutputPathId) {
|
|
Options options = CurrentOptions();
|
|
options.db_paths.emplace_back(dbname_ + "_2", 2 * 10485760);
|
|
options.db_paths.emplace_back(dbname_ + "_3", 100 * 10485760);
|
|
options.db_paths.emplace_back(dbname_ + "_4", 120 * 10485760);
|
|
options.max_subcompactions = max_subcompactions_;
|
|
CreateAndReopenWithCF({"pikachu"}, options);
|
|
|
|
// iter - 0 with 7 levels
|
|
// iter - 1 with 3 levels
|
|
for (int iter = 0; iter < 2; ++iter) {
|
|
for (int i = 0; i < 3; ++i) {
|
|
ASSERT_OK(Put(1, "p", "begin"));
|
|
ASSERT_OK(Put(1, "q", "end"));
|
|
ASSERT_OK(Flush(1));
|
|
}
|
|
ASSERT_EQ("3", FilesPerLevel(1));
|
|
ASSERT_EQ(3, GetSstFileCount(options.db_paths[0].path));
|
|
ASSERT_EQ(0, GetSstFileCount(dbname_));
|
|
|
|
// Compaction range falls before files
|
|
Compact(1, "", "c");
|
|
ASSERT_EQ("3", FilesPerLevel(1));
|
|
|
|
// Compaction range falls after files
|
|
Compact(1, "r", "z");
|
|
ASSERT_EQ("3", FilesPerLevel(1));
|
|
|
|
// Compaction range overlaps files
|
|
Compact(1, "p1", "p9", 1);
|
|
ASSERT_EQ("0,1", FilesPerLevel(1));
|
|
ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path));
|
|
ASSERT_EQ(0, GetSstFileCount(options.db_paths[0].path));
|
|
ASSERT_EQ(0, GetSstFileCount(dbname_));
|
|
|
|
// Populate a different range
|
|
for (int i = 0; i < 3; ++i) {
|
|
ASSERT_OK(Put(1, "c", "begin"));
|
|
ASSERT_OK(Put(1, "e", "end"));
|
|
ASSERT_OK(Flush(1));
|
|
}
|
|
ASSERT_EQ("3,1", FilesPerLevel(1));
|
|
|
|
// Compact just the new range
|
|
Compact(1, "b", "f", 1);
|
|
ASSERT_EQ("0,2", FilesPerLevel(1));
|
|
ASSERT_EQ(2, GetSstFileCount(options.db_paths[1].path));
|
|
ASSERT_EQ(0, GetSstFileCount(options.db_paths[0].path));
|
|
ASSERT_EQ(0, GetSstFileCount(dbname_));
|
|
|
|
// Compact all
|
|
ASSERT_OK(Put(1, "a", "begin"));
|
|
ASSERT_OK(Put(1, "z", "end"));
|
|
ASSERT_OK(Flush(1));
|
|
ASSERT_EQ("1,2", FilesPerLevel(1));
|
|
ASSERT_EQ(2, GetSstFileCount(options.db_paths[1].path));
|
|
ASSERT_EQ(1, GetSstFileCount(options.db_paths[0].path));
|
|
CompactRangeOptions compact_options;
|
|
compact_options.target_path_id = 1;
|
|
compact_options.exclusive_manual_compaction = exclusive_manual_compaction_;
|
|
db_->CompactRange(compact_options, handles_[1], nullptr, nullptr);
|
|
|
|
ASSERT_EQ("0,1", FilesPerLevel(1));
|
|
ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path));
|
|
ASSERT_EQ(0, GetSstFileCount(options.db_paths[0].path));
|
|
ASSERT_EQ(0, GetSstFileCount(dbname_));
|
|
|
|
if (iter == 0) {
|
|
DestroyAndReopen(options);
|
|
options = CurrentOptions();
|
|
options.db_paths.emplace_back(dbname_ + "_2", 2 * 10485760);
|
|
options.db_paths.emplace_back(dbname_ + "_3", 100 * 10485760);
|
|
options.db_paths.emplace_back(dbname_ + "_4", 120 * 10485760);
|
|
options.max_background_flushes = 1;
|
|
options.num_levels = 3;
|
|
options.create_if_missing = true;
|
|
CreateAndReopenWithCF({"pikachu"}, options);
|
|
}
|
|
}
|
|
}
|
|
|
|
TEST_F(DBCompactionTest, FilesDeletedAfterCompaction) {
|
|
do {
|
|
CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
|
|
ASSERT_OK(Put(1, "foo", "v2"));
|
|
Compact(1, "a", "z");
|
|
const size_t num_files = CountLiveFiles();
|
|
for (int i = 0; i < 10; i++) {
|
|
ASSERT_OK(Put(1, "foo", "v2"));
|
|
Compact(1, "a", "z");
|
|
}
|
|
ASSERT_EQ(CountLiveFiles(), num_files);
|
|
} while (ChangeCompactOptions());
|
|
}
|
|
|
|
// Check level comapction with compact files
|
|
TEST_P(DBCompactionTestWithParam, DISABLED_CompactFilesOnLevelCompaction) {
|
|
const int kTestKeySize = 16;
|
|
const int kTestValueSize = 984;
|
|
const int kEntrySize = kTestKeySize + kTestValueSize;
|
|
const int kEntriesPerBuffer = 100;
|
|
Options options;
|
|
options.create_if_missing = true;
|
|
options.write_buffer_size = kEntrySize * kEntriesPerBuffer;
|
|
options.compaction_style = kCompactionStyleLevel;
|
|
options.target_file_size_base = options.write_buffer_size;
|
|
options.max_bytes_for_level_base = options.target_file_size_base * 2;
|
|
options.level0_stop_writes_trigger = 2;
|
|
options.max_bytes_for_level_multiplier = 2;
|
|
options.compression = kNoCompression;
|
|
options.max_subcompactions = max_subcompactions_;
|
|
options = CurrentOptions(options);
|
|
CreateAndReopenWithCF({"pikachu"}, options);
|
|
|
|
Random rnd(301);
|
|
for (int key = 64 * kEntriesPerBuffer; key >= 0; --key) {
|
|
ASSERT_OK(Put(1, ToString(key), RandomString(&rnd, kTestValueSize)));
|
|
}
|
|
dbfull()->TEST_WaitForFlushMemTable(handles_[1]);
|
|
dbfull()->TEST_WaitForCompact();
|
|
|
|
ColumnFamilyMetaData cf_meta;
|
|
dbfull()->GetColumnFamilyMetaData(handles_[1], &cf_meta);
|
|
int output_level = static_cast<int>(cf_meta.levels.size()) - 1;
|
|
for (int file_picked = 5; file_picked > 0; --file_picked) {
|
|
std::set<std::string> overlapping_file_names;
|
|
std::vector<std::string> compaction_input_file_names;
|
|
for (int f = 0; f < file_picked; ++f) {
|
|
int level = 0;
|
|
auto file_meta = PickFileRandomly(cf_meta, &rnd, &level);
|
|
compaction_input_file_names.push_back(file_meta->name);
|
|
GetOverlappingFileNumbersForLevelCompaction(
|
|
cf_meta, options.comparator, level, output_level,
|
|
file_meta, &overlapping_file_names);
|
|
}
|
|
|
|
ASSERT_OK(dbfull()->CompactFiles(
|
|
CompactionOptions(), handles_[1],
|
|
compaction_input_file_names,
|
|
output_level));
|
|
|
|
// Make sure all overlapping files do not exist after compaction
|
|
dbfull()->GetColumnFamilyMetaData(handles_[1], &cf_meta);
|
|
VerifyCompactionResult(cf_meta, overlapping_file_names);
|
|
}
|
|
|
|
// make sure all key-values are still there.
|
|
for (int key = 64 * kEntriesPerBuffer; key >= 0; --key) {
|
|
ASSERT_NE(Get(1, ToString(key)), "NOT_FOUND");
|
|
}
|
|
}
|
|
|
|
TEST_P(DBCompactionTestWithParam, PartialCompactionFailure) {
|
|
Options options;
|
|
const int kKeySize = 16;
|
|
const int kKvSize = 1000;
|
|
const int kKeysPerBuffer = 100;
|
|
const int kNumL1Files = 5;
|
|
options.create_if_missing = true;
|
|
options.write_buffer_size = kKeysPerBuffer * kKvSize;
|
|
options.max_write_buffer_number = 2;
|
|
options.target_file_size_base =
|
|
options.write_buffer_size *
|
|
(options.max_write_buffer_number - 1);
|
|
options.level0_file_num_compaction_trigger = kNumL1Files;
|
|
options.max_bytes_for_level_base =
|
|
options.level0_file_num_compaction_trigger *
|
|
options.target_file_size_base;
|
|
options.max_bytes_for_level_multiplier = 2;
|
|
options.compression = kNoCompression;
|
|
options.max_subcompactions = max_subcompactions_;
|
|
|
|
env_->SetBackgroundThreads(1, Env::HIGH);
|
|
env_->SetBackgroundThreads(1, Env::LOW);
|
|
// stop the compaction thread until we simulate the file creation failure.
|
|
test::SleepingBackgroundTask sleeping_task_low;
|
|
env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low,
|
|
Env::Priority::LOW);
|
|
|
|
options.env = env_;
|
|
|
|
DestroyAndReopen(options);
|
|
|
|
const int kNumInsertedKeys =
|
|
options.level0_file_num_compaction_trigger *
|
|
(options.max_write_buffer_number - 1) *
|
|
kKeysPerBuffer;
|
|
|
|
Random rnd(301);
|
|
std::vector<std::string> keys;
|
|
std::vector<std::string> values;
|
|
for (int k = 0; k < kNumInsertedKeys; ++k) {
|
|
keys.emplace_back(RandomString(&rnd, kKeySize));
|
|
values.emplace_back(RandomString(&rnd, kKvSize - kKeySize));
|
|
ASSERT_OK(Put(Slice(keys[k]), Slice(values[k])));
|
|
dbfull()->TEST_WaitForFlushMemTable();
|
|
}
|
|
|
|
dbfull()->TEST_FlushMemTable(true);
|
|
// Make sure the number of L0 files can trigger compaction.
|
|
ASSERT_GE(NumTableFilesAtLevel(0),
|
|
options.level0_file_num_compaction_trigger);
|
|
|
|
auto previous_num_level0_files = NumTableFilesAtLevel(0);
|
|
|
|
// Fail the first file creation.
|
|
env_->non_writable_count_ = 1;
|
|
sleeping_task_low.WakeUp();
|
|
sleeping_task_low.WaitUntilDone();
|
|
|
|
// Expect compaction to fail here as one file will fail its
|
|
// creation.
|
|
ASSERT_TRUE(!dbfull()->TEST_WaitForCompact().ok());
|
|
|
|
// Verify L0 -> L1 compaction does fail.
|
|
ASSERT_EQ(NumTableFilesAtLevel(1), 0);
|
|
|
|
// Verify all L0 files are still there.
|
|
ASSERT_EQ(NumTableFilesAtLevel(0), previous_num_level0_files);
|
|
|
|
// All key-values must exist after compaction fails.
|
|
for (int k = 0; k < kNumInsertedKeys; ++k) {
|
|
ASSERT_EQ(values[k], Get(keys[k]));
|
|
}
|
|
|
|
env_->non_writable_count_ = 0;
|
|
|
|
// Make sure RocksDB will not get into corrupted state.
|
|
Reopen(options);
|
|
|
|
// Verify again after reopen.
|
|
for (int k = 0; k < kNumInsertedKeys; ++k) {
|
|
ASSERT_EQ(values[k], Get(keys[k]));
|
|
}
|
|
}
|
|
|
|
TEST_P(DBCompactionTestWithParam, DeleteMovedFileAfterCompaction) {
|
|
// iter 1 -- delete_obsolete_files_period_micros == 0
|
|
for (int iter = 0; iter < 2; ++iter) {
|
|
// This test triggers move compaction and verifies that the file is not
|
|
// deleted when it's part of move compaction
|
|
Options options = CurrentOptions();
|
|
options.env = env_;
|
|
if (iter == 1) {
|
|
options.delete_obsolete_files_period_micros = 0;
|
|
}
|
|
options.create_if_missing = true;
|
|
options.level0_file_num_compaction_trigger =
|
|
2; // trigger compaction when we have 2 files
|
|
OnFileDeletionListener* listener = new OnFileDeletionListener();
|
|
options.listeners.emplace_back(listener);
|
|
options.max_subcompactions = max_subcompactions_;
|
|
DestroyAndReopen(options);
|
|
|
|
Random rnd(301);
|
|
// Create two 1MB sst files
|
|
for (int i = 0; i < 2; ++i) {
|
|
// Create 1MB sst file
|
|
for (int j = 0; j < 100; ++j) {
|
|
ASSERT_OK(Put(Key(i * 50 + j), RandomString(&rnd, 10 * 1024)));
|
|
}
|
|
ASSERT_OK(Flush());
|
|
}
|
|
// this should execute L0->L1
|
|
dbfull()->TEST_WaitForCompact();
|
|
ASSERT_EQ("0,1", FilesPerLevel(0));
|
|
|
|
// block compactions
|
|
test::SleepingBackgroundTask sleeping_task;
|
|
env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task,
|
|
Env::Priority::LOW);
|
|
|
|
options.max_bytes_for_level_base = 1024 * 1024; // 1 MB
|
|
Reopen(options);
|
|
std::unique_ptr<Iterator> iterator(db_->NewIterator(ReadOptions()));
|
|
ASSERT_EQ("0,1", FilesPerLevel(0));
|
|
// let compactions go
|
|
sleeping_task.WakeUp();
|
|
sleeping_task.WaitUntilDone();
|
|
|
|
// this should execute L1->L2 (move)
|
|
dbfull()->TEST_WaitForCompact();
|
|
|
|
ASSERT_EQ("0,0,1", FilesPerLevel(0));
|
|
|
|
std::vector<LiveFileMetaData> metadata;
|
|
db_->GetLiveFilesMetaData(&metadata);
|
|
ASSERT_EQ(metadata.size(), 1U);
|
|
auto moved_file_name = metadata[0].name;
|
|
|
|
// Create two more 1MB sst files
|
|
for (int i = 0; i < 2; ++i) {
|
|
// Create 1MB sst file
|
|
for (int j = 0; j < 100; ++j) {
|
|
ASSERT_OK(Put(Key(i * 50 + j + 100), RandomString(&rnd, 10 * 1024)));
|
|
}
|
|
ASSERT_OK(Flush());
|
|
}
|
|
// this should execute both L0->L1 and L1->L2 (merge with previous file)
|
|
dbfull()->TEST_WaitForCompact();
|
|
|
|
ASSERT_EQ("0,0,2", FilesPerLevel(0));
|
|
|
|
// iterator is holding the file
|
|
ASSERT_OK(env_->FileExists(dbname_ + moved_file_name));
|
|
|
|
listener->SetExpectedFileName(dbname_ + moved_file_name);
|
|
iterator.reset();
|
|
|
|
// this file should have been compacted away
|
|
ASSERT_NOK(env_->FileExists(dbname_ + moved_file_name));
|
|
listener->VerifyMatchedCount(1);
|
|
}
|
|
}
|
|
|
|
TEST_P(DBCompactionTestWithParam, CompressLevelCompaction) {
|
|
if (!Zlib_Supported()) {
|
|
return;
|
|
}
|
|
Options options = CurrentOptions();
|
|
options.memtable_factory.reset(
|
|
new SpecialSkipListFactory(KNumKeysByGenerateNewFile - 1));
|
|
options.compaction_style = kCompactionStyleLevel;
|
|
options.write_buffer_size = 110 << 10; // 110KB
|
|
options.arena_block_size = 4 << 10;
|
|
options.level0_file_num_compaction_trigger = 2;
|
|
options.num_levels = 4;
|
|
options.max_bytes_for_level_base = 400 * 1024;
|
|
options.max_subcompactions = max_subcompactions_;
|
|
// First two levels have no compression, so that a trivial move between
|
|
// them will be allowed. Level 2 has Zlib compression so that a trivial
|
|
// move to level 3 will not be allowed
|
|
options.compression_per_level = {kNoCompression, kNoCompression,
|
|
kZlibCompression};
|
|
int matches = 0, didnt_match = 0, trivial_move = 0, non_trivial = 0;
|
|
|
|
rocksdb::SyncPoint::GetInstance()->SetCallBack(
|
|
"Compaction::InputCompressionMatchesOutput:Matches",
|
|
[&](void* arg) { matches++; });
|
|
rocksdb::SyncPoint::GetInstance()->SetCallBack(
|
|
"Compaction::InputCompressionMatchesOutput:DidntMatch",
|
|
[&](void* arg) { didnt_match++; });
|
|
rocksdb::SyncPoint::GetInstance()->SetCallBack(
|
|
"DBImpl::BackgroundCompaction:NonTrivial",
|
|
[&](void* arg) { non_trivial++; });
|
|
rocksdb::SyncPoint::GetInstance()->SetCallBack(
|
|
"DBImpl::BackgroundCompaction:TrivialMove",
|
|
[&](void* arg) { trivial_move++; });
|
|
rocksdb::SyncPoint::GetInstance()->EnableProcessing();
|
|
|
|
Reopen(options);
|
|
|
|
Random rnd(301);
|
|
int key_idx = 0;
|
|
|
|
// First three 110KB files are going to level 0
|
|
// After that, (100K, 200K)
|
|
for (int num = 0; num < 3; num++) {
|
|
GenerateNewFile(&rnd, &key_idx);
|
|
}
|
|
|
|
// Another 110KB triggers a compaction to 400K file to fill up level 0
|
|
GenerateNewFile(&rnd, &key_idx);
|
|
ASSERT_EQ(4, GetSstFileCount(dbname_));
|
|
|
|
// (1, 4)
|
|
GenerateNewFile(&rnd, &key_idx);
|
|
ASSERT_EQ("1,4", FilesPerLevel(0));
|
|
|
|
// (1, 4, 1)
|
|
GenerateNewFile(&rnd, &key_idx);
|
|
ASSERT_EQ("1,4,1", FilesPerLevel(0));
|
|
|
|
// (1, 4, 2)
|
|
GenerateNewFile(&rnd, &key_idx);
|
|
ASSERT_EQ("1,4,2", FilesPerLevel(0));
|
|
|
|
// (1, 4, 3)
|
|
GenerateNewFile(&rnd, &key_idx);
|
|
ASSERT_EQ("1,4,3", FilesPerLevel(0));
|
|
|
|
// (1, 4, 4)
|
|
GenerateNewFile(&rnd, &key_idx);
|
|
ASSERT_EQ("1,4,4", FilesPerLevel(0));
|
|
|
|
// (1, 4, 5)
|
|
GenerateNewFile(&rnd, &key_idx);
|
|
ASSERT_EQ("1,4,5", FilesPerLevel(0));
|
|
|
|
// (1, 4, 6)
|
|
GenerateNewFile(&rnd, &key_idx);
|
|
ASSERT_EQ("1,4,6", FilesPerLevel(0));
|
|
|
|
// (1, 4, 7)
|
|
GenerateNewFile(&rnd, &key_idx);
|
|
ASSERT_EQ("1,4,7", FilesPerLevel(0));
|
|
|
|
// (1, 4, 8)
|
|
GenerateNewFile(&rnd, &key_idx);
|
|
ASSERT_EQ("1,4,8", FilesPerLevel(0));
|
|
|
|
ASSERT_EQ(matches, 12);
|
|
// Currently, the test relies on the number of calls to
|
|
// InputCompressionMatchesOutput() per compaction.
|
|
const int kCallsToInputCompressionMatch = 2;
|
|
ASSERT_EQ(didnt_match, 8 * kCallsToInputCompressionMatch);
|
|
ASSERT_EQ(trivial_move, 12);
|
|
ASSERT_EQ(non_trivial, 8);
|
|
|
|
rocksdb::SyncPoint::GetInstance()->DisableProcessing();
|
|
|
|
for (int i = 0; i < key_idx; i++) {
|
|
auto v = Get(Key(i));
|
|
ASSERT_NE(v, "NOT_FOUND");
|
|
ASSERT_TRUE(v.size() == 1 || v.size() == 990);
|
|
}
|
|
|
|
Reopen(options);
|
|
|
|
for (int i = 0; i < key_idx; i++) {
|
|
auto v = Get(Key(i));
|
|
ASSERT_NE(v, "NOT_FOUND");
|
|
ASSERT_TRUE(v.size() == 1 || v.size() == 990);
|
|
}
|
|
|
|
Destroy(options);
|
|
}
|
|
|
|
TEST_F(DBCompactionTest, SanitizeCompactionOptionsTest) {
|
|
Options options = CurrentOptions();
|
|
options.max_background_compactions = 5;
|
|
options.soft_pending_compaction_bytes_limit = 0;
|
|
options.hard_pending_compaction_bytes_limit = 100;
|
|
options.create_if_missing = true;
|
|
DestroyAndReopen(options);
|
|
ASSERT_EQ(5, db_->GetOptions().base_background_compactions);
|
|
ASSERT_EQ(100, db_->GetOptions().soft_pending_compaction_bytes_limit);
|
|
|
|
options.base_background_compactions = 4;
|
|
options.max_background_compactions = 3;
|
|
options.soft_pending_compaction_bytes_limit = 200;
|
|
options.hard_pending_compaction_bytes_limit = 150;
|
|
DestroyAndReopen(options);
|
|
ASSERT_EQ(3, db_->GetOptions().base_background_compactions);
|
|
ASSERT_EQ(150, db_->GetOptions().soft_pending_compaction_bytes_limit);
|
|
}
|
|
|
|
// This tests for a bug that could cause two level0 compactions running
|
|
// concurrently
|
|
// TODO(aekmekji): Make sure that the reason this fails when run with
|
|
// max_subcompactions > 1 is not a correctness issue but just inherent to
|
|
// running parallel L0-L1 compactions
|
|
TEST_F(DBCompactionTest, SuggestCompactRangeNoTwoLevel0Compactions) {
|
|
Options options = CurrentOptions();
|
|
options.compaction_style = kCompactionStyleLevel;
|
|
options.write_buffer_size = 110 << 10;
|
|
options.arena_block_size = 4 << 10;
|
|
options.level0_file_num_compaction_trigger = 4;
|
|
options.num_levels = 4;
|
|
options.compression = kNoCompression;
|
|
options.max_bytes_for_level_base = 450 << 10;
|
|
options.target_file_size_base = 98 << 10;
|
|
options.max_write_buffer_number = 2;
|
|
options.max_background_compactions = 2;
|
|
|
|
DestroyAndReopen(options);
|
|
|
|
// fill up the DB
|
|
Random rnd(301);
|
|
for (int num = 0; num < 10; num++) {
|
|
GenerateNewRandomFile(&rnd);
|
|
}
|
|
db_->CompactRange(CompactRangeOptions(), nullptr, nullptr);
|
|
|
|
rocksdb::SyncPoint::GetInstance()->LoadDependency(
|
|
{{"CompactionJob::Run():Start",
|
|
"DBCompactionTest::SuggestCompactRangeNoTwoLevel0Compactions:1"},
|
|
{"DBCompactionTest::SuggestCompactRangeNoTwoLevel0Compactions:2",
|
|
"CompactionJob::Run():End"}});
|
|
|
|
rocksdb::SyncPoint::GetInstance()->EnableProcessing();
|
|
|
|
// trigger L0 compaction
|
|
for (int num = 0; num < options.level0_file_num_compaction_trigger + 1;
|
|
num++) {
|
|
GenerateNewRandomFile(&rnd, /* nowait */ true);
|
|
ASSERT_OK(Flush());
|
|
}
|
|
|
|
TEST_SYNC_POINT(
|
|
"DBCompactionTest::SuggestCompactRangeNoTwoLevel0Compactions:1");
|
|
|
|
GenerateNewRandomFile(&rnd, /* nowait */ true);
|
|
dbfull()->TEST_WaitForFlushMemTable();
|
|
ASSERT_OK(experimental::SuggestCompactRange(db_, nullptr, nullptr));
|
|
for (int num = 0; num < options.level0_file_num_compaction_trigger + 1;
|
|
num++) {
|
|
GenerateNewRandomFile(&rnd, /* nowait */ true);
|
|
ASSERT_OK(Flush());
|
|
}
|
|
|
|
TEST_SYNC_POINT(
|
|
"DBCompactionTest::SuggestCompactRangeNoTwoLevel0Compactions:2");
|
|
dbfull()->TEST_WaitForCompact();
|
|
}
|
|
|
|
|
|
TEST_P(DBCompactionTestWithParam, ForceBottommostLevelCompaction) {
|
|
int32_t trivial_move = 0;
|
|
int32_t non_trivial_move = 0;
|
|
rocksdb::SyncPoint::GetInstance()->SetCallBack(
|
|
"DBImpl::BackgroundCompaction:TrivialMove",
|
|
[&](void* arg) { trivial_move++; });
|
|
rocksdb::SyncPoint::GetInstance()->SetCallBack(
|
|
"DBImpl::BackgroundCompaction:NonTrivial",
|
|
[&](void* arg) { non_trivial_move++; });
|
|
rocksdb::SyncPoint::GetInstance()->EnableProcessing();
|
|
|
|
Options options = CurrentOptions();
|
|
options.write_buffer_size = 100000000;
|
|
options.max_subcompactions = max_subcompactions_;
|
|
DestroyAndReopen(options);
|
|
|
|
int32_t value_size = 10 * 1024; // 10 KB
|
|
|
|
Random rnd(301);
|
|
std::vector<std::string> values;
|
|
// File with keys [ 0 => 99 ]
|
|
for (int i = 0; i < 100; i++) {
|
|
values.push_back(RandomString(&rnd, value_size));
|
|
ASSERT_OK(Put(Key(i), values[i]));
|
|
}
|
|
ASSERT_OK(Flush());
|
|
|
|
ASSERT_EQ("1", FilesPerLevel(0));
|
|
// Compaction will do L0=>L1 (trivial move) then move L1 files to L3
|
|
CompactRangeOptions compact_options;
|
|
compact_options.change_level = true;
|
|
compact_options.target_level = 3;
|
|
ASSERT_OK(db_->CompactRange(compact_options, nullptr, nullptr));
|
|
ASSERT_EQ("0,0,0,1", FilesPerLevel(0));
|
|
ASSERT_EQ(trivial_move, 1);
|
|
ASSERT_EQ(non_trivial_move, 0);
|
|
|
|
// File with keys [ 100 => 199 ]
|
|
for (int i = 100; i < 200; i++) {
|
|
values.push_back(RandomString(&rnd, value_size));
|
|
ASSERT_OK(Put(Key(i), values[i]));
|
|
}
|
|
ASSERT_OK(Flush());
|
|
|
|
ASSERT_EQ("1,0,0,1", FilesPerLevel(0));
|
|
// Compaction will do L0=>L1 L1=>L2 L2=>L3 (3 trivial moves)
|
|
// then compacte the bottommost level L3=>L3 (non trivial move)
|
|
compact_options = CompactRangeOptions();
|
|
compact_options.bottommost_level_compaction =
|
|
BottommostLevelCompaction::kForce;
|
|
ASSERT_OK(db_->CompactRange(compact_options, nullptr, nullptr));
|
|
ASSERT_EQ("0,0,0,1", FilesPerLevel(0));
|
|
ASSERT_EQ(trivial_move, 4);
|
|
ASSERT_EQ(non_trivial_move, 1);
|
|
|
|
// File with keys [ 200 => 299 ]
|
|
for (int i = 200; i < 300; i++) {
|
|
values.push_back(RandomString(&rnd, value_size));
|
|
ASSERT_OK(Put(Key(i), values[i]));
|
|
}
|
|
ASSERT_OK(Flush());
|
|
|
|
ASSERT_EQ("1,0,0,1", FilesPerLevel(0));
|
|
trivial_move = 0;
|
|
non_trivial_move = 0;
|
|
compact_options = CompactRangeOptions();
|
|
compact_options.bottommost_level_compaction =
|
|
BottommostLevelCompaction::kSkip;
|
|
// Compaction will do L0=>L1 L1=>L2 L2=>L3 (3 trivial moves)
|
|
// and will skip bottommost level compaction
|
|
ASSERT_OK(db_->CompactRange(compact_options, nullptr, nullptr));
|
|
ASSERT_EQ("0,0,0,2", FilesPerLevel(0));
|
|
ASSERT_EQ(trivial_move, 3);
|
|
ASSERT_EQ(non_trivial_move, 0);
|
|
|
|
for (int i = 0; i < 300; i++) {
|
|
ASSERT_EQ(Get(Key(i)), values[i]);
|
|
}
|
|
|
|
rocksdb::SyncPoint::GetInstance()->DisableProcessing();
|
|
}
|
|
|
|
INSTANTIATE_TEST_CASE_P(DBCompactionTestWithParam, DBCompactionTestWithParam,
|
|
::testing::Values(std::make_tuple(1, true),
|
|
std::make_tuple(1, false),
|
|
std::make_tuple(4, true),
|
|
std::make_tuple(4, false)));
|
|
|
|
class CompactionPriTest : public DBTestBase,
|
|
public testing::WithParamInterface<uint32_t> {
|
|
public:
|
|
CompactionPriTest() : DBTestBase("/compaction_pri_test") {
|
|
compaction_pri_ = GetParam();
|
|
}
|
|
|
|
// Required if inheriting from testing::WithParamInterface<>
|
|
static void SetUpTestCase() {}
|
|
static void TearDownTestCase() {}
|
|
|
|
uint32_t compaction_pri_;
|
|
};
|
|
|
|
TEST_P(CompactionPriTest, Test) {
|
|
Options options = CurrentOptions();
|
|
options.write_buffer_size = 16 * 1024;
|
|
options.compaction_pri = static_cast<CompactionPri>(compaction_pri_);
|
|
options.hard_pending_compaction_bytes_limit = 256 * 1024;
|
|
options.max_bytes_for_level_base = 64 * 1024;
|
|
options.max_bytes_for_level_multiplier = 4;
|
|
options.compression = kNoCompression;
|
|
|
|
DestroyAndReopen(options);
|
|
|
|
Random rnd(301);
|
|
const int kNKeys = 5000;
|
|
int keys[kNKeys];
|
|
for (int i = 0; i < kNKeys; i++) {
|
|
keys[i] = i;
|
|
}
|
|
std::random_shuffle(std::begin(keys), std::end(keys));
|
|
|
|
for (int i = 0; i < kNKeys; i++) {
|
|
ASSERT_OK(Put(Key(keys[i]), RandomString(&rnd, 102)));
|
|
}
|
|
|
|
dbfull()->TEST_WaitForCompact();
|
|
for (int i = 0; i < kNKeys; i++) {
|
|
ASSERT_NE("NOT_FOUND", Get(Key(i)));
|
|
}
|
|
}
|
|
|
|
INSTANTIATE_TEST_CASE_P(
|
|
CompactionPriTest, CompactionPriTest,
|
|
::testing::Values(CompactionPri::kByCompensatedSize,
|
|
CompactionPri::kOldestLargestSeqFirst,
|
|
CompactionPri::kOldestSmallestSeqFirst,
|
|
CompactionPri::kMinOverlappingRatio));
|
|
|
|
#endif // !defined(ROCKSDB_LITE)
|
|
} // namespace rocksdb
|
|
|
|
int main(int argc, char** argv) {
|
|
#if !defined(ROCKSDB_LITE)
|
|
rocksdb::port::InstallStackTraceHandler();
|
|
::testing::InitGoogleTest(&argc, argv);
|
|
return RUN_ALL_TESTS();
|
|
#else
|
|
return 0;
|
|
#endif
|
|
}
|