rocksdb/db/db_compaction_test.cc

//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
//  This source code is licensed under both the GPLv2 (found in the
//  COPYING file in the root directory) and Apache 2.0 License
//  (found in the LICENSE.Apache file in the root directory).
//
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.

#include <tuple>

#include "compaction/compaction_picker_universal.h"
#include "db/blob/blob_index.h"
#include "db/db_test_util.h"
#include "db/dbformat.h"
#include "env/mock_env.h"
#include "port/port.h"
#include "port/stack_trace.h"
#include "rocksdb/concurrent_task_limiter.h"
#include "rocksdb/experimental.h"
#include "rocksdb/sst_file_writer.h"
#include "rocksdb/utilities/convenience.h"
#include "test_util/sync_point.h"
#include "test_util/testutil.h"
#include "util/concurrent_task_limiter_impl.h"
#include "util/random.h"
#include "utilities/fault_injection_env.h"
#include "utilities/fault_injection_fs.h"

namespace ROCKSDB_NAMESPACE {

// SYNC_POINT is not supported in released Windows mode.

class CompactionStatsCollector : public EventListener {
 public:
  CompactionStatsCollector()
      : compaction_completed_(
            static_cast<int>(CompactionReason::kNumOfReasons)) {
    for (auto& v : compaction_completed_) {
      v.store(0);
    }
  }

  ~CompactionStatsCollector() override {}

  void OnCompactionCompleted(DB* /* db */,
                             const CompactionJobInfo& info) override {
    int k = static_cast<int>(info.compaction_reason);
    int num_of_reasons = static_cast<int>(CompactionReason::kNumOfReasons);
    assert(k >= 0 && k < num_of_reasons);
    compaction_completed_[k]++;
  }

  void OnExternalFileIngested(
      DB* /* db */, const ExternalFileIngestionInfo& /* info */) override {
    int k = static_cast<int>(CompactionReason::kExternalSstIngestion);
    compaction_completed_[k]++;
  }

  void OnFlushCompleted(DB* /* db */, const FlushJobInfo& /* info */) override {
    int k = static_cast<int>(CompactionReason::kFlush);
    compaction_completed_[k]++;
  }

  int NumberOfCompactions(CompactionReason reason) const {
    int num_of_reasons = static_cast<int>(CompactionReason::kNumOfReasons);
    int k = static_cast<int>(reason);
    assert(k >= 0 && k < num_of_reasons);
    return compaction_completed_.at(k).load();
  }

 private:
  std::vector<std::atomic<int>> compaction_completed_;
};

class DBCompactionTest : public DBTestBase {
 public:
  DBCompactionTest()
      : DBTestBase("db_compaction_test", /*env_do_fsync=*/true) {}

 protected:
  /*
   * Verifies compaction stats of cfd are valid.
   *
   * For each level of cfd, its compaction stats are valid if
   * 1) sum(stat.counts) == stat.count, and
   * 2) stat.counts[i] == collector.NumberOfCompactions(i)
   */
  void VerifyCompactionStats(ColumnFamilyData& cfd,
                             const CompactionStatsCollector& collector) {
#ifndef NDEBUG
    InternalStats* internal_stats_ptr = cfd.internal_stats();
    ASSERT_NE(internal_stats_ptr, nullptr);
    const std::vector<InternalStats::CompactionStats>& comp_stats =
        internal_stats_ptr->TEST_GetCompactionStats();
    const int num_of_reasons =
        static_cast<int>(CompactionReason::kNumOfReasons);
    std::vector<int> counts(num_of_reasons, 0);
    // Count the number of compactions caused by each CompactionReason across
    // all levels.
    for (const auto& stat : comp_stats) {
      int sum = 0;
      for (int i = 0; i < num_of_reasons; i++) {
        counts[i] += stat.counts[i];
        sum += stat.counts[i];
      }
      ASSERT_EQ(sum, stat.count);
    }
    // Verify InternalStats bookkeeping matches that of
    // CompactionStatsCollector, assuming that all compactions complete.
    for (int i = 0; i < num_of_reasons; i++) {
      ASSERT_EQ(collector.NumberOfCompactions(static_cast<CompactionReason>(i)),
                counts[i]);
    }
#endif /* NDEBUG */
  }
};

class DBCompactionTestWithParam
    : public DBTestBase,
      public testing::WithParamInterface<std::tuple<uint32_t, bool>> {
 public:
  DBCompactionTestWithParam()
      : DBTestBase("db_compaction_test", /*env_do_fsync=*/true) {
    max_subcompactions_ = std::get<0>(GetParam());
    exclusive_manual_compaction_ = std::get<1>(GetParam());
  }

  // Required if inheriting from testing::WithParamInterface<>
  static void SetUpTestCase() {}
  static void TearDownTestCase() {}

  uint32_t max_subcompactions_;
  bool exclusive_manual_compaction_;
};

class DBCompactionTestWithBottommostParam
    : public DBTestBase,
      public testing::WithParamInterface<
          std::tuple<BottommostLevelCompaction, bool>> {
 public:
  DBCompactionTestWithBottommostParam()
      : DBTestBase("db_compaction_test", /*env_do_fsync=*/true) {
    bottommost_level_compaction_ = std::get<0>(GetParam());
  }

  BottommostLevelCompaction bottommost_level_compaction_;
};

class DBCompactionDirectIOTest : public DBCompactionTest,
                                 public ::testing::WithParamInterface<bool> {
 public:
  DBCompactionDirectIOTest() : DBCompactionTest() {}
};

class DBCompactionWaitForCompactTest
    : public DBTestBase,
      public testing::WithParamInterface<std::tuple<bool, bool>> {
 public:
  DBCompactionWaitForCompactTest()
      : DBTestBase("db_compaction_test", /*env_do_fsync=*/true) {
    abort_on_pause_ = std::get<0>(GetParam());
    flush_ = std::get<1>(GetParam());
  }
  bool abort_on_pause_;
  bool flush_;
  Options options_;
  WaitForCompactOptions wait_for_compact_options_;

  void SetUp() override {
    // This test sets up a scenario that one more L0 file will trigger a
    // compaction
    const int kNumKeysPerFile = 4;
    const int kNumFiles = 2;

    options_ = CurrentOptions();
    options_.level0_file_num_compaction_trigger = kNumFiles + 1;

    wait_for_compact_options_ = WaitForCompactOptions();
    wait_for_compact_options_.abort_on_pause = abort_on_pause_;
    wait_for_compact_options_.flush = flush_;

    DestroyAndReopen(options_);

    Random rnd(301);
    for (int i = 0; i < kNumFiles; ++i) {
      for (int j = 0; j < kNumKeysPerFile; ++j) {
        ASSERT_OK(
            Put(Key(i * kNumKeysPerFile + j), rnd.RandomString(100 /* len */)));
      }
      ASSERT_OK(Flush());
    }
    ASSERT_OK(dbfull()->TEST_WaitForCompact());
    ASSERT_EQ("2", FilesPerLevel());
  }
};

// Param = true : target level is non-empty
// Param = false: level between target level and source level
//  is not empty.
class ChangeLevelConflictsWithAuto
    : public DBCompactionTest,
      public ::testing::WithParamInterface<bool> {
 public:
  ChangeLevelConflictsWithAuto() : DBCompactionTest() {}
};

// Param = true: grab the compaction pressure token (enable
// parallel compactions)
// Param = false: Not grab the token (no parallel compactions)
class RoundRobinSubcompactionsAgainstPressureToken
    : public DBCompactionTest,
      public ::testing::WithParamInterface<bool> {
 public:
  RoundRobinSubcompactionsAgainstPressureToken() {
    grab_pressure_token_ = GetParam();
  }
  bool grab_pressure_token_;
};

class RoundRobinSubcompactionsAgainstResources
    : public DBCompactionTest,
      public ::testing::WithParamInterface<std::tuple<int, int>> {
 public:
  RoundRobinSubcompactionsAgainstResources() {
    total_low_pri_threads_ = std::get<0>(GetParam());
    max_compaction_limits_ = std::get<1>(GetParam());
  }
  int total_low_pri_threads_;
  int max_compaction_limits_;
};

namespace {
class FlushedFileCollector : public EventListener {
 public:
  FlushedFileCollector() {}
  ~FlushedFileCollector() override {}

  void OnFlushCompleted(DB* /*db*/, const FlushJobInfo& info) override {
    std::lock_guard<std::mutex> lock(mutex_);
    flushed_files_.push_back(info.file_path);
  }

  std::vector<std::string> GetFlushedFiles() {
    std::lock_guard<std::mutex> lock(mutex_);
    std::vector<std::string> result;
    for (auto fname : flushed_files_) {
      result.push_back(fname);
    }
    return result;
  }

  void ClearFlushedFiles() { flushed_files_.clear(); }

 private:
  std::vector<std::string> flushed_files_;
  std::mutex mutex_;
};

class SstStatsCollector : public EventListener {
 public:
  SstStatsCollector() : num_ssts_creation_started_(0) {}

  void OnTableFileCreationStarted(
      const TableFileCreationBriefInfo& /* info */) override {
    ++num_ssts_creation_started_;
  }

  int num_ssts_creation_started() { return num_ssts_creation_started_; }

 private:
  std::atomic<int> num_ssts_creation_started_;
};

static const int kCDTValueSize = 1000;
static const int kCDTKeysPerBuffer = 4;
static const int kCDTNumLevels = 8;
Options DeletionTriggerOptions(Options options) {
  options.compression = kNoCompression;
  options.write_buffer_size = kCDTKeysPerBuffer * (kCDTValueSize + 24);
  options.min_write_buffer_number_to_merge = 1;
  options.max_write_buffer_size_to_maintain = 0;
  options.num_levels = kCDTNumLevels;
  options.level0_file_num_compaction_trigger = 1;
  options.target_file_size_base = options.write_buffer_size * 2;
  options.target_file_size_multiplier = 2;
  options.max_bytes_for_level_base =
      options.target_file_size_base * options.target_file_size_multiplier;
  options.max_bytes_for_level_multiplier = 2;
  options.disable_auto_compactions = false;
  options.compaction_options_universal.max_size_amplification_percent = 100;
  return options;
}

bool HaveOverlappingKeyRanges(const Comparator* c, const SstFileMetaData& a,
                              const SstFileMetaData& b) {
  if (c->CompareWithoutTimestamp(a.smallestkey, b.smallestkey) >= 0) {
    if (c->CompareWithoutTimestamp(a.smallestkey, b.largestkey) <= 0) {
      // b.smallestkey <= a.smallestkey <= b.largestkey
      return true;
    }
  } else if (c->CompareWithoutTimestamp(a.largestkey, b.smallestkey) >= 0) {
    // a.smallestkey < b.smallestkey <= a.largestkey
    return true;
  }
  if (c->CompareWithoutTimestamp(a.largestkey, b.largestkey) <= 0) {
    if (c->CompareWithoutTimestamp(a.largestkey, b.smallestkey) >= 0) {
      // b.smallestkey <= a.largestkey <= b.largestkey
      return true;
    }
  } else if (c->CompareWithoutTimestamp(a.smallestkey, b.largestkey) <= 0) {
    // a.smallestkey <= b.largestkey < a.largestkey
    return true;
  }
  return false;
}

// Identifies all files between level "min_level" and "max_level"
// which has overlapping key range with "input_file_meta".
void GetOverlappingFileNumbersForLevelCompaction(
    const ColumnFamilyMetaData& cf_meta, const Comparator* comparator,
    int min_level, int max_level, const SstFileMetaData* input_file_meta,
    std::set<std::string>* overlapping_file_names) {
  std::set<const SstFileMetaData*> overlapping_files;
  overlapping_files.insert(input_file_meta);
  for (int m = min_level; m <= max_level; ++m) {
    for (auto& file : cf_meta.levels[m].files) {
      for (auto* included_file : overlapping_files) {
        if (HaveOverlappingKeyRanges(comparator, *included_file, file)) {
          overlapping_files.insert(&file);
          overlapping_file_names->insert(file.name);
          break;
        }
      }
    }
  }
}

void VerifyCompactionResult(
    const ColumnFamilyMetaData& cf_meta,
    const std::set<std::string>& overlapping_file_numbers) {
#ifndef NDEBUG
  for (auto& level : cf_meta.levels) {
    for (auto& file : level.files) {
      assert(overlapping_file_numbers.find(file.name) ==
             overlapping_file_numbers.end());
    }
  }
#endif
}

const SstFileMetaData* PickFileRandomly(const ColumnFamilyMetaData& cf_meta,
                                        Random* rand, int* level = nullptr) {
  auto file_id = rand->Uniform(static_cast<int>(cf_meta.file_count)) + 1;
  for (auto& level_meta : cf_meta.levels) {
    if (file_id <= level_meta.files.size()) {
      if (level != nullptr) {
        *level = level_meta.level;
      }
      auto result = rand->Uniform(file_id);
      return &(level_meta.files[result]);
    }
    file_id -= static_cast<uint32_t>(level_meta.files.size());
  }
  assert(false);
  return nullptr;
}
}  // anonymous namespace

#if !defined(ROCKSDB_VALGRIND_RUN) || defined(ROCKSDB_FULL_VALGRIND_RUN)
// All the TEST_P tests run once with sub_compactions disabled (i.e.
// options.max_subcompactions = 1) and once with it enabled
TEST_P(DBCompactionTestWithParam, CompactionDeletionTrigger) {
  for (int tid = 0; tid < 3; ++tid) {
    uint64_t db_size[2];
    Options options = DeletionTriggerOptions(CurrentOptions());
    options.max_subcompactions = max_subcompactions_;

    if (tid == 1) {
      // the following only disable stats update in DB::Open()
      // and should not affect the result of this test.
      options.skip_stats_update_on_db_open = true;
    } else if (tid == 2) {
      // third pass with universal compaction
      options.compaction_style = kCompactionStyleUniversal;
      options.num_levels = 1;
    }

    DestroyAndReopen(options);
    Random rnd(301);

    const int kTestSize = kCDTKeysPerBuffer * 1024;
    std::vector<std::string> values;
    for (int k = 0; k < kTestSize; ++k) {
      values.push_back(rnd.RandomString(kCDTValueSize));
      ASSERT_OK(Put(Key(k), values[k]));
    }
    ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
    ASSERT_OK(dbfull()->TEST_WaitForCompact());
    ASSERT_OK(Size(Key(0), Key(kTestSize - 1), &db_size[0]));

    for (int k = 0; k < kTestSize; ++k) {
      ASSERT_OK(Delete(Key(k)));
    }
    ASSERT_OK(Flush());
    ASSERT_OK(dbfull()->TEST_WaitForCompact());
    ASSERT_OK(Size(Key(0), Key(kTestSize - 1), &db_size[1]));

    if (options.compaction_style == kCompactionStyleUniversal) {
      // Claim: in universal compaction none of the original data will remain
      // once compactions settle.
      //
      // Proof: The compensated size of the file containing the most tombstones
      // is enough on its own to trigger size amp compaction. Size amp
      // compaction is a full compaction, so all tombstones meet the obsolete
      // keys they cover.
      ASSERT_EQ(0, db_size[1]);
    } else {
      // Claim: in level compaction at most `db_size[0] / 2` of the original
      // data will remain once compactions settle.
      //
      // Proof: Assume the original data is all in the bottom level. If it were
      // not, it would meet its tombstone sooner. The original data size is
      // large enough to require fanout to bottom level to be greater than
      // `max_bytes_for_level_multiplier == 2`. In the level just above,
      // tombstones must cover less than `db_size[0] / 4` bytes since fanout >=
      // 2 and file size is compensated by doubling the size of values we expect
      // are covered (`kDeletionWeightOnCompaction == 2`). The tombstones in
      // levels above must cover less than `db_size[0] / 8` bytes of original
      // data, `db_size[0] / 16`, and so on.
      ASSERT_GT(db_size[0] / 2, db_size[1]);
    }
  }
}
#endif  // !defined(ROCKSDB_VALGRIND_RUN) || defined(ROCKSDB_FULL_VALGRIND_RUN)

TEST_F(DBCompactionTest, SkipStatsUpdateTest) {
  // This test verify UpdateAccumulatedStats is not on
  // if options.skip_stats_update_on_db_open = true
  // The test will need to be updated if the internal behavior changes.

  Options options = DeletionTriggerOptions(CurrentOptions());
  options.disable_auto_compactions = true;
  options.env = env_;
  DestroyAndReopen(options);
  Random rnd(301);

  const int kTestSize = kCDTKeysPerBuffer * 512;
  std::vector<std::string> values;
  for (int k = 0; k < kTestSize; ++k) {
    values.push_back(rnd.RandomString(kCDTValueSize));
    ASSERT_OK(Put(Key(k), values[k]));
  }

  ASSERT_OK(Flush());

  Close();

  int update_acc_stats_called = 0;
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
      "VersionStorageInfo::UpdateAccumulatedStats",
      [&](void* /* arg */) { ++update_acc_stats_called; });
  SyncPoint::GetInstance()->EnableProcessing();

  // Reopen the DB with stats-update disabled
  options.skip_stats_update_on_db_open = true;
  options.max_open_files = 20;
  Reopen(options);

  ASSERT_EQ(update_acc_stats_called, 0);

  // Repeat the reopen process, but this time we enable
  // stats-update.
  options.skip_stats_update_on_db_open = false;
  Reopen(options);

  ASSERT_GT(update_acc_stats_called, 0);

  SyncPoint::GetInstance()->ClearAllCallBacks();
  SyncPoint::GetInstance()->DisableProcessing();
}

TEST_F(DBCompactionTest, TestTableReaderForCompaction) {
  Options options = CurrentOptions();
  options.env = env_;
  options.max_open_files = 20;
  options.level0_file_num_compaction_trigger = 3;
  // Avoid many shards with small max_open_files, where as little as
  // two table insertions could lead to an LRU eviction, depending on
  // hash values.
  options.table_cache_numshardbits = 2;
  DestroyAndReopen(options);
  Random rnd(301);

  int num_table_cache_lookup = 0;
  int num_new_table_reader = 0;
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
      "TableCache::FindTable:0", [&](void* arg) {
        assert(arg != nullptr);
        bool no_io = *(reinterpret_cast<bool*>(arg));
        if (!no_io) {
          // filter out cases for table properties queries.
          num_table_cache_lookup++;
        }
      });
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
      "TableCache::GetTableReader:0",
      [&](void* /*arg*/) { num_new_table_reader++; });
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();

  for (int k = 0; k < options.level0_file_num_compaction_trigger; ++k) {
    ASSERT_OK(Put(Key(k), Key(k)));
    ASSERT_OK(Put(Key(10 - k), "bar"));
    if (k < options.level0_file_num_compaction_trigger - 1) {
      num_table_cache_lookup = 0;
      ASSERT_OK(Flush());
      ASSERT_OK(dbfull()->TEST_WaitForCompact());
      // preloading iterator issues one table cache lookup and create
      // a new table reader, if not preloaded.
      int old_num_table_cache_lookup = num_table_cache_lookup;
      ASSERT_GE(num_table_cache_lookup, 1);
      ASSERT_EQ(num_new_table_reader, 1);

      num_table_cache_lookup = 0;
      num_new_table_reader = 0;
      ASSERT_EQ(Key(k), Get(Key(k)));
      // lookup iterator from table cache and no need to create a new one.
      ASSERT_EQ(old_num_table_cache_lookup + num_table_cache_lookup, 2);
      ASSERT_EQ(num_new_table_reader, 0);
    }
  }

  num_table_cache_lookup = 0;
  num_new_table_reader = 0;
  ASSERT_OK(Flush());
  ASSERT_OK(dbfull()->TEST_WaitForCompact());
  // Preloading iterator issues one table cache lookup and creates
  // a new table reader. One file is created for flush and one for compaction.
  // Compaction inputs make no table cache look-up for data/range deletion
  // iterators
  // May preload table cache too.
  ASSERT_GE(num_table_cache_lookup, 2);
  int old_num_table_cache_lookup2 = num_table_cache_lookup;

  // Create new iterator for:
  // (1) 1 for verifying flush results
  // (2) 1 for verifying compaction results.
  // (3) New TableReaders will not be created for compaction inputs
  ASSERT_EQ(num_new_table_reader, 2);

  num_table_cache_lookup = 0;
  num_new_table_reader = 0;
  ASSERT_EQ(Key(1), Get(Key(1)));
  ASSERT_EQ(num_table_cache_lookup + old_num_table_cache_lookup2, 5);
  ASSERT_EQ(num_new_table_reader, 0);

  num_table_cache_lookup = 0;
  num_new_table_reader = 0;
  CompactRangeOptions cro;
  cro.change_level = true;
  cro.target_level = 2;
  cro.bottommost_level_compaction = BottommostLevelCompaction::kForceOptimized;
  ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr));
  // Only verifying compaction outputs issues one table cache lookup
  // for both data block and range deletion block).
  // May preload table cache too.
  ASSERT_GE(num_table_cache_lookup, 1);
  old_num_table_cache_lookup2 = num_table_cache_lookup;
  // One for verifying compaction results.
  // No new iterator created for compaction.
  ASSERT_EQ(num_new_table_reader, 1);

  num_table_cache_lookup = 0;
  num_new_table_reader = 0;
  ASSERT_EQ(Key(1), Get(Key(1)));
  ASSERT_EQ(num_table_cache_lookup + old_num_table_cache_lookup2, 3);
  ASSERT_EQ(num_new_table_reader, 0);

  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks();
}

TEST_P(DBCompactionTestWithParam, CompactionDeletionTriggerReopen) {
  for (int tid = 0; tid < 2; ++tid) {
    uint64_t db_size[3];
    Options options = DeletionTriggerOptions(CurrentOptions());
    options.max_subcompactions = max_subcompactions_;

    if (tid == 1) {
      // second pass with universal compaction
      options.compaction_style = kCompactionStyleUniversal;
      options.num_levels = 1;
    }

    DestroyAndReopen(options);
    Random rnd(301);

    // round 1 --- insert key/value pairs.
    const int kTestSize = kCDTKeysPerBuffer * 512;
    std::vector<std::string> values;
    for (int k = 0; k < kTestSize; ++k) {
      values.push_back(rnd.RandomString(kCDTValueSize));
      ASSERT_OK(Put(Key(k), values[k]));
    }
    ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
    ASSERT_OK(dbfull()->TEST_WaitForCompact());
    ASSERT_OK(Size(Key(0), Key(kTestSize - 1), &db_size[0]));
    Close();

    // round 2 --- disable auto-compactions and issue deletions.
    options.create_if_missing = false;
    options.disable_auto_compactions = true;
    Reopen(options);

    for (int k = 0; k < kTestSize; ++k) {
      ASSERT_OK(Delete(Key(k)));
    }
    ASSERT_OK(Size(Key(0), Key(kTestSize - 1), &db_size[1]));
    Close();
    // as auto_compaction is off, we shouldn't see any reduction in db size.
    ASSERT_LE(db_size[0], db_size[1]);

    // round 3 --- reopen db with auto_compaction on and see if
    // deletion compensation still work.
    options.disable_auto_compactions = false;
    Reopen(options);
    // insert relatively small amount of data to trigger auto compaction.
    for (int k = 0; k < kTestSize / 10; ++k) {
      ASSERT_OK(Put(Key(k), values[k]));
    }
    ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
    ASSERT_OK(dbfull()->TEST_WaitForCompact());
    ASSERT_OK(Size(Key(0), Key(kTestSize - 1), &db_size[2]));
    // this time we're expecting significant drop in size.
    //
    // See "CompactionDeletionTrigger" test for proof that at most
    // `db_size[0] / 2` of the original data remains. In addition to that, this
    // test inserts `db_size[0] / 10` to push the tombstones into SST files and
    // then through automatic compactions. So in total `3 * db_size[0] / 5` of
    // the original data may remain.
    ASSERT_GT(3 * db_size[0] / 5, db_size[2]);
  }
}

TEST_F(DBCompactionTest, CompactRangeBottomPri) {
  ASSERT_OK(Put(Key(50), ""));
  ASSERT_OK(Flush());
  ASSERT_OK(Put(Key(100), ""));
  ASSERT_OK(Flush());
  ASSERT_OK(Put(Key(200), ""));
  ASSERT_OK(Flush());

  {
    CompactRangeOptions cro;
    cro.change_level = true;
    cro.target_level = 2;
    ASSERT_OK(dbfull()->CompactRange(cro, nullptr, nullptr));
  }
  ASSERT_EQ("0,0,3", FilesPerLevel(0));

  ASSERT_OK(Put(Key(1), ""));
  ASSERT_OK(Put(Key(199), ""));
  ASSERT_OK(Flush());
  ASSERT_OK(Put(Key(2), ""));
  ASSERT_OK(Put(Key(199), ""));
  ASSERT_OK(Flush());
  ASSERT_EQ("2,0,3", FilesPerLevel(0));

  // Now we have 2 L0 files, and 3 L2 files, and a manual compaction will
  // be triggered.
  // Two compaction jobs will run. One compacts 2 L0 files in Low Pri Pool
  // and one compact to L2 in bottom pri pool.
  int low_pri_count = 0;
  int bottom_pri_count = 0;
  SyncPoint::GetInstance()->SetCallBack(
      "ThreadPoolImpl::Impl::BGThread:BeforeRun", [&](void* arg) {
        Env::Priority* pri = reinterpret_cast<Env::Priority*>(arg);
        // First time is low pri pool in the test case.
        if (low_pri_count == 0 && bottom_pri_count == 0) {
          ASSERT_EQ(Env::Priority::LOW, *pri);
        }
        if (*pri == Env::Priority::LOW) {
          low_pri_count++;
        } else {
          bottom_pri_count++;
        }
      });
  SyncPoint::GetInstance()->EnableProcessing();
  env_->SetBackgroundThreads(1, Env::Priority::BOTTOM);
  ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr));
  ASSERT_EQ(1, low_pri_count);
  ASSERT_EQ(1, bottom_pri_count);
  ASSERT_EQ("0,0,2", FilesPerLevel(0));

  // Recompact bottom most level uses bottom pool
  CompactRangeOptions cro;
  cro.bottommost_level_compaction = BottommostLevelCompaction::kForce;
  ASSERT_OK(dbfull()->CompactRange(cro, nullptr, nullptr));
  ASSERT_EQ(1, low_pri_count);
  ASSERT_EQ(2, bottom_pri_count);

  env_->SetBackgroundThreads(0, Env::Priority::BOTTOM);
  ASSERT_OK(dbfull()->CompactRange(cro, nullptr, nullptr));
  // Low pri pool is used if bottom pool has size 0.
  ASSERT_EQ(2, low_pri_count);
  ASSERT_EQ(2, bottom_pri_count);

  SyncPoint::GetInstance()->DisableProcessing();
}

TEST_F(DBCompactionTest, DisableStatsUpdateReopen) {
  uint64_t db_size[3];
  for (int test = 0; test < 2; ++test) {
    Options options = DeletionTriggerOptions(CurrentOptions());
    options.skip_stats_update_on_db_open = (test == 0);

    env_->random_read_counter_.Reset();
    DestroyAndReopen(options);
    Random rnd(301);

    // round 1 --- insert key/value pairs.
    const int kTestSize = kCDTKeysPerBuffer * 512;
    std::vector<std::string> values;
    for (int k = 0; k < kTestSize; ++k) {
      values.push_back(rnd.RandomString(kCDTValueSize));
      ASSERT_OK(Put(Key(k), values[k]));
    }
    ASSERT_OK(Flush());
    ASSERT_OK(dbfull()->TEST_WaitForCompact());
    // L1 and L2 can fit deletions iff size compensation does not take effect,
    // i.e., when `skip_stats_update_on_db_open == true`. Move any remaining
    // files at or above L2 down to L3 to ensure obsolete data does not
    // accidentally meet its tombstone above L3. This makes the final size more
    // deterministic and easy to see whether size compensation for deletions
    // took effect.
    MoveFilesToLevel(3 /* level */);
    ASSERT_OK(Size(Key(0), Key(kTestSize - 1), &db_size[0]));
    Close();

    // round 2 --- disable auto-compactions and issue deletions.
    options.create_if_missing = false;
    options.disable_auto_compactions = true;

    env_->random_read_counter_.Reset();
    Reopen(options);

    for (int k = 0; k < kTestSize; ++k) {
      ASSERT_OK(Delete(Key(k)));
    }
    ASSERT_OK(Size(Key(0), Key(kTestSize - 1), &db_size[1]));
    Close();
    // as auto_compaction is off, we shouldn't see any reduction in db size.
    ASSERT_LE(db_size[0], db_size[1]);

    // round 3 --- reopen db with auto_compaction on and see if
    // deletion compensation still work.
    options.disable_auto_compactions = false;
    Reopen(options);
    ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
    ASSERT_OK(dbfull()->TEST_WaitForCompact());
    ASSERT_OK(Size(Key(0), Key(kTestSize - 1), &db_size[2]));

    if (options.skip_stats_update_on_db_open) {
      // If update stats on DB::Open is disable, we don't expect
      // deletion entries taking effect.
      //
      // The deletions are small enough to fit in L1 and L2, and obsolete keys
      // were moved to L3+, so none of the original data should have been
      // dropped.
      ASSERT_LE(db_size[0], db_size[2]);
    } else {
      // Otherwise, we should see a significant drop in db size.
      //
      // See "CompactionDeletionTrigger" test for proof that at most
      // `db_size[0] / 2` of the original data remains.
      ASSERT_GT(db_size[0] / 2, db_size[2]);
    }
  }
}

TEST_P(DBCompactionTestWithParam, CompactionTrigger) {
  const int kNumKeysPerFile = 100;

  Options options = CurrentOptions();
  options.write_buffer_size = 110 << 10;  // 110KB
  options.arena_block_size = 4 << 10;
  options.num_levels = 3;
  options.level0_file_num_compaction_trigger = 3;
  options.max_subcompactions = max_subcompactions_;
  options.memtable_factory.reset(
      test::NewSpecialSkipListFactory(kNumKeysPerFile));
  CreateAndReopenWithCF({"pikachu"}, options);

  Random rnd(301);

  for (int num = 0; num < options.level0_file_num_compaction_trigger - 1;
       num++) {
    std::vector<std::string> values;
    // Write 100KB (100 values, each 1K)
    for (int i = 0; i < kNumKeysPerFile; i++) {
      values.push_back(rnd.RandomString(990));
      ASSERT_OK(Put(1, Key(i), values[i]));
    }
    // put extra key to trigger flush
    ASSERT_OK(Put(1, "", ""));
    ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable(handles_[1]));
    ASSERT_EQ(NumTableFilesAtLevel(0, 1), num + 1);
  }

  // generate one more file in level-0, and should trigger level-0 compaction
  std::vector<std::string> values;
  for (int i = 0; i < kNumKeysPerFile; i++) {
    values.push_back(rnd.RandomString(990));
    ASSERT_OK(Put(1, Key(i), values[i]));
  }
  // put extra key to trigger flush
  ASSERT_OK(Put(1, "", ""));
  ASSERT_OK(dbfull()->TEST_WaitForCompact());

  ASSERT_EQ(NumTableFilesAtLevel(0, 1), 0);
  ASSERT_EQ(NumTableFilesAtLevel(1, 1), 1);
}

TEST_F(DBCompactionTest, BGCompactionsAllowed) {
  // Create several column families. Make compaction triggers in all of them
  // and see number of compactions scheduled to be less than allowed.
  const int kNumKeysPerFile = 100;

  Options options = CurrentOptions();
  options.write_buffer_size = 110 << 10;  // 110KB
  options.arena_block_size = 4 << 10;
  options.num_levels = 3;
  // Should speed up compaction when there are 4 files.
  options.level0_file_num_compaction_trigger = 2;
  options.level0_slowdown_writes_trigger = 20;
  options.soft_pending_compaction_bytes_limit = 1 << 30;  // Infinitely large
  options.max_background_compactions = 3;
  options.memtable_factory.reset(
      test::NewSpecialSkipListFactory(kNumKeysPerFile));

  // Block all threads in thread pool.
  const size_t kTotalTasks = 4;
  env_->SetBackgroundThreads(4, Env::LOW);
  test::SleepingBackgroundTask sleeping_tasks[kTotalTasks];
  for (size_t i = 0; i < kTotalTasks; i++) {
    env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask,
                   &sleeping_tasks[i], Env::Priority::LOW);
    sleeping_tasks[i].WaitUntilSleeping();
  }

  CreateAndReopenWithCF({"one", "two", "three"}, options);

  Random rnd(301);
  for (int cf = 0; cf < 4; cf++) {
    for (int num = 0; num < options.level0_file_num_compaction_trigger; num++) {
      for (int i = 0; i < kNumKeysPerFile; i++) {
        ASSERT_OK(Put(cf, Key(i), ""));
      }
      // put extra key to trigger flush
      ASSERT_OK(Put(cf, "", ""));
      ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable(handles_[cf]));
      ASSERT_EQ(NumTableFilesAtLevel(0, cf), num + 1);
    }
  }

  // Now all column families qualify compaction but only one should be
  // scheduled, because no column family hits speed up condition.
  ASSERT_EQ(1u, env_->GetThreadPoolQueueLen(Env::Priority::LOW));

  // Create two more files for one column family, which triggers speed up
  // condition, three compactions will be scheduled.
  for (int num = 0; num < options.level0_file_num_compaction_trigger; num++) {
    for (int i = 0; i < kNumKeysPerFile; i++) {
      ASSERT_OK(Put(2, Key(i), ""));
    }
    // put extra key to trigger flush
    ASSERT_OK(Put(2, "", ""));
    ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable(handles_[2]));
    ASSERT_EQ(options.level0_file_num_compaction_trigger + num + 1,
              NumTableFilesAtLevel(0, 2));
  }
  ASSERT_EQ(3U, env_->GetThreadPoolQueueLen(Env::Priority::LOW));

  // Unblock all threads to unblock all compactions.
  for (size_t i = 0; i < kTotalTasks; i++) {
    sleeping_tasks[i].WakeUp();
    sleeping_tasks[i].WaitUntilDone();
  }
  ASSERT_OK(dbfull()->TEST_WaitForCompact());

  // Verify number of compactions allowed will come back to 1.

  for (size_t i = 0; i < kTotalTasks; i++) {
    sleeping_tasks[i].Reset();
    env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask,
                   &sleeping_tasks[i], Env::Priority::LOW);
    sleeping_tasks[i].WaitUntilSleeping();
  }
  for (int cf = 0; cf < 4; cf++) {
    for (int num = 0; num < options.level0_file_num_compaction_trigger; num++) {
      for (int i = 0; i < kNumKeysPerFile; i++) {
        ASSERT_OK(Put(cf, Key(i), ""));
      }
      // put extra key to trigger flush
      ASSERT_OK(Put(cf, "", ""));
      ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable(handles_[cf]));
      ASSERT_EQ(NumTableFilesAtLevel(0, cf), num + 1);
    }
  }

  // Now all column families qualify compaction but only one should be
  // scheduled, because no column family hits speed up condition.
  ASSERT_EQ(1U, env_->GetThreadPoolQueueLen(Env::Priority::LOW));

  for (size_t i = 0; i < kTotalTasks; i++) {
    sleeping_tasks[i].WakeUp();
    sleeping_tasks[i].WaitUntilDone();
  }
}

TEST_P(DBCompactionTestWithParam, CompactionsGenerateMultipleFiles) {
  Options options = CurrentOptions();
  options.write_buffer_size = 100000000;  // Large write buffer
  options.max_subcompactions = max_subcompactions_;
  CreateAndReopenWithCF({"pikachu"}, options);

  Random rnd(301);

  // Write 8MB (80 values, each 100K)
  ASSERT_EQ(NumTableFilesAtLevel(0, 1), 0);
  std::vector<std::string> values;
  for (int i = 0; i < 80; i++) {
    values.push_back(rnd.RandomString(100000));
    ASSERT_OK(Put(1, Key(i), values[i]));
  }

  // Reopening moves updates to level-0
  ReopenWithColumnFamilies({"default", "pikachu"}, options);
  ASSERT_OK(dbfull()->TEST_CompactRange(0, nullptr, nullptr, handles_[1],
                                        true /* disallow trivial move */));

  ASSERT_EQ(NumTableFilesAtLevel(0, 1), 0);
  ASSERT_GT(NumTableFilesAtLevel(1, 1), 1);
  for (int i = 0; i < 80; i++) {
    ASSERT_EQ(Get(1, Key(i)), values[i]);
  }
}

TEST_F(DBCompactionTest, MinorCompactionsHappen) {
  do {
    Options options = CurrentOptions();
    options.write_buffer_size = 10000;
    CreateAndReopenWithCF({"pikachu"}, options);

    const int N = 500;

    int starting_num_tables = TotalTableFiles(1);
    for (int i = 0; i < N; i++) {
      ASSERT_OK(Put(1, Key(i), Key(i) + std::string(1000, 'v')));
    }
    int ending_num_tables = TotalTableFiles(1);
    ASSERT_GT(ending_num_tables, starting_num_tables);

    for (int i = 0; i < N; i++) {
      ASSERT_EQ(Key(i) + std::string(1000, 'v'), Get(1, Key(i)));
    }

    ReopenWithColumnFamilies({"default", "pikachu"}, options);

    for (int i = 0; i < N; i++) {
      ASSERT_EQ(Key(i) + std::string(1000, 'v'), Get(1, Key(i)));
    }
  } while (ChangeCompactOptions());
}

TEST_F(DBCompactionTest, UserKeyCrossFile1) {
  Options options = CurrentOptions();
  options.compaction_style = kCompactionStyleLevel;
  options.level0_file_num_compaction_trigger = 3;

  DestroyAndReopen(options);

  // create first file and flush to l0
  ASSERT_OK(Put("4", "A"));
  ASSERT_OK(Put("3", "A"));
  ASSERT_OK(Flush());
  ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());

  ASSERT_OK(Put("2", "A"));
  ASSERT_OK(Delete("3"));
  ASSERT_OK(Flush());
  ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
  ASSERT_EQ("NOT_FOUND", Get("3"));

  // move both files down to l1
  ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr));
  ASSERT_EQ("NOT_FOUND", Get("3"));

  for (int i = 0; i < 3; i++) {
    ASSERT_OK(Put("2", "B"));
    ASSERT_OK(Flush());
    ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
  }
  ASSERT_OK(dbfull()->TEST_WaitForCompact());

  ASSERT_EQ("NOT_FOUND", Get("3"));
}

TEST_F(DBCompactionTest, UserKeyCrossFile2) {
  Options options = CurrentOptions();
  options.compaction_style = kCompactionStyleLevel;
  options.level0_file_num_compaction_trigger = 3;

  DestroyAndReopen(options);

  // create first file and flush to l0
  ASSERT_OK(Put("4", "A"));
  ASSERT_OK(Put("3", "A"));
  ASSERT_OK(Flush());
  ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());

  ASSERT_OK(Put("2", "A"));
  ASSERT_OK(SingleDelete("3"));
  ASSERT_OK(Flush());
  ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
  ASSERT_EQ("NOT_FOUND", Get("3"));

  // move both files down to l1
  ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr));
  ASSERT_EQ("NOT_FOUND", Get("3"));

  for (int i = 0; i < 3; i++) {
    ASSERT_OK(Put("2", "B"));
    ASSERT_OK(Flush());
    ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
  }
  ASSERT_OK(dbfull()->TEST_WaitForCompact());

  ASSERT_EQ("NOT_FOUND", Get("3"));
}

TEST_F(DBCompactionTest, CompactionSstPartitioner) {
  Options options = CurrentOptions();
  options.compaction_style = kCompactionStyleLevel;
  options.level0_file_num_compaction_trigger = 3;
  std::shared_ptr<SstPartitionerFactory> factory(
      NewSstPartitionerFixedPrefixFactory(4));
  options.sst_partitioner_factory = factory;

  DestroyAndReopen(options);

  // create first file and flush to l0
  ASSERT_OK(Put("aaaa1", "A"));
  ASSERT_OK(Put("bbbb1", "B"));
  ASSERT_OK(Flush());
  ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());

  ASSERT_OK(Put("aaaa1", "A2"));
  ASSERT_OK(Flush());
  ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());

  // move both files down to l1
  ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr));

  std::vector<LiveFileMetaData> files;
  dbfull()->GetLiveFilesMetaData(&files);
  ASSERT_EQ(2, files.size());
  ASSERT_EQ("A2", Get("aaaa1"));
  ASSERT_EQ("B", Get("bbbb1"));
}

TEST_F(DBCompactionTest, CompactionSstPartitionWithManualCompaction) {
  Options options = CurrentOptions();
  options.compaction_style = kCompactionStyleLevel;
  options.level0_file_num_compaction_trigger = 3;

  DestroyAndReopen(options);

  // create first file and flush to l0
  ASSERT_OK(Put("000015", "A"));
  ASSERT_OK(Put("000025", "B"));
  ASSERT_OK(Flush());
  ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());

  // create second file and flush to l0
  ASSERT_OK(Put("000015", "A2"));
  ASSERT_OK(Put("000025", "B2"));
  ASSERT_OK(Flush());
  ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());

  // CONTROL 1: compact without partitioner
  CompactRangeOptions compact_options;
  compact_options.bottommost_level_compaction =
      BottommostLevelCompaction::kForceOptimized;
  ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr));

  // Check (compacted but no partitioning yet)
  std::vector<LiveFileMetaData> files;
  dbfull()->GetLiveFilesMetaData(&files);
  ASSERT_EQ(1, files.size());

  // Install partitioner
  std::shared_ptr<SstPartitionerFactory> factory(
      NewSstPartitionerFixedPrefixFactory(5));
  options.sst_partitioner_factory = factory;
  Reopen(options);

  // CONTROL 2: request compaction on range with no partition boundary and no
  // overlap with actual entries
  Slice from("000017");
  Slice to("000019");
  ASSERT_OK(dbfull()->CompactRange(compact_options, &from, &to));

  // Check (no partitioning yet)
  files.clear();
  dbfull()->GetLiveFilesMetaData(&files);
  ASSERT_EQ(1, files.size());
  ASSERT_EQ("A2", Get("000015"));
  ASSERT_EQ("B2", Get("000025"));

  // TEST: request compaction overlapping with partition boundary but no
  // actual entries
  // NOTE: `to` is INCLUSIVE
  from = Slice("000019");
  to = Slice("000020");
  ASSERT_OK(dbfull()->CompactRange(compact_options, &from, &to));

  // Check (must be partitioned)
  files.clear();
  dbfull()->GetLiveFilesMetaData(&files);
  ASSERT_EQ(2, files.size());
  ASSERT_EQ("A2", Get("000015"));
  ASSERT_EQ("B2", Get("000025"));
}

TEST_F(DBCompactionTest, CompactionSstPartitionerNonTrivial) {
  Options options = CurrentOptions();
  options.compaction_style = kCompactionStyleLevel;
  options.level0_file_num_compaction_trigger = 1;
  std::shared_ptr<SstPartitionerFactory> factory(
      NewSstPartitionerFixedPrefixFactory(4));
  options.sst_partitioner_factory = factory;

  DestroyAndReopen(options);

  // create first file and flush to l0
  ASSERT_OK(Put("aaaa1", "A"));
  ASSERT_OK(Put("bbbb1", "B"));
  ASSERT_OK(Flush());
  ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
  ASSERT_OK(dbfull()->TEST_WaitForCompact());

  std::vector<LiveFileMetaData> files;
  dbfull()->GetLiveFilesMetaData(&files);
  ASSERT_EQ(2, files.size());
  ASSERT_EQ("A", Get("aaaa1"));
  ASSERT_EQ("B", Get("bbbb1"));
}

TEST_F(DBCompactionTest, ZeroSeqIdCompaction) {
  Options options = CurrentOptions();
  options.compaction_style = kCompactionStyleLevel;
  options.level0_file_num_compaction_trigger = 3;

  FlushedFileCollector* collector = new FlushedFileCollector();
  options.listeners.emplace_back(collector);

  // compaction options
  CompactionOptions compact_opt;
  compact_opt.compression = kNoCompression;
  compact_opt.output_file_size_limit = 4096;
  const size_t key_len =
      static_cast<size_t>(compact_opt.output_file_size_limit) / 5;

  DestroyAndReopen(options);

  std::vector<const Snapshot*> snaps;

  // create first file and flush to l0
  for (auto& key : {"1", "2", "3", "3", "3", "3"}) {
    ASSERT_OK(Put(key, std::string(key_len, 'A')));
    snaps.push_back(dbfull()->GetSnapshot());
  }
  ASSERT_OK(Flush());
  ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());

  // create second file and flush to l0
  for (auto& key : {"3", "4", "5", "6", "7", "8"}) {
    ASSERT_OK(Put(key, std::string(key_len, 'A')));
    snaps.push_back(dbfull()->GetSnapshot());
  }
  ASSERT_OK(Flush());
  ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());

  // move both files down to l1
  ASSERT_OK(
      dbfull()->CompactFiles(compact_opt, collector->GetFlushedFiles(), 1));

  // release snap so that first instance of key(3) can have seqId=0
  for (auto snap : snaps) {
    dbfull()->ReleaseSnapshot(snap);
  }

  // create 3 files in l0 so to trigger compaction
  for (int i = 0; i < options.level0_file_num_compaction_trigger; i++) {
    ASSERT_OK(Put("2", std::string(1, 'A')));
    ASSERT_OK(Flush());
    ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
  }

  ASSERT_OK(dbfull()->TEST_WaitForCompact());
  ASSERT_OK(Put("", ""));
}

TEST_F(DBCompactionTest, ManualCompactionUnknownOutputSize) {
  // github issue #2249
  Options options = CurrentOptions();
  options.compaction_style = kCompactionStyleLevel;
  options.level0_file_num_compaction_trigger = 3;
  DestroyAndReopen(options);

  // create two files in l1 that we can compact
  for (int i = 0; i < 2; ++i) {
    for (int j = 0; j < options.level0_file_num_compaction_trigger; j++) {
      ASSERT_OK(Put(std::to_string(2 * i), std::string(1, 'A')));
      ASSERT_OK(Put(std::to_string(2 * i + 1), std::string(1, 'A')));
      ASSERT_OK(Flush());
      ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
    }
    ASSERT_OK(dbfull()->TEST_WaitForCompact());
  }
  ASSERT_OK(
      dbfull()->SetOptions({{"level0_file_num_compaction_trigger", "2"}}));
  ASSERT_OK(dbfull()->TEST_WaitForCompact());
  ASSERT_EQ(NumTableFilesAtLevel(0, 0), 0);
  ASSERT_EQ(NumTableFilesAtLevel(1, 0), 2);
  ASSERT_OK(
      dbfull()->SetOptions({{"level0_file_num_compaction_trigger", "3"}}));

  ColumnFamilyMetaData cf_meta;
  dbfull()->GetColumnFamilyMetaData(dbfull()->DefaultColumnFamily(), &cf_meta);
  ASSERT_EQ(2, cf_meta.levels[1].files.size());
  std::vector<std::string> input_filenames;
  for (const auto& sst_file : cf_meta.levels[1].files) {
    input_filenames.push_back(sst_file.name);
  }

  // note CompactionOptions::output_file_size_limit is unset.
  CompactionOptions compact_opt;
  compact_opt.compression = kNoCompression;
  ASSERT_OK(dbfull()->CompactFiles(compact_opt, input_filenames, 1));
}

// Check that writes done during a memtable compaction are recovered
// if the database is shutdown during the memtable compaction.
TEST_F(DBCompactionTest, RecoverDuringMemtableCompaction) {
  do {
    Options options = CurrentOptions();
    options.env = env_;
    CreateAndReopenWithCF({"pikachu"}, options);

    // Trigger a long memtable compaction and reopen the database during it
    ASSERT_OK(Put(1, "foo", "v1"));  // Goes to 1st log file
    ASSERT_OK(Put(1, "big1", std::string(10000000, 'x')));  // Fills memtable
    ASSERT_OK(Put(1, "big2", std::string(1000, 'y')));  // Triggers compaction
    ASSERT_OK(Put(1, "bar", "v2"));                     // Goes to new log file

    ReopenWithColumnFamilies({"default", "pikachu"}, options);
    ASSERT_EQ("v1", Get(1, "foo"));
    ASSERT_EQ("v2", Get(1, "bar"));
    ASSERT_EQ(std::string(10000000, 'x'), Get(1, "big1"));
    ASSERT_EQ(std::string(1000, 'y'), Get(1, "big2"));
  } while (ChangeOptions());
}

TEST_P(DBCompactionTestWithParam, TrivialMoveOneFile) {
  int32_t trivial_move = 0;
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
      "DBImpl::BackgroundCompaction:TrivialMove",
      [&](void* /*arg*/) { trivial_move++; });
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();

  Options options = CurrentOptions();
  options.write_buffer_size = 100000000;
  options.max_subcompactions = max_subcompactions_;
  DestroyAndReopen(options);

  int32_t num_keys = 80;
  int32_t value_size = 100 * 1024;  // 100 KB

  Random rnd(301);
  std::vector<std::string> values;
  for (int i = 0; i < num_keys; i++) {
    values.push_back(rnd.RandomString(value_size));
    ASSERT_OK(Put(Key(i), values[i]));
  }

  // Reopening moves updates to L0
  Reopen(options);
  ASSERT_EQ(NumTableFilesAtLevel(0, 0), 1);  // 1 file in L0
  ASSERT_EQ(NumTableFilesAtLevel(1, 0), 0);  // 0 files in L1

  std::vector<LiveFileMetaData> metadata;
  db_->GetLiveFilesMetaData(&metadata);
  ASSERT_EQ(metadata.size(), 1U);
  LiveFileMetaData level0_file = metadata[0];  // L0 file meta

  CompactRangeOptions cro;
  cro.exclusive_manual_compaction = exclusive_manual_compaction_;

  // Compaction will initiate a trivial move from L0 to L1
  ASSERT_OK(dbfull()->CompactRange(cro, nullptr, nullptr));

  // File moved From L0 to L1
  ASSERT_EQ(NumTableFilesAtLevel(0, 0), 0);  // 0 files in L0
  ASSERT_EQ(NumTableFilesAtLevel(1, 0), 1);  // 1 file in L1

  metadata.clear();
  db_->GetLiveFilesMetaData(&metadata);
  ASSERT_EQ(metadata.size(), 1U);
  ASSERT_EQ(metadata[0].name /* level1_file.name */, level0_file.name);
  ASSERT_EQ(metadata[0].size /* level1_file.size */, level0_file.size);

  for (int i = 0; i < num_keys; i++) {
    ASSERT_EQ(Get(Key(i)), values[i]);
  }

  ASSERT_EQ(trivial_move, 1);
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
}

TEST_P(DBCompactionTestWithParam, TrivialMoveNonOverlappingFiles) {
  int32_t trivial_move = 0;
  int32_t non_trivial_move = 0;
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
      "DBImpl::BackgroundCompaction:TrivialMove",
      [&](void* /*arg*/) { trivial_move++; });
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
      "DBImpl::BackgroundCompaction:NonTrivial",
      [&](void* /*arg*/) { non_trivial_move++; });
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();

  Options options = CurrentOptions();
  options.disable_auto_compactions = true;
  options.write_buffer_size = 10 * 1024 * 1024;
  options.max_subcompactions = max_subcompactions_;

  DestroyAndReopen(options);
  // non overlapping ranges
  std::vector<std::pair<int32_t, int32_t>> ranges = {
      {100, 199}, {300, 399}, {0, 99},    {200, 299},
      {600, 699}, {400, 499}, {500, 550}, {551, 599},
  };
  int32_t value_size = 10 * 1024;  // 10 KB

  Random rnd(301);
  std::map<int32_t, std::string> values;
  for (size_t i = 0; i < ranges.size(); i++) {
    for (int32_t j = ranges[i].first; j <= ranges[i].second; j++) {
      values[j] = rnd.RandomString(value_size);
      ASSERT_OK(Put(Key(j), values[j]));
    }
    ASSERT_OK(Flush());
  }

  int32_t level0_files = NumTableFilesAtLevel(0, 0);
  ASSERT_EQ(level0_files, ranges.size());    // Multiple files in L0
  ASSERT_EQ(NumTableFilesAtLevel(1, 0), 0);  // No files in L1

  CompactRangeOptions cro;
  cro.exclusive_manual_compaction = exclusive_manual_compaction_;

  // Since data is non-overlapping we expect compaction to initiate
  // a trivial move
  ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr));
  // We expect that all the files were trivially moved from L0 to L1
  ASSERT_EQ(NumTableFilesAtLevel(0, 0), 0);
  ASSERT_EQ(NumTableFilesAtLevel(1, 0) /* level1_files */, level0_files);

  for (size_t i = 0; i < ranges.size(); i++) {
    for (int32_t j = ranges[i].first; j <= ranges[i].second; j++) {
      ASSERT_EQ(Get(Key(j)), values[j]);
    }
  }

  ASSERT_EQ(trivial_move, 1);
  ASSERT_EQ(non_trivial_move, 0);

  trivial_move = 0;
  non_trivial_move = 0;
  values.clear();
  DestroyAndReopen(options);
  // Same ranges as above but overlapping
  ranges = {
      {100, 199},
      {300, 399},
      {0, 99},
      {200, 299},
      {600, 699},
      {400, 499},
      {500, 560},  // this range overlap with the next
                   // one
      {551, 599},
  };
  for (size_t i = 0; i < ranges.size(); i++) {
    for (int32_t j = ranges[i].first; j <= ranges[i].second; j++) {
      values[j] = rnd.RandomString(value_size);
      ASSERT_OK(Put(Key(j), values[j]));
    }
    ASSERT_OK(Flush());
  }

  ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr));

  for (size_t i = 0; i < ranges.size(); i++) {
    for (int32_t j = ranges[i].first; j <= ranges[i].second; j++) {
      ASSERT_EQ(Get(Key(j)), values[j]);
    }
  }
  ASSERT_EQ(trivial_move, 0);
  ASSERT_EQ(non_trivial_move, 1);

  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
}

TEST_P(DBCompactionTestWithParam, TrivialMoveTargetLevel) {
  int32_t trivial_move = 0;
  int32_t non_trivial_move = 0;
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
      "DBImpl::BackgroundCompaction:TrivialMove",
      [&](void* /*arg*/) { trivial_move++; });
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
      "DBImpl::BackgroundCompaction:NonTrivial",
      [&](void* /*arg*/) { non_trivial_move++; });
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();

  Options options = CurrentOptions();
  options.disable_auto_compactions = true;
  options.write_buffer_size = 10 * 1024 * 1024;
  options.num_levels = 7;
  options.max_subcompactions = max_subcompactions_;

  DestroyAndReopen(options);
  int32_t value_size = 10 * 1024;  // 10 KB

  // Add 2 non-overlapping files
  Random rnd(301);
  std::map<int32_t, std::string> values;

  // file 1 [0 => 300]
  for (int32_t i = 0; i <= 300; i++) {
    values[i] = rnd.RandomString(value_size);
    ASSERT_OK(Put(Key(i), values[i]));
  }
  ASSERT_OK(Flush());

  // file 2 [600 => 700]
  for (int32_t i = 600; i <= 700; i++) {
    values[i] = rnd.RandomString(value_size);
    ASSERT_OK(Put(Key(i), values[i]));
  }
  ASSERT_OK(Flush());

  // 2 files in L0
  ASSERT_EQ("2", FilesPerLevel(0));
  CompactRangeOptions compact_options;
  compact_options.change_level = true;
  compact_options.target_level = 6;
  compact_options.exclusive_manual_compaction = exclusive_manual_compaction_;
  ASSERT_OK(db_->CompactRange(compact_options, nullptr, nullptr));
  // 2 files in L6
  ASSERT_EQ("0,0,0,0,0,0,2", FilesPerLevel(0));

  ASSERT_EQ(trivial_move, 1);
  ASSERT_EQ(non_trivial_move, 0);

  for (int32_t i = 0; i <= 300; i++) {
    ASSERT_EQ(Get(Key(i)), values[i]);
  }
  for (int32_t i = 600; i <= 700; i++) {
    ASSERT_EQ(Get(Key(i)), values[i]);
  }
}

TEST_P(DBCompactionTestWithParam, PartialOverlappingL0) {
  class SubCompactionEventListener : public EventListener {
   public:
    void OnSubcompactionCompleted(const SubcompactionJobInfo&) override {
      sub_compaction_finished_++;
    }
    std::atomic<int> sub_compaction_finished_{0};
  };

  Options options = CurrentOptions();
  options.disable_auto_compactions = true;
  options.write_buffer_size = 10 * 1024 * 1024;
  options.max_subcompactions = max_subcompactions_;
  SubCompactionEventListener* listener = new SubCompactionEventListener();
  options.listeners.emplace_back(listener);

  DestroyAndReopen(options);

  // For subcompactino to trigger, output level needs to be non-empty.
  ASSERT_OK(Put("key", ""));
  ASSERT_OK(Put("kez", ""));
  ASSERT_OK(Flush());
  ASSERT_OK(Put("key", ""));
  ASSERT_OK(Put("kez", ""));
  ASSERT_OK(Flush());
  ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));

  // Ranges that are only briefly overlapping so that they won't be trivially
  // moved but subcompaction ranges would only contain a subset of files.
  std::vector<std::pair<int32_t, int32_t>> ranges = {
      {100, 199}, {198, 399}, {397, 600}, {598, 800}, {799, 900}, {895, 999},
  };
  int32_t value_size = 10 * 1024;  // 10 KB

  Random rnd(301);
  std::map<int32_t, std::string> values;
  for (size_t i = 0; i < ranges.size(); i++) {
    for (int32_t j = ranges[i].first; j <= ranges[i].second; j++) {
      values[j] = rnd.RandomString(value_size);
      ASSERT_OK(Put(Key(j), values[j]));
    }
    ASSERT_OK(Flush());
  }

  int32_t level0_files = NumTableFilesAtLevel(0, 0);
  ASSERT_EQ(level0_files, ranges.size());    // Multiple files in L0
  ASSERT_EQ(NumTableFilesAtLevel(1, 0), 1);  // One file in L1

  listener->sub_compaction_finished_ = 0;
  ASSERT_OK(db_->EnableAutoCompaction({db_->DefaultColumnFamily()}));
  ASSERT_OK(dbfull()->TEST_WaitForCompact());
  if (max_subcompactions_ > 3) {
    // RocksDB might not generate the exact number of sub compactions.
    // Here we validate that at least subcompaction happened.
    ASSERT_GT(listener->sub_compaction_finished_.load(), 2);
  }

  // We expect that all the files were compacted to L1
  ASSERT_EQ(NumTableFilesAtLevel(0, 0), 0);
  ASSERT_GT(NumTableFilesAtLevel(1, 0), 1);

  for (size_t i = 0; i < ranges.size(); i++) {
    for (int32_t j = ranges[i].first; j <= ranges[i].second; j++) {
      ASSERT_EQ(Get(Key(j)), values[j]);
    }
  }
}

TEST_P(DBCompactionTestWithParam, ManualCompactionPartial) {
  int32_t trivial_move = 0;
  int32_t non_trivial_move = 0;
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
      "DBImpl::BackgroundCompaction:TrivialMove",
      [&](void* /*arg*/) { trivial_move++; });
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
      "DBImpl::BackgroundCompaction:NonTrivial",
      [&](void* /*arg*/) { non_trivial_move++; });
  bool first = true;
  // Purpose of dependencies:
  // 4 -> 1: ensure the order of two non-trivial compactions
  // 5 -> 2 and 5 -> 3: ensure we do a check before two non-trivial compactions
  // are installed
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
      {{"DBCompaction::ManualPartial:4", "DBCompaction::ManualPartial:1"},
       {"DBCompaction::ManualPartial:5", "DBCompaction::ManualPartial:2"},
       {"DBCompaction::ManualPartial:5", "DBCompaction::ManualPartial:3"}});
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
      "DBImpl::BackgroundCompaction:NonTrivial:AfterRun", [&](void* /*arg*/) {
        if (first) {
          first = false;
          TEST_SYNC_POINT("DBCompaction::ManualPartial:4");
          TEST_SYNC_POINT("DBCompaction::ManualPartial:3");
        } else {  // second non-trivial compaction
          TEST_SYNC_POINT("DBCompaction::ManualPartial:2");
        }
      });

  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();

  Options options = CurrentOptions();
  options.write_buffer_size = 10 * 1024 * 1024;
  options.num_levels = 7;
  options.max_subcompactions = max_subcompactions_;
  options.level0_file_num_compaction_trigger = 3;
  options.max_background_compactions = 3;
  options.target_file_size_base = 1 << 23;  // 8 MB

  DestroyAndReopen(options);
  int32_t value_size = 10 * 1024;  // 10 KB

  // Add 2 non-overlapping files
  Random rnd(301);
  std::map<int32_t, std::string> values;

  // file 1 [0 => 100]
  for (int32_t i = 0; i < 100; i++) {
    values[i] = rnd.RandomString(value_size);
    ASSERT_OK(Put(Key(i), values[i]));
  }
  ASSERT_OK(Flush());

  // file 2 [100 => 300]
  for (int32_t i = 100; i < 300; i++) {
    values[i] = rnd.RandomString(value_size);
    ASSERT_OK(Put(Key(i), values[i]));
  }
  ASSERT_OK(Flush());

  // 2 files in L0
  ASSERT_EQ("2", FilesPerLevel(0));
  CompactRangeOptions compact_options;
  compact_options.change_level = true;
  compact_options.target_level = 6;
  compact_options.exclusive_manual_compaction = exclusive_manual_compaction_;
  // Trivial move the two non-overlapping files to level 6
  ASSERT_OK(db_->CompactRange(compact_options, nullptr, nullptr));
  // 2 files in L6
  ASSERT_EQ("0,0,0,0,0,0,2", FilesPerLevel(0));

  ASSERT_EQ(trivial_move, 1);
  ASSERT_EQ(non_trivial_move, 0);

  // file 3 [ 0 => 200]
  for (int32_t i = 0; i < 200; i++) {
    values[i] = rnd.RandomString(value_size);
    ASSERT_OK(Put(Key(i), values[i]));
  }
  ASSERT_OK(Flush());

  // 1 files in L0
  ASSERT_EQ("1,0,0,0,0,0,2", FilesPerLevel(0));
  ASSERT_OK(dbfull()->TEST_CompactRange(0, nullptr, nullptr, nullptr, false));
  ASSERT_OK(dbfull()->TEST_CompactRange(1, nullptr, nullptr, nullptr, false));
  ASSERT_OK(dbfull()->TEST_CompactRange(2, nullptr, nullptr, nullptr, false));
  ASSERT_OK(dbfull()->TEST_CompactRange(3, nullptr, nullptr, nullptr, false));
  ASSERT_OK(dbfull()->TEST_CompactRange(4, nullptr, nullptr, nullptr, false));
  // 2 files in L6, 1 file in L5
  ASSERT_EQ("0,0,0,0,0,1,2", FilesPerLevel(0));

  ASSERT_EQ(trivial_move, 6);
  ASSERT_EQ(non_trivial_move, 0);

  ROCKSDB_NAMESPACE::port::Thread threads([&] {
    compact_options.change_level = false;
    compact_options.exclusive_manual_compaction = false;
    std::string begin_string = Key(0);
    std::string end_string = Key(199);
    Slice begin(begin_string);
    Slice end(end_string);
    // First non-trivial compaction is triggered
    ASSERT_OK(db_->CompactRange(compact_options, &begin, &end));
  });

  TEST_SYNC_POINT("DBCompaction::ManualPartial:1");
  // file 4 [300 => 400)
  for (int32_t i = 300; i <= 400; i++) {
    values[i] = rnd.RandomString(value_size);
    ASSERT_OK(Put(Key(i), values[i]));
  }
  ASSERT_OK(Flush());

  // file 5 [400 => 500)
  for (int32_t i = 400; i <= 500; i++) {
    values[i] = rnd.RandomString(value_size);
    ASSERT_OK(Put(Key(i), values[i]));
  }
  ASSERT_OK(Flush());

  // file 6 [500 => 600)
  for (int32_t i = 500; i <= 600; i++) {
    values[i] = rnd.RandomString(value_size);
    ASSERT_OK(Put(Key(i), values[i]));
  }
  // Second non-trivial compaction is triggered
  ASSERT_OK(Flush());

  // Before two non-trivial compactions are installed, there are 3 files in L0
  ASSERT_EQ("3,0,0,0,0,1,2", FilesPerLevel(0));
  TEST_SYNC_POINT("DBCompaction::ManualPartial:5");

  ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
  ASSERT_OK(dbfull()->TEST_WaitForCompact());
  // After two non-trivial compactions are installed, there is 1 file in L6, and
  // 1 file in L1
  ASSERT_EQ("0,1,0,0,0,0,1", FilesPerLevel(0));
  threads.join();

  for (int32_t i = 0; i < 600; i++) {
    ASSERT_EQ(Get(Key(i)), values[i]);
  }
}

// Disable as the test is flaky.
TEST_F(DBCompactionTest, DISABLED_ManualPartialFill) {
  int32_t trivial_move = 0;
  int32_t non_trivial_move = 0;
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
      "DBImpl::BackgroundCompaction:TrivialMove",
      [&](void* /*arg*/) { trivial_move++; });
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
      "DBImpl::BackgroundCompaction:NonTrivial",
      [&](void* /*arg*/) { non_trivial_move++; });
  bool first = true;
  bool second = true;
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
      {{"DBCompaction::PartialFill:4", "DBCompaction::PartialFill:1"},
       {"DBCompaction::PartialFill:2", "DBCompaction::PartialFill:3"}});
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
      "DBImpl::BackgroundCompaction:NonTrivial:AfterRun", [&](void* /*arg*/) {
        if (first) {
          TEST_SYNC_POINT("DBCompaction::PartialFill:4");
          first = false;
          TEST_SYNC_POINT("DBCompaction::PartialFill:3");
        } else if (second) {
        }
      });

  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();

  Options options = CurrentOptions();
  options.write_buffer_size = 10 * 1024 * 1024;
  options.max_bytes_for_level_multiplier = 2;
  options.num_levels = 4;
  options.level0_file_num_compaction_trigger = 3;
  options.max_background_compactions = 3;

  DestroyAndReopen(options);
  // make sure all background compaction jobs can be scheduled
  auto stop_token =
      dbfull()->TEST_write_controler().GetCompactionPressureToken();
  int32_t value_size = 10 * 1024;  // 10 KB

  // Add 2 non-overlapping files
  Random rnd(301);
  std::map<int32_t, std::string> values;

  // file 1 [0 => 100]
  for (int32_t i = 0; i < 100; i++) {
    values[i] = rnd.RandomString(value_size);
    ASSERT_OK(Put(Key(i), values[i]));
  }
  ASSERT_OK(Flush());

  // file 2 [100 => 300]
  for (int32_t i = 100; i < 300; i++) {
    values[i] = rnd.RandomString(value_size);
    ASSERT_OK(Put(Key(i), values[i]));
  }
  ASSERT_OK(Flush());

  // 2 files in L0
  ASSERT_EQ("2", FilesPerLevel(0));
  CompactRangeOptions compact_options;
  compact_options.change_level = true;
  compact_options.target_level = 2;
  ASSERT_OK(db_->CompactRange(compact_options, nullptr, nullptr));
  // 2 files in L2
  ASSERT_EQ("0,0,2", FilesPerLevel(0));

  ASSERT_EQ(trivial_move, 1);
  ASSERT_EQ(non_trivial_move, 0);

  // file 3 [ 0 => 200]
  for (int32_t i = 0; i < 200; i++) {
    values[i] = rnd.RandomString(value_size);
    ASSERT_OK(Put(Key(i), values[i]));
  }
  ASSERT_OK(Flush());

  // 2 files in L2, 1 in L0
  ASSERT_EQ("1,0,2", FilesPerLevel(0));
  ASSERT_OK(dbfull()->TEST_CompactRange(0, nullptr, nullptr, nullptr, false));
  // 2 files in L2, 1 in L1
  ASSERT_EQ("0,1,2", FilesPerLevel(0));

  ASSERT_EQ(trivial_move, 2);
  ASSERT_EQ(non_trivial_move, 0);

  ROCKSDB_NAMESPACE::port::Thread threads([&] {
    compact_options.change_level = false;
    compact_options.exclusive_manual_compaction = false;
    std::string begin_string = Key(0);
    std::string end_string = Key(199);
    Slice begin(begin_string);
    Slice end(end_string);
    ASSERT_OK(db_->CompactRange(compact_options, &begin, &end));
  });

  TEST_SYNC_POINT("DBCompaction::PartialFill:1");
  // Many files 4 [300 => 4300)
  for (int32_t i = 0; i <= 5; i++) {
    for (int32_t j = 300; j < 4300; j++) {
      if (j == 2300) {
        ASSERT_OK(Flush());
        ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
      }
      values[j] = rnd.RandomString(value_size);
      ASSERT_OK(Put(Key(j), values[j]));
    }
  }

  // Verify level sizes
  uint64_t target_size = 4 * options.max_bytes_for_level_base;
  for (int32_t i = 1; i < options.num_levels; i++) {
    ASSERT_LE(SizeAtLevel(i), target_size);
    target_size = static_cast<uint64_t>(target_size *
                                        options.max_bytes_for_level_multiplier);
  }

  TEST_SYNC_POINT("DBCompaction::PartialFill:2");
  ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
  ASSERT_OK(dbfull()->TEST_WaitForCompact());
  threads.join();

  for (int32_t i = 0; i < 4300; i++) {
    ASSERT_EQ(Get(Key(i)), values[i]);
  }
}

TEST_F(DBCompactionTest, ManualCompactionWithUnorderedWrite) {
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
      {{"DBImpl::WriteImpl:UnorderedWriteAfterWriteWAL",
        "DBCompactionTest::ManualCompactionWithUnorderedWrite:WaitWriteWAL"},
       {"DBImpl::WaitForPendingWrites:BeforeBlock",
        "DBImpl::WriteImpl:BeforeUnorderedWriteMemtable"}});

  Options options = CurrentOptions();
  options.unordered_write = true;
  DestroyAndReopen(options);
  ASSERT_OK(Put("foo", "v1"));
  ASSERT_OK(Flush());

  ASSERT_OK(Put("bar", "v1"));
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
  port::Thread writer([&]() { ASSERT_OK(Put("foo", "v2")); });

  TEST_SYNC_POINT(
      "DBCompactionTest::ManualCompactionWithUnorderedWrite:WaitWriteWAL");
  ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));

  writer.join();
  ASSERT_EQ(Get("foo"), "v2");

  SyncPoint::GetInstance()->DisableProcessing();
  SyncPoint::GetInstance()->ClearAllCallBacks();

  Reopen(options);
  ASSERT_EQ(Get("foo"), "v2");
}

TEST_F(DBCompactionTest, DeleteFileRange) {
  Options options = CurrentOptions();
  options.write_buffer_size = 10 * 1024 * 1024;
  options.max_bytes_for_level_multiplier = 2;
  options.num_levels = 4;
  options.level0_file_num_compaction_trigger = 3;
  options.max_background_compactions = 3;

  DestroyAndReopen(options);
  int32_t value_size = 10 * 1024;  // 10 KB

  // Add 2 non-overlapping files
  Random rnd(301);
  std::map<int32_t, std::string> values;

  // file 1 [0 => 100]
  for (int32_t i = 0; i < 100; i++) {
    values[i] = rnd.RandomString(value_size);
    ASSERT_OK(Put(Key(i), values[i]));
  }
  ASSERT_OK(Flush());

  // file 2 [100 => 300]
  for (int32_t i = 100; i < 300; i++) {
    values[i] = rnd.RandomString(value_size);
    ASSERT_OK(Put(Key(i), values[i]));
  }
  ASSERT_OK(Flush());

  // 2 files in L0
  ASSERT_EQ("2", FilesPerLevel(0));
  CompactRangeOptions compact_options;
  compact_options.change_level = true;
  compact_options.target_level = 2;
  ASSERT_OK(db_->CompactRange(compact_options, nullptr, nullptr));
  // 2 files in L2
  ASSERT_EQ("0,0,2", FilesPerLevel(0));

  // file 3 [ 0 => 200]
  for (int32_t i = 0; i < 200; i++) {
    values[i] = rnd.RandomString(value_size);
    ASSERT_OK(Put(Key(i), values[i]));
  }
  ASSERT_OK(Flush());

  // Many files 4 [300 => 4300)
  for (int32_t i = 0; i <= 5; i++) {
    for (int32_t j = 300; j < 4300; j++) {
      if (j == 2300) {
        ASSERT_OK(Flush());
        ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
      }
      values[j] = rnd.RandomString(value_size);
      ASSERT_OK(Put(Key(j), values[j]));
    }
  }
  ASSERT_OK(Flush());
  ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
  ASSERT_OK(dbfull()->TEST_WaitForCompact());

  // Verify level sizes
  uint64_t target_size = 4 * options.max_bytes_for_level_base;
  for (int32_t i = 1; i < options.num_levels; i++) {
    ASSERT_LE(SizeAtLevel(i), target_size);
    target_size = static_cast<uint64_t>(target_size *
                                        options.max_bytes_for_level_multiplier);
  }

  const size_t old_num_files = CountFiles();
  std::string begin_string = Key(1000);
  std::string end_string = Key(2000);
  Slice begin(begin_string);
  Slice end(end_string);
  ASSERT_OK(DeleteFilesInRange(db_, db_->DefaultColumnFamily(), &begin, &end));

  int32_t deleted_count = 0;
  for (int32_t i = 0; i < 4300; i++) {
    if (i < 1000 || i > 2000) {
      ASSERT_EQ(Get(Key(i)), values[i]);
    } else {
      ReadOptions roptions;
      std::string result;
      Status s = db_->Get(roptions, Key(i), &result);
      ASSERT_TRUE(s.IsNotFound() || s.ok());
      if (s.IsNotFound()) {
        deleted_count++;
      }
    }
  }
  ASSERT_GT(deleted_count, 0);
  begin_string = Key(5000);
  end_string = Key(6000);
  Slice begin1(begin_string);
  Slice end1(end_string);
  // Try deleting files in range which contain no keys
  ASSERT_OK(
      DeleteFilesInRange(db_, db_->DefaultColumnFamily(), &begin1, &end1));

  // Push data from level 0 to level 1 to force all data to be deleted
  // Note that we don't delete level 0 files
  compact_options.change_level = true;
  compact_options.target_level = 1;
  ASSERT_OK(db_->CompactRange(compact_options, nullptr, nullptr));
  ASSERT_OK(dbfull()->TEST_WaitForCompact());

  ASSERT_OK(
      DeleteFilesInRange(db_, db_->DefaultColumnFamily(), nullptr, nullptr));

  int32_t deleted_count2 = 0;
  for (int32_t i = 0; i < 4300; i++) {
    ReadOptions roptions;
    std::string result;
    ASSERT_TRUE(db_->Get(roptions, Key(i), &result).IsNotFound());
    deleted_count2++;
  }
  ASSERT_GT(deleted_count2, deleted_count);
  const size_t new_num_files = CountFiles();
  ASSERT_GT(old_num_files, new_num_files);
}

TEST_F(DBCompactionTest, DeleteFilesInRanges) {
  Options options = CurrentOptions();
  options.write_buffer_size = 10 * 1024 * 1024;
  options.max_bytes_for_level_multiplier = 2;
  options.num_levels = 4;
  options.max_background_compactions = 3;
  options.disable_auto_compactions = true;

  DestroyAndReopen(options);
  int32_t value_size = 10 * 1024;  // 10 KB

  Random rnd(301);
  std::map<int32_t, std::string> values;

  // file [0 => 100), [100 => 200), ... [900, 1000)
  for (auto i = 0; i < 10; i++) {
    for (auto j = 0; j < 100; j++) {
      auto k = i * 100 + j;
      values[k] = rnd.RandomString(value_size);
      ASSERT_OK(Put(Key(k), values[k]));
    }
    ASSERT_OK(Flush());
  }
  ASSERT_EQ("10", FilesPerLevel(0));
  CompactRangeOptions compact_options;
  compact_options.change_level = true;
  compact_options.target_level = 2;
  ASSERT_OK(db_->CompactRange(compact_options, nullptr, nullptr));
  ASSERT_EQ("0,0,10", FilesPerLevel(0));

  // file [0 => 100), [200 => 300), ... [800, 900)
  for (auto i = 0; i < 10; i += 2) {
    for (auto j = 0; j < 100; j++) {
      auto k = i * 100 + j;
      ASSERT_OK(Put(Key(k), values[k]));
    }
    ASSERT_OK(Flush());
  }
  ASSERT_EQ("5,0,10", FilesPerLevel(0));
  ASSERT_OK(dbfull()->TEST_CompactRange(0, nullptr, nullptr));
  ASSERT_EQ("0,5,10", FilesPerLevel(0));

  // Delete files in range [0, 299] (inclusive)
  {
    auto begin_str1 = Key(0), end_str1 = Key(100);
    auto begin_str2 = Key(100), end_str2 = Key(200);
    auto begin_str3 = Key(200), end_str3 = Key(299);
    Slice begin1(begin_str1), end1(end_str1);
    Slice begin2(begin_str2), end2(end_str2);
    Slice begin3(begin_str3), end3(end_str3);
    std::vector<RangePtr> ranges;
    ranges.push_back(RangePtr(&begin1, &end1));
    ranges.push_back(RangePtr(&begin2, &end2));
    ranges.push_back(RangePtr(&begin3, &end3));
    ASSERT_OK(DeleteFilesInRanges(db_, db_->DefaultColumnFamily(),
                                  ranges.data(), ranges.size()));
    ASSERT_EQ("0,3,7", FilesPerLevel(0));

    // Keys [0, 300) should not exist.
    for (auto i = 0; i < 300; i++) {
      ReadOptions ropts;
      std::string result;
      auto s = db_->Get(ropts, Key(i), &result);
      ASSERT_TRUE(s.IsNotFound());
    }
    for (auto i = 300; i < 1000; i++) {
      ASSERT_EQ(Get(Key(i)), values[i]);
    }
  }

  // Delete files in range [600, 999) (exclusive)
  {
    auto begin_str1 = Key(600), end_str1 = Key(800);
    auto begin_str2 = Key(700), end_str2 = Key(900);
    auto begin_str3 = Key(800), end_str3 = Key(999);
    Slice begin1(begin_str1), end1(end_str1);
    Slice begin2(begin_str2), end2(end_str2);
    Slice begin3(begin_str3), end3(end_str3);
    std::vector<RangePtr> ranges;
    ranges.push_back(RangePtr(&begin1, &end1));
    ranges.push_back(RangePtr(&begin2, &end2));
    ranges.push_back(RangePtr(&begin3, &end3));
    ASSERT_OK(DeleteFilesInRanges(db_, db_->DefaultColumnFamily(),
                                  ranges.data(), ranges.size(), false));
    ASSERT_EQ("0,1,4", FilesPerLevel(0));

    // Keys [600, 900) should not exist.
    for (auto i = 600; i < 900; i++) {
      ReadOptions ropts;
      std::string result;
      auto s = db_->Get(ropts, Key(i), &result);
      ASSERT_TRUE(s.IsNotFound());
    }
    for (auto i = 300; i < 600; i++) {
      ASSERT_EQ(Get(Key(i)), values[i]);
    }
    for (auto i = 900; i < 1000; i++) {
      ASSERT_EQ(Get(Key(i)), values[i]);
    }
  }

  // Delete all files.
  {
    RangePtr range;
    ASSERT_OK(DeleteFilesInRanges(db_, db_->DefaultColumnFamily(), &range, 1));
    ASSERT_EQ("", FilesPerLevel(0));

    for (auto i = 0; i < 1000; i++) {
      ReadOptions ropts;
      std::string result;
      auto s = db_->Get(ropts, Key(i), &result);
      ASSERT_TRUE(s.IsNotFound());
    }
  }
}

TEST_F(DBCompactionTest, DeleteFileRangeFileEndpointsOverlapBug) {
  // regression test for #2833: groups of files whose user-keys overlap at the
  // endpoints could be split by `DeleteFilesInRange`. This caused old data to
  // reappear, either because a new version of the key was removed, or a range
  // deletion was partially dropped. It could also cause non-overlapping
  // invariant to be violated if the files dropped by DeleteFilesInRange were
  // a subset of files that a range deletion spans.
  const int kNumL0Files = 2;
  const int kValSize = 8 << 10;  // 8KB
  Options options = CurrentOptions();
  options.level0_file_num_compaction_trigger = kNumL0Files;
  options.target_file_size_base = 1 << 10;  // 1KB
  DestroyAndReopen(options);

  // The snapshot prevents key 1 from having its old version dropped. The low
  // `target_file_size_base` ensures two keys will be in each output file.
  const Snapshot* snapshot = nullptr;
  Random rnd(301);
  // The value indicates which flush the key belonged to, which is enough
  // for us to determine the keys' relative ages. After L0 flushes finish,
  // files look like:
  //
  // File 0: 0 -> vals[0], 1 -> vals[0]
  // File 1:               1 -> vals[1], 2 -> vals[1]
  //
  // Then L0->L1 compaction happens, which outputs keys as follows:
  //
  // File 0: 0 -> vals[0], 1 -> vals[1]
  // File 1:               1 -> vals[0], 2 -> vals[1]
  //
  // DeleteFilesInRange shouldn't be allowed to drop just file 0, as that
  // would cause `1 -> vals[0]` (an older key) to reappear.
  std::string vals[kNumL0Files];
  for (int i = 0; i < kNumL0Files; ++i) {
    vals[i] = rnd.RandomString(kValSize);
    ASSERT_OK(Put(Key(i), vals[i]));
    ASSERT_OK(Put(Key(i + 1), vals[i]));
    ASSERT_OK(Flush());
    if (i == 0) {
      snapshot = db_->GetSnapshot();
    }
  }
  ASSERT_OK(dbfull()->TEST_WaitForCompact());

  // Verify `DeleteFilesInRange` can't drop only file 0 which would cause
  // "1 -> vals[0]" to reappear.
  std::string begin_str = Key(0), end_str = Key(1);
  Slice begin = begin_str, end = end_str;
  ASSERT_OK(DeleteFilesInRange(db_, db_->DefaultColumnFamily(), &begin, &end));
  ASSERT_EQ(vals[1], Get(Key(1)));

  db_->ReleaseSnapshot(snapshot);
}

TEST_P(DBCompactionTestWithParam, TrivialMoveToLastLevelWithFiles) {
  int32_t trivial_move = 0;
  int32_t non_trivial_move = 0;
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
      "DBImpl::BackgroundCompaction:TrivialMove",
      [&](void* /*arg*/) { trivial_move++; });
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
      "DBImpl::BackgroundCompaction:NonTrivial",
      [&](void* /*arg*/) { non_trivial_move++; });
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();

  Options options = CurrentOptions();
  options.write_buffer_size = 100000000;
  options.max_subcompactions = max_subcompactions_;
  DestroyAndReopen(options);

  int32_t value_size = 10 * 1024;  // 10 KB

  Random rnd(301);
  std::vector<std::string> values;
  // File with keys [ 0 => 99 ]
  for (int i = 0; i < 100; i++) {
    values.push_back(rnd.RandomString(value_size));
    ASSERT_OK(Put(Key(i), values[i]));
  }
  ASSERT_OK(Flush());

  ASSERT_EQ("1", FilesPerLevel(0));
  // Compaction will do L0=>L1 (trivial move) then move L1 files to L3
  CompactRangeOptions compact_options;
  compact_options.change_level = true;
  compact_options.target_level = 3;
  compact_options.exclusive_manual_compaction = exclusive_manual_compaction_;
  ASSERT_OK(db_->CompactRange(compact_options, nullptr, nullptr));
  ASSERT_EQ("0,0,0,1", FilesPerLevel(0));
  ASSERT_EQ(trivial_move, 1);
  ASSERT_EQ(non_trivial_move, 0);

  // File with keys [ 100 => 199 ]
  for (int i = 100; i < 200; i++) {
    values.push_back(rnd.RandomString(value_size));
    ASSERT_OK(Put(Key(i), values[i]));
  }
  ASSERT_OK(Flush());

  ASSERT_EQ("1,0,0,1", FilesPerLevel(0));
  CompactRangeOptions cro;
  cro.exclusive_manual_compaction = exclusive_manual_compaction_;
  // Compaction will do L0=>L1 L1=>L2 L2=>L3 (3 trivial moves)
  ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr));
  ASSERT_EQ("0,0,0,2", FilesPerLevel(0));
  ASSERT_EQ(trivial_move, 4);
  ASSERT_EQ(non_trivial_move, 0);

  for (int i = 0; i < 200; i++) {
    ASSERT_EQ(Get(Key(i)), values[i]);
  }

  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
}

TEST_P(DBCompactionTestWithParam, LevelCompactionThirdPath) {
  Options options = CurrentOptions();
  options.db_paths.emplace_back(dbname_, 500 * 1024);
  options.db_paths.emplace_back(dbname_ + "_2", 4 * 1024 * 1024);
  options.db_paths.emplace_back(dbname_ + "_3", 1024 * 1024 * 1024);
  options.memtable_factory.reset(
      test::NewSpecialSkipListFactory(KNumKeysByGenerateNewFile - 1));
  options.compaction_style = kCompactionStyleLevel;
  options.write_buffer_size = 110 << 10;  // 110KB
  options.arena_block_size = 4 << 10;
  options.level0_file_num_compaction_trigger = 2;
  options.num_levels = 4;
  options.max_bytes_for_level_base = 400 * 1024;
  options.max_subcompactions = max_subcompactions_;

  DestroyAndReopen(options);

  Random rnd(301);
  int key_idx = 0;

  // First three 110KB files are not going to second path.
  // After that, (100K, 200K)
  for (int num = 0; num < 3; num++) {
    GenerateNewFile(&rnd, &key_idx);
  }

  // Another 110KB triggers a compaction to 400K file to fill up first path
  GenerateNewFile(&rnd, &key_idx);
  ASSERT_EQ(3, GetSstFileCount(options.db_paths[1].path));

  // (1, 4)
  GenerateNewFile(&rnd, &key_idx);
  ASSERT_EQ("1,4", FilesPerLevel(0));
  ASSERT_EQ(4, GetSstFileCount(options.db_paths[1].path));
  ASSERT_EQ(1, GetSstFileCount(dbname_));

  // (1, 4, 1)
  GenerateNewFile(&rnd, &key_idx);
  ASSERT_EQ("1,4,1", FilesPerLevel(0));
  ASSERT_EQ(1, GetSstFileCount(options.db_paths[2].path));
  ASSERT_EQ(4, GetSstFileCount(options.db_paths[1].path));
  ASSERT_EQ(1, GetSstFileCount(dbname_));

  // (1, 4, 2)
  GenerateNewFile(&rnd, &key_idx);
  ASSERT_EQ("1,4,2", FilesPerLevel(0));
  ASSERT_EQ(2, GetSstFileCount(options.db_paths[2].path));
  ASSERT_EQ(4, GetSstFileCount(options.db_paths[1].path));
  ASSERT_EQ(1, GetSstFileCount(dbname_));

  // (1, 4, 3)
  GenerateNewFile(&rnd, &key_idx);
  ASSERT_EQ("1,4,3", FilesPerLevel(0));
  ASSERT_EQ(3, GetSstFileCount(options.db_paths[2].path));
  ASSERT_EQ(4, GetSstFileCount(options.db_paths[1].path));
  ASSERT_EQ(1, GetSstFileCount(dbname_));

  // (1, 4, 4)
  GenerateNewFile(&rnd, &key_idx);
  ASSERT_EQ("1,4,4", FilesPerLevel(0));
  ASSERT_EQ(4, GetSstFileCount(options.db_paths[2].path));
  ASSERT_EQ(4, GetSstFileCount(options.db_paths[1].path));
  ASSERT_EQ(1, GetSstFileCount(dbname_));

  // (1, 4, 5)
  GenerateNewFile(&rnd, &key_idx);
  ASSERT_EQ("1,4,5", FilesPerLevel(0));
  ASSERT_EQ(5, GetSstFileCount(options.db_paths[2].path));
  ASSERT_EQ(4, GetSstFileCount(options.db_paths[1].path));
  ASSERT_EQ(1, GetSstFileCount(dbname_));

  // (1, 4, 6)
  GenerateNewFile(&rnd, &key_idx);
  ASSERT_EQ("1,4,6", FilesPerLevel(0));
  ASSERT_EQ(6, GetSstFileCount(options.db_paths[2].path));
  ASSERT_EQ(4, GetSstFileCount(options.db_paths[1].path));
  ASSERT_EQ(1, GetSstFileCount(dbname_));

  // (1, 4, 7)
  GenerateNewFile(&rnd, &key_idx);
  ASSERT_EQ("1,4,7", FilesPerLevel(0));
  ASSERT_EQ(7, GetSstFileCount(options.db_paths[2].path));
  ASSERT_EQ(4, GetSstFileCount(options.db_paths[1].path));
  ASSERT_EQ(1, GetSstFileCount(dbname_));

  // (1, 4, 8)
  GenerateNewFile(&rnd, &key_idx);
  ASSERT_EQ("1,4,8", FilesPerLevel(0));
  ASSERT_EQ(8, GetSstFileCount(options.db_paths[2].path));
  ASSERT_EQ(4, GetSstFileCount(options.db_paths[1].path));
  ASSERT_EQ(1, GetSstFileCount(dbname_));

  for (int i = 0; i < key_idx; i++) {
    auto v = Get(Key(i));
    ASSERT_NE(v, "NOT_FOUND");
    ASSERT_TRUE(v.size() == 1 || v.size() == 990);
  }

  Reopen(options);

  for (int i = 0; i < key_idx; i++) {
    auto v = Get(Key(i));
    ASSERT_NE(v, "NOT_FOUND");
    ASSERT_TRUE(v.size() == 1 || v.size() == 990);
  }

  Destroy(options);
}

TEST_P(DBCompactionTestWithParam, LevelCompactionPathUse) {
  Options options = CurrentOptions();
  options.db_paths.emplace_back(dbname_, 500 * 1024);
  options.db_paths.emplace_back(dbname_ + "_2", 4 * 1024 * 1024);
  options.db_paths.emplace_back(dbname_ + "_3", 1024 * 1024 * 1024);
  options.memtable_factory.reset(
      test::NewSpecialSkipListFactory(KNumKeysByGenerateNewFile - 1));
  options.compaction_style = kCompactionStyleLevel;
  options.write_buffer_size = 110 << 10;  // 110KB
  options.arena_block_size = 4 << 10;
  options.level0_file_num_compaction_trigger = 2;
  options.num_levels = 4;
  options.max_bytes_for_level_base = 400 * 1024;
  options.max_subcompactions = max_subcompactions_;

  DestroyAndReopen(options);

  Random rnd(301);
  int key_idx = 0;

  // Always gets compacted into 1 Level1 file,
  // 0/1 Level 0 file
  for (int num = 0; num < 3; num++) {
    key_idx = 0;
    GenerateNewFile(&rnd, &key_idx);
  }

  key_idx = 0;
  GenerateNewFile(&rnd, &key_idx);
  ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path));

  key_idx = 0;
  GenerateNewFile(&rnd, &key_idx);
  ASSERT_EQ("1,1", FilesPerLevel(0));
  ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path));
  ASSERT_EQ(1, GetSstFileCount(dbname_));

  key_idx = 0;
  GenerateNewFile(&rnd, &key_idx);
  ASSERT_EQ("0,1", FilesPerLevel(0));
  ASSERT_EQ(0, GetSstFileCount(options.db_paths[2].path));
  ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path));
  ASSERT_EQ(0, GetSstFileCount(dbname_));

  key_idx = 0;
  GenerateNewFile(&rnd, &key_idx);
  ASSERT_EQ("1,1", FilesPerLevel(0));
  ASSERT_EQ(0, GetSstFileCount(options.db_paths[2].path));
  ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path));
  ASSERT_EQ(1, GetSstFileCount(dbname_));

  key_idx = 0;
  GenerateNewFile(&rnd, &key_idx);
  ASSERT_EQ("0,1", FilesPerLevel(0));
  ASSERT_EQ(0, GetSstFileCount(options.db_paths[2].path));
  ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path));
  ASSERT_EQ(0, GetSstFileCount(dbname_));

  key_idx = 0;
  GenerateNewFile(&rnd, &key_idx);
  ASSERT_EQ("1,1", FilesPerLevel(0));
  ASSERT_EQ(0, GetSstFileCount(options.db_paths[2].path));
  ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path));
  ASSERT_EQ(1, GetSstFileCount(dbname_));

  key_idx = 0;
  GenerateNewFile(&rnd, &key_idx);
  ASSERT_EQ("0,1", FilesPerLevel(0));
  ASSERT_EQ(0, GetSstFileCount(options.db_paths[2].path));
  ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path));
  ASSERT_EQ(0, GetSstFileCount(dbname_));

  key_idx = 0;
  GenerateNewFile(&rnd, &key_idx);
  ASSERT_EQ("1,1", FilesPerLevel(0));
  ASSERT_EQ(0, GetSstFileCount(options.db_paths[2].path));
  ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path));
  ASSERT_EQ(1, GetSstFileCount(dbname_));

  key_idx = 0;
  GenerateNewFile(&rnd, &key_idx);
  ASSERT_EQ("0,1", FilesPerLevel(0));
  ASSERT_EQ(0, GetSstFileCount(options.db_paths[2].path));
  ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path));
  ASSERT_EQ(0, GetSstFileCount(dbname_));

  key_idx = 0;
  GenerateNewFile(&rnd, &key_idx);
  ASSERT_EQ("1,1", FilesPerLevel(0));
  ASSERT_EQ(0, GetSstFileCount(options.db_paths[2].path));
  ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path));
  ASSERT_EQ(1, GetSstFileCount(dbname_));

  for (int i = 0; i < key_idx; i++) {
    auto v = Get(Key(i));
    ASSERT_NE(v, "NOT_FOUND");
    ASSERT_TRUE(v.size() == 1 || v.size() == 990);
  }

  Reopen(options);

  for (int i = 0; i < key_idx; i++) {
    auto v = Get(Key(i));
    ASSERT_NE(v, "NOT_FOUND");
    ASSERT_TRUE(v.size() == 1 || v.size() == 990);
  }

  Destroy(options);
}

TEST_P(DBCompactionTestWithParam, LevelCompactionCFPathUse) {
  Options options = CurrentOptions();
  options.db_paths.emplace_back(dbname_, 500 * 1024);
  options.db_paths.emplace_back(dbname_ + "_2", 4 * 1024 * 1024);
  options.db_paths.emplace_back(dbname_ + "_3", 1024 * 1024 * 1024);
  options.memtable_factory.reset(
      test::NewSpecialSkipListFactory(KNumKeysByGenerateNewFile - 1));
  options.compaction_style = kCompactionStyleLevel;
  options.write_buffer_size = 110 << 10;  // 110KB
  options.arena_block_size = 4 << 10;
  options.level0_file_num_compaction_trigger = 2;
  options.num_levels = 4;
  options.max_bytes_for_level_base = 400 * 1024;
  options.max_subcompactions = max_subcompactions_;

  std::vector<Options> option_vector;
  option_vector.emplace_back(options);
  ColumnFamilyOptions cf_opt1(options), cf_opt2(options);
  // Configure CF1 specific paths.
  cf_opt1.cf_paths.emplace_back(dbname_ + "cf1", 500 * 1024);
  cf_opt1.cf_paths.emplace_back(dbname_ + "cf1_2", 4 * 1024 * 1024);
  cf_opt1.cf_paths.emplace_back(dbname_ + "cf1_3", 1024 * 1024 * 1024);
  option_vector.emplace_back(DBOptions(options), cf_opt1);
  CreateColumnFamilies({"one"}, option_vector[1]);

  // Configure CF2 specific paths.
  cf_opt2.cf_paths.emplace_back(dbname_ + "cf2", 500 * 1024);
  cf_opt2.cf_paths.emplace_back(dbname_ + "cf2_2", 4 * 1024 * 1024);
  cf_opt2.cf_paths.emplace_back(dbname_ + "cf2_3", 1024 * 1024 * 1024);
  option_vector.emplace_back(DBOptions(options), cf_opt2);
  CreateColumnFamilies({"two"}, option_vector[2]);

  ReopenWithColumnFamilies({"default", "one", "two"}, option_vector);

  Random rnd(301);
  int key_idx = 0;
  int key_idx1 = 0;
  int key_idx2 = 0;

  auto generate_file = [&]() {
    GenerateNewFile(0, &rnd, &key_idx);
    GenerateNewFile(1, &rnd, &key_idx1);
    GenerateNewFile(2, &rnd, &key_idx2);
  };

  auto check_sstfilecount = [&](int path_id, int expected) {
    ASSERT_EQ(expected, GetSstFileCount(options.db_paths[path_id].path));
    ASSERT_EQ(expected, GetSstFileCount(cf_opt1.cf_paths[path_id].path));
    ASSERT_EQ(expected, GetSstFileCount(cf_opt2.cf_paths[path_id].path));
  };

  auto check_filesperlevel = [&](const std::string& expected) {
    ASSERT_EQ(expected, FilesPerLevel(0));
    ASSERT_EQ(expected, FilesPerLevel(1));
    ASSERT_EQ(expected, FilesPerLevel(2));
  };

  auto check_getvalues = [&]() {
    for (int i = 0; i < key_idx; i++) {
      auto v = Get(0, Key(i));
      ASSERT_NE(v, "NOT_FOUND");
      ASSERT_TRUE(v.size() == 1 || v.size() == 990);
    }

    for (int i = 0; i < key_idx1; i++) {
      auto v = Get(1, Key(i));
      ASSERT_NE(v, "NOT_FOUND");
      ASSERT_TRUE(v.size() == 1 || v.size() == 990);
    }

    for (int i = 0; i < key_idx2; i++) {
      auto v = Get(2, Key(i));
      ASSERT_NE(v, "NOT_FOUND");
      ASSERT_TRUE(v.size() == 1 || v.size() == 990);
    }
  };

  // Check that default column family uses db_paths.
  // And Column family "one" uses cf_paths.

  // The compaction in level0 outputs the sst files in level1.
  // The first path cannot hold level1's data(400KB+400KB > 500KB),
  // so every compaction move a sst file to second path. Please
  // refer to LevelCompactionBuilder::GetPathId.
  for (int num = 0; num < 3; num++) {
    generate_file();
  }
  check_sstfilecount(0, 1);
  check_sstfilecount(1, 2);

  generate_file();
  check_sstfilecount(1, 3);

  // (1, 4)
  generate_file();
  check_filesperlevel("1,4");
  check_sstfilecount(1, 4);
  check_sstfilecount(0, 1);

  // (1, 4, 1)
  generate_file();
  check_filesperlevel("1,4,1");
  check_sstfilecount(2, 1);
  check_sstfilecount(1, 4);
  check_sstfilecount(0, 1);

  // (1, 4, 2)
  generate_file();
  check_filesperlevel("1,4,2");
  check_sstfilecount(2, 2);
  check_sstfilecount(1, 4);
  check_sstfilecount(0, 1);

  check_getvalues();

  {  // Also verify GetLiveFilesStorageInfo with db_paths / cf_paths
    std::vector<LiveFileStorageInfo> new_infos;
    LiveFilesStorageInfoOptions lfsio;
    lfsio.wal_size_for_flush = UINT64_MAX;  // no flush
    ASSERT_OK(db_->GetLiveFilesStorageInfo(lfsio, &new_infos));
    std::unordered_map<std::string, int> live_sst_by_dir;
    for (auto& info : new_infos) {
      if (info.file_type == kTableFile) {
        live_sst_by_dir[info.directory]++;
        // Verify file on disk (no directory confusion)
        uint64_t size;
        ASSERT_OK(env_->GetFileSize(
            info.directory + "/" + info.relative_filename, &size));
        ASSERT_EQ(info.size, size);
      }
    }
    ASSERT_EQ(3U * 3U, live_sst_by_dir.size());
    for (auto& paths : {options.db_paths, cf_opt1.cf_paths, cf_opt2.cf_paths}) {
      ASSERT_EQ(1, live_sst_by_dir[paths[0].path]);
      ASSERT_EQ(4, live_sst_by_dir[paths[1].path]);
      ASSERT_EQ(2, live_sst_by_dir[paths[2].path]);
    }
  }

  ReopenWithColumnFamilies({"default", "one", "two"}, option_vector);

  check_getvalues();

  Destroy(options, true);
}

TEST_P(DBCompactionTestWithParam, ConvertCompactionStyle) {
  Random rnd(301);
  int max_key_level_insert = 200;
  int max_key_universal_insert = 600;

  // Stage 1: generate a db with level compaction
  Options options = CurrentOptions();
  options.write_buffer_size = 110 << 10;  // 110KB
  options.arena_block_size = 4 << 10;
  options.num_levels = 4;
  options.level0_file_num_compaction_trigger = 3;
  options.max_bytes_for_level_base = 500 << 10;  // 500KB
  options.max_bytes_for_level_multiplier = 1;
  options.target_file_size_base = 200 << 10;  // 200KB
  options.target_file_size_multiplier = 1;
  options.max_subcompactions = max_subcompactions_;
  CreateAndReopenWithCF({"pikachu"}, options);

  for (int i = 0; i <= max_key_level_insert; i++) {
    // each value is 10K
    ASSERT_OK(Put(1, Key(i), rnd.RandomString(10000)));
  }
  ASSERT_OK(Flush(1));
  ASSERT_OK(dbfull()->TEST_WaitForCompact());

  ASSERT_GT(TotalTableFiles(1, 4), 1);
  int non_level0_num_files = 0;
  for (int i = 1; i < options.num_levels; i++) {
    non_level0_num_files += NumTableFilesAtLevel(i, 1);
  }
  ASSERT_GT(non_level0_num_files, 0);

  // Stage 2: reopen with universal compaction - should fail
  options = CurrentOptions();
  options.compaction_style = kCompactionStyleUniversal;
  options.num_levels = 1;
  options = CurrentOptions(options);
  Status s = TryReopenWithColumnFamilies({"default", "pikachu"}, options);
  ASSERT_TRUE(s.IsInvalidArgument());

  // Stage 3: compact into a single file and move the file to level 0
  options = CurrentOptions();
  options.disable_auto_compactions = true;
  options.target_file_size_base = INT_MAX;
  options.target_file_size_multiplier = 1;
  options.max_bytes_for_level_base = INT_MAX;
  options.max_bytes_for_level_multiplier = 1;
  options.num_levels = 4;
  options = CurrentOptions(options);
  ReopenWithColumnFamilies({"default", "pikachu"}, options);

  CompactRangeOptions compact_options;
  compact_options.change_level = true;
  compact_options.target_level = 0;
  // cannot use kForceOptimized here because the compaction here is expected
  // to generate one output file
  compact_options.bottommost_level_compaction =
      BottommostLevelCompaction::kForce;
  compact_options.exclusive_manual_compaction = exclusive_manual_compaction_;
  ASSERT_OK(
      dbfull()->CompactRange(compact_options, handles_[1], nullptr, nullptr));

  // Only 1 file in L0
  ASSERT_EQ("1", FilesPerLevel(1));

  // Stage 4: re-open in universal compaction style and do some db operations
  options = CurrentOptions();
  options.compaction_style = kCompactionStyleUniversal;
  options.num_levels = 4;
  options.write_buffer_size = 110 << 10;  // 110KB
  options.arena_block_size = 4 << 10;
  options.level0_file_num_compaction_trigger = 3;
  options = CurrentOptions(options);
  ReopenWithColumnFamilies({"default", "pikachu"}, options);

  options.num_levels = 1;
  ReopenWithColumnFamilies({"default", "pikachu"}, options);

  for (int i = max_key_level_insert / 2; i <= max_key_universal_insert; i++) {
    ASSERT_OK(Put(1, Key(i), rnd.RandomString(10000)));
  }
  ASSERT_OK(dbfull()->Flush(FlushOptions()));
  ASSERT_OK(Flush(1));
  ASSERT_OK(dbfull()->TEST_WaitForCompact());

  for (int i = 1; i < options.num_levels; i++) {
    ASSERT_EQ(NumTableFilesAtLevel(i, 1), 0);
  }

  // verify keys inserted in both level compaction style and universal
  // compaction style
  std::string keys_in_db;
  Iterator* iter = dbfull()->NewIterator(ReadOptions(), handles_[1]);
  ASSERT_OK(iter->status());
  for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
    keys_in_db.append(iter->key().ToString());
    keys_in_db.push_back(',');
  }
  delete iter;

  std::string expected_keys;
  for (int i = 0; i <= max_key_universal_insert; i++) {
    expected_keys.append(Key(i));
    expected_keys.push_back(',');
  }

  ASSERT_EQ(keys_in_db, expected_keys);
}

TEST_F(DBCompactionTest, L0_CompactionBug_Issue44_a) {
  do {
    CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
    ASSERT_OK(Put(1, "b", "v"));
    ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions());
    ASSERT_OK(Delete(1, "b"));
    ASSERT_OK(Delete(1, "a"));
    ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions());
    ASSERT_OK(Delete(1, "a"));
    ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions());
    ASSERT_OK(Put(1, "a", "v"));
    ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions());
    ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions());
    ASSERT_EQ("(a->v)", Contents(1));
    env_->SleepForMicroseconds(1000000);  // Wait for compaction to finish
    ASSERT_EQ("(a->v)", Contents(1));
  } while (ChangeCompactOptions());
}

TEST_F(DBCompactionTest, L0_CompactionBug_Issue44_b) {
  do {
    CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
    ASSERT_OK(Put(1, "", ""));
    ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions());
    ASSERT_OK(Delete(1, "e"));
    ASSERT_OK(Put(1, "", ""));
    ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions());
    ASSERT_OK(Put(1, "c", "cv"));
    ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions());
    ASSERT_OK(Put(1, "", ""));
    ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions());
    ASSERT_OK(Put(1, "", ""));
    env_->SleepForMicroseconds(1000000);  // Wait for compaction to finish
    ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions());
    ASSERT_OK(Put(1, "d", "dv"));
    ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions());
    ASSERT_OK(Put(1, "", ""));
    ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions());
    ASSERT_OK(Delete(1, "d"));
    ASSERT_OK(Delete(1, "b"));
    ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions());
    ASSERT_EQ("(->)(c->cv)", Contents(1));
    env_->SleepForMicroseconds(1000000);  // Wait for compaction to finish
    ASSERT_EQ("(->)(c->cv)", Contents(1));
  } while (ChangeCompactOptions());
}

TEST_F(DBCompactionTest, ManualAutoRace) {
  CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
      {{"DBImpl::BGWorkCompaction", "DBCompactionTest::ManualAutoRace:1"},
       {"DBImpl::RunManualCompaction:WaitScheduled",
        "BackgroundCallCompaction:0"}});

  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();

  ASSERT_OK(Put(1, "foo", ""));
  ASSERT_OK(Put(1, "bar", ""));
  ASSERT_OK(Flush(1));
  ASSERT_OK(Put(1, "foo", ""));
  ASSERT_OK(Put(1, "bar", ""));
  // Generate four files in CF 0, which should trigger an auto compaction
  ASSERT_OK(Put("foo", ""));
  ASSERT_OK(Put("bar", ""));
  ASSERT_OK(Flush());
  ASSERT_OK(Put("foo", ""));
  ASSERT_OK(Put("bar", ""));
  ASSERT_OK(Flush());
  ASSERT_OK(Put("foo", ""));
  ASSERT_OK(Put("bar", ""));
  ASSERT_OK(Flush());
  ASSERT_OK(Put("foo", ""));
  ASSERT_OK(Put("bar", ""));
  ASSERT_OK(Flush());

  // The auto compaction is scheduled but waited until here
  TEST_SYNC_POINT("DBCompactionTest::ManualAutoRace:1");
  // The auto compaction will wait until the manual compaction is registerd
  // before processing so that it will be cancelled.
  CompactRangeOptions cro;
  cro.exclusive_manual_compaction = true;
  ASSERT_OK(dbfull()->CompactRange(cro, handles_[1], nullptr, nullptr));
  ASSERT_EQ("0,1", FilesPerLevel(1));

  // Eventually the cancelled compaction will be rescheduled and executed.
  ASSERT_OK(dbfull()->TEST_WaitForCompact());
  ASSERT_EQ("0,1", FilesPerLevel(0));
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
}

TEST_P(DBCompactionTestWithParam, ManualCompaction) {
  Options options = CurrentOptions();
  options.max_subcompactions = max_subcompactions_;
  options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
  CreateAndReopenWithCF({"pikachu"}, options);

  // iter - 0 with 7 levels
  // iter - 1 with 3 levels
  for (int iter = 0; iter < 2; ++iter) {
    MakeTables(3, "p", "q", 1);
    ASSERT_EQ("1,1,1", FilesPerLevel(1));

    // Compaction range falls before files
    Compact(1, "", "c");
    ASSERT_EQ("1,1,1", FilesPerLevel(1));

    // Compaction range falls after files
    Compact(1, "r", "z");
    ASSERT_EQ("1,1,1", FilesPerLevel(1));

    // Compaction range overlaps files
    Compact(1, "p", "q");
    ASSERT_EQ("0,0,1", FilesPerLevel(1));

    // Populate a different range
    MakeTables(3, "c", "e", 1);
    ASSERT_EQ("1,1,2", FilesPerLevel(1));

    // Compact just the new range
    Compact(1, "b", "f");
    ASSERT_EQ("0,0,2", FilesPerLevel(1));

    // Compact all
    MakeTables(1, "a", "z", 1);
    ASSERT_EQ("1,0,2", FilesPerLevel(1));

    uint64_t prev_block_cache_add =
        options.statistics->getTickerCount(BLOCK_CACHE_ADD);
    CompactRangeOptions cro;
    cro.exclusive_manual_compaction = exclusive_manual_compaction_;
    ASSERT_OK(db_->CompactRange(cro, handles_[1], nullptr, nullptr));
    // Verify manual compaction doesn't fill block cache
    ASSERT_EQ(prev_block_cache_add,
              options.statistics->getTickerCount(BLOCK_CACHE_ADD));

    ASSERT_EQ("0,0,1", FilesPerLevel(1));

    if (iter == 0) {
      options = CurrentOptions();
      options.num_levels = 3;
      options.create_if_missing = true;
      options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
      DestroyAndReopen(options);
      CreateAndReopenWithCF({"pikachu"}, options);
    }
  }
}

TEST_P(DBCompactionTestWithParam, ManualLevelCompactionOutputPathId) {
  Options options = CurrentOptions();
  options.db_paths.emplace_back(dbname_ + "_2", 2 * 10485760);
  options.db_paths.emplace_back(dbname_ + "_3", 100 * 10485760);
  options.db_paths.emplace_back(dbname_ + "_4", 120 * 10485760);
  options.max_subcompactions = max_subcompactions_;
  CreateAndReopenWithCF({"pikachu"}, options);

  // iter - 0 with 7 levels
  // iter - 1 with 3 levels
  for (int iter = 0; iter < 2; ++iter) {
    for (int i = 0; i < 3; ++i) {
      ASSERT_OK(Put(1, "p", "begin"));
      ASSERT_OK(Put(1, "q", "end"));
      ASSERT_OK(Flush(1));
    }
    ASSERT_EQ("3", FilesPerLevel(1));
    ASSERT_EQ(3, GetSstFileCount(options.db_paths[0].path));
    ASSERT_EQ(0, GetSstFileCount(dbname_));

    // Compaction range falls before files
    Compact(1, "", "c");
    ASSERT_EQ("3", FilesPerLevel(1));

    // Compaction range falls after files
    Compact(1, "r", "z");
    ASSERT_EQ("3", FilesPerLevel(1));

    // Compaction range overlaps files
    Compact(1, "p", "q", 1);
    ASSERT_OK(dbfull()->TEST_WaitForCompact());
    ASSERT_EQ("0,1", FilesPerLevel(1));
    ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path));
    ASSERT_EQ(0, GetSstFileCount(options.db_paths[0].path));
    ASSERT_EQ(0, GetSstFileCount(dbname_));

    // Populate a different range
    for (int i = 0; i < 3; ++i) {
      ASSERT_OK(Put(1, "c", "begin"));
      ASSERT_OK(Put(1, "e", "end"));
      ASSERT_OK(Flush(1));
    }
    ASSERT_EQ("3,1", FilesPerLevel(1));

    // Compact just the new range
    Compact(1, "b", "f", 1);
    ASSERT_OK(dbfull()->TEST_WaitForCompact());
    ASSERT_EQ("0,2", FilesPerLevel(1));
    ASSERT_EQ(2, GetSstFileCount(options.db_paths[1].path));
    ASSERT_EQ(0, GetSstFileCount(options.db_paths[0].path));
    ASSERT_EQ(0, GetSstFileCount(dbname_));

    // Compact all
    ASSERT_OK(Put(1, "a", "begin"));
    ASSERT_OK(Put(1, "z", "end"));
    ASSERT_OK(Flush(1));
    ASSERT_EQ("1,2", FilesPerLevel(1));
    ASSERT_EQ(2, GetSstFileCount(options.db_paths[1].path));
    ASSERT_EQ(1, GetSstFileCount(options.db_paths[0].path));
    CompactRangeOptions compact_options;
    compact_options.target_path_id = 1;
    compact_options.exclusive_manual_compaction = exclusive_manual_compaction_;
    ASSERT_OK(
        db_->CompactRange(compact_options, handles_[1], nullptr, nullptr));
    ASSERT_OK(dbfull()->TEST_WaitForCompact());

    ASSERT_EQ("0,1", FilesPerLevel(1));
    ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path));
    ASSERT_EQ(0, GetSstFileCount(options.db_paths[0].path));
    ASSERT_EQ(0, GetSstFileCount(dbname_));

    if (iter == 0) {
      DestroyAndReopen(options);
      options = CurrentOptions();
      options.db_paths.emplace_back(dbname_ + "_2", 2 * 10485760);
      options.db_paths.emplace_back(dbname_ + "_3", 100 * 10485760);
      options.db_paths.emplace_back(dbname_ + "_4", 120 * 10485760);
      options.max_background_flushes = 1;
      options.num_levels = 3;
      options.create_if_missing = true;
      CreateAndReopenWithCF({"pikachu"}, options);
    }
  }
}

TEST_F(DBCompactionTest, FilesDeletedAfterCompaction) {
  do {
    CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
    ASSERT_OK(Put(1, "foo", "v2"));
    Compact(1, "a", "z");
    const size_t num_files = CountLiveFiles();
    for (int i = 0; i < 10; i++) {
      ASSERT_OK(Put(1, "foo", "v2"));
      Compact(1, "a", "z");
    }
    ASSERT_EQ(CountLiveFiles(), num_files);
  } while (ChangeCompactOptions());
}

// Check level comapction with compact files
TEST_P(DBCompactionTestWithParam, DISABLED_CompactFilesOnLevelCompaction) {
  const int kTestKeySize = 16;
  const int kTestValueSize = 984;
  const int kEntrySize = kTestKeySize + kTestValueSize;
  const int kEntriesPerBuffer = 100;
  Options options;
  options.create_if_missing = true;
  options.write_buffer_size = kEntrySize * kEntriesPerBuffer;
  options.compaction_style = kCompactionStyleLevel;
  options.target_file_size_base = options.write_buffer_size;
  options.max_bytes_for_level_base = options.target_file_size_base * 2;
  options.level0_stop_writes_trigger = 2;
  options.max_bytes_for_level_multiplier = 2;
  options.compression = kNoCompression;
  options.max_subcompactions = max_subcompactions_;
  options = CurrentOptions(options);
  CreateAndReopenWithCF({"pikachu"}, options);

  Random rnd(301);
  for (int key = 64 * kEntriesPerBuffer; key >= 0; --key) {
    ASSERT_OK(Put(1, std::to_string(key), rnd.RandomString(kTestValueSize)));
  }
  ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable(handles_[1]));
  ASSERT_OK(dbfull()->TEST_WaitForCompact());

  ColumnFamilyMetaData cf_meta;
  dbfull()->GetColumnFamilyMetaData(handles_[1], &cf_meta);
  int output_level = static_cast<int>(cf_meta.levels.size()) - 1;
  for (int file_picked = 5; file_picked > 0; --file_picked) {
    std::set<std::string> overlapping_file_names;
    std::vector<std::string> compaction_input_file_names;
    for (int f = 0; f < file_picked; ++f) {
      int level = 0;
      auto file_meta = PickFileRandomly(cf_meta, &rnd, &level);
      compaction_input_file_names.push_back(file_meta->name);
      GetOverlappingFileNumbersForLevelCompaction(
          cf_meta, options.comparator, level, output_level, file_meta,
          &overlapping_file_names);
    }

    ASSERT_OK(dbfull()->CompactFiles(CompactionOptions(), handles_[1],
                                     compaction_input_file_names,
                                     output_level));

    // Make sure all overlapping files do not exist after compaction
    dbfull()->GetColumnFamilyMetaData(handles_[1], &cf_meta);
    VerifyCompactionResult(cf_meta, overlapping_file_names);
  }

  // make sure all key-values are still there.
  for (int key = 64 * kEntriesPerBuffer; key >= 0; --key) {
    ASSERT_NE(Get(1, std::to_string(key)), "NOT_FOUND");
  }
}

TEST_P(DBCompactionTestWithParam, PartialCompactionFailure) {
  Options options;
  const int kKeySize = 16;
  const int kKvSize = 1000;
  const int kKeysPerBuffer = 100;
  const int kNumL1Files = 5;
  options.create_if_missing = true;
  options.write_buffer_size = kKeysPerBuffer * kKvSize;
  options.max_write_buffer_number = 2;
  options.target_file_size_base =
      options.write_buffer_size * (options.max_write_buffer_number - 1);
  options.level0_file_num_compaction_trigger = kNumL1Files;
  options.max_bytes_for_level_base =
      options.level0_file_num_compaction_trigger *
      options.target_file_size_base;
  options.max_bytes_for_level_multiplier = 2;
  options.compression = kNoCompression;
  options.max_subcompactions = max_subcompactions_;

  env_->SetBackgroundThreads(1, Env::HIGH);
  env_->SetBackgroundThreads(1, Env::LOW);
  // stop the compaction thread until we simulate the file creation failure.
  test::SleepingBackgroundTask sleeping_task_low;
  env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low,
                 Env::Priority::LOW);

  options.env = env_;

  DestroyAndReopen(options);

  const int kNumInsertedKeys = options.level0_file_num_compaction_trigger *
                               (options.max_write_buffer_number - 1) *
                               kKeysPerBuffer;

  Random rnd(301);
  std::vector<std::string> keys;
  std::vector<std::string> values;
  for (int k = 0; k < kNumInsertedKeys; ++k) {
    keys.emplace_back(rnd.RandomString(kKeySize));
    values.emplace_back(rnd.RandomString(kKvSize - kKeySize));
    ASSERT_OK(Put(Slice(keys[k]), Slice(values[k])));
    ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
  }

  ASSERT_OK(dbfull()->TEST_FlushMemTable(true));
  // Make sure the number of L0 files can trigger compaction.
  ASSERT_GE(NumTableFilesAtLevel(0),
            options.level0_file_num_compaction_trigger);

  auto previous_num_level0_files = NumTableFilesAtLevel(0);

  // Fail the first file creation.
  env_->non_writable_count_ = 1;
  sleeping_task_low.WakeUp();
  sleeping_task_low.WaitUntilDone();

  // Expect compaction to fail here as one file will fail its
  // creation.
  ASSERT_TRUE(!dbfull()->TEST_WaitForCompact().ok());

  // Verify L0 -> L1 compaction does fail.
  ASSERT_EQ(NumTableFilesAtLevel(1), 0);

  // Verify all L0 files are still there.
  ASSERT_EQ(NumTableFilesAtLevel(0), previous_num_level0_files);

  // All key-values must exist after compaction fails.
  for (int k = 0; k < kNumInsertedKeys; ++k) {
    ASSERT_EQ(values[k], Get(keys[k]));
  }

  env_->non_writable_count_ = 0;

  // Make sure RocksDB will not get into corrupted state.
  Reopen(options);

  // Verify again after reopen.
  for (int k = 0; k < kNumInsertedKeys; ++k) {
    ASSERT_EQ(values[k], Get(keys[k]));
  }
}

TEST_P(DBCompactionTestWithParam, DeleteMovedFileAfterCompaction) {
  // iter 1 -- delete_obsolete_files_period_micros == 0
  for (int iter = 0; iter < 2; ++iter) {
    // This test triggers move compaction and verifies that the file is not
    // deleted when it's part of move compaction
    Options options = CurrentOptions();
    options.env = env_;
    if (iter == 1) {
      options.delete_obsolete_files_period_micros = 0;
    }
    options.create_if_missing = true;
    options.level0_file_num_compaction_trigger =
        2;  // trigger compaction when we have 2 files
    OnFileDeletionListener* listener = new OnFileDeletionListener();
    options.listeners.emplace_back(listener);
    options.max_subcompactions = max_subcompactions_;
    DestroyAndReopen(options);

    Random rnd(301);
    // Create two 1MB sst files
    for (int i = 0; i < 2; ++i) {
      // Create 1MB sst file
      for (int j = 0; j < 100; ++j) {
        ASSERT_OK(Put(Key(i * 50 + j), rnd.RandomString(10 * 1024)));
      }
      ASSERT_OK(Flush());
    }
    // this should execute L0->L1
    ASSERT_OK(dbfull()->TEST_WaitForCompact());
    ASSERT_EQ("0,1", FilesPerLevel(0));

    // block compactions
    test::SleepingBackgroundTask sleeping_task;
    env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task,
                   Env::Priority::LOW);

    options.max_bytes_for_level_base = 1024 * 1024;  // 1 MB
    Reopen(options);
    std::unique_ptr<Iterator> iterator(db_->NewIterator(ReadOptions()));
    ASSERT_EQ("0,1", FilesPerLevel(0));
    // let compactions go
    sleeping_task.WakeUp();
    sleeping_task.WaitUntilDone();

    // this should execute L1->L2 (move)
    ASSERT_OK(dbfull()->TEST_WaitForCompact());

    ASSERT_EQ("0,0,1", FilesPerLevel(0));

    std::vector<LiveFileMetaData> metadata;
    db_->GetLiveFilesMetaData(&metadata);
    ASSERT_EQ(metadata.size(), 1U);
    auto moved_file_name = metadata[0].name;

    // Create two more 1MB sst files
    for (int i = 0; i < 2; ++i) {
      // Create 1MB sst file
      for (int j = 0; j < 100; ++j) {
        ASSERT_OK(Put(Key(i * 50 + j + 100), rnd.RandomString(10 * 1024)));
      }
      ASSERT_OK(Flush());
    }
    // this should execute both L0->L1 and L1->L2 (merge with previous file)
    ASSERT_OK(dbfull()->TEST_WaitForCompact());

    ASSERT_EQ("0,0,2", FilesPerLevel(0));

    // iterator is holding the file
    ASSERT_OK(env_->FileExists(dbname_ + moved_file_name));

    listener->SetExpectedFileName(dbname_ + moved_file_name);
    ASSERT_OK(iterator->status());
    iterator.reset();

    // this file should have been compacted away
    ASSERT_NOK(env_->FileExists(dbname_ + moved_file_name));
    listener->VerifyMatchedCount(1);
  }
}

TEST_P(DBCompactionTestWithParam, CompressLevelCompaction) {
  if (!Zlib_Supported()) {
    return;
  }
  Options options = CurrentOptions();
  options.memtable_factory.reset(
      test::NewSpecialSkipListFactory(KNumKeysByGenerateNewFile - 1));
  options.compaction_style = kCompactionStyleLevel;
  options.write_buffer_size = 110 << 10;  // 110KB
  options.arena_block_size = 4 << 10;
  options.level0_file_num_compaction_trigger = 2;
  options.num_levels = 4;
  options.max_bytes_for_level_base = 400 * 1024;
  options.max_subcompactions = max_subcompactions_;
  // First two levels have no compression, so that a trivial move between
  // them will be allowed. Level 2 has Zlib compression so that a trivial
  // move to level 3 will not be allowed
  options.compression_per_level = {kNoCompression, kNoCompression,
                                   kZlibCompression};
  int matches = 0, didnt_match = 0, trivial_move = 0, non_trivial = 0;

  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
      "Compaction::InputCompressionMatchesOutput:Matches",
      [&](void* /*arg*/) { matches++; });
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
      "Compaction::InputCompressionMatchesOutput:DidntMatch",
      [&](void* /*arg*/) { didnt_match++; });
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
      "DBImpl::BackgroundCompaction:NonTrivial",
      [&](void* /*arg*/) { non_trivial++; });
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
      "DBImpl::BackgroundCompaction:TrivialMove",
      [&](void* /*arg*/) { trivial_move++; });
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();

  Reopen(options);

  Random rnd(301);
  int key_idx = 0;

  // First three 110KB files are going to level 0
  // After that, (100K, 200K)
  for (int num = 0; num < 3; num++) {
    GenerateNewFile(&rnd, &key_idx);
  }

  // Another 110KB triggers a compaction to 400K file to fill up level 0
  GenerateNewFile(&rnd, &key_idx);
  ASSERT_EQ(4, GetSstFileCount(dbname_));

  // (1, 4)
  GenerateNewFile(&rnd, &key_idx);
  ASSERT_EQ("1,4", FilesPerLevel(0));

  // (1, 4, 1)
  GenerateNewFile(&rnd, &key_idx);
  ASSERT_EQ("1,4,1", FilesPerLevel(0));

  // (1, 4, 2)
  GenerateNewFile(&rnd, &key_idx);
  ASSERT_EQ("1,4,2", FilesPerLevel(0));

  // (1, 4, 3)
  GenerateNewFile(&rnd, &key_idx);
  ASSERT_EQ("1,4,3", FilesPerLevel(0));

  // (1, 4, 4)
  GenerateNewFile(&rnd, &key_idx);
  ASSERT_EQ("1,4,4", FilesPerLevel(0));

  // (1, 4, 5)
  GenerateNewFile(&rnd, &key_idx);
  ASSERT_EQ("1,4,5", FilesPerLevel(0));

  // (1, 4, 6)
  GenerateNewFile(&rnd, &key_idx);
  ASSERT_EQ("1,4,6", FilesPerLevel(0));

  // (1, 4, 7)
  GenerateNewFile(&rnd, &key_idx);
  ASSERT_EQ("1,4,7", FilesPerLevel(0));

  // (1, 4, 8)
  GenerateNewFile(&rnd, &key_idx);
  ASSERT_EQ("1,4,8", FilesPerLevel(0));

  ASSERT_EQ(matches, 12);
  // Currently, the test relies on the number of calls to
  // InputCompressionMatchesOutput() per compaction.
  const int kCallsToInputCompressionMatch = 2;
  ASSERT_EQ(didnt_match, 8 * kCallsToInputCompressionMatch);
  ASSERT_EQ(trivial_move, 12);
  ASSERT_EQ(non_trivial, 8);

  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();

  for (int i = 0; i < key_idx; i++) {
    auto v = Get(Key(i));
    ASSERT_NE(v, "NOT_FOUND");
    ASSERT_TRUE(v.size() == 1 || v.size() == 990);
  }

  Reopen(options);

  for (int i = 0; i < key_idx; i++) {
    auto v = Get(Key(i));
    ASSERT_NE(v, "NOT_FOUND");
    ASSERT_TRUE(v.size() == 1 || v.size() == 990);
  }

  Destroy(options);
}

TEST_F(DBCompactionTest, SanitizeCompactionOptionsTest) {
  Options options = CurrentOptions();
  options.max_background_compactions = 5;
  options.soft_pending_compaction_bytes_limit = 0;
  options.hard_pending_compaction_bytes_limit = 100;
  options.create_if_missing = true;
  DestroyAndReopen(options);
  ASSERT_EQ(100, db_->GetOptions().soft_pending_compaction_bytes_limit);

  options.max_background_compactions = 3;
  options.soft_pending_compaction_bytes_limit = 200;
  options.hard_pending_compaction_bytes_limit = 150;
  DestroyAndReopen(options);
  ASSERT_EQ(150, db_->GetOptions().soft_pending_compaction_bytes_limit);
}

// This tests for a bug that could cause two level0 compactions running
// concurrently
// TODO(aekmekji): Make sure that the reason this fails when run with
// max_subcompactions > 1 is not a correctness issue but just inherent to
// running parallel L0-L1 compactions
TEST_F(DBCompactionTest, SuggestCompactRangeNoTwoLevel0Compactions) {
  Options options = CurrentOptions();
  options.compaction_style = kCompactionStyleLevel;
  options.write_buffer_size = 110 << 10;
  options.arena_block_size = 4 << 10;
  options.level0_file_num_compaction_trigger = 4;
  options.num_levels = 4;
  options.compression = kNoCompression;
  options.max_bytes_for_level_base = 450 << 10;
  options.target_file_size_base = 98 << 10;
  options.max_write_buffer_number = 2;
  options.max_background_compactions = 2;

  DestroyAndReopen(options);

  // fill up the DB
  Random rnd(301);
  for (int num = 0; num < 10; num++) {
    GenerateNewRandomFile(&rnd);
  }
  ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));

  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
      {{"CompactionJob::Run():Start",
        "DBCompactionTest::SuggestCompactRangeNoTwoLevel0Compactions:1"},
       {"DBCompactionTest::SuggestCompactRangeNoTwoLevel0Compactions:2",
        "CompactionJob::Run():End"}});

  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();

  // trigger L0 compaction
  for (int num = 0; num < options.level0_file_num_compaction_trigger + 1;
       num++) {
    GenerateNewRandomFile(&rnd, /* nowait */ true);
    ASSERT_OK(Flush());
  }

  TEST_SYNC_POINT(
      "DBCompactionTest::SuggestCompactRangeNoTwoLevel0Compactions:1");

  GenerateNewRandomFile(&rnd, /* nowait */ true);
  ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
  ASSERT_OK(experimental::SuggestCompactRange(db_, nullptr, nullptr));
  for (int num = 0; num < options.level0_file_num_compaction_trigger + 1;
       num++) {
    GenerateNewRandomFile(&rnd, /* nowait */ true);
    ASSERT_OK(Flush());
  }

  TEST_SYNC_POINT(
      "DBCompactionTest::SuggestCompactRangeNoTwoLevel0Compactions:2");
  ASSERT_OK(dbfull()->TEST_WaitForCompact());

  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
}

INSTANTIATE_TEST_CASE_P(DBCompactionWaitForCompactTest,
                        DBCompactionWaitForCompactTest,
                        ::testing::Values(std::make_tuple(false, false),
                                          std::make_tuple(false, true),
                                          std::make_tuple(true, false),
                                          std::make_tuple(true, true)));

TEST_P(DBCompactionWaitForCompactTest,
       WaitForCompactWaitsOnCompactionToFinish) {
  // Triggers a compaction. Before the compaction finishes, test
  // closes the DB Upon reopen, wait for the compaction to finish and checks for
  // the number of compaction finished

  int compaction_finished = 0;
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
      "CompactionJob::Run():EndStatusSet", [&](void* arg) {
        auto status = static_cast<Status*>(arg);
        if (status->ok()) {
          compaction_finished++;
        }
      });
  // To make sure there's a flush/compaction debt
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
      "DBImpl::MaybeScheduleFlushOrCompaction:BeforeSchedule", [&](void* arg) {
        auto unscheduled_flushes = *static_cast<int*>(arg);
        ASSERT_GT(unscheduled_flushes, 0);
      });

  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
      {{"DBCompactionTest::WaitForCompactWaitsOnCompactionToFinish",
        "DBImpl::MaybeScheduleFlushOrCompaction:BeforeSchedule"}});

  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();

  // create compaction debt by adding one more L0 file then closing
  Random rnd(123);
  GenerateNewRandomFile(&rnd, /* nowait */ true);
  ASSERT_EQ(0, compaction_finished);
  Close();
  TEST_SYNC_POINT("DBCompactionTest::WaitForCompactWaitsOnCompactionToFinish");
  ASSERT_EQ(0, compaction_finished);

  // Reopen the db and we expect the compaction to be triggered.
  Reopen(options_);

  // Wait for compaction to finish
  ASSERT_OK(dbfull()->WaitForCompact(wait_for_compact_options_));
  ASSERT_GT(compaction_finished, 0);

  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks();
}

TEST_P(DBCompactionWaitForCompactTest, WaitForCompactAbortOnPause) {
  // Triggers a compaction. Before the compaction finishes, test
  // pauses the compaction. Calling WaitForCompact() with option
  // abort_on_pause=true should return Status::Aborted Or
  // ContinueBackgroundWork() must be called

  // Now trigger L0 compaction by adding a file
  Random rnd(123);
  GenerateNewRandomFile(&rnd, /* nowait */ true);
  ASSERT_OK(Flush());

  // Pause the background jobs.
  ASSERT_OK(dbfull()->PauseBackgroundWork());

  // If not abort_on_pause_ continue the background jobs.
  if (!abort_on_pause_) {
    ASSERT_OK(dbfull()->ContinueBackgroundWork());
  }

  Status s = dbfull()->WaitForCompact(wait_for_compact_options_);
  if (abort_on_pause_) {
    ASSERT_NOK(s);
    ASSERT_TRUE(s.IsAborted());
  } else {
    ASSERT_OK(s);
  }
}

TEST_P(DBCompactionWaitForCompactTest, WaitForCompactShutdownWhileWaiting) {
  // Triggers a compaction. Before the compaction finishes, db
  // shuts down (by calling CancelAllBackgroundWork()). Calling WaitForCompact()
  // should return Status::IsShutdownInProgress()

  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency({
      {"CompactionJob::Run():Start",
       "DBCompactionTest::WaitForCompactShutdownWhileWaiting:0"},
      {"DBImpl::WaitForCompact:StartWaiting",
       "DBCompactionTest::WaitForCompactShutdownWhileWaiting:1"},
      {"DBImpl::~DBImpl:WaitJob", "CompactionJob::Run():End"},
  });

  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();

  // Now trigger L0 compaction by adding a file
  Random rnd(123);
  GenerateNewRandomFile(&rnd, /* nowait */ true);
  ASSERT_OK(Flush());
  // Wait for compaction to start
  TEST_SYNC_POINT("DBCompactionTest::WaitForCompactShutdownWhileWaiting:0");

  // Wait for Compaction in another thread
  auto waiting_for_compaction_thread = port::Thread([this]() {
    Status s = dbfull()->WaitForCompact(wait_for_compact_options_);
    ASSERT_NOK(s);
    ASSERT_TRUE(s.IsShutdownInProgress());
  });
  TEST_SYNC_POINT("DBCompactionTest::WaitForCompactShutdownWhileWaiting:1");
  // Shutdown after wait started, but before the compaction finishes
  auto closing_thread = port::Thread([this]() { ASSERT_OK(db_->Close()); });

  waiting_for_compaction_thread.join();
  closing_thread.join();

  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
}

TEST_P(DBCompactionWaitForCompactTest, WaitForCompactWithOptionToFlush) {
  // After creating enough L0 files that one more file will trigger the
  // compaction, write some data in memtable. Calls WaitForCompact with option
  // to flush. This will flush the memtable to a new L0 file which will trigger
  // compaction. Lastly check for expected number of files, closing + reopening
  // DB won't trigger any flush or compaction

  int compaction_finished = 0;
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
      "DBImpl::BackgroundCompaction:AfterCompaction",
      [&](void*) { compaction_finished++; });

  int flush_finished = 0;
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
      "FlushJob::End", [&](void*) { flush_finished++; });

  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();

  // write to memtable (overlapping key with first L0 file), but no flush is
  // needed at this point.
  ASSERT_OK(Put(Key(0), "some random string"));
  ASSERT_EQ(0, compaction_finished);
  ASSERT_EQ(0, flush_finished);
  ASSERT_EQ("2", FilesPerLevel());

  ASSERT_OK(dbfull()->WaitForCompact(wait_for_compact_options_));
  if (flush_) {
    ASSERT_EQ("1,2", FilesPerLevel());
    ASSERT_EQ(1, compaction_finished);
    ASSERT_EQ(1, flush_finished);
  } else {
    ASSERT_EQ(0, compaction_finished);
    ASSERT_EQ(0, flush_finished);
    ASSERT_EQ("2", FilesPerLevel());
  }

  compaction_finished = 0;
  flush_finished = 0;
  Close();
  Reopen(options_);

  ASSERT_EQ(0, flush_finished);
  if (flush_) {
    // if flushed already prior to close and reopen, expect there's no
    // additional compaction needed
    ASSERT_EQ(0, compaction_finished);
  } else {
    // if not flushed prior to close and reopen, expect L0 file creation from
    // WAL when reopening which will trigger the compaction.
    ASSERT_OK(dbfull()->WaitForCompact(wait_for_compact_options_));
    ASSERT_EQ(1, compaction_finished);
  }

  ASSERT_EQ("1,2", FilesPerLevel());

  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks();
}

static std::string ShortKey(int i) {
  assert(i < 10000);
  char buf[100];
  snprintf(buf, sizeof(buf), "key%04d", i);
  return std::string(buf);
}

TEST_P(DBCompactionTestWithParam, ForceBottommostLevelCompaction) {
  int32_t trivial_move = 0;
  int32_t non_trivial_move = 0;
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
      "DBImpl::BackgroundCompaction:TrivialMove",
      [&](void* /*arg*/) { trivial_move++; });
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
      "DBImpl::BackgroundCompaction:NonTrivial",
      [&](void* /*arg*/) { non_trivial_move++; });
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();

  // The key size is guaranteed to be <= 8
  class ShortKeyComparator : public Comparator {
    int Compare(const ROCKSDB_NAMESPACE::Slice& a,
                const ROCKSDB_NAMESPACE::Slice& b) const override {
      assert(a.size() <= 8);
      assert(b.size() <= 8);
      return BytewiseComparator()->Compare(a, b);
    }
    const char* Name() const override { return "ShortKeyComparator"; }
    void FindShortestSeparator(
        std::string* start,
        const ROCKSDB_NAMESPACE::Slice& limit) const override {
      return BytewiseComparator()->FindShortestSeparator(start, limit);
    }
    void FindShortSuccessor(std::string* key) const override {
      return BytewiseComparator()->FindShortSuccessor(key);
    }
  } short_key_cmp;
  Options options = CurrentOptions();
  options.target_file_size_base = 100000000;
  options.write_buffer_size = 100000000;
  options.max_subcompactions = max_subcompactions_;
  options.comparator = &short_key_cmp;
  DestroyAndReopen(options);

  int32_t value_size = 10 * 1024;  // 10 KB

  Random rnd(301);
  std::vector<std::string> values;
  // File with keys [ 0 => 99 ]
  for (int i = 0; i < 100; i++) {
    values.push_back(rnd.RandomString(value_size));
    ASSERT_OK(Put(ShortKey(i), values[i]));
  }
  ASSERT_OK(Flush());

  ASSERT_EQ("1", FilesPerLevel(0));
  // Compaction will do L0=>L1 (trivial move) then move L1 files to L3
  CompactRangeOptions compact_options;
  compact_options.change_level = true;
  compact_options.target_level = 3;
  ASSERT_OK(db_->CompactRange(compact_options, nullptr, nullptr));
  ASSERT_EQ("0,0,0,1", FilesPerLevel(0));
  ASSERT_EQ(trivial_move, 1);
  ASSERT_EQ(non_trivial_move, 0);

  // File with keys [ 100 => 199 ]
  for (int i = 100; i < 200; i++) {
    values.push_back(rnd.RandomString(value_size));
    ASSERT_OK(Put(ShortKey(i), values[i]));
  }
  ASSERT_OK(Flush());

  ASSERT_EQ("1,0,0,1", FilesPerLevel(0));
  // Compaction will do L0=>L1 L1=>L2 L2=>L3 (3 trivial moves)
  // then compacte the bottommost level L3=>L3 (non trivial move)
  compact_options = CompactRangeOptions();
  compact_options.bottommost_level_compaction =
      BottommostLevelCompaction::kForceOptimized;
  ASSERT_OK(db_->CompactRange(compact_options, nullptr, nullptr));
  ASSERT_EQ("0,0,0,1", FilesPerLevel(0));
  ASSERT_EQ(trivial_move, 4);
  ASSERT_EQ(non_trivial_move, 1);

  // File with keys [ 200 => 299 ]
  for (int i = 200; i < 300; i++) {
    values.push_back(rnd.RandomString(value_size));
    ASSERT_OK(Put(ShortKey(i), values[i]));
  }
  ASSERT_OK(Flush());

  ASSERT_EQ("1,0,0,1", FilesPerLevel(0));
  trivial_move = 0;
  non_trivial_move = 0;
  compact_options = CompactRangeOptions();
  compact_options.bottommost_level_compaction =
      BottommostLevelCompaction::kSkip;
  // Compaction will do L0=>L1 L1=>L2 L2=>L3 (3 trivial moves)
  // and will skip bottommost level compaction
  ASSERT_OK(db_->CompactRange(compact_options, nullptr, nullptr));
  ASSERT_EQ("0,0,0,2", FilesPerLevel(0));
  ASSERT_EQ(trivial_move, 3);
  ASSERT_EQ(non_trivial_move, 0);

  for (int i = 0; i < 300; i++) {
    ASSERT_EQ(Get(ShortKey(i)), values[i]);
  }

  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
}

TEST_P(DBCompactionTestWithParam, IntraL0Compaction) {
  Options options = CurrentOptions();
  options.compression = kNoCompression;
  options.level0_file_num_compaction_trigger = 5;
  options.max_background_compactions = 2;
  options.max_subcompactions = max_subcompactions_;
  options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
  options.write_buffer_size = 2 << 20;  // 2MB

  BlockBasedTableOptions table_options;
  table_options.block_cache = NewLRUCache(64 << 20);  // 64MB
  table_options.cache_index_and_filter_blocks = true;
  table_options.pin_l0_filter_and_index_blocks_in_cache = true;
  options.table_factory.reset(NewBlockBasedTableFactory(table_options));

  DestroyAndReopen(options);

  const size_t kValueSize = 1 << 20;
  Random rnd(301);
  std::string value(rnd.RandomString(kValueSize));

  // The L0->L1 must be picked before we begin flushing files to trigger
  // intra-L0 compaction, and must not finish until after an intra-L0
  // compaction has been picked.
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
      {{"LevelCompactionPicker::PickCompaction:Return",
        "DBCompactionTest::IntraL0Compaction:L0ToL1Ready"},
       {"LevelCompactionPicker::PickCompactionBySize:0",
        "CompactionJob::Run():Start"}});
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();

  // index:   0   1   2   3   4   5   6   7   8   9
  // size:  1MB 1MB 1MB 1MB 1MB 2MB 1MB 1MB 1MB 1MB
  // score:                     1.5 1.3 1.5 2.0 inf
  //
  // Files 0-4 will be included in an L0->L1 compaction.
  //
  // L0->L0 will be triggered since the sync points guarantee compaction to base
  // level is still blocked when files 5-9 trigger another compaction.
  //
  // Files 6-9 are the longest span of available files for which
  // work-per-deleted-file decreases (see "score" row above).
  for (int i = 0; i < 10; ++i) {
    ASSERT_OK(Put(Key(0), ""));  // prevents trivial move
    if (i == 5) {
      TEST_SYNC_POINT("DBCompactionTest::IntraL0Compaction:L0ToL1Ready");
      ASSERT_OK(Put(Key(i + 1), value + value));
    } else {
      ASSERT_OK(Put(Key(i + 1), value));
    }
    ASSERT_OK(Flush());
  }
  ASSERT_OK(dbfull()->TEST_WaitForCompact());
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();

  std::vector<std::vector<FileMetaData>> level_to_files;
  dbfull()->TEST_GetFilesMetaData(dbfull()->DefaultColumnFamily(),
                                  &level_to_files);
  ASSERT_GE(level_to_files.size(), 2);  // at least L0 and L1
  // L0 has the 2MB file (not compacted) and 4MB file (output of L0->L0)
  ASSERT_EQ(2, level_to_files[0].size());
  ASSERT_GT(level_to_files[1].size(), 0);
  for (int i = 0; i < 2; ++i) {
    ASSERT_GE(level_to_files[0][i].fd.file_size, 1 << 21);
  }

  // The index/filter in the file produced by intra-L0 should not be pinned.
  // That means clearing unref'd entries in block cache and re-accessing the
  // file produced by intra-L0 should bump the index block miss count.
  uint64_t prev_index_misses =
      TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS);
  table_options.block_cache->EraseUnRefEntries();
  ASSERT_EQ("", Get(Key(0)));
  ASSERT_EQ(prev_index_misses + 1,
            TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS));
}

TEST_P(DBCompactionTestWithParam, IntraL0CompactionDoesNotObsoleteDeletions) {
  // regression test for issue #2722: L0->L0 compaction can resurrect deleted
  // keys from older L0 files if L1+ files' key-ranges do not include the key.
  Options options = CurrentOptions();
  options.compression = kNoCompression;
  options.level0_file_num_compaction_trigger = 5;
  options.max_background_compactions = 2;
  options.max_subcompactions = max_subcompactions_;
  DestroyAndReopen(options);

  const size_t kValueSize = 1 << 20;
  Random rnd(301);
  std::string value(rnd.RandomString(kValueSize));

  // The L0->L1 must be picked before we begin flushing files to trigger
  // intra-L0 compaction, and must not finish until after an intra-L0
  // compaction has been picked.
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
      {{"LevelCompactionPicker::PickCompaction:Return",
        "DBCompactionTest::IntraL0CompactionDoesNotObsoleteDeletions:"
        "L0ToL1Ready"},
       {"LevelCompactionPicker::PickCompactionBySize:0",
        "CompactionJob::Run():Start"}});
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();

  // index:   0   1   2   3   4    5    6   7   8   9
  // size:  1MB 1MB 1MB 1MB 1MB  1MB  1MB 1MB 1MB 1MB
  // score:                     1.25 1.33 1.5 2.0 inf
  //
  // Files 0-4 will be included in an L0->L1 compaction.
  //
  // L0->L0 will be triggered since the sync points guarantee compaction to base
  // level is still blocked when files 5-9 trigger another compaction. All files
  // 5-9 are included in the L0->L0 due to work-per-deleted file decreasing.
  //
  // Put a key-value in files 0-4. Delete that key in files 5-9. Verify the
  // L0->L0 preserves the deletion such that the key remains deleted.
  for (int i = 0; i < 10; ++i) {
    // key 0 serves both to prevent trivial move and as the key we want to
    // verify is not resurrected by L0->L0 compaction.
    if (i < 5) {
      ASSERT_OK(Put(Key(0), ""));
    } else {
      ASSERT_OK(Delete(Key(0)));
    }
    if (i == 5) {
      TEST_SYNC_POINT(
          "DBCompactionTest::IntraL0CompactionDoesNotObsoleteDeletions:"
          "L0ToL1Ready");
    }
    ASSERT_OK(Put(Key(i + 1), value));
    ASSERT_OK(Flush());
  }
  ASSERT_OK(dbfull()->TEST_WaitForCompact());
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();

  std::vector<std::vector<FileMetaData>> level_to_files;
  dbfull()->TEST_GetFilesMetaData(dbfull()->DefaultColumnFamily(),
                                  &level_to_files);
  ASSERT_GE(level_to_files.size(), 2);  // at least L0 and L1
  // L0 has a single output file from L0->L0
  ASSERT_EQ(1, level_to_files[0].size());
  ASSERT_GT(level_to_files[1].size(), 0);
  ASSERT_GE(level_to_files[0][0].fd.file_size, 1 << 22);

  ReadOptions roptions;
  std::string result;
  ASSERT_TRUE(db_->Get(roptions, Key(0), &result).IsNotFound());
}

TEST_P(DBCompactionTestWithParam, FullCompactionInBottomPriThreadPool) {
  const int kNumFilesTrigger = 3;
  Env::Default()->SetBackgroundThreads(1, Env::Priority::BOTTOM);
  for (bool use_universal_compaction : {false, true}) {
    Options options = CurrentOptions();
    if (use_universal_compaction) {
      options.compaction_style = kCompactionStyleUniversal;
    } else {
      options.compaction_style = kCompactionStyleLevel;
      options.level_compaction_dynamic_level_bytes = true;
    }
    options.num_levels = 4;
    options.write_buffer_size = 100 << 10;     // 100KB
    options.target_file_size_base = 32 << 10;  // 32KB
    options.level0_file_num_compaction_trigger = kNumFilesTrigger;
    // Trigger compaction if size amplification exceeds 110%
    options.compaction_options_universal.max_size_amplification_percent = 110;
    DestroyAndReopen(options);

    int num_bottom_pri_compactions = 0;
    SyncPoint::GetInstance()->SetCallBack(
        "DBImpl::BGWorkBottomCompaction",
        [&](void* /*arg*/) { ++num_bottom_pri_compactions; });
    SyncPoint::GetInstance()->EnableProcessing();

    Random rnd(301);
    for (int num = 0; num < kNumFilesTrigger; num++) {
      ASSERT_EQ(NumSortedRuns(), num);
      int key_idx = 0;
      GenerateNewFile(&rnd, &key_idx);
    }
    ASSERT_OK(dbfull()->TEST_WaitForCompact());

    ASSERT_EQ(1, num_bottom_pri_compactions);

    // Verify that size amplification did occur
    ASSERT_EQ(NumSortedRuns(), 1);
    ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
  }
  Env::Default()->SetBackgroundThreads(0, Env::Priority::BOTTOM);
}

TEST_F(DBCompactionTest, CancelCompactionWaitingOnConflict) {
  // This test verifies cancellation of a compaction waiting to be scheduled due
  // to conflict with a running compaction.
  //
  // A `CompactRange()` in universal compacts all files, waiting for files to
  // become available if they are locked for another compaction. This test
  // triggers an automatic compaction that blocks a `CompactRange()`, and
  // verifies that `DisableManualCompaction()` can successfully cancel the
  // `CompactRange()` without waiting for the automatic compaction to finish.
  const int kNumSortedRuns = 4;

  Options options = CurrentOptions();
  options.compaction_style = kCompactionStyleUniversal;
  options.level0_file_num_compaction_trigger = kNumSortedRuns;
  options.memtable_factory.reset(
      test::NewSpecialSkipListFactory(KNumKeysByGenerateNewFile - 1));
  Reopen(options);

  test::SleepingBackgroundTask auto_compaction_sleeping_task;
  // Block automatic compaction when it runs in the callback
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
      "CompactionJob::Run():Start",
      [&](void* /*arg*/) { auto_compaction_sleeping_task.DoSleep(); });
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();

  // Fill overlapping files in L0 to trigger an automatic compaction
  Random rnd(301);
  for (int i = 0; i < kNumSortedRuns; ++i) {
    int key_idx = 0;
    // We hold the compaction from happening, so when generating the last SST
    // file, we cannot wait. Otherwise, we'll hit a deadlock.
    GenerateNewFile(&rnd, &key_idx,
                    (i == kNumSortedRuns - 1) ? true : false /* nowait */);
  }
  auto_compaction_sleeping_task.WaitUntilSleeping();

  // Make sure the manual compaction has seen the conflict before being canceled
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
      {{"ColumnFamilyData::CompactRange:Return",
        "DBCompactionTest::CancelCompactionWaitingOnConflict:"
        "PreDisableManualCompaction"}});
  auto manual_compaction_thread = port::Thread([this]() {
    ASSERT_TRUE(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)
                    .IsIncomplete());
  });

  // Cancel it. Thread should be joinable, i.e., manual compaction was unblocked
  // despite finding a conflict with an automatic compaction that is still
  // running
  TEST_SYNC_POINT(
      "DBCompactionTest::CancelCompactionWaitingOnConflict:"
      "PreDisableManualCompaction");
  db_->DisableManualCompaction();
  manual_compaction_thread.join();
}

TEST_F(DBCompactionTest, OptimizedDeletionObsoleting) {
  // Deletions can be dropped when compacted to non-last level if they fall
  // outside the lower-level files' key-ranges.
  const int kNumL0Files = 4;
  Options options = CurrentOptions();
  options.level0_file_num_compaction_trigger = kNumL0Files;
  options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
  DestroyAndReopen(options);

  // put key 1 and 3 in separate L1, L2 files.
  // So key 0, 2, and 4+ fall outside these levels' key-ranges.
  for (int level = 2; level >= 1; --level) {
    for (int i = 0; i < 2; ++i) {
      ASSERT_OK(Put(Key(2 * i + 1), "val"));
      ASSERT_OK(Flush());
    }
    MoveFilesToLevel(level);
    ASSERT_EQ(2, NumTableFilesAtLevel(level));
  }

  // Delete keys in range [1, 4]. These L0 files will be compacted with L1:
  // - Tombstones for keys 2 and 4 can be dropped early.
  // - Tombstones for keys 1 and 3 must be kept due to L2 files' key-ranges.
  for (int i = 0; i < kNumL0Files; ++i) {
    ASSERT_OK(Put(Key(0), "val"));  // sentinel to prevent trivial move
    ASSERT_OK(Delete(Key(i + 1)));
    ASSERT_OK(Flush());
  }
  ASSERT_OK(dbfull()->TEST_WaitForCompact());

  for (int i = 0; i < kNumL0Files; ++i) {
    std::string value;
    ASSERT_TRUE(db_->Get(ReadOptions(), Key(i + 1), &value).IsNotFound());
  }
  ASSERT_EQ(2, options.statistics->getTickerCount(
                   COMPACTION_OPTIMIZED_DEL_DROP_OBSOLETE));
  ASSERT_EQ(2,
            options.statistics->getTickerCount(COMPACTION_KEY_DROP_OBSOLETE));
}

TEST_F(DBCompactionTest, CompactFilesPendingL0Bug) {
  // https://www.facebook.com/groups/rocksdb.dev/permalink/1389452781153232/
  // CompactFiles() had a bug where it failed to pick a compaction when an L0
  // compaction existed, but marked it as scheduled anyways. It'd never be
  // unmarked as scheduled, so future compactions or DB close could hang.
  const int kNumL0Files = 5;
  Options options = CurrentOptions();
  options.level0_file_num_compaction_trigger = kNumL0Files - 1;
  options.max_background_compactions = 2;
  DestroyAndReopen(options);

  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
      {{"LevelCompactionPicker::PickCompaction:Return",
        "DBCompactionTest::CompactFilesPendingL0Bug:Picked"},
       {"DBCompactionTest::CompactFilesPendingL0Bug:ManualCompacted",
        "DBImpl::BackgroundCompaction:NonTrivial:AfterRun"}});
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();

  auto schedule_multi_compaction_token =
      dbfull()->TEST_write_controler().GetCompactionPressureToken();

  // Files 0-3 will be included in an L0->L1 compaction.
  //
  // File 4 will be included in a call to CompactFiles() while the first
  // compaction is running.
  for (int i = 0; i < kNumL0Files - 1; ++i) {
    ASSERT_OK(Put(Key(0), "val"));  // sentinel to prevent trivial move
    ASSERT_OK(Put(Key(i + 1), "val"));
    ASSERT_OK(Flush());
  }
  TEST_SYNC_POINT("DBCompactionTest::CompactFilesPendingL0Bug:Picked");
  // file 4 flushed after 0-3 picked
  ASSERT_OK(Put(Key(kNumL0Files), "val"));
  ASSERT_OK(Flush());

  // previously DB close would hang forever as this situation caused scheduled
  // compactions count to never decrement to zero.
  ColumnFamilyMetaData cf_meta;
  dbfull()->GetColumnFamilyMetaData(dbfull()->DefaultColumnFamily(), &cf_meta);
  ASSERT_EQ(kNumL0Files, cf_meta.levels[0].files.size());
  std::vector<std::string> input_filenames;
  input_filenames.push_back(cf_meta.levels[0].files.front().name);
  ASSERT_OK(dbfull()->CompactFiles(CompactionOptions(), input_filenames,
                                   0 /* output_level */));
  TEST_SYNC_POINT("DBCompactionTest::CompactFilesPendingL0Bug:ManualCompacted");
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
}

TEST_F(DBCompactionTest, CompactFilesOverlapInL0Bug) {
  // Regression test for bug of not pulling in L0 files that overlap the user-
  // specified input files in time- and key-ranges.
  ASSERT_OK(Put(Key(0), "old_val"));
  ASSERT_OK(Flush());
  ASSERT_OK(Put(Key(0), "new_val"));
  ASSERT_OK(Flush());

  ColumnFamilyMetaData cf_meta;
  dbfull()->GetColumnFamilyMetaData(dbfull()->DefaultColumnFamily(), &cf_meta);
  ASSERT_GE(cf_meta.levels.size(), 2);
  ASSERT_EQ(2, cf_meta.levels[0].files.size());

  // Compacting {new L0 file, L1 file} should pull in the old L0 file since it
  // overlaps in key-range and time-range.
  std::vector<std::string> input_filenames;
  input_filenames.push_back(cf_meta.levels[0].files.front().name);
  ASSERT_OK(dbfull()->CompactFiles(CompactionOptions(), input_filenames,
                                   1 /* output_level */));
  ASSERT_EQ("new_val", Get(Key(0)));
}

TEST_F(DBCompactionTest, DeleteFilesInRangeConflictWithCompaction) {
  Options options = CurrentOptions();
  DestroyAndReopen(options);
  const Snapshot* snapshot = nullptr;
  const int kMaxKey = 10;

  for (int i = 0; i < kMaxKey; i++) {
    ASSERT_OK(Put(Key(i), Key(i)));
    ASSERT_OK(Delete(Key(i)));
    if (!snapshot) {
      snapshot = db_->GetSnapshot();
    }
  }
  ASSERT_OK(Flush());
  MoveFilesToLevel(1);
  ASSERT_OK(Put(Key(kMaxKey), Key(kMaxKey)));
  ASSERT_OK(dbfull()->TEST_WaitForCompact());
  // test DeleteFilesInRange() deletes the files already picked for compaction
  SyncPoint::GetInstance()->LoadDependency(
      {{"VersionSet::LogAndApply:WriteManifestStart",
        "BackgroundCallCompaction:0"},
       {"DBImpl::BackgroundCompaction:Finish",
        "VersionSet::LogAndApply:WriteManifestDone"}});
  SyncPoint::GetInstance()->EnableProcessing();

  // release snapshot which mark bottommost file for compaction
  db_->ReleaseSnapshot(snapshot);
  std::string begin_string = Key(0);
  std::string end_string = Key(kMaxKey + 1);
  Slice begin(begin_string);
  Slice end(end_string);
  ASSERT_OK(DeleteFilesInRange(db_, db_->DefaultColumnFamily(), &begin, &end));
  SyncPoint::GetInstance()->DisableProcessing();
}

TEST_F(DBCompactionTest, CompactBottomLevelFilesWithDeletions) {
  // bottom-level files may contain deletions due to snapshots protecting the
  // deleted keys. Once the snapshot is released, we should see files with many
  // such deletions undergo single-file compactions.
  const int kNumKeysPerFile = 1024;
  const int kNumLevelFiles = 4;
  const int kValueSize = 128;
  Options options = CurrentOptions();
  options.compression = kNoCompression;
  options.level0_file_num_compaction_trigger = kNumLevelFiles;
  // inflate it a bit to account for key/metadata overhead
  options.target_file_size_base = 120 * kNumKeysPerFile * kValueSize / 100;
  CreateAndReopenWithCF({"one"}, options);

  Random rnd(301);
  const Snapshot* snapshot = nullptr;
  for (int i = 0; i < kNumLevelFiles; ++i) {
    for (int j = 0; j < kNumKeysPerFile; ++j) {
      ASSERT_OK(
          Put(Key(i * kNumKeysPerFile + j), rnd.RandomString(kValueSize)));
    }
    if (i == kNumLevelFiles - 1) {
      snapshot = db_->GetSnapshot();
      // delete every other key after grabbing a snapshot, so these deletions
      // and the keys they cover can't be dropped until after the snapshot is
      // released.
      for (int j = 0; j < kNumLevelFiles * kNumKeysPerFile; j += 2) {
        ASSERT_OK(Delete(Key(j)));
      }
    }
    ASSERT_OK(Flush());
    if (i < kNumLevelFiles - 1) {
      ASSERT_EQ(i + 1, NumTableFilesAtLevel(0));
    }
  }
  ASSERT_OK(dbfull()->TEST_WaitForCompact());
  ASSERT_EQ(kNumLevelFiles, NumTableFilesAtLevel(1));

  std::vector<LiveFileMetaData> pre_release_metadata, post_release_metadata;
  db_->GetLiveFilesMetaData(&pre_release_metadata);
  // just need to bump seqnum so ReleaseSnapshot knows the newest key in the SST
  // files does not need to be preserved in case of a future snapshot.
  ASSERT_OK(Put(Key(0), "val"));
  ASSERT_NE(kMaxSequenceNumber, dbfull()->bottommost_files_mark_threshold_);
  // release snapshot and wait for compactions to finish. Single-file
  // compactions should be triggered, which reduce the size of each bottom-level
  // file without changing file count.
  db_->ReleaseSnapshot(snapshot);
  ASSERT_EQ(kMaxSequenceNumber, dbfull()->bottommost_files_mark_threshold_);
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
      "LevelCompactionPicker::PickCompaction:Return", [&](void* arg) {
        Compaction* compaction = reinterpret_cast<Compaction*>(arg);
        ASSERT_TRUE(compaction->compaction_reason() ==
                    CompactionReason::kBottommostFiles);
      });
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
  ASSERT_OK(dbfull()->TEST_WaitForCompact());
  db_->GetLiveFilesMetaData(&post_release_metadata);
  ASSERT_EQ(pre_release_metadata.size(), post_release_metadata.size());

  for (size_t i = 0; i < pre_release_metadata.size(); ++i) {
    const auto& pre_file = pre_release_metadata[i];
    const auto& post_file = post_release_metadata[i];
    ASSERT_EQ(1, pre_file.level);
    ASSERT_EQ(1, post_file.level);
    // each file is smaller than it was before as it was rewritten without
    // deletion markers/deleted keys.
    ASSERT_LT(post_file.size, pre_file.size);
  }
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
}

TEST_F(DBCompactionTest, NoCompactBottomLevelFilesWithDeletions) {
  // bottom-level files may contain deletions due to snapshots protecting the
  // deleted keys. Once the snapshot is released, we should see files with many
  // such deletions undergo single-file compactions. But when disabling auto
  // compactions, it shouldn't be triggered which may causing too many
  // background jobs.
  const int kNumKeysPerFile = 1024;
  const int kNumLevelFiles = 4;
  const int kValueSize = 128;
  Options options = CurrentOptions();
  options.compression = kNoCompression;
  options.disable_auto_compactions = true;
  options.level0_file_num_compaction_trigger = kNumLevelFiles;
  // inflate it a bit to account for key/metadata overhead
  options.target_file_size_base = 120 * kNumKeysPerFile * kValueSize / 100;
  Reopen(options);

  Random rnd(301);
  const Snapshot* snapshot = nullptr;
  for (int i = 0; i < kNumLevelFiles; ++i) {
    for (int j = 0; j < kNumKeysPerFile; ++j) {
      ASSERT_OK(
          Put(Key(i * kNumKeysPerFile + j), rnd.RandomString(kValueSize)));
    }
    if (i == kNumLevelFiles - 1) {
      snapshot = db_->GetSnapshot();
      // delete every other key after grabbing a snapshot, so these deletions
      // and the keys they cover can't be dropped until after the snapshot is
      // released.
      for (int j = 0; j < kNumLevelFiles * kNumKeysPerFile; j += 2) {
        ASSERT_OK(Delete(Key(j)));
      }
    }
    ASSERT_OK(Flush());
    if (i < kNumLevelFiles - 1) {
      ASSERT_EQ(i + 1, NumTableFilesAtLevel(0));
    }
  }
  ASSERT_OK(dbfull()->TEST_CompactRange(0, nullptr, nullptr, nullptr));
  ASSERT_EQ(kNumLevelFiles, NumTableFilesAtLevel(1));

  std::vector<LiveFileMetaData> pre_release_metadata, post_release_metadata;
  db_->GetLiveFilesMetaData(&pre_release_metadata);
  // just need to bump seqnum so ReleaseSnapshot knows the newest key in the SST
  // files does not need to be preserved in case of a future snapshot.
  ASSERT_OK(Put(Key(0), "val"));

  // release snapshot and no compaction should be triggered.
  std::atomic<int> num_compactions{0};
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
      "DBImpl::BackgroundCompaction:Start",
      [&](void* /*arg*/) { num_compactions.fetch_add(1); });
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
  db_->ReleaseSnapshot(snapshot);
  ASSERT_OK(dbfull()->TEST_WaitForCompact());
  ASSERT_EQ(0, num_compactions);
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();

  db_->GetLiveFilesMetaData(&post_release_metadata);
  ASSERT_EQ(pre_release_metadata.size(), post_release_metadata.size());
  for (size_t i = 0; i < pre_release_metadata.size(); ++i) {
    const auto& pre_file = pre_release_metadata[i];
    const auto& post_file = post_release_metadata[i];
    ASSERT_EQ(1, pre_file.level);
    ASSERT_EQ(1, post_file.level);
    // each file is same as before with deletion markers/deleted keys.
    ASSERT_EQ(post_file.size, pre_file.size);
  }
}

TEST_F(DBCompactionTest, RoundRobinTtlCompactionNormal) {
  Options options = CurrentOptions();
  options.compression = kNoCompression;
  options.level0_file_num_compaction_trigger = 20;
  options.ttl = 24 * 60 * 60;  // 24 hours
  options.compaction_pri = kRoundRobin;
  env_->now_cpu_count_.store(0);
  env_->SetMockSleep();
  options.env = env_;

  // add a small second for each wait time, to make sure the file is expired
  int small_seconds = 1;

  std::atomic_int ttl_compactions{0};
  std::atomic_int round_robin_ttl_compactions{0};
  std::atomic_int other_compactions{0};

  SyncPoint::GetInstance()->SetCallBack(
      "LevelCompactionPicker::PickCompaction:Return", [&](void* arg) {
        Compaction* compaction = reinterpret_cast<Compaction*>(arg);
        auto compaction_reason = compaction->compaction_reason();
        if (compaction_reason == CompactionReason::kTtl) {
          ttl_compactions++;
        } else if (compaction_reason == CompactionReason::kRoundRobinTtl) {
          round_robin_ttl_compactions++;
        } else {
          other_compactions++;
        }
      });
  SyncPoint::GetInstance()->EnableProcessing();

  DestroyAndReopen(options);

  // Setup the files from lower level to up level, each file is 1 hour's older
  // than the next one.
  // create 10 files on the last level (L6)
  for (int i = 0; i < 10; i++) {
    for (int j = 0; j < 100; j++) {
      ASSERT_OK(Put(Key(i * 100 + j), "value" + std::to_string(i * 100 + j)));
    }
    ASSERT_OK(Flush());
    env_->MockSleepForSeconds(60 * 60);  // generate 1 file per hour
  }
  MoveFilesToLevel(6);

  // create 5 files on L5
  for (int i = 0; i < 5; i++) {
    for (int j = 0; j < 200; j++) {
      ASSERT_OK(Put(Key(i * 200 + j), "value" + std::to_string(i * 200 + j)));
    }
    ASSERT_OK(Flush());
    env_->MockSleepForSeconds(60 * 60);
  }
  MoveFilesToLevel(5);

  // create 3 files on L4
  for (int i = 0; i < 3; i++) {
    for (int j = 0; j < 300; j++) {
      ASSERT_OK(Put(Key(i * 300 + j), "value" + std::to_string(i * 300 + j)));
    }
    ASSERT_OK(Flush());
    env_->MockSleepForSeconds(60 * 60);
  }
  MoveFilesToLevel(4);

  // The LSM tree should be like:
  // L4: [0,            299], [300,      599], [600,     899]
  // L5: [0,  199]      [200,         399]...............[800, 999]
  // L6: [0,99][100,199][200,299][300,399]...............[800,899][900,999]
  ASSERT_EQ("0,0,0,0,3,5,10", FilesPerLevel());

  // make sure the first L5 file is expired
  env_->MockSleepForSeconds(16 * 60 * 60 + small_seconds++);

  // trigger TTL compaction
  ASSERT_OK(Put(Key(4), "value" + std::to_string(1)));
  ASSERT_OK(Put(Key(5), "value" + std::to_string(1)));
  ASSERT_OK(Flush());

  ASSERT_OK(dbfull()->TEST_WaitForCompact());

  // verify there's a RoundRobin TTL compaction
  ASSERT_EQ(1, round_robin_ttl_compactions);
  round_robin_ttl_compactions = 0;

  // expire 2 more files
  env_->MockSleepForSeconds(2 * 60 * 60 + small_seconds++);
  // trigger TTL compaction
  ASSERT_OK(Put(Key(4), "value" + std::to_string(2)));
  ASSERT_OK(Put(Key(5), "value" + std::to_string(2)));
  ASSERT_OK(Flush());

  ASSERT_OK(dbfull()->TEST_WaitForCompact());

  ASSERT_EQ(2, round_robin_ttl_compactions);
  round_robin_ttl_compactions = 0;

  // expire 4 more files, 2 out of 3 files on L4 are expired
  env_->MockSleepForSeconds(4 * 60 * 60 + small_seconds++);
  // trigger TTL compaction
  ASSERT_OK(Put(Key(6), "value" + std::to_string(3)));
  ASSERT_OK(Put(Key(7), "value" + std::to_string(3)));
  ASSERT_OK(Flush());

  ASSERT_OK(dbfull()->TEST_WaitForCompact());

  ASSERT_EQ(1, NumTableFilesAtLevel(4));
  ASSERT_EQ(0, NumTableFilesAtLevel(5));

  ASSERT_GT(round_robin_ttl_compactions, 0);
  round_robin_ttl_compactions = 0;

  // make the first L0 file expired, which triggers a normal TTL compaction
  // instead of roundrobin TTL compaction, it will also include an extra file
  // from L0 because of overlap
  ASSERT_EQ(0, ttl_compactions);
  env_->MockSleepForSeconds(19 * 60 * 60 + small_seconds++);

  // trigger TTL compaction
  ASSERT_OK(Put(Key(6), "value" + std::to_string(4)));
  ASSERT_OK(Put(Key(7), "value" + std::to_string(4)));
  ASSERT_OK(Flush());

  ASSERT_OK(dbfull()->TEST_WaitForCompact());

  // L0 -> L1 compaction is normal TTL compaction, L1 -> next levels compactions
  // are RoundRobin TTL compaction.
  ASSERT_GT(ttl_compactions, 0);
  ttl_compactions = 0;
  ASSERT_GT(round_robin_ttl_compactions, 0);
  round_robin_ttl_compactions = 0;

  // All files are expired, so only the last level has data
  env_->MockSleepForSeconds(24 * 60 * 60);
  // trigger TTL compaction
  ASSERT_OK(Put(Key(6), "value" + std::to_string(4)));
  ASSERT_OK(Put(Key(7), "value" + std::to_string(4)));
  ASSERT_OK(Flush());

  ASSERT_OK(dbfull()->TEST_WaitForCompact());
  ASSERT_EQ("0,0,0,0,0,0,2", FilesPerLevel());

  ASSERT_GT(ttl_compactions, 0);
  ttl_compactions = 0;
  ASSERT_GT(round_robin_ttl_compactions, 0);
  round_robin_ttl_compactions = 0;

  ASSERT_EQ(0, other_compactions);
}

TEST_F(DBCompactionTest, RoundRobinTtlCompactionUnsortedTime) {
  // This is to test the case that the RoundRobin compaction cursor not pointing
  // to the oldest file, RoundRobin compaction should still compact the file
  // after cursor until all expired files are compacted.
  Options options = CurrentOptions();
  options.compression = kNoCompression;
  options.level0_file_num_compaction_trigger = 20;
  options.ttl = 24 * 60 * 60;  // 24 hours
  options.compaction_pri = kRoundRobin;
  env_->now_cpu_count_.store(0);
  env_->SetMockSleep();
  options.env = env_;

  std::atomic_int ttl_compactions{0};
  std::atomic_int round_robin_ttl_compactions{0};
  std::atomic_int other_compactions{0};

  SyncPoint::GetInstance()->SetCallBack(
      "LevelCompactionPicker::PickCompaction:Return", [&](void* arg) {
        Compaction* compaction = reinterpret_cast<Compaction*>(arg);
        auto compaction_reason = compaction->compaction_reason();
        if (compaction_reason == CompactionReason::kTtl) {
          ttl_compactions++;
        } else if (compaction_reason == CompactionReason::kRoundRobinTtl) {
          round_robin_ttl_compactions++;
        } else {
          other_compactions++;
        }
      });
  SyncPoint::GetInstance()->EnableProcessing();

  DestroyAndReopen(options);

  // create 10 files on the last level (L6)
  for (int i = 0; i < 10; i++) {
    for (int j = 0; j < 100; j++) {
      ASSERT_OK(Put(Key(i * 100 + j), "value" + std::to_string(i * 100 + j)));
    }
    ASSERT_OK(Flush());
    env_->MockSleepForSeconds(60 * 60);  // generate 1 file per hour
  }
  MoveFilesToLevel(6);

  // create 5 files on L5
  for (int i = 0; i < 5; i++) {
    for (int j = 0; j < 200; j++) {
      ASSERT_OK(Put(Key(i * 200 + j), "value" + std::to_string(i * 200 + j)));
    }
    ASSERT_OK(Flush());
    env_->MockSleepForSeconds(60 * 60);  // 1 hour
  }
  MoveFilesToLevel(5);

  // The LSM tree should be like:
  // L5: [0,  199]      [200,         399] [400,599] [600,799] [800, 999]
  // L6: [0,99][100,199][200,299][300,399]....................[800,899][900,999]
  ASSERT_EQ("0,0,0,0,0,5,10", FilesPerLevel());

  // point the compaction cursor to the 4th file on L5
  VersionSet* const versions = dbfull()->GetVersionSet();
  assert(versions);
  ColumnFamilyData* const cfd = versions->GetColumnFamilySet()->GetDefault();
  ASSERT_NE(cfd, nullptr);
  Version* const current = cfd->current();
  ASSERT_NE(current, nullptr);
  VersionStorageInfo* storage_info = current->storage_info();
  ASSERT_NE(storage_info, nullptr);
  const InternalKey split_cursor = InternalKey(Key(600), 100000, kTypeValue);
  storage_info->AddCursorForOneLevel(5, split_cursor);

  // make the first file on L5 expired, there should be 3 TTL compactions:
  // 4th one, 5th one, then 1st one.
  env_->MockSleepForSeconds(19 * 60 * 60 + 1);
  // trigger TTL compaction
  ASSERT_OK(Put(Key(6), "value" + std::to_string(4)));
  ASSERT_OK(Put(Key(7), "value" + std::to_string(4)));
  ASSERT_OK(Flush());

  ASSERT_OK(dbfull()->TEST_WaitForCompact());
  ASSERT_EQ(2, NumTableFilesAtLevel(5));

  ASSERT_EQ(3, round_robin_ttl_compactions);
  ASSERT_EQ(0, ttl_compactions);
  ASSERT_EQ(0, other_compactions);
}

TEST_F(DBCompactionTest, LevelCompactExpiredTtlFiles) {
  const int kNumKeysPerFile = 32;
  const int kNumLevelFiles = 2;
  const int kValueSize = 1024;

  Options options = CurrentOptions();
  options.compression = kNoCompression;
  options.ttl = 24 * 60 * 60;  // 24 hours
  options.max_open_files = -1;
  env_->SetMockSleep();
  options.env = env_;

  // NOTE: Presumed unnecessary and removed: resetting mock time in env

  DestroyAndReopen(options);

  Random rnd(301);
  for (int i = 0; i < kNumLevelFiles; ++i) {
    for (int j = 0; j < kNumKeysPerFile; ++j) {
      ASSERT_OK(
          Put(Key(i * kNumKeysPerFile + j), rnd.RandomString(kValueSize)));
    }
    ASSERT_OK(Flush());
  }
  ASSERT_OK(dbfull()->TEST_WaitForCompact());
  MoveFilesToLevel(3);
  ASSERT_EQ("0,0,0,2", FilesPerLevel());

  // Delete previously written keys.
  for (int i = 0; i < kNumLevelFiles; ++i) {
    for (int j = 0; j < kNumKeysPerFile; ++j) {
      ASSERT_OK(Delete(Key(i * kNumKeysPerFile + j)));
    }
    ASSERT_OK(Flush());
  }
  ASSERT_OK(dbfull()->TEST_WaitForCompact());
  ASSERT_EQ("2,0,0,2", FilesPerLevel());
  MoveFilesToLevel(1);
  ASSERT_EQ("0,2,0,2", FilesPerLevel());

  env_->MockSleepForSeconds(36 * 60 * 60);  // 36 hours
  ASSERT_EQ("0,2,0,2", FilesPerLevel());

  // Just do a simple write + flush so that the Ttl expired files get
  // compacted.
  ASSERT_OK(Put("a", "1"));
  ASSERT_OK(Flush());
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
      "LevelCompactionPicker::PickCompaction:Return", [&](void* arg) {
        Compaction* compaction = reinterpret_cast<Compaction*>(arg);
        ASSERT_TRUE(compaction->compaction_reason() == CompactionReason::kTtl);
      });
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
  ASSERT_OK(dbfull()->TEST_WaitForCompact());
  // All non-L0 files are deleted, as they contained only deleted data.
  ASSERT_EQ("1", FilesPerLevel());
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();

  // Test dynamically changing ttl.

  // NOTE: Presumed unnecessary and removed: resetting mock time in env

  DestroyAndReopen(options);

  for (int i = 0; i < kNumLevelFiles; ++i) {
    for (int j = 0; j < kNumKeysPerFile; ++j) {
      ASSERT_OK(
          Put(Key(i * kNumKeysPerFile + j), rnd.RandomString(kValueSize)));
    }
    ASSERT_OK(Flush());
  }
  ASSERT_OK(dbfull()->TEST_WaitForCompact());
  MoveFilesToLevel(3);
  ASSERT_EQ("0,0,0,2", FilesPerLevel());

  // Delete previously written keys.
  for (int i = 0; i < kNumLevelFiles; ++i) {
    for (int j = 0; j < kNumKeysPerFile; ++j) {
      ASSERT_OK(Delete(Key(i * kNumKeysPerFile + j)));
    }
    ASSERT_OK(Flush());
  }
  ASSERT_OK(dbfull()->TEST_WaitForCompact());
  ASSERT_EQ("2,0,0,2", FilesPerLevel());
  MoveFilesToLevel(1);
  ASSERT_EQ("0,2,0,2", FilesPerLevel());

  // Move time forward by 12 hours, and make sure that compaction still doesn't
  // trigger as ttl is set to 24 hours.
  env_->MockSleepForSeconds(12 * 60 * 60);
  ASSERT_OK(Put("a", "1"));
  ASSERT_OK(Flush());
  ASSERT_OK(dbfull()->TEST_WaitForCompact());
  ASSERT_EQ("1,2,0,2", FilesPerLevel());

  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
      "LevelCompactionPicker::PickCompaction:Return", [&](void* arg) {
        Compaction* compaction = reinterpret_cast<Compaction*>(arg);
        ASSERT_TRUE(compaction->compaction_reason() == CompactionReason::kTtl);
      });
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();

  // Dynamically change ttl to 10 hours.
  // This should trigger a ttl compaction, as 12 hours have already passed.
  ASSERT_OK(dbfull()->SetOptions({{"ttl", "36000"}}));
  ASSERT_OK(dbfull()->TEST_WaitForCompact());
  // All non-L0 files are deleted, as they contained only deleted data.
  ASSERT_EQ("1", FilesPerLevel());
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
}

TEST_F(DBCompactionTest, LevelTtlCompactionOutputCuttingIteractingWithOther) {
  // This test is for a bug fix in CompactionOutputs::ShouldStopBefore() where
  // TTL states were not being updated for keys that ShouldStopBefore() would
  // return true for reasons other than TTL.
  Options options = CurrentOptions();
  options.compression = kNoCompression;
  options.ttl = 24 * 60 * 60;  // 24 hours
  options.max_open_files = -1;
  options.compaction_pri = kMinOverlappingRatio;
  env_->SetMockSleep();
  options.env = env_;
  options.target_file_size_base = 4 << 10;
  options.disable_auto_compactions = true;
  options.level_compaction_dynamic_file_size = false;

  DestroyAndReopen(options);
  Random rnd(301);

  // This makes sure the manual compaction below
  // is not a bottommost compaction as TTL is only
  // for non-bottommost compactions.
  ASSERT_OK(Put(Key(3), rnd.RandomString(1 << 10)));
  ASSERT_OK(Put(Key(0), rnd.RandomString(1 << 10)));
  ASSERT_OK(Flush());
  MoveFilesToLevel(6);

  // L2:
  ASSERT_OK(Put(Key(2), rnd.RandomString(4 << 10)));
  ASSERT_OK(Put(Key(3), rnd.RandomString(4 << 10)));
  ASSERT_OK(Flush());
  MoveFilesToLevel(2);

  // L1, overlaps in range with the file in L2 so
  // that they compact together.
  ASSERT_OK(Put(Key(0), rnd.RandomString(4 << 10)));
  ASSERT_OK(Put(Key(1), rnd.RandomString(4 << 10)));
  ASSERT_OK(Put(Key(3), rnd.RandomString(4 << 10)));
  ASSERT_OK(Flush());
  MoveFilesToLevel(1);

  ASSERT_EQ("0,1,1,0,0,0,1", FilesPerLevel());
  // 36 hours so that the file in L2 is eligible for TTL
  env_->MockSleepForSeconds(36 * 60 * 60);

  CompactRangeOptions compact_range_opts;

  ASSERT_OK(dbfull()->RunManualCompaction(
      static_cast_with_check<ColumnFamilyHandleImpl>(db_->DefaultColumnFamily())
          ->cfd(),
      1 /* input_level */, 2 /* output_level */, compact_range_opts,
      nullptr /* begin */, nullptr /* end */, true /* exclusive */,
      true /* disallow_trivial_move */,
      std::numeric_limits<uint64_t>::max() /*max_file_num_to_ignore*/,
      "" /*trim_ts*/));

  // L2 should have 2 files:
  // file 1: Key(0), Key(1)
  // ShouldStopBefore(Key(2)) return true due to TTL or output file size
  // file 2: Key(2), Key(3)
  //
  // Before the fix in this PR, L2 would have 3 files:
  // file 1: Key(0), Key(1)
  // CompactionOutputs::ShouldStopBefore(Key(2)) returns true due to output file
  // size.
  // file 2: Key(2)
  // CompactionOutput::ShouldStopBefore(Key(3)) returns true
  // due to TTL cutting and that TTL states were not updated
  // for Key(2).
  // file 3: Key(3)
  ASSERT_EQ("0,0,2,0,0,0,1", FilesPerLevel());
}

TEST_F(DBCompactionTest, LevelTtlCascadingCompactions) {
  env_->SetMockSleep();
  const int kValueSize = 100;

  for (bool if_restart : {false, true}) {
    for (bool if_open_all_files : {false, true}) {
      Options options = CurrentOptions();
      options.compression = kNoCompression;
      options.ttl = 24 * 60 * 60;  // 24 hours
      if (if_open_all_files) {
        options.max_open_files = -1;
      } else {
        options.max_open_files = 20;
      }
      // RocksDB sanitize max open files to at least 20. Modify it back.
      ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
          "SanitizeOptions::AfterChangeMaxOpenFiles", [&](void* arg) {
            int* max_open_files = static_cast<int*>(arg);
            *max_open_files = 2;
          });
      // In the case where all files are opened and doing DB restart
      // forcing the oldest ancester time in manifest file to be 0 to
      // simulate the case of reading from an old version.
      ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
          "VersionEdit::EncodeTo:VarintOldestAncesterTime", [&](void* arg) {
            if (if_restart && if_open_all_files) {
              std::string* encoded_fieled = static_cast<std::string*>(arg);
              *encoded_fieled = "";
              PutVarint64(encoded_fieled, 0);
            }
          });

      options.env = env_;

      // NOTE: Presumed unnecessary and removed: resetting mock time in env

      DestroyAndReopen(options);

      int ttl_compactions = 0;
      ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
          "LevelCompactionPicker::PickCompaction:Return", [&](void* arg) {
            Compaction* compaction = reinterpret_cast<Compaction*>(arg);
            auto compaction_reason = compaction->compaction_reason();
            if (compaction_reason == CompactionReason::kTtl) {
              ttl_compactions++;
            }
          });
      ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();

      // Add two L6 files with key ranges: [1 .. 100], [101 .. 200].
      Random rnd(301);
      for (int i = 1; i <= 100; ++i) {
        ASSERT_OK(Put(Key(i), rnd.RandomString(kValueSize)));
      }
      ASSERT_OK(Flush());
      // Get the first file's creation time. This will be the oldest file in the
      // DB. Compactions inolving this file's descendents should keep getting
      // this time.
      std::vector<std::vector<FileMetaData>> level_to_files;
      dbfull()->TEST_GetFilesMetaData(dbfull()->DefaultColumnFamily(),
                                      &level_to_files);
      uint64_t oldest_time = level_to_files[0][0].oldest_ancester_time;
      // Add 1 hour and do another flush.
      env_->MockSleepForSeconds(1 * 60 * 60);
      for (int i = 101; i <= 200; ++i) {
        ASSERT_OK(Put(Key(i), rnd.RandomString(kValueSize)));
      }
      ASSERT_OK(Flush());
      MoveFilesToLevel(6);
      ASSERT_EQ("0,0,0,0,0,0,2", FilesPerLevel());

      env_->MockSleepForSeconds(1 * 60 * 60);
      // Add two L4 files with key ranges: [1 .. 50], [51 .. 150].
      for (int i = 1; i <= 50; ++i) {
        ASSERT_OK(Put(Key(i), rnd.RandomString(kValueSize)));
      }
      ASSERT_OK(Flush());
      env_->MockSleepForSeconds(1 * 60 * 60);
      for (int i = 51; i <= 150; ++i) {
        ASSERT_OK(Put(Key(i), rnd.RandomString(kValueSize)));
      }
      ASSERT_OK(Flush());
      MoveFilesToLevel(4);
      ASSERT_EQ("0,0,0,0,2,0,2", FilesPerLevel());

      env_->MockSleepForSeconds(1 * 60 * 60);
      // Add one L1 file with key range: [26, 75].
      for (int i = 26; i <= 75; ++i) {
        ASSERT_OK(Put(Key(i), rnd.RandomString(kValueSize)));
      }
      ASSERT_OK(Flush());
      ASSERT_OK(dbfull()->TEST_WaitForCompact());
      MoveFilesToLevel(1);
      ASSERT_EQ("0,1,0,0,2,0,2", FilesPerLevel());

      // LSM tree:
      // L1:         [26 .. 75]
      // L4:     [1 .. 50][51 ..... 150]
      // L6:     [1 ........ 100][101 .... 200]
      //
      // On TTL expiry, TTL compaction should be initiated on L1 file, and the
      // compactions should keep going on until the key range hits bottom level.
      // In other words: the compaction on this data range "cascasdes" until
      // reaching the bottom level.
      //
      // Order of events on TTL expiry:
      // 1. L1 file falls to L3 via 2 trivial moves which are initiated by the
      // ttl
      //    compaction.
      // 2. A TTL compaction happens between L3 and L4 files. Output file in L4.
      // 3. The new output file from L4 falls to L5 via 1 trival move initiated
      //    by the ttl compaction.
      // 4. A TTL compaction happens between L5 and L6 files. Ouptut in L6.

      // Add 25 hours and do a write
      env_->MockSleepForSeconds(25 * 60 * 60);

      ASSERT_OK(Put(Key(1), "1"));
      if (if_restart) {
        Reopen(options);
      } else {
        ASSERT_OK(Flush());
      }
      ASSERT_OK(dbfull()->TEST_WaitForCompact());
      ASSERT_EQ("1,0,0,0,0,0,1", FilesPerLevel());
      ASSERT_EQ(5, ttl_compactions);

      dbfull()->TEST_GetFilesMetaData(dbfull()->DefaultColumnFamily(),
                                      &level_to_files);
      ASSERT_EQ(oldest_time, level_to_files[6][0].oldest_ancester_time);

      env_->MockSleepForSeconds(25 * 60 * 60);
      ASSERT_OK(Put(Key(2), "1"));
      if (if_restart) {
        Reopen(options);
      } else {
        ASSERT_OK(Flush());
      }
      ASSERT_OK(dbfull()->TEST_WaitForCompact());
      ASSERT_EQ("1,0,0,0,0,0,1", FilesPerLevel());
      ASSERT_GE(ttl_compactions, 6);

      ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
    }
  }
}

TEST_F(DBCompactionTest, LevelPeriodicCompaction) {
  env_->SetMockSleep();
  const int kNumKeysPerFile = 32;
  const int kNumLevelFiles = 2;
  const int kValueSize = 100;

  for (bool if_restart : {false, true}) {
    for (bool if_open_all_files : {false, true}) {
      Options options = CurrentOptions();
      options.periodic_compaction_seconds = 48 * 60 * 60;  // 2 days
      if (if_open_all_files) {
        options.max_open_files = -1;  // needed for ttl compaction
      } else {
        options.max_open_files = 20;
      }
      // RocksDB sanitize max open files to at least 20. Modify it back.
      ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
          "SanitizeOptions::AfterChangeMaxOpenFiles", [&](void* arg) {
            int* max_open_files = static_cast<int*>(arg);
            *max_open_files = 0;
          });
      // In the case where all files are opened and doing DB restart
      // forcing the file creation time in manifest file to be 0 to
      // simulate the case of reading from an old version.
      ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
          "VersionEdit::EncodeTo:VarintFileCreationTime", [&](void* arg) {
            if (if_restart && if_open_all_files) {
              std::string* encoded_fieled = static_cast<std::string*>(arg);
              *encoded_fieled = "";
              PutVarint64(encoded_fieled, 0);
            }
          });

      options.env = env_;

      // NOTE: Presumed unnecessary and removed: resetting mock time in env

      DestroyAndReopen(options);

      int periodic_compactions = 0;
      ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
          "LevelCompactionPicker::PickCompaction:Return", [&](void* arg) {
            Compaction* compaction = reinterpret_cast<Compaction*>(arg);
            auto compaction_reason = compaction->compaction_reason();
            if (compaction_reason == CompactionReason::kPeriodicCompaction) {
              periodic_compactions++;
            }
          });
      ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();

      Random rnd(301);
      for (int i = 0; i < kNumLevelFiles; ++i) {
        for (int j = 0; j < kNumKeysPerFile; ++j) {
          ASSERT_OK(
              Put(Key(i * kNumKeysPerFile + j), rnd.RandomString(kValueSize)));
        }
        ASSERT_OK(Flush());
      }
      ASSERT_OK(dbfull()->TEST_WaitForCompact());

      ASSERT_EQ("2", FilesPerLevel());
      ASSERT_EQ(0, periodic_compactions);

      // Add 50 hours and do a write
      env_->MockSleepForSeconds(50 * 60 * 60);
      ASSERT_OK(Put("a", "1"));
      ASSERT_OK(Flush());
      ASSERT_OK(dbfull()->TEST_WaitForCompact());
      // Assert that the files stay in the same level
      ASSERT_EQ("3", FilesPerLevel());
      // The two old files go through the periodic compaction process
      ASSERT_EQ(2, periodic_compactions);

      MoveFilesToLevel(1);
      ASSERT_EQ("0,3", FilesPerLevel());

      // Add another 50 hours and do another write
      env_->MockSleepForSeconds(50 * 60 * 60);
      ASSERT_OK(Put("b", "2"));
      if (if_restart) {
        Reopen(options);
      } else {
        ASSERT_OK(Flush());
      }
      ASSERT_OK(dbfull()->TEST_WaitForCompact());
      ASSERT_EQ("1,3", FilesPerLevel());
      // The three old files now go through the periodic compaction process. 2
      // + 3.
      ASSERT_EQ(5, periodic_compactions);

      // Add another 50 hours and do another write
      env_->MockSleepForSeconds(50 * 60 * 60);
      ASSERT_OK(Put("c", "3"));
      ASSERT_OK(Flush());
      ASSERT_OK(dbfull()->TEST_WaitForCompact());
      ASSERT_EQ("2,3", FilesPerLevel());
      // The four old files now go through the periodic compaction process. 5
      // + 4.
      ASSERT_EQ(9, periodic_compactions);

      ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
    }
  }
}

TEST_F(DBCompactionTest, LevelPeriodicCompactionWithOldDB) {
  // This test makes sure that periodic compactions are working with a DB
  // where file_creation_time of some files is 0.
  // After compactions the new files are created with a valid file_creation_time

  const int kNumKeysPerFile = 32;
  const int kNumFiles = 4;
  const int kValueSize = 100;

  Options options = CurrentOptions();
  env_->SetMockSleep();
  options.env = env_;

  // NOTE: Presumed unnecessary and removed: resetting mock time in env

  DestroyAndReopen(options);

  int periodic_compactions = 0;
  bool set_file_creation_time_to_zero = true;
  bool set_creation_time_to_zero = true;
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
      "LevelCompactionPicker::PickCompaction:Return", [&](void* arg) {
        Compaction* compaction = reinterpret_cast<Compaction*>(arg);
        auto compaction_reason = compaction->compaction_reason();
        if (compaction_reason == CompactionReason::kPeriodicCompaction) {
          periodic_compactions++;
        }
      });
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
      "PropertyBlockBuilder::AddTableProperty:Start", [&](void* arg) {
        TableProperties* props = reinterpret_cast<TableProperties*>(arg);
        if (set_file_creation_time_to_zero) {
          props->file_creation_time = 0;
        }
        if (set_creation_time_to_zero) {
          props->creation_time = 0;
        }
      });
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();

  Random rnd(301);
  for (int i = 0; i < kNumFiles; ++i) {
    for (int j = 0; j < kNumKeysPerFile; ++j) {
      ASSERT_OK(
          Put(Key(i * kNumKeysPerFile + j), rnd.RandomString(kValueSize)));
    }
    ASSERT_OK(Flush());
    // Move the first two files to L2.
    if (i == 1) {
      MoveFilesToLevel(2);
      set_creation_time_to_zero = false;
    }
  }
  ASSERT_OK(dbfull()->TEST_WaitForCompact());

  ASSERT_EQ("2,0,2", FilesPerLevel());
  ASSERT_EQ(0, periodic_compactions);

  Close();

  set_file_creation_time_to_zero = false;
  // Forward the clock by 2 days.
  env_->MockSleepForSeconds(2 * 24 * 60 * 60);
  options.periodic_compaction_seconds = 1 * 24 * 60 * 60;  // 1 day

  Reopen(options);
  ASSERT_OK(dbfull()->TEST_WaitForCompact());
  ASSERT_EQ("2,0,2", FilesPerLevel());
  // Make sure that all files go through periodic compaction.
  ASSERT_EQ(kNumFiles, periodic_compactions);

  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
}

TEST_F(DBCompactionTest, LevelPeriodicAndTtlCompaction) {
  const int kNumKeysPerFile = 32;
  const int kNumLevelFiles = 2;
  const int kValueSize = 100;

  Options options = CurrentOptions();
  options.ttl = 10 * 60 * 60;                          // 10 hours
  options.periodic_compaction_seconds = 48 * 60 * 60;  // 2 days
  options.max_open_files = -1;  // needed for both periodic and ttl compactions
  env_->SetMockSleep();
  options.env = env_;

  // NOTE: Presumed unnecessary and removed: resetting mock time in env

  DestroyAndReopen(options);

  int periodic_compactions = 0;
  int ttl_compactions = 0;
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
      "LevelCompactionPicker::PickCompaction:Return", [&](void* arg) {
        Compaction* compaction = reinterpret_cast<Compaction*>(arg);
        auto compaction_reason = compaction->compaction_reason();
        if (compaction_reason == CompactionReason::kPeriodicCompaction) {
          periodic_compactions++;
        } else if (compaction_reason == CompactionReason::kTtl) {
          ttl_compactions++;
        }
      });
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();

  Random rnd(301);
  for (int i = 0; i < kNumLevelFiles; ++i) {
    for (int j = 0; j < kNumKeysPerFile; ++j) {
      ASSERT_OK(
          Put(Key(i * kNumKeysPerFile + j), rnd.RandomString(kValueSize)));
    }
    ASSERT_OK(Flush());
  }
  ASSERT_OK(dbfull()->TEST_WaitForCompact());

  MoveFilesToLevel(3);

  ASSERT_EQ("0,0,0,2", FilesPerLevel());
  ASSERT_EQ(0, periodic_compactions);
  ASSERT_EQ(0, ttl_compactions);

  // Add some time greater than periodic_compaction_time.
  env_->MockSleepForSeconds(50 * 60 * 60);
  ASSERT_OK(Put("a", "1"));
  ASSERT_OK(Flush());
  ASSERT_OK(dbfull()->TEST_WaitForCompact());
  // Files in the bottom level go through periodic compactions.
  ASSERT_EQ("1,0,0,2", FilesPerLevel());
  ASSERT_EQ(2, periodic_compactions);
  ASSERT_EQ(0, ttl_compactions);

  // Add a little more time than ttl
  env_->MockSleepForSeconds(11 * 60 * 60);
  ASSERT_OK(Put("b", "1"));
  ASSERT_OK(Flush());
  ASSERT_OK(dbfull()->TEST_WaitForCompact());
  // Notice that the previous file in level 1 falls down to the bottom level
  // due to ttl compactions, one level at a time.
  // And bottom level files don't get picked up for ttl compactions.
  ASSERT_EQ("1,0,0,3", FilesPerLevel());
  ASSERT_EQ(2, periodic_compactions);
  ASSERT_EQ(3, ttl_compactions);

  // Add some time greater than periodic_compaction_time.
  env_->MockSleepForSeconds(50 * 60 * 60);
  ASSERT_OK(Put("c", "1"));
  ASSERT_OK(Flush());
  ASSERT_OK(dbfull()->TEST_WaitForCompact());
  // Previous L0 file falls one level at a time to bottom level due to ttl.
  // And all 4 bottom files go through periodic compactions.
  ASSERT_EQ("1,0,0,4", FilesPerLevel());
  ASSERT_EQ(6, periodic_compactions);
  ASSERT_EQ(6, ttl_compactions);

  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
}

TEST_F(DBCompactionTest, LevelTtlBooster) {
  const int kNumKeysPerFile = 32;
  const int kNumLevelFiles = 3;
  const int kValueSize = 1000;

  Options options = CurrentOptions();
  options.ttl = 10 * 60 * 60;                           // 10 hours
  options.periodic_compaction_seconds = 480 * 60 * 60;  // very long
  options.level0_file_num_compaction_trigger = 2;
  options.max_bytes_for_level_base = 5 * uint64_t{kNumKeysPerFile * kValueSize};
  options.max_open_files = -1;  // needed for both periodic and ttl compactions
  options.compaction_pri = CompactionPri::kMinOverlappingRatio;
  env_->SetMockSleep();
  options.env = env_;

  // NOTE: Presumed unnecessary and removed: resetting mock time in env

  DestroyAndReopen(options);

  Random rnd(301);
  for (int i = 0; i < kNumLevelFiles; ++i) {
    for (int j = 0; j < kNumKeysPerFile; ++j) {
      ASSERT_OK(
          Put(Key(i * kNumKeysPerFile + j), rnd.RandomString(kValueSize)));
    }
    ASSERT_OK(Flush());
  }
  ASSERT_OK(dbfull()->TEST_WaitForCompact());

  MoveFilesToLevel(2);

  ASSERT_EQ("0,0,3", FilesPerLevel());

  // Create some files for L1
  for (int i = 0; i < 2; i++) {
    for (int j = 0; j < kNumKeysPerFile; ++j) {
      ASSERT_OK(Put(Key(2 * j + i), rnd.RandomString(kValueSize)));
    }
    ASSERT_OK(Flush());
    ASSERT_OK(dbfull()->TEST_WaitForCompact());
  }

  ASSERT_EQ("0,1,3", FilesPerLevel());

  // Make the new L0 files qualify TTL boosting and generate one more to trigger
  // L1 -> L2 compaction. Old files will be picked even if their priority is
  // lower without boosting.
  env_->MockSleepForSeconds(8 * 60 * 60);
  for (int i = 0; i < 2; i++) {
    for (int j = 0; j < kNumKeysPerFile; ++j) {
      ASSERT_OK(Put(Key(kNumKeysPerFile * 2 + 2 * j + i),
                    rnd.RandomString(kValueSize * 2)));
    }
    ASSERT_OK(Flush());
    ASSERT_OK(dbfull()->TEST_WaitForCompact());
  }
  // Force files to be compacted to L1
  ASSERT_OK(
      dbfull()->SetOptions({{"level0_file_num_compaction_trigger", "1"}}));
  ASSERT_OK(dbfull()->TEST_WaitForCompact());
  ASSERT_EQ("0,1,2", FilesPerLevel());
  ASSERT_OK(
      dbfull()->SetOptions({{"level0_file_num_compaction_trigger", "2"}}));

  ASSERT_GT(SizeAtLevel(1), kNumKeysPerFile * 4 * kValueSize);
}

TEST_F(DBCompactionTest, LevelPeriodicCompactionWithCompactionFilters) {
  class TestCompactionFilter : public CompactionFilter {
    const char* Name() const override { return "TestCompactionFilter"; }
  };
  class TestCompactionFilterFactory : public CompactionFilterFactory {
    const char* Name() const override { return "TestCompactionFilterFactory"; }
    std::unique_ptr<CompactionFilter> CreateCompactionFilter(
        const CompactionFilter::Context& /*context*/) override {
      return std::unique_ptr<CompactionFilter>(new TestCompactionFilter());
    }
  };

  const int kNumKeysPerFile = 32;
  const int kNumLevelFiles = 2;
  const int kValueSize = 100;

  Random rnd(301);

  Options options = CurrentOptions();
  TestCompactionFilter test_compaction_filter;
  env_->SetMockSleep();
  options.env = env_;

  // NOTE: Presumed unnecessary and removed: resetting mock time in env

  enum CompactionFilterType {
    kUseCompactionFilter,
    kUseCompactionFilterFactory
  };

  for (CompactionFilterType comp_filter_type :
       {kUseCompactionFilter, kUseCompactionFilterFactory}) {
    // Assert that periodic compactions are not enabled.
    ASSERT_EQ(std::numeric_limits<uint64_t>::max() - 1,
              options.periodic_compaction_seconds);

    if (comp_filter_type == kUseCompactionFilter) {
      options.compaction_filter = &test_compaction_filter;
      options.compaction_filter_factory.reset();
    } else if (comp_filter_type == kUseCompactionFilterFactory) {
      options.compaction_filter = nullptr;
      options.compaction_filter_factory.reset(
          new TestCompactionFilterFactory());
    }
    DestroyAndReopen(options);

    // periodic_compaction_seconds should be set to the sanitized value when
    // a compaction filter or a compaction filter factory is used.
    ASSERT_EQ(30 * 24 * 60 * 60,
              dbfull()->GetOptions().periodic_compaction_seconds);

    int periodic_compactions = 0;
    ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
        "LevelCompactionPicker::PickCompaction:Return", [&](void* arg) {
          Compaction* compaction = reinterpret_cast<Compaction*>(arg);
          auto compaction_reason = compaction->compaction_reason();
          if (compaction_reason == CompactionReason::kPeriodicCompaction) {
            periodic_compactions++;
          }
        });
    ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();

    for (int i = 0; i < kNumLevelFiles; ++i) {
      for (int j = 0; j < kNumKeysPerFile; ++j) {
        ASSERT_OK(
            Put(Key(i * kNumKeysPerFile + j), rnd.RandomString(kValueSize)));
      }
      ASSERT_OK(Flush());
    }
    ASSERT_OK(dbfull()->TEST_WaitForCompact());

    ASSERT_EQ("2", FilesPerLevel());
    ASSERT_EQ(0, periodic_compactions);

    // Add 31 days and do a write
    env_->MockSleepForSeconds(31 * 24 * 60 * 60);
    ASSERT_OK(Put("a", "1"));
    ASSERT_OK(Flush());
    ASSERT_OK(dbfull()->TEST_WaitForCompact());
    // Assert that the files stay in the same level
    ASSERT_EQ("3", FilesPerLevel());
    // The two old files go through the periodic compaction process
    ASSERT_EQ(2, periodic_compactions);

    ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
  }
}

TEST_F(DBCompactionTest, CompactRangeDelayedByL0FileCount) {
  // Verify that, when `CompactRangeOptions::allow_write_stall == false`, manual
  // compaction only triggers flush after it's sure stall won't be triggered for
  // L0 file count going too high.
  const int kNumL0FilesTrigger = 4;
  const int kNumL0FilesLimit = 8;
  // i == 0: verifies normal case where stall is avoided by delay
  // i == 1: verifies no delay in edge case where stall trigger is same as
  //         compaction trigger, so stall can't be avoided
  for (int i = 0; i < 2; ++i) {
    Options options = CurrentOptions();
    options.level0_slowdown_writes_trigger = kNumL0FilesLimit;
    if (i == 0) {
      options.level0_file_num_compaction_trigger = kNumL0FilesTrigger;
    } else {
      options.level0_file_num_compaction_trigger = kNumL0FilesLimit;
    }
    Reopen(options);

    if (i == 0) {
      // ensure the auto compaction doesn't finish until manual compaction has
      // had a chance to be delayed.
      ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
          {{"DBImpl::WaitUntilFlushWouldNotStallWrites:StallWait",
            "CompactionJob::Run():End"}});
    } else {
      // ensure the auto-compaction doesn't finish until manual compaction has
      // continued without delay.
      ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
          {{"DBImpl::FlushMemTable:StallWaitDone",
            "CompactionJob::Run():End"}});
    }
    ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();

    Random rnd(301);
    for (int j = 0; j < kNumL0FilesLimit - 1; ++j) {
      for (int k = 0; k < 2; ++k) {
        ASSERT_OK(Put(Key(k), rnd.RandomString(1024)));
      }
      ASSERT_OK(Flush());
    }
    auto manual_compaction_thread = port::Thread([this]() {
      CompactRangeOptions cro;
      cro.allow_write_stall = false;
      ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr));
    });

    manual_compaction_thread.join();
    ASSERT_OK(dbfull()->TEST_WaitForCompact());
    ASSERT_EQ(0, NumTableFilesAtLevel(0));
    ASSERT_GT(NumTableFilesAtLevel(1), 0);
    ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
  }
}

TEST_F(DBCompactionTest, CompactRangeDelayedByImmMemTableCount) {
  // Verify that, when `CompactRangeOptions::allow_write_stall == false`, manual
  // compaction only triggers flush after it's sure stall won't be triggered for
  // immutable memtable count going too high.
  const int kNumImmMemTableLimit = 8;
  // i == 0: verifies normal case where stall is avoided by delay
  // i == 1: verifies no delay in edge case where stall trigger is same as flush
  //         trigger, so stall can't be avoided
  for (int i = 0; i < 2; ++i) {
    Options options = CurrentOptions();
    options.disable_auto_compactions = true;
    // the delay limit is one less than the stop limit. This test focuses on
    // avoiding delay limit, but this option sets stop limit, so add one.
    options.max_write_buffer_number = kNumImmMemTableLimit + 1;
    if (i == 1) {
      options.min_write_buffer_number_to_merge = kNumImmMemTableLimit;
    }
    Reopen(options);

    if (i == 0) {
      // ensure the flush doesn't finish until manual compaction has had a
      // chance to be delayed.
      ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
          {{"DBImpl::WaitUntilFlushWouldNotStallWrites:StallWait",
            "FlushJob::WriteLevel0Table"}});
    } else {
      // ensure the flush doesn't finish until manual compaction has continued
      // without delay.
      ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
          {{"DBImpl::FlushMemTable:StallWaitDone",
            "FlushJob::WriteLevel0Table"}});
    }
    ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();

    Random rnd(301);
    for (int j = 0; j < kNumImmMemTableLimit - 1; ++j) {
      ASSERT_OK(Put(Key(0), rnd.RandomString(1024)));
      FlushOptions flush_opts;
      flush_opts.wait = false;
      flush_opts.allow_write_stall = true;
      ASSERT_OK(dbfull()->Flush(flush_opts));
    }

    auto manual_compaction_thread = port::Thread([this]() {
      CompactRangeOptions cro;
      cro.allow_write_stall = false;
      ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr));
    });

    manual_compaction_thread.join();
    ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
    ASSERT_EQ(0, NumTableFilesAtLevel(0));
    ASSERT_GT(NumTableFilesAtLevel(1), 0);
    ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
  }
}

TEST_F(DBCompactionTest, CompactRangeShutdownWhileDelayed) {
  // Verify that, when `CompactRangeOptions::allow_write_stall == false`, delay
  // does not hang if CF is dropped or DB is closed
  const int kNumL0FilesTrigger = 4;
  const int kNumL0FilesLimit = 8;
  Options options = CurrentOptions();
  options.level0_file_num_compaction_trigger = kNumL0FilesTrigger;
  options.level0_slowdown_writes_trigger = kNumL0FilesLimit;
  // i == 0: DB::DropColumnFamily() on CompactRange's target CF unblocks it
  // i == 1: DB::CancelAllBackgroundWork() unblocks CompactRange. This is to
  //         simulate what happens during Close as we can't call Close (it
  //         blocks on the auto-compaction, making a cycle).
  for (int i = 0; i < 2; ++i) {
    CreateAndReopenWithCF({"one"}, options);
    // The calls to close CF/DB wait until the manual compaction stalls.
    // The auto-compaction waits until the manual compaction finishes to ensure
    // the signal comes from closing CF/DB, not from compaction making progress.
    ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
        {{"DBImpl::WaitUntilFlushWouldNotStallWrites:StallWait",
          "DBCompactionTest::CompactRangeShutdownWhileDelayed:PreShutdown"},
         {"DBCompactionTest::CompactRangeShutdownWhileDelayed:PostManual",
          "CompactionJob::Run():End"}});
    ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();

    Random rnd(301);
    for (int j = 0; j < kNumL0FilesLimit - 1; ++j) {
      for (int k = 0; k < 2; ++k) {
        ASSERT_OK(Put(1, Key(k), rnd.RandomString(1024)));
      }
      ASSERT_OK(Flush(1));
    }
    auto manual_compaction_thread = port::Thread([this, i]() {
      CompactRangeOptions cro;
      cro.allow_write_stall = false;
      if (i == 0) {
        ASSERT_TRUE(db_->CompactRange(cro, handles_[1], nullptr, nullptr)
                        .IsColumnFamilyDropped());
      } else {
        ASSERT_TRUE(db_->CompactRange(cro, handles_[1], nullptr, nullptr)
                        .IsShutdownInProgress());
      }
    });

    TEST_SYNC_POINT(
        "DBCompactionTest::CompactRangeShutdownWhileDelayed:PreShutdown");
    if (i == 0) {
      ASSERT_OK(db_->DropColumnFamily(handles_[1]));
    } else {
      dbfull()->CancelAllBackgroundWork(false /* wait */);
    }
    manual_compaction_thread.join();
    TEST_SYNC_POINT(
        "DBCompactionTest::CompactRangeShutdownWhileDelayed:PostManual");
    if (i == 0) {
      ASSERT_OK(dbfull()->TEST_WaitForCompact());
    } else {
      ASSERT_NOK(dbfull()->TEST_WaitForCompact());
    }
    ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
  }
}

TEST_F(DBCompactionTest, CompactRangeSkipFlushAfterDelay) {
  // Verify that, when `CompactRangeOptions::allow_write_stall == false`,
  // CompactRange skips its flush if the delay is long enough that the memtables
  // existing at the beginning of the call have already been flushed.
  const int kNumL0FilesTrigger = 4;
  const int kNumL0FilesLimit = 8;
  Options options = CurrentOptions();
  options.level0_slowdown_writes_trigger = kNumL0FilesLimit;
  options.level0_file_num_compaction_trigger = kNumL0FilesTrigger;
  Reopen(options);

  Random rnd(301);
  // The manual flush includes the memtable that was active when CompactRange
  // began. So it unblocks CompactRange and precludes its flush. Throughout the
  // test, stall conditions are upheld via high L0 file count.
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
      {{"DBImpl::WaitUntilFlushWouldNotStallWrites:StallWait",
        "DBCompactionTest::CompactRangeSkipFlushAfterDelay:PreFlush"},
       {"DBCompactionTest::CompactRangeSkipFlushAfterDelay:PostFlush",
        "DBImpl::FlushMemTable:StallWaitDone"},
       {"DBImpl::FlushMemTable:StallWaitDone", "CompactionJob::Run():End"}});
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();

  // used for the delayable flushes
  FlushOptions flush_opts;
  flush_opts.allow_write_stall = true;
  for (int i = 0; i < kNumL0FilesLimit - 1; ++i) {
    for (int j = 0; j < 2; ++j) {
      ASSERT_OK(Put(Key(j), rnd.RandomString(1024)));
    }
    ASSERT_OK(dbfull()->Flush(flush_opts));
  }
  auto manual_compaction_thread = port::Thread([this]() {
    CompactRangeOptions cro;
    cro.allow_write_stall = false;
    ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr));
  });

  TEST_SYNC_POINT("DBCompactionTest::CompactRangeSkipFlushAfterDelay:PreFlush");
  ASSERT_OK(Put(std::to_string(0), rnd.RandomString(1024)));
  ASSERT_OK(dbfull()->Flush(flush_opts));
  ASSERT_OK(Put(std::to_string(0), rnd.RandomString(1024)));
  TEST_SYNC_POINT(
      "DBCompactionTest::CompactRangeSkipFlushAfterDelay:PostFlush");
  manual_compaction_thread.join();

  // If CompactRange's flush was skipped, the final Put above will still be
  // in the active memtable.
  std::string num_keys_in_memtable;
  ASSERT_TRUE(db_->GetProperty(DB::Properties::kNumEntriesActiveMemTable,
                               &num_keys_in_memtable));
  ASSERT_EQ(std::to_string(1), num_keys_in_memtable);

  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
}

TEST_F(DBCompactionTest, CompactRangeFlushOverlappingMemtable) {
  // Verify memtable only gets flushed if it contains data overlapping the range
  // provided to `CompactRange`. Tests all kinds of overlap/non-overlap.
  const int kNumEndpointKeys = 5;
  std::string keys[kNumEndpointKeys] = {"a", "b", "c", "d", "e"};
  Options options = CurrentOptions();
  options.disable_auto_compactions = true;
  Reopen(options);

  // One extra iteration for nullptr, which means left side of interval is
  // unbounded.
  for (int i = 0; i <= kNumEndpointKeys; ++i) {
    Slice begin;
    Slice* begin_ptr;
    if (i == 0) {
      begin_ptr = nullptr;
    } else {
      begin = keys[i - 1];
      begin_ptr = &begin;
    }
    // Start at `i` so right endpoint comes after left endpoint. One extra
    // iteration for nullptr, which means right side of interval is unbounded.
    for (int j = std::max(0, i - 1); j <= kNumEndpointKeys; ++j) {
      Slice end;
      Slice* end_ptr;
      if (j == kNumEndpointKeys) {
        end_ptr = nullptr;
      } else {
        end = keys[j];
        end_ptr = &end;
      }
      ASSERT_OK(Put("b", "val"));
      ASSERT_OK(Put("d", "val"));
      CompactRangeOptions compact_range_opts;
      ASSERT_OK(db_->CompactRange(compact_range_opts, begin_ptr, end_ptr));

      uint64_t get_prop_tmp, num_memtable_entries = 0;
      ASSERT_TRUE(db_->GetIntProperty(DB::Properties::kNumEntriesImmMemTables,
                                      &get_prop_tmp));
      num_memtable_entries += get_prop_tmp;
      ASSERT_TRUE(db_->GetIntProperty(DB::Properties::kNumEntriesActiveMemTable,
                                      &get_prop_tmp));
      num_memtable_entries += get_prop_tmp;
      if (begin_ptr == nullptr || end_ptr == nullptr ||
          (i <= 4 && j >= 1 && (begin != "c" || end != "c"))) {
        // In this case `CompactRange`'s range overlapped in some way with the
        // memtable's range, so flush should've happened. Then "b" and "d" won't
        // be in the memtable.
        ASSERT_EQ(0, num_memtable_entries);
      } else {
        ASSERT_EQ(2, num_memtable_entries);
        // flush anyways to prepare for next iteration
        ASSERT_OK(db_->Flush(FlushOptions()));
      }
    }
  }
}

TEST_F(DBCompactionTest, CompactionStatsTest) {
  Options options = CurrentOptions();
  options.level0_file_num_compaction_trigger = 2;
  CompactionStatsCollector* collector = new CompactionStatsCollector();
  options.listeners.emplace_back(collector);
  DestroyAndReopen(options);

  for (int i = 0; i < 32; i++) {
    for (int j = 0; j < 5000; j++) {
      ASSERT_OK(Put(std::to_string(j), std::string(1, 'A')));
    }
    ASSERT_OK(Flush());
    ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
  }
  ASSERT_OK(dbfull()->TEST_WaitForCompact());
  ColumnFamilyHandleImpl* cfh =
      static_cast<ColumnFamilyHandleImpl*>(dbfull()->DefaultColumnFamily());
  ColumnFamilyData* cfd = cfh->cfd();

  VerifyCompactionStats(*cfd, *collector);
}

TEST_F(DBCompactionTest, SubcompactionEvent) {
  class SubCompactionEventListener : public EventListener {
   public:
    void OnCompactionBegin(DB* /*db*/, const CompactionJobInfo& ci) override {
      InstrumentedMutexLock l(&mutex_);
      ASSERT_EQ(running_compactions_.find(ci.job_id),
                running_compactions_.end());
      running_compactions_.emplace(ci.job_id, std::unordered_set<int>());
    }

    void OnCompactionCompleted(DB* /*db*/,
                               const CompactionJobInfo& ci) override {
      InstrumentedMutexLock l(&mutex_);
      auto it = running_compactions_.find(ci.job_id);
      ASSERT_NE(it, running_compactions_.end());
      ASSERT_EQ(it->second.size(), 0);
      running_compactions_.erase(it);
    }

    void OnSubcompactionBegin(const SubcompactionJobInfo& si) override {
      InstrumentedMutexLock l(&mutex_);
      auto it = running_compactions_.find(si.job_id);
      ASSERT_NE(it, running_compactions_.end());
      auto r = it->second.insert(si.subcompaction_job_id);
      ASSERT_TRUE(r.second);  // each subcompaction_job_id should be different
      total_subcompaction_cnt_++;
    }

    void OnSubcompactionCompleted(const SubcompactionJobInfo& si) override {
      InstrumentedMutexLock l(&mutex_);
      auto it = running_compactions_.find(si.job_id);
      ASSERT_NE(it, running_compactions_.end());
      auto r = it->second.erase(si.subcompaction_job_id);
      ASSERT_EQ(r, 1);
    }

    size_t GetRunningCompactionCount() {
      InstrumentedMutexLock l(&mutex_);
      return running_compactions_.size();
    }

    size_t GetTotalSubcompactionCount() {
      InstrumentedMutexLock l(&mutex_);
      return total_subcompaction_cnt_;
    }

   private:
    InstrumentedMutex mutex_;
    std::unordered_map<int, std::unordered_set<int>> running_compactions_;
    size_t total_subcompaction_cnt_ = 0;
  };

  Options options = CurrentOptions();
  options.target_file_size_base = 1024;
  options.level0_file_num_compaction_trigger = 10;
  auto* listener = new SubCompactionEventListener();
  options.listeners.emplace_back(listener);

  DestroyAndReopen(options);

  // generate 4 files @ L2
  for (int i = 0; i < 4; i++) {
    for (int j = 0; j < 10; j++) {
      int key_id = i * 10 + j;
      ASSERT_OK(Put(Key(key_id), "value" + std::to_string(key_id)));
    }
    ASSERT_OK(Flush());
  }
  MoveFilesToLevel(2);

  // generate 2 files @ L1 which overlaps with L2 files
  for (int i = 0; i < 2; i++) {
    for (int j = 0; j < 10; j++) {
      int key_id = i * 20 + j * 2;
      ASSERT_OK(Put(Key(key_id), "value" + std::to_string(key_id)));
    }
    ASSERT_OK(Flush());
  }
  MoveFilesToLevel(1);
  ASSERT_EQ(FilesPerLevel(), "0,2,4");

  CompactRangeOptions comp_opts;
  comp_opts.max_subcompactions = 4;
  Status s = dbfull()->CompactRange(comp_opts, nullptr, nullptr);
  ASSERT_OK(s);
  ASSERT_OK(dbfull()->TEST_WaitForCompact());
  // make sure there's no running compaction
  ASSERT_EQ(listener->GetRunningCompactionCount(), 0);
  // and sub compaction is triggered
  ASSERT_GT(listener->GetTotalSubcompactionCount(), 0);
}

TEST_F(DBCompactionTest, CompactFilesOutputRangeConflict) {
  // LSM setup:
  // L1:      [ba bz]
  // L2: [a b]       [c d]
  // L3: [a b]       [c d]
  //
  // Thread 1:                        Thread 2:
  // Begin compacting all L2->L3
  //                                  Compact [ba bz] L1->L3
  // End compacting all L2->L3
  //
  // The compaction operation in thread 2 should be disallowed because the range
  // overlaps with the compaction in thread 1, which also covers that range in
  // L3.
  Options options = CurrentOptions();
  FlushedFileCollector* collector = new FlushedFileCollector();
  options.listeners.emplace_back(collector);
  Reopen(options);

  for (int level = 3; level >= 2; --level) {
    ASSERT_OK(Put("a", "val"));
    ASSERT_OK(Put("b", "val"));
    ASSERT_OK(Flush());
    ASSERT_OK(Put("c", "val"));
    ASSERT_OK(Put("d", "val"));
    ASSERT_OK(Flush());
    MoveFilesToLevel(level);
  }
  ASSERT_OK(Put("ba", "val"));
  ASSERT_OK(Put("bz", "val"));
  ASSERT_OK(Flush());
  MoveFilesToLevel(1);

  SyncPoint::GetInstance()->LoadDependency({
      {"CompactFilesImpl:0",
       "DBCompactionTest::CompactFilesOutputRangeConflict:Thread2Begin"},
      {"DBCompactionTest::CompactFilesOutputRangeConflict:Thread2End",
       "CompactFilesImpl:1"},
  });
  SyncPoint::GetInstance()->EnableProcessing();

  auto bg_thread = port::Thread([&]() {
    // Thread 1
    std::vector<std::string> filenames = collector->GetFlushedFiles();
    filenames.pop_back();
    ASSERT_OK(db_->CompactFiles(CompactionOptions(), filenames,
                                3 /* output_level */));
  });

  // Thread 2
  TEST_SYNC_POINT(
      "DBCompactionTest::CompactFilesOutputRangeConflict:Thread2Begin");
  std::string filename = collector->GetFlushedFiles().back();
  ASSERT_FALSE(
      db_->CompactFiles(CompactionOptions(), {filename}, 3 /* output_level */)
          .ok());
  TEST_SYNC_POINT(
      "DBCompactionTest::CompactFilesOutputRangeConflict:Thread2End");

  bg_thread.join();
}

TEST_F(DBCompactionTest, CompactionHasEmptyOutput) {
  Options options = CurrentOptions();
  SstStatsCollector* collector = new SstStatsCollector();
  options.level0_file_num_compaction_trigger = 2;
  options.listeners.emplace_back(collector);
  Reopen(options);

  // Make sure the L0 files overlap to prevent trivial move.
  ASSERT_OK(Put("a", "val"));
  ASSERT_OK(Put("b", "val"));
  ASSERT_OK(Flush());
  ASSERT_OK(Delete("a"));
  ASSERT_OK(Delete("b"));
  ASSERT_OK(Flush());

  ASSERT_OK(dbfull()->TEST_WaitForCompact());
  ASSERT_EQ(NumTableFilesAtLevel(0), 0);
  ASSERT_EQ(NumTableFilesAtLevel(1), 0);

  // Expect one file creation to start for each flush, and zero for compaction
  // since no keys are written.
  ASSERT_EQ(2, collector->num_ssts_creation_started());
}

TEST_F(DBCompactionTest, CompactionLimiter) {
  const int kNumKeysPerFile = 10;
  const int kMaxBackgroundThreads = 64;

  struct CompactionLimiter {
    std::string name;
    int limit_tasks;
    int max_tasks;
    int tasks;
    std::shared_ptr<ConcurrentTaskLimiter> limiter;
  };

  std::vector<CompactionLimiter> limiter_settings;
  limiter_settings.push_back({"limiter_1", 1, 0, 0, nullptr});
  limiter_settings.push_back({"limiter_2", 2, 0, 0, nullptr});
  limiter_settings.push_back({"limiter_3", 3, 0, 0, nullptr});

  for (auto& ls : limiter_settings) {
    ls.limiter.reset(NewConcurrentTaskLimiter(ls.name, ls.limit_tasks));
  }

  std::shared_ptr<ConcurrentTaskLimiter> unique_limiter(
      NewConcurrentTaskLimiter("unique_limiter", -1));

  const char* cf_names[] = {"default", "0", "1", "2", "3", "4", "5", "6", "7",
                            "8",       "9", "a", "b", "c", "d", "e", "f"};
  const unsigned int cf_count = sizeof cf_names / sizeof cf_names[0];

  std::unordered_map<std::string, CompactionLimiter*> cf_to_limiter;

  Options options = CurrentOptions();
  options.write_buffer_size = 110 * 1024;  // 110KB
  options.arena_block_size = 4096;
  options.num_levels = 3;
  options.level0_file_num_compaction_trigger = 4;
  options.level0_slowdown_writes_trigger = 64;
  options.level0_stop_writes_trigger = 64;
  options.max_background_jobs = kMaxBackgroundThreads;  // Enough threads
  options.memtable_factory.reset(
      test::NewSpecialSkipListFactory(kNumKeysPerFile));
  options.max_write_buffer_number = 10;  // Enough memtables
  DestroyAndReopen(options);

  std::vector<Options> option_vector;
  option_vector.reserve(cf_count);

  for (unsigned int cf = 0; cf < cf_count; cf++) {
    ColumnFamilyOptions cf_opt(options);
    if (cf == 0) {
      // "Default" CF does't use compaction limiter
      cf_opt.compaction_thread_limiter = nullptr;
    } else if (cf == 1) {
      // "1" CF uses bypass compaction limiter
      unique_limiter->SetMaxOutstandingTask(-1);
      cf_opt.compaction_thread_limiter = unique_limiter;
    } else {
      // Assign limiter by mod
      auto& ls = limiter_settings[cf % 3];
      cf_opt.compaction_thread_limiter = ls.limiter;
      cf_to_limiter[cf_names[cf]] = &ls;
    }
    option_vector.emplace_back(DBOptions(options), cf_opt);
  }

  for (unsigned int cf = 1; cf < cf_count; cf++) {
    CreateColumnFamilies({cf_names[cf]}, option_vector[cf]);
  }

  ReopenWithColumnFamilies(
      std::vector<std::string>(cf_names, cf_names + cf_count), option_vector);

  port::Mutex mutex;

  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
      "DBImpl::BackgroundCompaction:BeforeCompaction", [&](void* arg) {
        const auto& cf_name = static_cast<ColumnFamilyData*>(arg)->GetName();
        auto iter = cf_to_limiter.find(cf_name);
        if (iter != cf_to_limiter.end()) {
          MutexLock l(&mutex);
          ASSERT_GE(iter->second->limit_tasks, ++iter->second->tasks);
          iter->second->max_tasks =
              std::max(iter->second->max_tasks, iter->second->limit_tasks);
        }
      });

  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
      "DBImpl::BackgroundCompaction:AfterCompaction", [&](void* arg) {
        const auto& cf_name = static_cast<ColumnFamilyData*>(arg)->GetName();
        auto iter = cf_to_limiter.find(cf_name);
        if (iter != cf_to_limiter.end()) {
          MutexLock l(&mutex);
          ASSERT_GE(--iter->second->tasks, 0);
        }
      });

  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();

  // Block all compact threads in thread pool.
  const size_t kTotalFlushTasks = kMaxBackgroundThreads / 4;
  const size_t kTotalCompactTasks = kMaxBackgroundThreads - kTotalFlushTasks;
  env_->SetBackgroundThreads((int)kTotalFlushTasks, Env::HIGH);
  env_->SetBackgroundThreads((int)kTotalCompactTasks, Env::LOW);

  test::SleepingBackgroundTask sleeping_compact_tasks[kTotalCompactTasks];

  // Block all compaction threads in thread pool.
  for (size_t i = 0; i < kTotalCompactTasks; i++) {
    env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask,
                   &sleeping_compact_tasks[i], Env::LOW);
    sleeping_compact_tasks[i].WaitUntilSleeping();
  }

  int keyIndex = 0;

  for (int n = 0; n < options.level0_file_num_compaction_trigger; n++) {
    for (unsigned int cf = 0; cf < cf_count; cf++) {
      // All L0s should overlap with each other
      for (int i = 0; i < kNumKeysPerFile; i++) {
        ASSERT_OK(Put(cf, Key(i), ""));
      }
      // put extra key to trigger flush
      ASSERT_OK(Put(cf, "", ""));
    }

    for (unsigned int cf = 0; cf < cf_count; cf++) {
      ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable(handles_[cf]));
    }
  }

  // Enough L0 files to trigger compaction
  for (unsigned int cf = 0; cf < cf_count; cf++) {
    ASSERT_EQ(NumTableFilesAtLevel(0, cf),
              options.level0_file_num_compaction_trigger);
  }

  // Create more files for one column family, which triggers speed up
  // condition, all compactions will be scheduled.
  for (int num = 0; num < options.level0_file_num_compaction_trigger; num++) {
    for (int i = 0; i < kNumKeysPerFile; i++) {
      ASSERT_OK(Put(0, Key(i), ""));
    }
    // put extra key to trigger flush
    ASSERT_OK(Put(0, "", ""));
    ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable(handles_[0]));
    ASSERT_EQ(options.level0_file_num_compaction_trigger + num + 1,
              NumTableFilesAtLevel(0, 0));
  }

  // All CFs are pending compaction
  ASSERT_EQ(cf_count, env_->GetThreadPoolQueueLen(Env::LOW));

  // Unblock all compaction threads
  for (size_t i = 0; i < kTotalCompactTasks; i++) {
    sleeping_compact_tasks[i].WakeUp();
    sleeping_compact_tasks[i].WaitUntilDone();
  }

  for (unsigned int cf = 0; cf < cf_count; cf++) {
    ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable(handles_[cf]));
  }

  ASSERT_OK(dbfull()->TEST_WaitForCompact());

  // Max outstanding compact tasks reached limit
  for (auto& ls : limiter_settings) {
    ASSERT_EQ(ls.limit_tasks, ls.max_tasks);
    ASSERT_EQ(0, ls.limiter->GetOutstandingTask());
  }

  // test manual compaction under a fully throttled limiter
  int cf_test = 1;
  unique_limiter->SetMaxOutstandingTask(0);

  // flush one more file to cf 1
  for (int i = 0; i < kNumKeysPerFile; i++) {
    ASSERT_OK(Put(cf_test, Key(keyIndex++), ""));
  }
  // put extra key to trigger flush
  ASSERT_OK(Put(cf_test, "", ""));

  ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable(handles_[cf_test]));
  ASSERT_EQ(1, NumTableFilesAtLevel(0, cf_test));

  Compact(cf_test, Key(0), Key(keyIndex));
  ASSERT_OK(dbfull()->TEST_WaitForCompact());
}

INSTANTIATE_TEST_CASE_P(DBCompactionTestWithParam, DBCompactionTestWithParam,
                        ::testing::Values(std::make_tuple(1, true),
                                          std::make_tuple(1, false),
                                          std::make_tuple(4, true),
                                          std::make_tuple(4, false)));

TEST_P(DBCompactionDirectIOTest, DirectIO) {
  Options options = CurrentOptions();
  Destroy(options);
  options.create_if_missing = true;
  options.disable_auto_compactions = true;
  options.use_direct_io_for_flush_and_compaction = GetParam();
  options.env = MockEnv::Create(Env::Default());
  Reopen(options);
  bool readahead = false;
  SyncPoint::GetInstance()->SetCallBack(
      "CompactionJob::OpenCompactionOutputFile", [&](void* arg) {
        bool* use_direct_writes = static_cast<bool*>(arg);
        ASSERT_EQ(*use_direct_writes,
                  options.use_direct_io_for_flush_and_compaction);
      });
  if (options.use_direct_io_for_flush_and_compaction) {
    SyncPoint::GetInstance()->SetCallBack(
        "SanitizeOptions:direct_io", [&](void* /*arg*/) { readahead = true; });
  }
  SyncPoint::GetInstance()->EnableProcessing();
  CreateAndReopenWithCF({"pikachu"}, options);
  MakeTables(3, "p", "q", 1);
  ASSERT_EQ("1,1,1", FilesPerLevel(1));
  Compact(1, "p", "q");
  ASSERT_EQ(readahead, options.use_direct_reads);
  ASSERT_EQ("0,0,1", FilesPerLevel(1));
  Destroy(options);
  delete options.env;
}

INSTANTIATE_TEST_CASE_P(DBCompactionDirectIOTest, DBCompactionDirectIOTest,
                        testing::Bool());

class CompactionPriTest : public DBTestBase,
                          public testing::WithParamInterface<uint32_t> {
 public:
  CompactionPriTest()
      : DBTestBase("compaction_pri_test", /*env_do_fsync=*/true) {
    compaction_pri_ = GetParam();
  }

  // Required if inheriting from testing::WithParamInterface<>
  static void SetUpTestCase() {}
  static void TearDownTestCase() {}

  uint32_t compaction_pri_;
};

TEST_P(CompactionPriTest, Test) {
  Options options = CurrentOptions();
  options.write_buffer_size = 16 * 1024;
  options.compaction_pri = static_cast<CompactionPri>(compaction_pri_);
  options.hard_pending_compaction_bytes_limit = 256 * 1024;
  options.max_bytes_for_level_base = 64 * 1024;
  options.max_bytes_for_level_multiplier = 4;
  options.compression = kNoCompression;

  DestroyAndReopen(options);

  Random rnd(301);
  const int kNKeys = 5000;
  int keys[kNKeys];
  for (int i = 0; i < kNKeys; i++) {
    keys[i] = i;
  }
  RandomShuffle(std::begin(keys), std::end(keys), rnd.Next());

  for (int i = 0; i < kNKeys; i++) {
    ASSERT_OK(Put(Key(keys[i]), rnd.RandomString(102)));
  }

  ASSERT_OK(dbfull()->TEST_WaitForCompact());
  for (int i = 0; i < kNKeys; i++) {
    ASSERT_NE("NOT_FOUND", Get(Key(i)));
  }
}

INSTANTIATE_TEST_CASE_P(
    CompactionPriTest, CompactionPriTest,
    ::testing::Values(CompactionPri::kByCompensatedSize,
                      CompactionPri::kOldestLargestSeqFirst,
                      CompactionPri::kOldestSmallestSeqFirst,
                      CompactionPri::kMinOverlappingRatio,
                      CompactionPri::kRoundRobin));

TEST_F(DBCompactionTest, PersistRoundRobinCompactCursor) {
  Options options = CurrentOptions();
  options.write_buffer_size = 16 * 1024;
  options.max_bytes_for_level_base = 128 * 1024;
  options.target_file_size_base = 64 * 1024;
  options.level0_file_num_compaction_trigger = 4;
  options.compaction_pri = CompactionPri::kRoundRobin;
  options.max_bytes_for_level_multiplier = 4;
  options.num_levels = 3;
  options.compression = kNoCompression;

  DestroyAndReopen(options);

  Random rnd(301);

  // 30 Files in L0 to trigger compactions between L1 and L2
  for (int i = 0; i < 30; i++) {
    for (int j = 0; j < 16; j++) {
      ASSERT_OK(Put(rnd.RandomString(24), rnd.RandomString(1000)));
    }
    ASSERT_OK(Flush());
  }

  ASSERT_OK(dbfull()->TEST_WaitForCompact());

  VersionSet* const versions = dbfull()->GetVersionSet();
  assert(versions);

  ColumnFamilyData* const cfd = versions->GetColumnFamilySet()->GetDefault();
  ASSERT_NE(cfd, nullptr);

  Version* const current = cfd->current();
  ASSERT_NE(current, nullptr);

  const VersionStorageInfo* const storage_info = current->storage_info();
  ASSERT_NE(storage_info, nullptr);

  const std::vector<InternalKey> compact_cursors =
      storage_info->GetCompactCursors();

  Reopen(options);

  VersionSet* const reopened_versions = dbfull()->GetVersionSet();
  assert(reopened_versions);

  ColumnFamilyData* const reopened_cfd =
      reopened_versions->GetColumnFamilySet()->GetDefault();
  ASSERT_NE(reopened_cfd, nullptr);

  Version* const reopened_current = reopened_cfd->current();
  ASSERT_NE(reopened_current, nullptr);

  const VersionStorageInfo* const reopened_storage_info =
      reopened_current->storage_info();
  ASSERT_NE(reopened_storage_info, nullptr);

  const std::vector<InternalKey> reopened_compact_cursors =
      reopened_storage_info->GetCompactCursors();
  const auto icmp = reopened_storage_info->InternalComparator();
  ASSERT_EQ(compact_cursors.size(), reopened_compact_cursors.size());
  for (size_t i = 0; i < compact_cursors.size(); i++) {
    if (compact_cursors[i].Valid()) {
      ASSERT_EQ(0,
                icmp->Compare(compact_cursors[i], reopened_compact_cursors[i]));
    } else {
      ASSERT_TRUE(!reopened_compact_cursors[i].Valid());
    }
  }
}

TEST_P(RoundRobinSubcompactionsAgainstPressureToken, PressureTokenTest) {
  const int kKeysPerBuffer = 100;
  Options options = CurrentOptions();
  options.num_levels = 4;
  options.max_bytes_for_level_multiplier = 2;
  options.level0_file_num_compaction_trigger = 4;
  options.target_file_size_base = kKeysPerBuffer * 1024;
  options.compaction_pri = CompactionPri::kRoundRobin;
  options.max_bytes_for_level_base = 8 * kKeysPerBuffer * 1024;
  options.disable_auto_compactions = true;
  // Setup 7 threads but limited subcompactions so that
  // RoundRobin requires extra compactions from reserved threads
  options.max_subcompactions = 1;
  options.max_background_compactions = 7;
  options.max_compaction_bytes = 100000000;
  DestroyAndReopen(options);
  env_->SetBackgroundThreads(7, Env::LOW);

  Random rnd(301);
  const std::vector<int> files_per_level = {0, 15, 25};
  for (int lvl = 2; lvl > 0; lvl--) {
    for (int i = 0; i < files_per_level[lvl]; i++) {
      for (int j = 0; j < kKeysPerBuffer; j++) {
        // Add (lvl-1) to ensure nearly equivallent number of files
        // in L2 are overlapped with fils selected to compact from
        // L1
        ASSERT_OK(Put(Key(2 * i * kKeysPerBuffer + 2 * j + (lvl - 1)),
                      rnd.RandomString(1010)));
      }
      ASSERT_OK(Flush());
    }
    MoveFilesToLevel(lvl);
    ASSERT_EQ(files_per_level[lvl], NumTableFilesAtLevel(lvl, 0));
  }
  // 15 files in L1; 25 files in L2

  // This is a variable for making sure the following callback is called
  // and the assertions in it are indeed excuted.
  bool num_planned_subcompactions_verified = false;
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
      "CompactionJob::GenSubcompactionBoundaries:0", [&](void* arg) {
        uint64_t num_planned_subcompactions = *(static_cast<uint64_t*>(arg));
        if (grab_pressure_token_) {
          // 7 files are selected for round-robin under auto
          // compaction. The number of planned subcompaction is restricted by
          // the limited number of max_background_compactions
          ASSERT_EQ(num_planned_subcompactions, 7);
        } else {
          ASSERT_EQ(num_planned_subcompactions, 1);
        }
        num_planned_subcompactions_verified = true;
      });

  // The following 3 dependencies have to be added to ensure the auto
  // compaction and the pressure token is correctly enabled. Same for
  // RoundRobinSubcompactionsUsingResources and
  // DBCompactionTest.RoundRobinSubcompactionsShrinkResources
  SyncPoint::GetInstance()->LoadDependency(
      {{"RoundRobinSubcompactionsAgainstPressureToken:0",
        "BackgroundCallCompaction:0"},
       {"CompactionJob::AcquireSubcompactionResources:0",
        "RoundRobinSubcompactionsAgainstPressureToken:1"},
       {"RoundRobinSubcompactionsAgainstPressureToken:2",
        "CompactionJob::AcquireSubcompactionResources:1"}});
  SyncPoint::GetInstance()->EnableProcessing();

  ASSERT_OK(dbfull()->EnableAutoCompaction({dbfull()->DefaultColumnFamily()}));
  TEST_SYNC_POINT("RoundRobinSubcompactionsAgainstPressureToken:0");
  TEST_SYNC_POINT("RoundRobinSubcompactionsAgainstPressureToken:1");
  std::unique_ptr<WriteControllerToken> pressure_token;
  if (grab_pressure_token_) {
    pressure_token =
        dbfull()->TEST_write_controler().GetCompactionPressureToken();
  }
  TEST_SYNC_POINT("RoundRobinSubcompactionsAgainstPressureToken:2");

  ASSERT_OK(dbfull()->TEST_WaitForCompact());
  ASSERT_TRUE(num_planned_subcompactions_verified);
  SyncPoint::GetInstance()->DisableProcessing();
  SyncPoint::GetInstance()->ClearAllCallBacks();
}

INSTANTIATE_TEST_CASE_P(RoundRobinSubcompactionsAgainstPressureToken,
                        RoundRobinSubcompactionsAgainstPressureToken,
                        testing::Bool());

TEST_P(RoundRobinSubcompactionsAgainstResources, SubcompactionsUsingResources) {
  const int kKeysPerBuffer = 200;
  Options options = CurrentOptions();
  options.num_levels = 4;
  options.level0_file_num_compaction_trigger = 3;
  options.target_file_size_base = kKeysPerBuffer * 1024;
  options.compaction_pri = CompactionPri::kRoundRobin;
  options.max_bytes_for_level_base = 30 * kKeysPerBuffer * 1024;
  options.disable_auto_compactions = true;
  options.max_subcompactions = 1;
  options.max_background_compactions = max_compaction_limits_;
  // Set a large number for max_compaction_bytes so that one round-robin
  // compaction is enough to make post-compaction L1 size less than
  // the maximum size (this test assumes only one round-robin compaction
  // is triggered by kLevelMaxLevelSize)
  options.max_compaction_bytes = 100000000;

  DestroyAndReopen(options);
  env_->SetBackgroundThreads(total_low_pri_threads_, Env::LOW);

  Random rnd(301);
  const std::vector<int> files_per_level = {0, 40, 100};
  for (int lvl = 2; lvl > 0; lvl--) {
    for (int i = 0; i < files_per_level[lvl]; i++) {
      for (int j = 0; j < kKeysPerBuffer; j++) {
        // Add (lvl-1) to ensure nearly equivallent number of files
        // in L2 are overlapped with fils selected to compact from
        // L1
        ASSERT_OK(Put(Key(2 * i * kKeysPerBuffer + 2 * j + (lvl - 1)),
                      rnd.RandomString(1010)));
      }
      ASSERT_OK(Flush());
    }
    MoveFilesToLevel(lvl);
    ASSERT_OK(dbfull()->TEST_WaitForCompact());
    ASSERT_EQ(files_per_level[lvl], NumTableFilesAtLevel(lvl, 0));
  }

  // 40 files in L1; 100 files in L2
  // This is a variable for making sure the following callback is called
  // and the assertions in it are indeed excuted.
  bool num_planned_subcompactions_verified = false;
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
      "CompactionJob::GenSubcompactionBoundaries:0", [&](void* arg) {
        uint64_t num_planned_subcompactions = *(static_cast<uint64_t*>(arg));
        // More than 10 files are selected for round-robin under auto
        // compaction. The number of planned subcompaction is restricted by
        // the minimum number between available threads and compaction limits
        ASSERT_EQ(num_planned_subcompactions - options.max_subcompactions,
                  std::min(total_low_pri_threads_, max_compaction_limits_) - 1);
        num_planned_subcompactions_verified = true;
      });
  SyncPoint::GetInstance()->LoadDependency(
      {{"RoundRobinSubcompactionsAgainstResources:0",
        "BackgroundCallCompaction:0"},
       {"CompactionJob::AcquireSubcompactionResources:0",
        "RoundRobinSubcompactionsAgainstResources:1"},
       {"RoundRobinSubcompactionsAgainstResources:2",
        "CompactionJob::AcquireSubcompactionResources:1"},
       {"CompactionJob::ReleaseSubcompactionResources:0",
        "RoundRobinSubcompactionsAgainstResources:3"},
       {"RoundRobinSubcompactionsAgainstResources:4",
        "CompactionJob::ReleaseSubcompactionResources:1"}});
  SyncPoint::GetInstance()->EnableProcessing();

  ASSERT_OK(dbfull()->TEST_WaitForCompact());
  ASSERT_OK(dbfull()->EnableAutoCompaction({dbfull()->DefaultColumnFamily()}));
  TEST_SYNC_POINT("RoundRobinSubcompactionsAgainstResources:0");
  TEST_SYNC_POINT("RoundRobinSubcompactionsAgainstResources:1");
  auto pressure_token =
      dbfull()->TEST_write_controler().GetCompactionPressureToken();

  TEST_SYNC_POINT("RoundRobinSubcompactionsAgainstResources:2");
  TEST_SYNC_POINT("RoundRobinSubcompactionsAgainstResources:3");
  // We can reserve more threads now except one is being used
  ASSERT_EQ(total_low_pri_threads_ - 1,
            env_->ReserveThreads(total_low_pri_threads_, Env::Priority::LOW));
  ASSERT_EQ(
      total_low_pri_threads_ - 1,
      env_->ReleaseThreads(total_low_pri_threads_ - 1, Env::Priority::LOW));
  TEST_SYNC_POINT("RoundRobinSubcompactionsAgainstResources:4");
  ASSERT_OK(dbfull()->TEST_WaitForCompact());
  ASSERT_TRUE(num_planned_subcompactions_verified);
  SyncPoint::GetInstance()->DisableProcessing();
  SyncPoint::GetInstance()->ClearAllCallBacks();
}

INSTANTIATE_TEST_CASE_P(RoundRobinSubcompactionsAgainstResources,
                        RoundRobinSubcompactionsAgainstResources,
                        ::testing::Values(std::make_tuple(1, 5),
                                          std::make_tuple(5, 1),
                                          std::make_tuple(10, 5),
                                          std::make_tuple(5, 10),
                                          std::make_tuple(10, 10)));

TEST_P(DBCompactionTestWithParam, RoundRobinWithoutAdditionalResources) {
  const int kKeysPerBuffer = 200;
  Options options = CurrentOptions();
  options.num_levels = 4;
  options.level0_file_num_compaction_trigger = 3;
  options.target_file_size_base = kKeysPerBuffer * 1024;
  options.compaction_pri = CompactionPri::kRoundRobin;
  options.max_bytes_for_level_base = 30 * kKeysPerBuffer * 1024;
  options.disable_auto_compactions = true;
  options.max_subcompactions = max_subcompactions_;
  options.max_background_compactions = 1;
  options.max_compaction_bytes = 100000000;
  // Similar experiment setting as above except the max_subcompactions
  // is given by max_subcompactions_ (1 or 4), and we fix the
  // additional resources as (1, 1) and thus no more extra resources
  // can be used
  DestroyAndReopen(options);
  env_->SetBackgroundThreads(1, Env::LOW);

  Random rnd(301);
  const std::vector<int> files_per_level = {0, 33, 100};
  for (int lvl = 2; lvl > 0; lvl--) {
    for (int i = 0; i < files_per_level[lvl]; i++) {
      for (int j = 0; j < kKeysPerBuffer; j++) {
        // Add (lvl-1) to ensure nearly equivallent number of files
        // in L2 are overlapped with fils selected to compact from
        // L1
        ASSERT_OK(Put(Key(2 * i * kKeysPerBuffer + 2 * j + (lvl - 1)),
                      rnd.RandomString(1010)));
      }
      ASSERT_OK(Flush());
    }
    MoveFilesToLevel(lvl);
    ASSERT_OK(dbfull()->TEST_WaitForCompact());
    ASSERT_EQ(files_per_level[lvl], NumTableFilesAtLevel(lvl, 0));
  }

  // 33 files in L1; 100 files in L2
  // This is a variable for making sure the following callback is called
  // and the assertions in it are indeed excuted.
  bool num_planned_subcompactions_verified = false;
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
      "CompactionJob::GenSubcompactionBoundaries:0", [&](void* arg) {
        uint64_t num_planned_subcompactions = *(static_cast<uint64_t*>(arg));
        // At most 4 files are selected for round-robin under auto
        // compaction. The number of planned subcompaction is restricted by
        // the max_subcompactions since no extra resources can be used
        ASSERT_EQ(num_planned_subcompactions, options.max_subcompactions);
        num_planned_subcompactions_verified = true;
      });
  // No need to setup dependency for pressure token since
  // AcquireSubcompactionResources may not be called and it anyway cannot
  // reserve any additional resources
  SyncPoint::GetInstance()->LoadDependency(
      {{"DBCompactionTest::RoundRobinWithoutAdditionalResources:0",
        "BackgroundCallCompaction:0"}});
  SyncPoint::GetInstance()->EnableProcessing();

  ASSERT_OK(dbfull()->TEST_WaitForCompact());
  ASSERT_OK(dbfull()->EnableAutoCompaction({dbfull()->DefaultColumnFamily()}));
  TEST_SYNC_POINT("DBCompactionTest::RoundRobinWithoutAdditionalResources:0");

  ASSERT_OK(dbfull()->TEST_WaitForCompact());
  ASSERT_TRUE(num_planned_subcompactions_verified);
  SyncPoint::GetInstance()->DisableProcessing();
  SyncPoint::GetInstance()->ClearAllCallBacks();
}

TEST_F(DBCompactionTest, RoundRobinCutOutputAtCompactCursor) {
  Options options = CurrentOptions();
  options.num_levels = 3;
  options.compression = kNoCompression;
  options.write_buffer_size = 4 * 1024;
  options.max_bytes_for_level_base = 64 * 1024;
  options.max_bytes_for_level_multiplier = 4;
  options.level0_file_num_compaction_trigger = 4;
  options.compaction_pri = CompactionPri::kRoundRobin;

  DestroyAndReopen(options);

  VersionSet* const versions = dbfull()->GetVersionSet();
  assert(versions);

  ColumnFamilyData* const cfd = versions->GetColumnFamilySet()->GetDefault();
  ASSERT_NE(cfd, nullptr);

  Version* const current = cfd->current();
  ASSERT_NE(current, nullptr);

  VersionStorageInfo* storage_info = current->storage_info();
  ASSERT_NE(storage_info, nullptr);

  const InternalKey split_cursor = InternalKey(Key(600), 100, kTypeValue);
  storage_info->AddCursorForOneLevel(2, split_cursor);

  Random rnd(301);

  for (int i = 0; i < 50; i++) {
    for (int j = 0; j < 50; j++) {
      ASSERT_OK(Put(Key(j * 2 + i * 100), rnd.RandomString(102)));
    }
  }
  // Add more overlapping files (avoid trivial move) to trigger compaction that
  // output files in L2. Note that trivial move does not trigger compaction and
  // in that case the cursor is not necessarily the boundary of file.
  for (int i = 0; i < 50; i++) {
    for (int j = 0; j < 50; j++) {
      ASSERT_OK(Put(Key(j * 2 + 1 + i * 100), rnd.RandomString(1014)));
    }
  }

  ASSERT_OK(dbfull()->TEST_WaitForCompact());

  std::vector<std::vector<FileMetaData>> level_to_files;
  dbfull()->TEST_GetFilesMetaData(dbfull()->DefaultColumnFamily(),
                                  &level_to_files);
  const auto icmp = cfd->current()->storage_info()->InternalComparator();
  // Files in level 2 should be split by the cursor
  for (const auto& file : level_to_files[2]) {
    ASSERT_TRUE(
        icmp->Compare(file.smallest.Encode(), split_cursor.Encode()) >= 0 ||
        icmp->Compare(file.largest.Encode(), split_cursor.Encode()) < 0);
  }
}

class NoopMergeOperator : public MergeOperator {
 public:
  NoopMergeOperator() {}

  bool FullMergeV2(const MergeOperationInput& /*merge_in*/,
                   MergeOperationOutput* merge_out) const override {
    std::string val("bar");
    merge_out->new_value = val;
    return true;
  }

  const char* Name() const override { return "Noop"; }
};

TEST_F(DBCompactionTest, PartialManualCompaction) {
  Options opts = CurrentOptions();
  opts.num_levels = 3;
  opts.level0_file_num_compaction_trigger = 10;
  opts.compression = kNoCompression;
  opts.merge_operator.reset(new NoopMergeOperator());
  opts.target_file_size_base = 10240;
  DestroyAndReopen(opts);

  Random rnd(301);
  for (auto i = 0; i < 8; ++i) {
    for (auto j = 0; j < 10; ++j) {
      ASSERT_OK(Merge("foo", rnd.RandomString(1024)));
    }
    ASSERT_OK(Flush());
  }

  MoveFilesToLevel(2);

  std::string prop;
  EXPECT_TRUE(dbfull()->GetProperty(DB::Properties::kLiveSstFilesSize, &prop));
  uint64_t max_compaction_bytes = atoi(prop.c_str()) / 2;
  ASSERT_OK(dbfull()->SetOptions(
      {{"max_compaction_bytes", std::to_string(max_compaction_bytes)}}));

  CompactRangeOptions cro;
  cro.bottommost_level_compaction = BottommostLevelCompaction::kForceOptimized;
  ASSERT_OK(dbfull()->CompactRange(cro, nullptr, nullptr));
}

TEST_F(DBCompactionTest, ManualCompactionFailsInReadOnlyMode) {
  // Regression test for bug where manual compaction hangs forever when the DB
  // is in read-only mode. Verify it now at least returns, despite failing.
  const int kNumL0Files = 4;
  std::unique_ptr<FaultInjectionTestEnv> mock_env(
      new FaultInjectionTestEnv(env_));
  Options opts = CurrentOptions();
  opts.disable_auto_compactions = true;
  opts.env = mock_env.get();
  DestroyAndReopen(opts);

  Random rnd(301);
  for (int i = 0; i < kNumL0Files; ++i) {
    // Make sure files are overlapping in key-range to prevent trivial move.
    ASSERT_OK(Put("key1", rnd.RandomString(1024)));
    ASSERT_OK(Put("key2", rnd.RandomString(1024)));
    ASSERT_OK(Flush());
  }
  ASSERT_EQ(kNumL0Files, NumTableFilesAtLevel(0));

  // Enter read-only mode by failing a write.
  mock_env->SetFilesystemActive(false);
  // Make sure this is outside `CompactRange`'s range so that it doesn't fail
  // early trying to flush memtable.
  ASSERT_NOK(Put("key3", rnd.RandomString(1024)));

  // In the bug scenario, the first manual compaction would fail and forget to
  // unregister itself, causing the second one to hang forever due to conflict
  // with a non-running compaction.
  CompactRangeOptions cro;
  cro.exclusive_manual_compaction = false;
  Slice begin_key("key1");
  Slice end_key("key2");
  ASSERT_NOK(dbfull()->CompactRange(cro, &begin_key, &end_key));
  ASSERT_NOK(dbfull()->CompactRange(cro, &begin_key, &end_key));

  // Close before mock_env destruct.
  Close();
}

// ManualCompactionBottomLevelOptimization tests the bottom level manual
// compaction optimization to skip recompacting files created by Ln-1 to Ln
// compaction
TEST_F(DBCompactionTest, ManualCompactionBottomLevelOptimized) {
  Options opts = CurrentOptions();
  opts.num_levels = 3;
  opts.level0_file_num_compaction_trigger = 5;
  opts.compression = kNoCompression;
  opts.merge_operator.reset(new NoopMergeOperator());
  opts.target_file_size_base = 1024;
  opts.max_bytes_for_level_multiplier = 2;
  opts.disable_auto_compactions = true;
  DestroyAndReopen(opts);
  ColumnFamilyHandleImpl* cfh =
      static_cast<ColumnFamilyHandleImpl*>(dbfull()->DefaultColumnFamily());
  ColumnFamilyData* cfd = cfh->cfd();
  InternalStats* internal_stats_ptr = cfd->internal_stats();
  ASSERT_NE(internal_stats_ptr, nullptr);

  Random rnd(301);
  for (auto i = 0; i < 8; ++i) {
    for (auto j = 0; j < 10; ++j) {
      ASSERT_OK(
          Put("foo" + std::to_string(i * 10 + j), rnd.RandomString(1024)));
    }
    ASSERT_OK(Flush());
  }

  MoveFilesToLevel(2);

  for (auto i = 0; i < 8; ++i) {
    for (auto j = 0; j < 10; ++j) {
      ASSERT_OK(
          Put("bar" + std::to_string(i * 10 + j), rnd.RandomString(1024)));
    }
    ASSERT_OK(Flush());
  }
  const std::vector<InternalStats::CompactionStats>& comp_stats =
      internal_stats_ptr->TEST_GetCompactionStats();
  int num = comp_stats[2].num_input_files_in_output_level;
  ASSERT_EQ(num, 0);

  CompactRangeOptions cro;
  cro.bottommost_level_compaction = BottommostLevelCompaction::kForceOptimized;
  ASSERT_OK(dbfull()->CompactRange(cro, nullptr, nullptr));

  const std::vector<InternalStats::CompactionStats>& comp_stats2 =
      internal_stats_ptr->TEST_GetCompactionStats();
  num = comp_stats2[2].num_input_files_in_output_level;
  ASSERT_EQ(num, 0);
}

TEST_F(DBCompactionTest, ManualCompactionMax) {
  uint64_t l1_avg_size = 0, l2_avg_size = 0;
  auto generate_sst_func = [&]() {
    Random rnd(301);
    for (auto i = 0; i < 100; i++) {
      for (auto j = 0; j < 10; j++) {
        ASSERT_OK(Put(Key(i * 10 + j), rnd.RandomString(1024)));
      }
      ASSERT_OK(Flush());
    }
    MoveFilesToLevel(2);

    for (auto i = 0; i < 10; i++) {
      for (auto j = 0; j < 10; j++) {
        ASSERT_OK(Put(Key(i * 100 + j * 10), rnd.RandomString(1024)));
      }
      ASSERT_OK(Flush());
    }
    MoveFilesToLevel(1);

    std::vector<std::vector<FileMetaData>> level_to_files;
    dbfull()->TEST_GetFilesMetaData(dbfull()->DefaultColumnFamily(),
                                    &level_to_files);

    uint64_t total = 0;
    for (const auto& file : level_to_files[1]) {
      total += file.compensated_file_size;
    }
    l1_avg_size = total / level_to_files[1].size();

    total = 0;
    for (const auto& file : level_to_files[2]) {
      total += file.compensated_file_size;
    }
    l2_avg_size = total / level_to_files[2].size();
  };

  std::atomic_int num_compactions(0);
  SyncPoint::GetInstance()->SetCallBack(
      "DBImpl::BGWorkCompaction", [&](void* /*arg*/) { ++num_compactions; });
  SyncPoint::GetInstance()->EnableProcessing();

  Options opts = CurrentOptions();
  opts.disable_auto_compactions = true;

  // with default setting (1.6G by default), it should cover all files in 1
  // compaction
  DestroyAndReopen(opts);
  generate_sst_func();
  num_compactions.store(0);
  CompactRangeOptions cro;
  ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr));
  ASSERT_TRUE(num_compactions.load() == 1);

  // split the compaction to 5
  int num_split = 5;
  DestroyAndReopen(opts);
  generate_sst_func();
  uint64_t total_size = (l1_avg_size * 10) + (l2_avg_size * 100);
  opts.max_compaction_bytes = total_size / num_split;
  opts.target_file_size_base = total_size / num_split;
  Reopen(opts);
  num_compactions.store(0);
  ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr));
  ASSERT_TRUE(num_compactions.load() == num_split);

  // very small max_compaction_bytes, it should still move forward
  opts.max_compaction_bytes = l1_avg_size / 2;
  opts.target_file_size_base = l1_avg_size / 2;
  DestroyAndReopen(opts);
  generate_sst_func();
  num_compactions.store(0);
  ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr));
  ASSERT_TRUE(num_compactions.load() > 10);

  // dynamically set the option
  num_split = 2;
  opts.max_compaction_bytes = 0;
  DestroyAndReopen(opts);
  generate_sst_func();
  total_size = (l1_avg_size * 10) + (l2_avg_size * 100);
  Status s = db_->SetOptions(
      {{"max_compaction_bytes", std::to_string(total_size / num_split)},
       {"target_file_size_base", std::to_string(total_size / num_split)}});
  ASSERT_OK(s);

  num_compactions.store(0);
  ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr));
  ASSERT_TRUE(num_compactions.load() == num_split);
}

TEST_F(DBCompactionTest, CompactionDuringShutdown) {
  Options opts = CurrentOptions();
  opts.level0_file_num_compaction_trigger = 2;
  opts.disable_auto_compactions = true;
  DestroyAndReopen(opts);
  ColumnFamilyHandleImpl* cfh =
      static_cast<ColumnFamilyHandleImpl*>(dbfull()->DefaultColumnFamily());
  ColumnFamilyData* cfd = cfh->cfd();
  InternalStats* internal_stats_ptr = cfd->internal_stats();
  ASSERT_NE(internal_stats_ptr, nullptr);

  Random rnd(301);
  for (auto i = 0; i < 2; ++i) {
    for (auto j = 0; j < 10; ++j) {
      ASSERT_OK(
          Put("foo" + std::to_string(i * 10 + j), rnd.RandomString(1024)));
    }
    ASSERT_OK(Flush());
  }

  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
      "DBImpl::BackgroundCompaction:NonTrivial:BeforeRun",
      [&](void* /*arg*/) { dbfull()->shutting_down_.store(true); });
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
  Status s = dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr);
  ASSERT_TRUE(s.ok() || s.IsShutdownInProgress());
  ASSERT_OK(dbfull()->error_handler_.GetBGError());
}

// FixFileIngestionCompactionDeadlock tests and verifies that compaction and
// file ingestion do not cause deadlock in the event of write stall triggered
// by number of L0 files reaching level0_stop_writes_trigger.
TEST_P(DBCompactionTestWithParam, FixFileIngestionCompactionDeadlock) {
  const int kNumKeysPerFile = 100;
  // Generate SST files.
  Options options = CurrentOptions();

  // Generate an external SST file containing a single key, i.e. 99
  std::string sst_files_dir = dbname_ + "/sst_files/";
  ASSERT_OK(DestroyDir(env_, sst_files_dir));
  ASSERT_OK(env_->CreateDir(sst_files_dir));
  SstFileWriter sst_writer(EnvOptions(), options);
  const std::string sst_file_path = sst_files_dir + "test.sst";
  ASSERT_OK(sst_writer.Open(sst_file_path));
  ASSERT_OK(sst_writer.Put(Key(kNumKeysPerFile - 1), "value"));
  ASSERT_OK(sst_writer.Finish());

  SyncPoint::GetInstance()->DisableProcessing();
  SyncPoint::GetInstance()->ClearAllCallBacks();
  SyncPoint::GetInstance()->LoadDependency({
      {"DBImpl::IngestExternalFile:AfterIncIngestFileCounter",
       "BackgroundCallCompaction:0"},
  });
  SyncPoint::GetInstance()->EnableProcessing();

  options.write_buffer_size = 110 << 10;  // 110KB
  options.level0_file_num_compaction_trigger =
      options.level0_stop_writes_trigger;
  options.max_subcompactions = max_subcompactions_;
  options.memtable_factory.reset(
      test::NewSpecialSkipListFactory(kNumKeysPerFile));
  DestroyAndReopen(options);
  Random rnd(301);

  // Generate level0_stop_writes_trigger L0 files to trigger write stop
  for (int i = 0; i != options.level0_file_num_compaction_trigger; ++i) {
    for (int j = 0; j != kNumKeysPerFile; ++j) {
      ASSERT_OK(Put(Key(j), rnd.RandomString(990)));
    }
    if (i > 0) {
      ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
      ASSERT_EQ(NumTableFilesAtLevel(0 /*level*/, 0 /*cf*/), i);
    }
  }
  // When we reach this point, there will be level0_stop_writes_trigger L0
  // files and one extra key (99) in memory, which overlaps with the external
  // SST file. Write stall triggers, and can be cleared only after compaction
  // reduces the number of L0 files.

  // Compaction will also be triggered since we have reached the threshold for
  // auto compaction. Note that compaction may begin after the following file
  // ingestion thread and waits for ingestion to finish.

  // Thread to ingest file with overlapping key range with the current
  // memtable. Consequently ingestion will trigger a flush. The flush MUST
  // proceed without waiting for the write stall condition to clear, otherwise
  // deadlock can happen.
  port::Thread ingestion_thr([&]() {
    IngestExternalFileOptions ifo;
    Status s = db_->IngestExternalFile({sst_file_path}, ifo);
    ASSERT_OK(s);
  });

  // More write to trigger write stop
  ingestion_thr.join();
  ASSERT_OK(dbfull()->TEST_WaitForCompact());
  Close();
}

class DBCompactionTestWithOngoingFileIngestionParam
    : public DBCompactionTest,
      public testing::WithParamInterface<std::string> {
 public:
  DBCompactionTestWithOngoingFileIngestionParam() : DBCompactionTest() {
    compaction_path_to_test_ = GetParam();
  }
  void SetupOptions() {
    options_ = CurrentOptions();
    options_.create_if_missing = true;

    if (compaction_path_to_test_ == "RefitLevelCompactRange") {
      options_.num_levels = 7;
    } else {
      options_.num_levels = 3;
    }
    options_.compaction_style = CompactionStyle::kCompactionStyleLevel;
    if (compaction_path_to_test_ == "AutoCompaction") {
      options_.disable_auto_compactions = false;
      options_.level0_file_num_compaction_trigger = 1;
    } else {
      options_.disable_auto_compactions = true;
    }
  }

  void PauseCompactionThread() {
    sleeping_task_.reset(new test::SleepingBackgroundTask());
    env_->SetBackgroundThreads(1, Env::LOW);
    env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask,
                   sleeping_task_.get(), Env::Priority::LOW);
    sleeping_task_->WaitUntilSleeping();
  }

  void ResumeCompactionThread() {
    if (sleeping_task_) {
      sleeping_task_->WakeUp();
      sleeping_task_->WaitUntilDone();
    }
  }

  void SetupFilesToForceFutureFilesIngestedToCertainLevel() {
    SstFileWriter sst_file_writer(EnvOptions(), options_);
    std::string dummy = dbname_ + "/dummy.sst";
    ASSERT_OK(sst_file_writer.Open(dummy));
    ASSERT_OK(sst_file_writer.Put("k2", "dummy"));
    ASSERT_OK(sst_file_writer.Finish());
    ASSERT_OK(db_->IngestExternalFile({dummy}, IngestExternalFileOptions()));
    // L2 is made to contain a file overlapped with files to be ingested in
    // later steps on key "k2". This will force future files ingested to L1 or
    // above.
    ASSERT_EQ("0,0,1", FilesPerLevel(0));
  }

  void SetupSyncPoints() {
    if (compaction_path_to_test_ == "AutoCompaction") {
      SyncPoint::GetInstance()->SetCallBack(
          "ExternalSstFileIngestionJob::Run", [&](void*) {
            SyncPoint::GetInstance()->LoadDependency(
                {{"DBImpl::BackgroundCompaction():AfterPickCompaction",
                  "VersionSet::LogAndApply:WriteManifest"}});
          });
    } else if (compaction_path_to_test_ == "NonRefitLevelCompactRange") {
      SyncPoint::GetInstance()->SetCallBack(
          "ExternalSstFileIngestionJob::Run", [&](void*) {
            SyncPoint::GetInstance()->LoadDependency(
                {{"ColumnFamilyData::CompactRange:Return",
                  "VersionSet::LogAndApply:WriteManifest"}});
          });
    } else if (compaction_path_to_test_ == "RefitLevelCompactRange") {
      SyncPoint::GetInstance()->SetCallBack(
          "ExternalSstFileIngestionJob::Run", [&](void*) {
            SyncPoint::GetInstance()->LoadDependency(
                {{"DBImpl::CompactRange:PostRefitLevel",
                  "VersionSet::LogAndApply:WriteManifest"}});
          });
    } else if (compaction_path_to_test_ == "CompactFiles") {
      SyncPoint::GetInstance()->SetCallBack(
          "ExternalSstFileIngestionJob::Run", [&](void*) {
            SyncPoint::GetInstance()->LoadDependency(
                {{"DBImpl::CompactFilesImpl::PostSanitizeCompactionInputFiles",
                  "VersionSet::LogAndApply:WriteManifest"}});
          });
    } else {
      assert(false);
    }
    SyncPoint::GetInstance()->LoadDependency(
        {{"ExternalSstFileIngestionJob::Run", "PreCompaction"}});
    SyncPoint::GetInstance()->EnableProcessing();
  }

  void RunCompactionOverlappedWithFileIngestion() {
    if (compaction_path_to_test_ == "AutoCompaction") {
      TEST_SYNC_POINT("PreCompaction");
      ResumeCompactionThread();
      // Without proper range conflict check,
      // this would have been `Status::Corruption` about overlapping ranges
      Status s = dbfull()->TEST_WaitForCompact();
      EXPECT_OK(s);
    } else if (compaction_path_to_test_ == "NonRefitLevelCompactRange") {
      CompactRangeOptions cro;
      cro.change_level = false;
      std::string start_key = "k1";
      Slice start(start_key);
      std::string end_key = "k4";
      Slice end(end_key);
      TEST_SYNC_POINT("PreCompaction");
      // Without proper range conflict check,
      // this would have been `Status::Corruption` about overlapping ranges
      Status s = dbfull()->CompactRange(cro, &start, &end);
      EXPECT_OK(s);
    } else if (compaction_path_to_test_ == "RefitLevelCompactRange") {
      CompactRangeOptions cro;
      cro.change_level = true;
      cro.target_level = 5;
      std::string start_key = "k1";
      Slice start(start_key);
      std::string end_key = "k4";
      Slice end(end_key);
      TEST_SYNC_POINT("PreCompaction");
      Status s = dbfull()->CompactRange(cro, &start, &end);
      // Without proper range conflict check,
      // this would have been `Status::Corruption` about overlapping ranges
      // To see this, remove the fix AND replace
      // `DBImpl::CompactRange:PostRefitLevel` in sync point dependency with
      // `DBImpl::ReFitLevel:PostRegisterCompaction`
      EXPECT_TRUE(s.IsNotSupported());
      EXPECT_TRUE(s.ToString().find("some ongoing compaction's output") !=
                  std::string::npos);
    } else if (compaction_path_to_test_ == "CompactFiles") {
      ColumnFamilyMetaData cf_meta_data;
      db_->GetColumnFamilyMetaData(&cf_meta_data);
      ASSERT_EQ(cf_meta_data.levels[0].files.size(), 1);
      std::vector<std::string> input_files;
      for (const auto& file : cf_meta_data.levels[0].files) {
        input_files.push_back(file.name);
      }
      TEST_SYNC_POINT("PreCompaction");
      Status s = db_->CompactFiles(CompactionOptions(), input_files, 1);
      // Without proper range conflict check,
      // this would have been `Status::Corruption` about overlapping ranges
      EXPECT_TRUE(s.IsAborted());
      EXPECT_TRUE(
          s.ToString().find(
              "A running compaction is writing to the same output level") !=
          std::string::npos);
    } else {
      assert(false);
    }
  }

  void DisableSyncPoints() {
    ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks();
    ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
  }

 protected:
  std::string compaction_path_to_test_;
  Options options_;
  std::shared_ptr<test::SleepingBackgroundTask> sleeping_task_;
};

INSTANTIATE_TEST_CASE_P(DBCompactionTestWithOngoingFileIngestionParam,
                        DBCompactionTestWithOngoingFileIngestionParam,
                        ::testing::Values("AutoCompaction",
                                          "NonRefitLevelCompactRange",
                                          "RefitLevelCompactRange",
                                          "CompactFiles"));

TEST_P(DBCompactionTestWithOngoingFileIngestionParam, RangeConflictCheck) {
  SetupOptions();
  DestroyAndReopen(options_);

  if (compaction_path_to_test_ == "AutoCompaction") {
    PauseCompactionThread();
  }

  if (compaction_path_to_test_ != "RefitLevelCompactRange") {
    SetupFilesToForceFutureFilesIngestedToCertainLevel();
  }

  // Create s1
  ASSERT_OK(Put("k1", "v"));
  ASSERT_OK(Put("k4", "v"));
  ASSERT_OK(Flush());
  if (compaction_path_to_test_ == "RefitLevelCompactRange") {
    MoveFilesToLevel(6 /* level */);
    ASSERT_EQ("0,0,0,0,0,0,1", FilesPerLevel(0));
  } else {
    ASSERT_EQ("1,0,1", FilesPerLevel(0));
  }

  // To coerce following sequence of events
  // Timeline   Thread 1 (Ingest s2)          Thread 2 (Compact s1)
  // t0   |     Decide to output to Lk
  // t1   |     Release lock in LogAndApply()
  // t2   |                                    Acquire lock
  // t3   |                                    Decides to compact to Lk
  //      |                                    Expected to fail due to range
  //      |                                    conflict check with file
  //      |                                    ingestion
  // t4   |                                    Release lock in LogAndApply()
  // t5   |    Acquire lock again and finish
  // t6   |                                    Acquire lock again and finish
  SetupSyncPoints();

  // Ingest s2
  port::Thread thread1([&] {
    SstFileWriter sst_file_writer(EnvOptions(), options_);
    std::string s2 = dbname_ + "/ingested_s2.sst";
    ASSERT_OK(sst_file_writer.Open(s2));
    ASSERT_OK(sst_file_writer.Put("k2", "v2"));
    ASSERT_OK(sst_file_writer.Put("k3", "v2"));
    ASSERT_OK(sst_file_writer.Finish());
    ASSERT_OK(db_->IngestExternalFile({s2}, IngestExternalFileOptions()));
  });

  // Compact s1. Without proper range conflict check,
  // this will encounter overlapping file corruption.
  port::Thread thread2([&] { RunCompactionOverlappedWithFileIngestion(); });

  thread1.join();
  thread2.join();
  DisableSyncPoints();
}

TEST_F(DBCompactionTest, ConsistencyFailTest) {
  Options options = CurrentOptions();
  options.force_consistency_checks = true;
  DestroyAndReopen(options);

  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
      "VersionBuilder::CheckConsistency0", [&](void* arg) {
        auto p =
            reinterpret_cast<std::pair<FileMetaData**, FileMetaData**>*>(arg);
        // just swap the two FileMetaData so that we hit error
        // in CheckConsistency funcion
        FileMetaData* temp = *(p->first);
        *(p->first) = *(p->second);
        *(p->second) = temp;
      });

  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();

  for (int k = 0; k < 2; ++k) {
    ASSERT_OK(Put("foo", "bar"));
    Status s = Flush();
    if (k < 1) {
      ASSERT_OK(s);
    } else {
      ASSERT_TRUE(s.IsCorruption());
    }
  }

  ASSERT_NOK(Put("foo", "bar"));
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
  SyncPoint::GetInstance()->ClearAllCallBacks();
}

TEST_F(DBCompactionTest, ConsistencyFailTest2) {
  Options options = CurrentOptions();
  options.force_consistency_checks = true;
  options.target_file_size_base = 1000;
  options.level0_file_num_compaction_trigger = 2;
  BlockBasedTableOptions bbto;
  bbto.block_size = 400;  // small block size
  options.table_factory.reset(NewBlockBasedTableFactory(bbto));
  DestroyAndReopen(options);

  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
      "VersionBuilder::CheckConsistency1", [&](void* arg) {
        auto p =
            reinterpret_cast<std::pair<FileMetaData**, FileMetaData**>*>(arg);
        // just swap the two FileMetaData so that we hit error
        // in CheckConsistency funcion
        FileMetaData* temp = *(p->first);
        *(p->first) = *(p->second);
        *(p->second) = temp;
      });

  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();

  Random rnd(301);
  std::string value = rnd.RandomString(1000);

  ASSERT_OK(Put("foo1", value));
  ASSERT_OK(Put("z", ""));
  ASSERT_OK(Flush());
  ASSERT_OK(Put("foo2", value));
  ASSERT_OK(Put("z", ""));
  Status s = Flush();
  ASSERT_TRUE(s.ok() || s.IsCorruption());

  // This probably returns non-OK, but we rely on the next Put()
  // to determine the DB is frozen.
  ASSERT_NOK(dbfull()->TEST_WaitForCompact());
  ASSERT_NOK(Put("foo", "bar"));
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
  SyncPoint::GetInstance()->ClearAllCallBacks();
}

void IngestOneKeyValue(DBImpl* db, const std::string& key,
                       const std::string& value, const Options& options) {
  ExternalSstFileInfo info;
  std::string f = test::PerThreadDBPath("sst_file" + key);
  EnvOptions env;
  ROCKSDB_NAMESPACE::SstFileWriter writer(env, options);
  auto s = writer.Open(f);
  ASSERT_OK(s);
  // ASSERT_OK(writer.Put(Key(), ""));
  ASSERT_OK(writer.Put(key, value));

  ASSERT_OK(writer.Finish(&info));
  IngestExternalFileOptions ingest_opt;

  ASSERT_OK(db->IngestExternalFile({info.file_path}, ingest_opt));
}

class DBCompactionTestL0FilesMisorderCorruption : public DBCompactionTest {
 public:
  DBCompactionTestL0FilesMisorderCorruption() : DBCompactionTest() {}
  void SetupOptions(const CompactionStyle compaciton_style,
                    const std::string& compaction_path_to_test = "") {
    options_ = CurrentOptions();
    options_.create_if_missing = true;
    options_.compression = kNoCompression;

    options_.force_consistency_checks = true;
    options_.compaction_style = compaciton_style;

    if (compaciton_style == CompactionStyle::kCompactionStyleLevel) {
      options_.num_levels = 7;
      // Level compaction's PickIntraL0Compaction() impl detail requires
      // `options.level0_file_num_compaction_trigger` to be
      // at least 2 files less than the actual number of level 0 files
      // (i.e, 7 by design in this test)
      options_.level0_file_num_compaction_trigger = 5;
      options_.max_background_compactions = 2;
      options_.write_buffer_size = 2 << 20;
      options_.max_write_buffer_number = 6;
    } else if (compaciton_style == CompactionStyle::kCompactionStyleUniversal) {
      // TODO: expand test coverage to num_lvels > 1 for universal compacion,
      // which requires careful unit test design to compact to level 0 despite
      // num_levels > 1
      options_.num_levels = 1;
      options_.level0_file_num_compaction_trigger = 5;

      CompactionOptionsUniversal universal_options;
      if (compaction_path_to_test == "PickCompactionToReduceSizeAmp") {
        universal_options.max_size_amplification_percent = 50;
      } else if (compaction_path_to_test ==
                 "PickCompactionToReduceSortedRuns") {
        universal_options.max_size_amplification_percent = 400;
      } else if (compaction_path_to_test == "PickDeleteTriggeredCompaction") {
        universal_options.max_size_amplification_percent = 400;
        universal_options.min_merge_width = 6;
      }
      options_.compaction_options_universal = universal_options;
    } else if (compaciton_style == CompactionStyle::kCompactionStyleFIFO) {
      options_.max_open_files = -1;
      options_.num_levels = 1;
      options_.level0_file_num_compaction_trigger = 3;

      CompactionOptionsFIFO fifo_options;
      if (compaction_path_to_test == "FindIntraL0Compaction" ||
          compaction_path_to_test == "CompactRange") {
        fifo_options.allow_compaction = true;
      } else if (compaction_path_to_test == "CompactFile") {
        fifo_options.allow_compaction = false;
      }
      options_.compaction_options_fifo = fifo_options;
    }

    if (compaction_path_to_test == "CompactFile" ||
        compaction_path_to_test == "CompactRange") {
      options_.disable_auto_compactions = true;
    } else {
      options_.disable_auto_compactions = false;
    }
  }

  void Destroy(const Options& options) {
    if (snapshot_) {
      assert(db_);
      db_->ReleaseSnapshot(snapshot_);
      snapshot_ = nullptr;
    }
    DBTestBase::Destroy(options);
  }

  void Reopen(const Options& options) {
    DBTestBase::Reopen(options);
    if (options.compaction_style != CompactionStyle::kCompactionStyleLevel) {
      // To force assigning the global seqno to ingested file
      // for our test purpose.
      assert(snapshot_ == nullptr);
      snapshot_ = db_->GetSnapshot();
    }
  }

  void DestroyAndReopen(Options& options) {
    Destroy(options);
    Reopen(options);
  }

  void PauseCompactionThread() {
    sleeping_task_.reset(new test::SleepingBackgroundTask());
    env_->SetBackgroundThreads(1, Env::LOW);
    env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask,
                   sleeping_task_.get(), Env::Priority::LOW);
    sleeping_task_->WaitUntilSleeping();
  }

  void ResumeCompactionThread() {
    if (sleeping_task_) {
      sleeping_task_->WakeUp();
      sleeping_task_->WaitUntilDone();
    }
  }

  void AddFilesMarkedForPeriodicCompaction(const size_t num_files) {
    assert(options_.compaction_style ==
           CompactionStyle::kCompactionStyleUniversal);
    VersionSet* const versions = dbfull()->GetVersionSet();
    assert(versions);
    ColumnFamilyData* const cfd = versions->GetColumnFamilySet()->GetDefault();
    assert(cfd);
    Version* const current = cfd->current();
    assert(current);

    VersionStorageInfo* const storage_info = current->storage_info();
    assert(storage_info);

    const std::vector<FileMetaData*> level0_files = storage_info->LevelFiles(0);
    assert(level0_files.size() == num_files);

    for (FileMetaData* f : level0_files) {
      storage_info->TEST_AddFileMarkedForPeriodicCompaction(0, f);
    }
  }

  void AddFilesMarkedForCompaction(const size_t num_files) {
    assert(options_.compaction_style ==
           CompactionStyle::kCompactionStyleUniversal);
    VersionSet* const versions = dbfull()->GetVersionSet();
    assert(versions);
    ColumnFamilyData* const cfd = versions->GetColumnFamilySet()->GetDefault();
    assert(cfd);
    Version* const current = cfd->current();
    assert(current);

    VersionStorageInfo* const storage_info = current->storage_info();
    assert(storage_info);

    const std::vector<FileMetaData*> level0_files = storage_info->LevelFiles(0);
    assert(level0_files.size() == num_files);

    for (FileMetaData* f : level0_files) {
      storage_info->TEST_AddFileMarkedForCompaction(0, f);
    }
  }

  void SetupSyncPoints(const std::string& compaction_path_to_test) {
    compaction_path_sync_point_called_.store(false);
    if (compaction_path_to_test == "FindIntraL0Compaction" &&
        options_.compaction_style == CompactionStyle::kCompactionStyleLevel) {
      ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
          "PostPickFileToCompact", [&](void* arg) {
            bool* picked_file_to_compact = (bool*)arg;
            // To trigger intra-L0 compaction specifically,
            // we mock PickFileToCompact()'s result to be false
            *picked_file_to_compact = false;
          });
      ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
          "FindIntraL0Compaction", [&](void* /*arg*/) {
            compaction_path_sync_point_called_.store(true);
          });

    } else if (compaction_path_to_test == "PickPeriodicCompaction") {
      assert(options_.compaction_style ==
             CompactionStyle::kCompactionStyleUniversal);
      ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
          "PostPickPeriodicCompaction", [&](void* compaction_arg) {
            Compaction* compaction = (Compaction*)compaction_arg;
            if (compaction != nullptr) {
              compaction_path_sync_point_called_.store(true);
            }
          });
    } else if (compaction_path_to_test == "PickCompactionToReduceSizeAmp") {
      assert(options_.compaction_style ==
             CompactionStyle::kCompactionStyleUniversal);
      ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
          "PickCompactionToReduceSizeAmpReturnNonnullptr", [&](void* /*arg*/) {
            compaction_path_sync_point_called_.store(true);
          });
    } else if (compaction_path_to_test == "PickCompactionToReduceSortedRuns") {
      assert(options_.compaction_style ==
             CompactionStyle::kCompactionStyleUniversal);
      ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
          "PickCompactionToReduceSortedRunsReturnNonnullptr",
          [&](void* /*arg*/) {
            compaction_path_sync_point_called_.store(true);
          });
    } else if (compaction_path_to_test == "PickDeleteTriggeredCompaction") {
      assert(options_.compaction_style ==
             CompactionStyle::kCompactionStyleUniversal);
      ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
          "PickDeleteTriggeredCompactionReturnNonnullptr", [&](void* /*arg*/) {
            compaction_path_sync_point_called_.store(true);
          });
    } else if ((compaction_path_to_test == "FindIntraL0Compaction" ||
                compaction_path_to_test == "CompactRange") &&
               options_.compaction_style ==
                   CompactionStyle::kCompactionStyleFIFO) {
      ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
          "FindIntraL0Compaction", [&](void* /*arg*/) {
            compaction_path_sync_point_called_.store(true);
          });
    }

    ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
  }

  bool SyncPointsCalled() { return compaction_path_sync_point_called_.load(); }

  void DisableSyncPoints() {
    ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks();
    ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
  }

  // Return the largest seqno of the latest L0 file based on file number
  SequenceNumber GetLatestL0FileLargestSeqnoHelper() {
    VersionSet* const versions = dbfull()->GetVersionSet();
    assert(versions);
    ColumnFamilyData* const cfd = versions->GetColumnFamilySet()->GetDefault();
    assert(cfd);
    Version* const current = cfd->current();
    assert(current);
    VersionStorageInfo* const storage_info = current->storage_info();
    assert(storage_info);
    const std::vector<FileMetaData*> level0_files = storage_info->LevelFiles(0);
    assert(level0_files.size() >= 1);

    uint64_t latest_file_num = 0;
    uint64_t latest_file_largest_seqno = 0;
    for (FileMetaData* f : level0_files) {
      if (f->fd.GetNumber() > latest_file_num) {
        latest_file_num = f->fd.GetNumber();
        latest_file_largest_seqno = f->fd.largest_seqno;
      }
    }

    return latest_file_largest_seqno;
  }

 protected:
  Options options_;

 private:
  const Snapshot* snapshot_ = nullptr;
  std::atomic<bool> compaction_path_sync_point_called_;
  std::shared_ptr<test::SleepingBackgroundTask> sleeping_task_;
};

TEST_F(DBCompactionTestL0FilesMisorderCorruption,
       FlushAfterIntraL0LevelCompactionWithIngestedFile) {
  SetupOptions(CompactionStyle::kCompactionStyleLevel, "");
  DestroyAndReopen(options_);
  // Prevents trivial move
  for (int i = 0; i < 10; ++i) {
    ASSERT_OK(Put(Key(i), ""));  // Prevents trivial move
  }
  ASSERT_OK(Flush());
  Compact("", Key(99));
  ASSERT_EQ(0, NumTableFilesAtLevel(0));

  // To get accurate NumTableFilesAtLevel(0) when the number reaches
  // options_.level0_file_num_compaction_trigger
  PauseCompactionThread();

  // To create below LSM tree
  // (key:value@n indicates key-value pair has seqno "n", L0 is sorted):
  //
  // memtable: m1[              5:new@12 ..      1:new@8,    0:new@7]
  //       L0: s6[6:new@13], s5[5:old@6] ...  s1[1:old@2],s0[0:old@1]
  //
  // (1) Make 6 L0 sst (i.e, s0 - s5)
  for (int i = 0; i < 6; ++i) {
    if (i % 2 == 0) {
      IngestOneKeyValue(dbfull(), Key(i), "old", options_);
    } else {
      ASSERT_OK(Put(Key(i), "old"));
      ASSERT_OK(Flush());
    }
  }
  ASSERT_EQ(6, NumTableFilesAtLevel(0));

  // (2) Create m1
  for (int i = 0; i < 6; ++i) {
    ASSERT_OK(Put(Key(i), "new"));
  }
  ASSERT_EQ(6, NumTableFilesAtLevel(0));

  // (3) Ingest file (i.e, s6) to trigger IntraL0Compaction()
  for (int i = 6; i < 7; ++i) {
    ASSERT_EQ(i, NumTableFilesAtLevel(0));
    IngestOneKeyValue(dbfull(), Key(i), "new", options_);
  }

  SetupSyncPoints("FindIntraL0Compaction");
  ResumeCompactionThread();

  ASSERT_OK(dbfull()->TEST_WaitForCompact());

  ASSERT_TRUE(SyncPointsCalled());
  DisableSyncPoints();

  // After compaction, we have LSM tree:
  //
  // memtable: m1[          5:new@12 ..     1:new@8,  0:new@7]
  //       L0: s7[6:new@13, 5:old@6 ..                0:old@1]
  ASSERT_EQ(1, NumTableFilesAtLevel(0));
  SequenceNumber compact_output_file_largest_seqno =
      GetLatestL0FileLargestSeqnoHelper();

  ASSERT_OK(Flush());
  // After flush, we have LSM tree:
  //
  // L0: s8[5:new@12 .. 0:new@7],s7[6:new@13, 5:old@5 .. 0:old@1]
  ASSERT_EQ(2, NumTableFilesAtLevel(0));
  SequenceNumber flushed_file_largest_seqno =
      GetLatestL0FileLargestSeqnoHelper();

  // To verify there isn't any file misorder leading to returning a old value
  // of Key(0) - Key(5) , which is caused by flushed table s8 has a
  // smaller largest seqno than the compaction output file s7's largest seqno
  // while the flushed table has the newer version of the values than the
  // compaction output file's.
  ASSERT_TRUE(flushed_file_largest_seqno < compact_output_file_largest_seqno);
  for (int i = 0; i < 6; ++i) {
    ASSERT_EQ("new", Get(Key(i)));
  }
  for (int i = 6; i < 7; ++i) {
    ASSERT_EQ("new", Get(Key(i)));
  }
}

TEST_F(DBCompactionTestL0FilesMisorderCorruption,
       FlushAfterIntraL0UniversalCompactionWithIngestedFile) {
  for (const std::string compaction_path_to_test :
       {"PickPeriodicCompaction", "PickCompactionToReduceSizeAmp",
        "PickCompactionToReduceSortedRuns", "PickDeleteTriggeredCompaction"}) {
    SetupOptions(CompactionStyle::kCompactionStyleUniversal,
                 compaction_path_to_test);
    DestroyAndReopen(options_);

    // To get accurate NumTableFilesAtLevel(0) when the number reaches
    // options_.level0_file_num_compaction_trigger
    PauseCompactionThread();

    // To create below LSM tree
    // (key:value@n indicates key-value pair has seqno "n", L0 is sorted):
    //
    // memtable: m1 [                  k2:new@8, k1:new@7]
    // L0: s4[k9:dummy@10], s3[k8:dummy@9],
    //     s2[k7:old@6, k6:old@5].. s0[k3:old@2, k1:old@1]
    //
    // (1) Create 3 existing SST file (i.e, s0 - s2)
    ASSERT_OK(Put("k1", "old"));
    ASSERT_OK(Put("k3", "old"));
    ASSERT_OK(Flush());
    ASSERT_EQ(1, NumTableFilesAtLevel(0));
    ASSERT_OK(Put("k4", "old"));
    ASSERT_OK(Put("k5", "old"));
    ASSERT_OK(Flush());
    ASSERT_EQ(2, NumTableFilesAtLevel(0));
    ASSERT_OK(Put("k6", "old"));
    ASSERT_OK(Put("k7", "old"));
    ASSERT_OK(Flush());
    ASSERT_EQ(3, NumTableFilesAtLevel(0));

    // (2) Create m1. Noted that it contains a overlaped key with s0
    ASSERT_OK(Put("k1", "new"));  // overlapped key
    ASSERT_OK(Put("k2", "new"));

    // (3) Ingest two SST files s3, s4
    IngestOneKeyValue(dbfull(), "k8", "dummy", options_);
    IngestOneKeyValue(dbfull(), "k9", "dummy", options_);
    // Up to now, L0 contains s0 - s4
    ASSERT_EQ(5, NumTableFilesAtLevel(0));

    if (compaction_path_to_test == "PickPeriodicCompaction") {
      AddFilesMarkedForPeriodicCompaction(5);
    } else if (compaction_path_to_test == "PickDeleteTriggeredCompaction") {
      AddFilesMarkedForCompaction(5);
    }

    SetupSyncPoints(compaction_path_to_test);
    ResumeCompactionThread();

    ASSERT_OK(dbfull()->TEST_WaitForCompact());

    ASSERT_TRUE(SyncPointsCalled())
        << "failed for compaction path to test: " << compaction_path_to_test;
    DisableSyncPoints();

    // After compaction, we have LSM tree:
    //
    // memtable: m1[                                     k2:new@8, k1:new@7]
    //       L0: s5[k9:dummy@10, k8@dummy@9, k7:old@6 .. k3:old@2, k1:old@1]
    ASSERT_EQ(1, NumTableFilesAtLevel(0))
        << "failed for compaction path to test: " << compaction_path_to_test;
    SequenceNumber compact_output_file_largest_seqno =
        GetLatestL0FileLargestSeqnoHelper();

    ASSERT_OK(Flush()) << "failed for compaction path to test: "
                       << compaction_path_to_test;
    // After flush, we have LSM tree:
    //
    // L0: s6[k2:new@8, k1:new@7],
    //     s5[k9:dummy@10, k8@dummy@9, k7:old@6 .. k3:old@2, k1:old@1]
    ASSERT_EQ(2, NumTableFilesAtLevel(0))
        << "failed for compaction path to test: " << compaction_path_to_test;
    SequenceNumber flushed_file_largest_seqno =
        GetLatestL0FileLargestSeqnoHelper();

    // To verify there isn't any file misorder leading to returning a old
    // value of "k1" , which is caused by flushed table s6 has a
    // smaller largest seqno than the compaction output file s5's largest seqno
    // while the flushed table has the newer version of the value
    // than the compaction output file's.
    ASSERT_TRUE(flushed_file_largest_seqno < compact_output_file_largest_seqno)
        << "failed for compaction path to test: " << compaction_path_to_test;
    EXPECT_EQ(Get("k1"), "new")
        << "failed for compaction path to test: " << compaction_path_to_test;
  }

  Destroy(options_);
}

TEST_F(DBCompactionTestL0FilesMisorderCorruption,
       FlushAfterIntraL0FIFOCompactionWithIngestedFile) {
  for (const std::string compaction_path_to_test : {"FindIntraL0Compaction"}) {
    SetupOptions(CompactionStyle::kCompactionStyleFIFO,
                 compaction_path_to_test);
    DestroyAndReopen(options_);

    // To create below LSM tree
    // (key:value@n indicates key-value pair has seqno "n", L0 is sorted):
    //
    // memtable: m1 [                         k2:new@4, k1:new@3]
    // L0: s2[k5:dummy@6], s1[k4:dummy@5], s0[k3:old@2, k1:old@1]
    //
    // (1) Create an existing SST file s0
    ASSERT_OK(Put("k1", "old"));
    ASSERT_OK(Put("k3", "old"));
    ASSERT_OK(Flush());
    ASSERT_EQ(1, NumTableFilesAtLevel(0));

    // (2) Create memtable m1. Noted that it contains a overlaped key with s0
    ASSERT_OK(Put("k1", "new"));  // overlapped key
    ASSERT_OK(Put("k2", "new"));

    // To get accurate NumTableFilesAtLevel(0) when the number reaches
    // options_.level0_file_num_compaction_trigger
    PauseCompactionThread();

    // (3) Ingest two SST files s1, s2
    IngestOneKeyValue(dbfull(), "k4", "dummy", options_);
    IngestOneKeyValue(dbfull(), "k5", "dummy", options_);
    // Up to now, L0 contains s0, s1, s2
    ASSERT_EQ(3, NumTableFilesAtLevel(0));

    SetupSyncPoints(compaction_path_to_test);
    ResumeCompactionThread();

    ASSERT_OK(dbfull()->TEST_WaitForCompact());

    ASSERT_TRUE(SyncPointsCalled())
        << "failed for compaction path to test: " << compaction_path_to_test;
    DisableSyncPoints();
    // After compaction, we have LSM tree:
    //
    // memtable: m1 [                 k2:new@4, k1:new@3]
    // L0: s3[k5:dummy@6, k4:dummy@5, k3:old@2, k1:old@1]
    ASSERT_EQ(1, NumTableFilesAtLevel(0))
        << "failed for compaction path to test: " << compaction_path_to_test;
    SequenceNumber compact_output_file_largest_seqno =
        GetLatestL0FileLargestSeqnoHelper();

    ASSERT_OK(Flush()) << "failed for compaction path to test: "
                       << compaction_path_to_test;
    // After flush, we have LSM tree:
    //
    // L0: s4[k2:new@4, k1:new@3], s3[k5:dummy@6, k4:dummy@5, k3:old@2,
    // k1:old@1]
    ASSERT_EQ(2, NumTableFilesAtLevel(0))
        << "failed for compaction path to test: " << compaction_path_to_test;
    SequenceNumber flushed_file_largest_seqno =
        GetLatestL0FileLargestSeqnoHelper();

    // To verify there isn't any file misorder leading to returning a old
    // value of "k1" , which is caused by flushed table s4 has a
    // smaller largest seqno than the compaction output file s3's largest seqno
    // while the flushed table has the newer version of the value
    // than the compaction output file's.
    ASSERT_TRUE(flushed_file_largest_seqno < compact_output_file_largest_seqno)
        << "failed for compaction path to test: " << compaction_path_to_test;
    EXPECT_EQ(Get("k1"), "new")
        << "failed for compaction path to test: " << compaction_path_to_test;
  }

  Destroy(options_);
}

class DBCompactionTestL0FilesMisorderCorruptionWithParam
    : public DBCompactionTestL0FilesMisorderCorruption,
      public testing::WithParamInterface<CompactionStyle> {
 public:
  DBCompactionTestL0FilesMisorderCorruptionWithParam()
      : DBCompactionTestL0FilesMisorderCorruption() {}
};

// TODO: add `CompactionStyle::kCompactionStyleLevel` to testing parameter,
// which requires careful unit test
// design for ingesting file to L0 and CompactRange()/CompactFile() to L0
INSTANTIATE_TEST_CASE_P(
    DBCompactionTestL0FilesMisorderCorruptionWithParam,
    DBCompactionTestL0FilesMisorderCorruptionWithParam,
    ::testing::Values(CompactionStyle::kCompactionStyleUniversal,
                      CompactionStyle::kCompactionStyleFIFO));

TEST_P(DBCompactionTestL0FilesMisorderCorruptionWithParam,
       FlushAfterIntraL0CompactFileWithIngestedFile) {
  SetupOptions(GetParam(), "CompactFile");
  DestroyAndReopen(options_);

  // To create below LSM tree
  // (key:value@n indicates key-value pair has seqno "n", L0 is sorted):
  //
  // memtable: m1 [                         k2:new@4, k1:new@3]
  // L0: s2[k5:dummy@6], s1[k4:dummy@5], s0[k3:old@2, k1:old@1]
  //
  // (1) Create an existing SST file s0
  ASSERT_OK(Put("k1", "old"));
  ASSERT_OK(Put("k3", "old"));
  ASSERT_OK(Flush());
  ASSERT_EQ(1, NumTableFilesAtLevel(0));

  // (2) Create memtable m1. Noted that it contains a overlaped key with s0
  ASSERT_OK(Put("k1", "new"));  // overlapped key
  ASSERT_OK(Put("k2", "new"));

  // (3) Ingest two SST files s1, s2
  IngestOneKeyValue(dbfull(), "k4", "dummy", options_);
  IngestOneKeyValue(dbfull(), "k5", "dummy", options_);
  // Up to now, L0 contains s0, s1, s2
  ASSERT_EQ(3, NumTableFilesAtLevel(0));

  ColumnFamilyMetaData cf_meta_data;
  db_->GetColumnFamilyMetaData(&cf_meta_data);
  ASSERT_EQ(cf_meta_data.levels[0].files.size(), 3);
  std::vector<std::string> input_files;
  for (const auto& file : cf_meta_data.levels[0].files) {
    input_files.push_back(file.name);
  }
  ASSERT_EQ(input_files.size(), 3);

  Status s = db_->CompactFiles(CompactionOptions(), input_files, 0);
  // After compaction, we have LSM tree:
  //
  // memtable: m1 [                 k2:new@4, k1:new@3]
  // L0: s3[k5:dummy@6, k4:dummy@5, k3:old@2, k1:old@1]
  ASSERT_OK(s);
  ASSERT_EQ(1, NumTableFilesAtLevel(0));
  SequenceNumber compact_output_file_largest_seqno =
      GetLatestL0FileLargestSeqnoHelper();

  ASSERT_OK(Flush());
  // After flush, we have LSM tree:
  //
  // L0: s4[k2:new@4, k1:new@3], s3[k5:dummy@6, k4:dummy@5, k3:old@2,
  // k1:old@1]
  ASSERT_EQ(2, NumTableFilesAtLevel(0));
  SequenceNumber flushed_file_largest_seqno =
      GetLatestL0FileLargestSeqnoHelper();

  // To verify there isn't any file misorder leading to returning a old value
  // of "1" , which is caused by flushed table s4 has a smaller
  // largest seqno than the compaction output file s3's largest seqno while the
  // flushed table has the newer version of the value than the
  // compaction output file's.
  ASSERT_TRUE(flushed_file_largest_seqno < compact_output_file_largest_seqno);
  EXPECT_EQ(Get("k1"), "new");

  Destroy(options_);
}

TEST_P(DBCompactionTestL0FilesMisorderCorruptionWithParam,
       FlushAfterIntraL0CompactRangeWithIngestedFile) {
  SetupOptions(GetParam(), "CompactRange");
  DestroyAndReopen(options_);

  // To create below LSM tree
  // (key:value@n indicates key-value pair has seqno "n", L0 is sorted):
  //
  // memtable: m1 [                         k2:new@4, k1:new@3]
  // L0: s2[k5:dummy@6], s1[k4:dummy@5], s0[k3:old@2, k1:old@1]
  //
  // (1) Create an existing SST file s0
  ASSERT_OK(Put("k1", "old"));
  ASSERT_OK(Put("k3", "old"));
  ASSERT_OK(Flush());
  ASSERT_EQ(1, NumTableFilesAtLevel(0));

  // (2) Create memtable m1. Noted that it contains a overlaped key with s0
  ASSERT_OK(Put("k1", "new"));  // overlapped key
  ASSERT_OK(Put("k2", "new"));

  // (3) Ingest two SST files s1, s2
  IngestOneKeyValue(dbfull(), "k4", "dummy", options_);
  IngestOneKeyValue(dbfull(), "k5", "dummy", options_);
  // Up to now, L0 contains s0, s1, s2
  ASSERT_EQ(3, NumTableFilesAtLevel(0));

  if (options_.compaction_style == CompactionStyle::kCompactionStyleFIFO) {
    SetupSyncPoints("CompactRange");
  }
  // `start` and `end` is carefully chosen so that compact range:
  // (1) doesn't overlap with memtable therefore the memtable won't be flushed
  // (2) should target at compacting s0 with s1 and s2
  Slice start("k3"), end("k5");
  ASSERT_OK(db_->CompactRange(CompactRangeOptions(), &start, &end));
  // After compaction, we have LSM tree:
  //
  // memtable: m1 [                 k2:new@4, k1:new@3]
  // L0: s3[k5:dummy@6, k4:dummy@5, k3:old@2, k1:old@1]
  if (options_.compaction_style == CompactionStyle::kCompactionStyleFIFO) {
    ASSERT_TRUE(SyncPointsCalled());
    DisableSyncPoints();
  }
  ASSERT_EQ(1, NumTableFilesAtLevel(0));
  SequenceNumber compact_output_file_largest_seqno =
      GetLatestL0FileLargestSeqnoHelper();

  ASSERT_OK(Flush());
  // After flush, we have LSM tree:
  //
  // L0: s4[k2:new@4, k1:new@3], s3[k5:dummy@6, k4:dummy@5, k3:old@2,
  // k1:old@1]
  ASSERT_EQ(2, NumTableFilesAtLevel(0));
  SequenceNumber flushed_file_largest_seqno =
      GetLatestL0FileLargestSeqnoHelper();

  // To verify there isn't any file misorder leading to returning a old value
  // of "k1" , which is caused by flushed table s4 has a smaller
  // largest seqno than the compaction output file s3's largest seqno while the
  // flushed table has the newer version of the value than the
  // compaction output file's.
  ASSERT_TRUE(flushed_file_largest_seqno < compact_output_file_largest_seqno);
  EXPECT_EQ(Get("k1"), "new");

  Destroy(options_);
}

TEST_F(DBCompactionTest, SingleLevelUniveresal) {
  // Tests that manual compaction works with single level universal compaction.
  Options options = CurrentOptions();
  options.compaction_style = kCompactionStyleUniversal;
  options.disable_auto_compactions = true;
  options.num_levels = 1;
  DestroyAndReopen(options);

  Random rnd(31);
  for (int i = 0; i < 10; ++i) {
    for (int j = 0; j < 50; ++j) {
      ASSERT_OK(Put(Key(i * 100 + j), rnd.RandomString(50)));
    }
    ASSERT_OK(Flush());
  }
  ASSERT_EQ(NumTableFilesAtLevel(0), 10);
  ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
  ASSERT_EQ(NumTableFilesAtLevel(0), 1);
}

TEST_F(DBCompactionTest, SingleOverlappingNonL0BottommostManualCompaction) {
  // Tests that manual compact will rewrite bottommost level
  // when there is only a single non-L0 level that overlaps with
  // manual compaction range.
  constexpr int kSstNum = 10;
  Options options = CurrentOptions();
  options.disable_auto_compactions = true;
  options.num_levels = 7;
  for (auto b : {BottommostLevelCompaction::kForce,
                 BottommostLevelCompaction::kForceOptimized}) {
    DestroyAndReopen(options);

    // Generate some sst files on level 0 with sequence keys (no overlap)
    for (int i = 0; i < kSstNum; i++) {
      for (int j = 1; j < UCHAR_MAX; j++) {
        auto key = std::string(kSstNum, '\0');
        key[kSstNum - i] += static_cast<char>(j);
        ASSERT_OK(Put(key, std::string(i % 1000, 'A')));
      }
      ASSERT_OK(Flush());
    }
    MoveFilesToLevel(4);
    ASSERT_EQ(NumTableFilesAtLevel(4), kSstNum);
    CompactRangeOptions cro;
    cro.bottommost_level_compaction = b;
    ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr));
    ASSERT_EQ(NumTableFilesAtLevel(4), 1);
  }
}

TEST_P(DBCompactionTestWithBottommostParam, SequenceKeysManualCompaction) {
  constexpr int kSstNum = 10;
  Options options = CurrentOptions();
  options.disable_auto_compactions = true;
  options.num_levels = 7;
  const bool dynamic_level = std::get<1>(GetParam());
  options.level_compaction_dynamic_level_bytes = dynamic_level;
  DestroyAndReopen(options);

  // Generate some sst files on level 0 with sequence keys (no overlap)
  for (int i = 0; i < kSstNum; i++) {
    for (int j = 1; j < UCHAR_MAX; j++) {
      auto key = std::string(kSstNum, '\0');
      key[kSstNum - i] += static_cast<char>(j);
      ASSERT_OK(Put(key, std::string(i % 1000, 'A')));
    }
    ASSERT_OK(Flush());
  }
  ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());

  ASSERT_EQ(std::to_string(kSstNum), FilesPerLevel(0));

  auto cro = CompactRangeOptions();
  cro.bottommost_level_compaction = bottommost_level_compaction_;
  bool trivial_moved = false;
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
      "DBImpl::BackgroundCompaction:TrivialMove",
      [&](void* /*arg*/) { trivial_moved = true; });
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
  // All bottommost_level_compaction options should allow l0 -> l1 trivial move.
  ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr));
  ASSERT_TRUE(trivial_moved);
  if (bottommost_level_compaction_ == BottommostLevelCompaction::kForce ||
      bottommost_level_compaction_ ==
          BottommostLevelCompaction::kForceOptimized) {
    // bottommost level should go through intra-level compaction
    // and has only 1 file
    if (dynamic_level) {
      ASSERT_EQ("0,0,0,0,0,0,1", FilesPerLevel(0));
    } else {
      ASSERT_EQ("0,1", FilesPerLevel(0));
    }
  } else {
    // Just trivial move from level 0 -> 1/base
    if (dynamic_level) {
      ASSERT_EQ("0,0,0,0,0,0," + std::to_string(kSstNum), FilesPerLevel(0));
    } else {
      ASSERT_EQ("0," + std::to_string(kSstNum), FilesPerLevel(0));
    }
  }
}

INSTANTIATE_TEST_CASE_P(
    DBCompactionTestWithBottommostParam, DBCompactionTestWithBottommostParam,
    ::testing::Combine(
        ::testing::Values(BottommostLevelCompaction::kSkip,
                          BottommostLevelCompaction::kIfHaveCompactionFilter,
                          BottommostLevelCompaction::kForce,
                          BottommostLevelCompaction::kForceOptimized),
        ::testing::Bool()));

TEST_F(DBCompactionTest, UpdateLevelSubCompactionTest) {
  Options options = CurrentOptions();
  options.max_subcompactions = 10;
  options.target_file_size_base = 1 << 10;  // 1KB
  DestroyAndReopen(options);

  bool has_compaction = false;
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
      "LevelCompactionPicker::PickCompaction:Return", [&](void* arg) {
        Compaction* compaction = reinterpret_cast<Compaction*>(arg);
        ASSERT_TRUE(compaction->max_subcompactions() == 10);
        has_compaction = true;
      });
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();

  ASSERT_TRUE(dbfull()->GetDBOptions().max_subcompactions == 10);
  // Trigger compaction
  for (int i = 0; i < 32; i++) {
    for (int j = 0; j < 5000; j++) {
      ASSERT_OK(Put(std::to_string(j), std::string(1, 'A')));
    }
    ASSERT_OK(Flush());
    ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
  }
  ASSERT_OK(dbfull()->TEST_WaitForCompact());
  ASSERT_TRUE(has_compaction);

  has_compaction = false;
  ASSERT_OK(dbfull()->SetDBOptions({{"max_subcompactions", "2"}}));
  ASSERT_TRUE(dbfull()->GetDBOptions().max_subcompactions == 2);

  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
      "LevelCompactionPicker::PickCompaction:Return", [&](void* arg) {
        Compaction* compaction = reinterpret_cast<Compaction*>(arg);
        ASSERT_TRUE(compaction->max_subcompactions() == 2);
        has_compaction = true;
      });
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();

  // Trigger compaction
  for (int i = 0; i < 32; i++) {
    for (int j = 0; j < 5000; j++) {
      ASSERT_OK(Put(std::to_string(j), std::string(1, 'A')));
    }
    ASSERT_OK(Flush());
    ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
  }
  ASSERT_OK(dbfull()->TEST_WaitForCompact());
  ASSERT_TRUE(has_compaction);
}

TEST_F(DBCompactionTest, UpdateUniversalSubCompactionTest) {
  Options options = CurrentOptions();
  options.max_subcompactions = 10;
  options.compaction_style = kCompactionStyleUniversal;
  options.target_file_size_base = 1 << 10;  // 1KB
  DestroyAndReopen(options);

  bool has_compaction = false;
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
      "UniversalCompactionBuilder::PickCompaction:Return", [&](void* arg) {
        Compaction* compaction = reinterpret_cast<Compaction*>(arg);
        ASSERT_TRUE(compaction->max_subcompactions() == 10);
        has_compaction = true;
      });
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();

  // Trigger compaction
  for (int i = 0; i < 32; i++) {
    for (int j = 0; j < 5000; j++) {
      ASSERT_OK(Put(std::to_string(j), std::string(1, 'A')));
    }
    ASSERT_OK(Flush());
    ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
  }
  ASSERT_OK(dbfull()->TEST_WaitForCompact());
  ASSERT_TRUE(has_compaction);
  has_compaction = false;

  ASSERT_OK(dbfull()->SetDBOptions({{"max_subcompactions", "2"}}));
  ASSERT_TRUE(dbfull()->GetDBOptions().max_subcompactions == 2);

  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
      "UniversalCompactionBuilder::PickCompaction:Return", [&](void* arg) {
        Compaction* compaction = reinterpret_cast<Compaction*>(arg);
        ASSERT_TRUE(compaction->max_subcompactions() == 2);
        has_compaction = true;
      });
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();

  // Trigger compaction
  for (int i = 0; i < 32; i++) {
    for (int j = 0; j < 5000; j++) {
      ASSERT_OK(Put(std::to_string(j), std::string(1, 'A')));
    }
    ASSERT_OK(Flush());
    ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
  }
  ASSERT_OK(dbfull()->TEST_WaitForCompact());
  ASSERT_TRUE(has_compaction);
}

TEST_P(ChangeLevelConflictsWithAuto, TestConflict) {
  // A `CompactRange()` may race with an automatic compaction, we'll need
  // to make sure it doesn't corrupte the data.
  Options options = CurrentOptions();
  options.level0_file_num_compaction_trigger = 2;
  Reopen(options);

  ASSERT_OK(Put("foo", "v1"));
  ASSERT_OK(Put("bar", "v1"));
  ASSERT_OK(Flush());
  ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());

  {
    CompactRangeOptions cro;
    cro.change_level = true;
    cro.target_level = 2;
    ASSERT_OK(dbfull()->CompactRange(cro, nullptr, nullptr));
  }
  ASSERT_EQ("0,0,1", FilesPerLevel(0));

  // Run a qury to refitting to level 1 while another thread writing to
  // the same level.
  SyncPoint::GetInstance()->LoadDependency({
      // The first two dependencies ensure the foreground creates an L0 file
      // between the background compaction's L0->L1 and its L1->L2.
      {
          "DBImpl::CompactRange:BeforeRefit:1",
          "AutoCompactionFinished1",
      },
      {
          "AutoCompactionFinished2",
          "DBImpl::CompactRange:BeforeRefit:2",
      },
  });
  SyncPoint::GetInstance()->EnableProcessing();

  std::thread auto_comp([&] {
    TEST_SYNC_POINT("AutoCompactionFinished1");
    ASSERT_OK(Put("bar", "v2"));
    ASSERT_OK(Put("foo", "v2"));
    ASSERT_OK(Flush());
    ASSERT_OK(Put("bar", "v3"));
    ASSERT_OK(Put("foo", "v3"));
    ASSERT_OK(Flush());
    ASSERT_OK(dbfull()->TEST_WaitForCompact());
    TEST_SYNC_POINT("AutoCompactionFinished2");
  });

  {
    CompactRangeOptions cro;
    cro.change_level = true;
    cro.target_level = GetParam() ? 1 : 0;
    // This should return non-OK, but it's more important for the test to
    // make sure that the DB is not corrupted.
    ASSERT_NOK(dbfull()->CompactRange(cro, nullptr, nullptr));
  }
  auto_comp.join();
  // Refitting didn't happen.
  SyncPoint::GetInstance()->DisableProcessing();

  // Write something to DB just make sure that consistency check didn't
  // fail and make the DB readable.
}

INSTANTIATE_TEST_CASE_P(ChangeLevelConflictsWithAuto,
                        ChangeLevelConflictsWithAuto, testing::Bool());

TEST_F(DBCompactionTest, ChangeLevelCompactRangeConflictsWithManual) {
  // A `CompactRange()` with `change_level == true` needs to execute its final
  // step, `ReFitLevel()`, in isolation. Previously there was a bug where
  // refitting could target the same level as an ongoing manual compaction,
  // leading to overlapping files in that level.
  //
  // This test ensures that case is not possible by verifying any manual
  // compaction issued during the `ReFitLevel()` phase fails with
  // `Status::Incomplete`.
  Options options = CurrentOptions();
  options.memtable_factory.reset(
      test::NewSpecialSkipListFactory(KNumKeysByGenerateNewFile - 1));
  options.level0_file_num_compaction_trigger = 2;
  options.num_levels = 3;
  Reopen(options);

  // Setup an LSM with three levels populated.
  Random rnd(301);
  int key_idx = 0;
  GenerateNewFile(&rnd, &key_idx);
  {
    CompactRangeOptions cro;
    cro.change_level = true;
    cro.target_level = 2;
    ASSERT_OK(dbfull()->CompactRange(cro, nullptr, nullptr));
  }
  ASSERT_EQ("0,0,2", FilesPerLevel(0));

  GenerateNewFile(&rnd, &key_idx);
  GenerateNewFile(&rnd, &key_idx);
  ASSERT_EQ("1,1,2", FilesPerLevel(0));

  // The background thread will refit L2->L1 while the
  // foreground thread will try to simultaneously compact L0->L1.
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency({
      // The first two dependencies ensure the foreground creates an L0 file
      // between the background compaction's L0->L1 and its L1->L2.
      {
          "DBImpl::RunManualCompaction()::1",
          "DBCompactionTest::ChangeLevelCompactRangeConflictsWithManual:"
          "PutFG",
      },
      {
          "DBCompactionTest::ChangeLevelCompactRangeConflictsWithManual:"
          "FlushedFG",
          "DBImpl::RunManualCompaction()::2",
      },
      // The next two dependencies ensure the foreground invokes
      // `CompactRange()` while the background is refitting. The
      // foreground's `CompactRange()` is guaranteed to attempt an L0->L1
      // as we set it up with an empty memtable and a new L0 file.
      {
          "DBImpl::CompactRange:PreRefitLevel",
          "DBCompactionTest::ChangeLevelCompactRangeConflictsWithManual:"
          "CompactFG",
      },
      {
          "DBCompactionTest::ChangeLevelCompactRangeConflictsWithManual:"
          "CompactedFG",
          "DBImpl::CompactRange:PostRefitLevel",
      },
  });
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();

  ROCKSDB_NAMESPACE::port::Thread refit_level_thread([&] {
    CompactRangeOptions cro;
    cro.change_level = true;
    cro.target_level = 1;
    ASSERT_OK(dbfull()->CompactRange(cro, nullptr, nullptr));
  });

  TEST_SYNC_POINT(
      "DBCompactionTest::ChangeLevelCompactRangeConflictsWithManual:PutFG");
  // Make sure we have something new to compact in the foreground.
  // Note key 1 is carefully chosen as it ensures the file we create here
  // overlaps with one of the files being refitted L2->L1 in the background.
  // If we chose key 0, the file created here would not overlap.
  ASSERT_OK(Put(Key(1), "val"));
  ASSERT_OK(Flush());
  TEST_SYNC_POINT(
      "DBCompactionTest::ChangeLevelCompactRangeConflictsWithManual:FlushedFG");

  TEST_SYNC_POINT(
      "DBCompactionTest::ChangeLevelCompactRangeConflictsWithManual:CompactFG");
  ASSERT_TRUE(dbfull()
                  ->CompactRange(CompactRangeOptions(), nullptr, nullptr)
                  .IsIncomplete());
  TEST_SYNC_POINT(
      "DBCompactionTest::ChangeLevelCompactRangeConflictsWithManual:"
      "CompactedFG");
  refit_level_thread.join();
}

TEST_F(DBCompactionTest, ChangeLevelErrorPathTest) {
  // This test is added to ensure that RefitLevel() error paths are clearing
  // internal flags and to test that subsequent valid RefitLevel() calls
  // succeeds
  Options options = CurrentOptions();
  options.memtable_factory.reset(
      test::NewSpecialSkipListFactory(KNumKeysByGenerateNewFile - 1));
  options.level0_file_num_compaction_trigger = 2;
  options.num_levels = 3;
  Reopen(options);

  ASSERT_EQ("", FilesPerLevel(0));

  // Setup an LSM with three levels populated.
  Random rnd(301);
  int key_idx = 0;
  GenerateNewFile(&rnd, &key_idx);
  ASSERT_EQ("1", FilesPerLevel(0));
  {
    CompactRangeOptions cro;
    cro.change_level = true;
    cro.target_level = 2;
    ASSERT_OK(dbfull()->CompactRange(cro, nullptr, nullptr));
  }
  ASSERT_EQ("0,0,2", FilesPerLevel(0));

  auto start_idx = key_idx;
  GenerateNewFile(&rnd, &key_idx);
  GenerateNewFile(&rnd, &key_idx);
  ASSERT_EQ("1,1,2", FilesPerLevel(0));

  MoveFilesToLevel(1);
  ASSERT_EQ("0,2,2", FilesPerLevel(0));

  // The next CompactRange() call is used to test exercise error paths within
  // RefitLevel() before triggering a valid RefitLevel() call
  //
  // Try a refit from L2->L1 - this should fail and exercise error paths in
  // RefitLevel()
  {
    // Select key range that matches the bottom most level (L2)
    std::string begin_string = Key(0);
    std::string end_string = Key(start_idx - 1);
    Slice begin(begin_string);
    Slice end(end_string);

    CompactRangeOptions cro;
    cro.change_level = true;
    cro.target_level = 1;
    ASSERT_NOK(dbfull()->CompactRange(cro, &begin, &end));
  }
  ASSERT_EQ("0,2,2", FilesPerLevel(0));

  // Try a valid Refit request to ensure, the path is still working
  {
    CompactRangeOptions cro;
    cro.change_level = true;
    cro.target_level = 1;
    ASSERT_OK(dbfull()->CompactRange(cro, nullptr, nullptr));
  }
  ASSERT_EQ("0,5", FilesPerLevel(0));
}

TEST_F(DBCompactionTest, CompactionWithBlob) {
  Options options = CurrentOptions();
  options.disable_auto_compactions = true;
  Reopen(options);

  constexpr char first_key[] = "first_key";
  constexpr char second_key[] = "second_key";
  constexpr char first_value[] = "first_value";
  constexpr char second_value[] = "second_value";
  constexpr char third_value[] = "third_value";

  ASSERT_OK(Put(first_key, first_value));
  ASSERT_OK(Put(second_key, first_value));
  ASSERT_OK(Flush());

  ASSERT_OK(Put(first_key, second_value));
  ASSERT_OK(Put(second_key, second_value));
  ASSERT_OK(Flush());

  ASSERT_OK(Put(first_key, third_value));
  ASSERT_OK(Put(second_key, third_value));
  ASSERT_OK(Flush());

  options.enable_blob_files = true;

  Reopen(options);

  constexpr Slice* begin = nullptr;
  constexpr Slice* end = nullptr;

  ASSERT_OK(db_->CompactRange(CompactRangeOptions(), begin, end));

  ASSERT_EQ(Get(first_key), third_value);
  ASSERT_EQ(Get(second_key), third_value);

  VersionSet* const versions = dbfull()->GetVersionSet();
  assert(versions);

  ColumnFamilyData* const cfd = versions->GetColumnFamilySet()->GetDefault();
  ASSERT_NE(cfd, nullptr);

  Version* const current = cfd->current();
  ASSERT_NE(current, nullptr);

  const VersionStorageInfo* const storage_info = current->storage_info();
  ASSERT_NE(storage_info, nullptr);

  const auto& l1_files = storage_info->LevelFiles(1);
  ASSERT_EQ(l1_files.size(), 1);

  const FileMetaData* const table_file = l1_files[0];
  ASSERT_NE(table_file, nullptr);

  const auto& blob_files = storage_info->GetBlobFiles();
  ASSERT_EQ(blob_files.size(), 1);

  const auto& blob_file = blob_files.front();
  ASSERT_NE(blob_file, nullptr);

  ASSERT_EQ(table_file->smallest.user_key(), first_key);
  ASSERT_EQ(table_file->largest.user_key(), second_key);
  ASSERT_EQ(table_file->oldest_blob_file_number,
            blob_file->GetBlobFileNumber());

  ASSERT_EQ(blob_file->GetTotalBlobCount(), 2);

  const InternalStats* const internal_stats = cfd->internal_stats();
  ASSERT_NE(internal_stats, nullptr);

  const auto& compaction_stats = internal_stats->TEST_GetCompactionStats();
  ASSERT_GE(compaction_stats.size(), 2);
  ASSERT_EQ(compaction_stats[1].bytes_read_blob, 0);
  ASSERT_EQ(compaction_stats[1].bytes_written, table_file->fd.GetFileSize());
  ASSERT_EQ(compaction_stats[1].bytes_written_blob,
            blob_file->GetTotalBlobBytes());
  ASSERT_EQ(compaction_stats[1].num_output_files, 1);
  ASSERT_EQ(compaction_stats[1].num_output_files_blob, 1);
}

class DBCompactionTestBlobError
    : public DBCompactionTest,
      public testing::WithParamInterface<std::string> {
 public:
  DBCompactionTestBlobError() : sync_point_(GetParam()) {}

  std::string sync_point_;
};

INSTANTIATE_TEST_CASE_P(DBCompactionTestBlobError, DBCompactionTestBlobError,
                        ::testing::ValuesIn(std::vector<std::string>{
                            "BlobFileBuilder::WriteBlobToFile:AddRecord",
                            "BlobFileBuilder::WriteBlobToFile:AppendFooter"}));

TEST_P(DBCompactionTestBlobError, CompactionError) {
  Options options = CurrentOptions();
  options.disable_auto_compactions = true;
  Reopen(options);

  constexpr char first_key[] = "first_key";
  constexpr char second_key[] = "second_key";
  constexpr char first_value[] = "first_value";
  constexpr char second_value[] = "second_value";
  constexpr char third_value[] = "third_value";

  ASSERT_OK(Put(first_key, first_value));
  ASSERT_OK(Put(second_key, first_value));
  ASSERT_OK(Flush());

  ASSERT_OK(Put(first_key, second_value));
  ASSERT_OK(Put(second_key, second_value));
  ASSERT_OK(Flush());

  ASSERT_OK(Put(first_key, third_value));
  ASSERT_OK(Put(second_key, third_value));
  ASSERT_OK(Flush());

  options.enable_blob_files = true;

  Reopen(options);

  SyncPoint::GetInstance()->SetCallBack(sync_point_, [this](void* arg) {
    Status* const s = static_cast<Status*>(arg);
    assert(s);

    (*s) = Status::IOError(sync_point_);
  });
  SyncPoint::GetInstance()->EnableProcessing();

  constexpr Slice* begin = nullptr;
  constexpr Slice* end = nullptr;

  ASSERT_TRUE(db_->CompactRange(CompactRangeOptions(), begin, end).IsIOError());

  SyncPoint::GetInstance()->DisableProcessing();
  SyncPoint::GetInstance()->ClearAllCallBacks();

  VersionSet* const versions = dbfull()->GetVersionSet();
  assert(versions);

  ColumnFamilyData* const cfd = versions->GetColumnFamilySet()->GetDefault();
  ASSERT_NE(cfd, nullptr);

  Version* const current = cfd->current();
  ASSERT_NE(current, nullptr);

  const VersionStorageInfo* const storage_info = current->storage_info();
  ASSERT_NE(storage_info, nullptr);

  const auto& l1_files = storage_info->LevelFiles(1);
  ASSERT_TRUE(l1_files.empty());

  const auto& blob_files = storage_info->GetBlobFiles();
  ASSERT_TRUE(blob_files.empty());

  const InternalStats* const internal_stats = cfd->internal_stats();
  ASSERT_NE(internal_stats, nullptr);

  const auto& compaction_stats = internal_stats->TEST_GetCompactionStats();
  ASSERT_GE(compaction_stats.size(), 2);

  if (sync_point_ == "BlobFileBuilder::WriteBlobToFile:AddRecord") {
    ASSERT_EQ(compaction_stats[1].bytes_read_blob, 0);
    ASSERT_EQ(compaction_stats[1].bytes_written, 0);
    ASSERT_EQ(compaction_stats[1].bytes_written_blob, 0);
    ASSERT_EQ(compaction_stats[1].num_output_files, 0);
    ASSERT_EQ(compaction_stats[1].num_output_files_blob, 0);
  } else {
    // SST file writing succeeded; blob file writing failed (during Finish)
    ASSERT_EQ(compaction_stats[1].bytes_read_blob, 0);
    ASSERT_GT(compaction_stats[1].bytes_written, 0);
    ASSERT_EQ(compaction_stats[1].bytes_written_blob, 0);
    ASSERT_EQ(compaction_stats[1].num_output_files, 1);
    ASSERT_EQ(compaction_stats[1].num_output_files_blob, 0);
  }
}

class DBCompactionTestBlobGC
    : public DBCompactionTest,
      public testing::WithParamInterface<std::tuple<double, bool>> {
 public:
  DBCompactionTestBlobGC()
      : blob_gc_age_cutoff_(std::get<0>(GetParam())),
        updated_enable_blob_files_(std::get<1>(GetParam())) {}

  double blob_gc_age_cutoff_;
  bool updated_enable_blob_files_;
};

INSTANTIATE_TEST_CASE_P(DBCompactionTestBlobGC, DBCompactionTestBlobGC,
                        ::testing::Combine(::testing::Values(0.0, 0.5, 1.0),
                                           ::testing::Bool()));

TEST_P(DBCompactionTestBlobGC, CompactionWithBlobGCOverrides) {
  Options options = CurrentOptions();
  options.disable_auto_compactions = true;
  options.enable_blob_files = true;
  options.blob_file_size = 32;  // one blob per file
  options.enable_blob_garbage_collection = true;
  options.blob_garbage_collection_age_cutoff = 0;

  DestroyAndReopen(options);

  for (int i = 0; i < 128; i += 2) {
    ASSERT_OK(Put("key" + std::to_string(i), "value" + std::to_string(i)));
    ASSERT_OK(
        Put("key" + std::to_string(i + 1), "value" + std::to_string(i + 1)));
    ASSERT_OK(Flush());
  }

  std::vector<uint64_t> original_blob_files = GetBlobFileNumbers();
  ASSERT_EQ(original_blob_files.size(), 128);

  // Note: turning off enable_blob_files before the compaction results in
  // garbage collected values getting inlined.
  ASSERT_OK(db_->SetOptions({{"enable_blob_files", "false"}}));

  CompactRangeOptions cro;
  cro.blob_garbage_collection_policy = BlobGarbageCollectionPolicy::kForce;
  cro.blob_garbage_collection_age_cutoff = blob_gc_age_cutoff_;

  ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr));

  // Check that the GC stats are correct
  {
    VersionSet* const versions = dbfull()->GetVersionSet();
    assert(versions);
    assert(versions->GetColumnFamilySet());

    ColumnFamilyData* const cfd = versions->GetColumnFamilySet()->GetDefault();
    assert(cfd);

    const InternalStats* const internal_stats = cfd->internal_stats();
    assert(internal_stats);

    const auto& compaction_stats = internal_stats->TEST_GetCompactionStats();
    ASSERT_GE(compaction_stats.size(), 2);

    ASSERT_GE(compaction_stats[1].bytes_read_blob, 0);
    ASSERT_EQ(compaction_stats[1].bytes_written_blob, 0);
  }

  const size_t cutoff_index = static_cast<size_t>(
      cro.blob_garbage_collection_age_cutoff * original_blob_files.size());
  const size_t expected_num_files = original_blob_files.size() - cutoff_index;

  const std::vector<uint64_t> new_blob_files = GetBlobFileNumbers();

  ASSERT_EQ(new_blob_files.size(), expected_num_files);

  // Original blob files below the cutoff should be gone, original blob files
  // at or above the cutoff should be still there
  for (size_t i = cutoff_index; i < original_blob_files.size(); ++i) {
    ASSERT_EQ(new_blob_files[i - cutoff_index], original_blob_files[i]);
  }

  for (size_t i = 0; i < 128; ++i) {
    ASSERT_EQ(Get("key" + std::to_string(i)), "value" + std::to_string(i));
  }
}

TEST_P(DBCompactionTestBlobGC, CompactionWithBlobGC) {
  Options options = CurrentOptions();
  options.disable_auto_compactions = true;
  options.enable_blob_files = true;
  options.blob_file_size = 32;  // one blob per file
  options.enable_blob_garbage_collection = true;
  options.blob_garbage_collection_age_cutoff = blob_gc_age_cutoff_;

  Reopen(options);

  constexpr char first_key[] = "first_key";
  constexpr char first_value[] = "first_value";
  constexpr char second_key[] = "second_key";
  constexpr char second_value[] = "second_value";

  ASSERT_OK(Put(first_key, first_value));
  ASSERT_OK(Put(second_key, second_value));
  ASSERT_OK(Flush());

  constexpr char third_key[] = "third_key";
  constexpr char third_value[] = "third_value";
  constexpr char fourth_key[] = "fourth_key";
  constexpr char fourth_value[] = "fourth_value";

  ASSERT_OK(Put(third_key, third_value));
  ASSERT_OK(Put(fourth_key, fourth_value));
  ASSERT_OK(Flush());

  const std::vector<uint64_t> original_blob_files = GetBlobFileNumbers();

  ASSERT_EQ(original_blob_files.size(), 4);

  const size_t cutoff_index = static_cast<size_t>(
      options.blob_garbage_collection_age_cutoff * original_blob_files.size());

  // Note: turning off enable_blob_files before the compaction results in
  // garbage collected values getting inlined.
  size_t expected_number_of_files = original_blob_files.size();

  if (!updated_enable_blob_files_) {
    ASSERT_OK(db_->SetOptions({{"enable_blob_files", "false"}}));

    expected_number_of_files -= cutoff_index;
  }

  constexpr Slice* begin = nullptr;
  constexpr Slice* end = nullptr;

  ASSERT_OK(db_->CompactRange(CompactRangeOptions(), begin, end));

  ASSERT_EQ(Get(first_key), first_value);
  ASSERT_EQ(Get(second_key), second_value);
  ASSERT_EQ(Get(third_key), third_value);
  ASSERT_EQ(Get(fourth_key), fourth_value);

  const std::vector<uint64_t> new_blob_files = GetBlobFileNumbers();

  ASSERT_EQ(new_blob_files.size(), expected_number_of_files);

  // Original blob files below the cutoff should be gone, original blob files at
  // or above the cutoff should be still there
  for (size_t i = cutoff_index; i < original_blob_files.size(); ++i) {
    ASSERT_EQ(new_blob_files[i - cutoff_index], original_blob_files[i]);
  }

  VersionSet* const versions = dbfull()->GetVersionSet();
  assert(versions);
  assert(versions->GetColumnFamilySet());

  ColumnFamilyData* const cfd = versions->GetColumnFamilySet()->GetDefault();
  assert(cfd);

  const InternalStats* const internal_stats = cfd->internal_stats();
  assert(internal_stats);

  const auto& compaction_stats = internal_stats->TEST_GetCompactionStats();
  ASSERT_GE(compaction_stats.size(), 2);

  if (blob_gc_age_cutoff_ > 0.0) {
    ASSERT_GT(compaction_stats[1].bytes_read_blob, 0);

    if (updated_enable_blob_files_) {
      // GC relocated some blobs to new blob files
      ASSERT_GT(compaction_stats[1].bytes_written_blob, 0);
      ASSERT_EQ(compaction_stats[1].bytes_read_blob,
                compaction_stats[1].bytes_written_blob);
    } else {
      // GC moved some blobs back to the LSM, no new blob files
      ASSERT_EQ(compaction_stats[1].bytes_written_blob, 0);
    }
  } else {
    ASSERT_EQ(compaction_stats[1].bytes_read_blob, 0);
    ASSERT_EQ(compaction_stats[1].bytes_written_blob, 0);
  }
}

TEST_F(DBCompactionTest, CompactionWithBlobGCError_CorruptIndex) {
  Options options;
  options.env = env_;
  options.disable_auto_compactions = true;
  options.enable_blob_files = true;
  options.enable_blob_garbage_collection = true;
  options.blob_garbage_collection_age_cutoff = 1.0;

  Reopen(options);

  constexpr char first_key[] = "first_key";
  constexpr char first_value[] = "first_value";
  ASSERT_OK(Put(first_key, first_value));

  constexpr char second_key[] = "second_key";
  constexpr char second_value[] = "second_value";
  ASSERT_OK(Put(second_key, second_value));

  ASSERT_OK(Flush());

  constexpr char third_key[] = "third_key";
  constexpr char third_value[] = "third_value";
  ASSERT_OK(Put(third_key, third_value));

  constexpr char fourth_key[] = "fourth_key";
  constexpr char fourth_value[] = "fourth_value";
  ASSERT_OK(Put(fourth_key, fourth_value));

  ASSERT_OK(Flush());

  SyncPoint::GetInstance()->SetCallBack(
      "CompactionIterator::GarbageCollectBlobIfNeeded::TamperWithBlobIndex",
      [](void* arg) {
        Slice* const blob_index = static_cast<Slice*>(arg);
        assert(blob_index);
        assert(!blob_index->empty());
        blob_index->remove_prefix(1);
      });
  SyncPoint::GetInstance()->EnableProcessing();

  constexpr Slice* begin = nullptr;
  constexpr Slice* end = nullptr;

  ASSERT_TRUE(
      db_->CompactRange(CompactRangeOptions(), begin, end).IsCorruption());

  SyncPoint::GetInstance()->DisableProcessing();
  SyncPoint::GetInstance()->ClearAllCallBacks();
}

TEST_F(DBCompactionTest, CompactionWithBlobGCError_InlinedTTLIndex) {
  constexpr uint64_t min_blob_size = 10;

  Options options;
  options.env = env_;
  options.disable_auto_compactions = true;
  options.enable_blob_files = true;
  options.min_blob_size = min_blob_size;
  options.enable_blob_garbage_collection = true;
  options.blob_garbage_collection_age_cutoff = 1.0;

  Reopen(options);

  constexpr char first_key[] = "first_key";
  constexpr char first_value[] = "first_value";
  ASSERT_OK(Put(first_key, first_value));

  constexpr char second_key[] = "second_key";
  constexpr char second_value[] = "second_value";
  ASSERT_OK(Put(second_key, second_value));

  ASSERT_OK(Flush());

  constexpr char third_key[] = "third_key";
  constexpr char third_value[] = "third_value";
  ASSERT_OK(Put(third_key, third_value));

  constexpr char fourth_key[] = "fourth_key";
  constexpr char blob[] = "short";
  static_assert(sizeof(short) - 1 < min_blob_size,
                "Blob too long to be inlined");

  // Fake an inlined TTL blob index.
  std::string blob_index;

  constexpr uint64_t expiration = 1234567890;

  BlobIndex::EncodeInlinedTTL(&blob_index, expiration, blob);

  WriteBatch batch;
  ASSERT_OK(
      WriteBatchInternal::PutBlobIndex(&batch, 0, fourth_key, blob_index));
  ASSERT_OK(db_->Write(WriteOptions(), &batch));

  ASSERT_OK(Flush());

  constexpr Slice* begin = nullptr;
  constexpr Slice* end = nullptr;

  ASSERT_TRUE(
      db_->CompactRange(CompactRangeOptions(), begin, end).IsCorruption());
}

TEST_F(DBCompactionTest, CompactionWithBlobGCError_IndexWithInvalidFileNumber) {
  Options options;
  options.env = env_;
  options.disable_auto_compactions = true;
  options.enable_blob_files = true;
  options.enable_blob_garbage_collection = true;
  options.blob_garbage_collection_age_cutoff = 1.0;

  Reopen(options);

  constexpr char first_key[] = "first_key";
  constexpr char first_value[] = "first_value";
  ASSERT_OK(Put(first_key, first_value));

  constexpr char second_key[] = "second_key";
  constexpr char second_value[] = "second_value";
  ASSERT_OK(Put(second_key, second_value));

  ASSERT_OK(Flush());

  constexpr char third_key[] = "third_key";
  constexpr char third_value[] = "third_value";
  ASSERT_OK(Put(third_key, third_value));

  constexpr char fourth_key[] = "fourth_key";

  // Fake a blob index referencing a non-existent blob file.
  std::string blob_index;

  constexpr uint64_t blob_file_number = 1000;
  constexpr uint64_t offset = 1234;
  constexpr uint64_t size = 5678;

  BlobIndex::EncodeBlob(&blob_index, blob_file_number, offset, size,
                        kNoCompression);

  WriteBatch batch;
  ASSERT_OK(
      WriteBatchInternal::PutBlobIndex(&batch, 0, fourth_key, blob_index));
  ASSERT_OK(db_->Write(WriteOptions(), &batch));

  ASSERT_OK(Flush());

  constexpr Slice* begin = nullptr;
  constexpr Slice* end = nullptr;

  ASSERT_TRUE(
      db_->CompactRange(CompactRangeOptions(), begin, end).IsCorruption());
}

TEST_F(DBCompactionTest, CompactionWithChecksumHandoff1) {
  if (mem_env_ || encrypted_env_) {
    ROCKSDB_GTEST_SKIP("Test requires non-mem or non-encrypted environment");
    return;
  }
  std::shared_ptr<FaultInjectionTestFS> fault_fs(
      new FaultInjectionTestFS(FileSystem::Default()));
  std::unique_ptr<Env> fault_fs_env(NewCompositeEnv(fault_fs));
  Options options = CurrentOptions();
  options.level0_file_num_compaction_trigger = 2;
  options.num_levels = 3;
  options.env = fault_fs_env.get();
  options.create_if_missing = true;
  options.checksum_handoff_file_types.Add(FileType::kTableFile);
  Status s;
  Reopen(options);

  fault_fs->SetChecksumHandoffFuncType(ChecksumType::kCRC32c);
  ASSERT_OK(Put(Key(0), "value1"));
  ASSERT_OK(Put(Key(2), "value2"));
  s = Flush();
  ASSERT_EQ(s, Status::OK());
  ASSERT_OK(Put(Key(1), "value3"));
  s = Flush();
  ASSERT_EQ(s, Status::OK());
  s = dbfull()->TEST_WaitForCompact();
  ASSERT_EQ(s, Status::OK());
  Destroy(options);
  Reopen(options);

  // The hash does not match, compaction write fails
  // fault_fs->SetChecksumHandoffFuncType(ChecksumType::kxxHash);
  // Since the file system returns IOStatus::Corruption, it is an
  // unrecoverable error.
  ASSERT_OK(Put(Key(0), "value1"));
  ASSERT_OK(Put(Key(2), "value2"));
  s = Flush();
  ASSERT_EQ(s, Status::OK());
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
      {{"DBImpl::FlushMemTable:FlushMemTableFinished",
        "BackgroundCallCompaction:0"}});
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
      "BackgroundCallCompaction:0", [&](void*) {
        fault_fs->SetChecksumHandoffFuncType(ChecksumType::kxxHash);
      });
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
  ASSERT_OK(Put(Key(1), "value3"));
  s = Flush();
  ASSERT_EQ(s, Status::OK());
  s = dbfull()->TEST_WaitForCompact();
  ASSERT_EQ(s.severity(),
            ROCKSDB_NAMESPACE::Status::Severity::kUnrecoverableError);
  SyncPoint::GetInstance()->DisableProcessing();
  Destroy(options);
  Reopen(options);

  // The file system does not support checksum handoff. The check
  // will be ignored.
  fault_fs->SetChecksumHandoffFuncType(ChecksumType::kNoChecksum);
  ASSERT_OK(Put(Key(0), "value1"));
  ASSERT_OK(Put(Key(2), "value2"));
  s = Flush();
  ASSERT_EQ(s, Status::OK());
  ASSERT_OK(Put(Key(1), "value3"));
  s = Flush();
  ASSERT_EQ(s, Status::OK());
  s = dbfull()->TEST_WaitForCompact();
  ASSERT_EQ(s, Status::OK());

  // Each write will be similated as corrupted.
  // Since the file system returns IOStatus::Corruption, it is an
  // unrecoverable error.
  fault_fs->SetChecksumHandoffFuncType(ChecksumType::kCRC32c);
  ASSERT_OK(Put(Key(0), "value1"));
  ASSERT_OK(Put(Key(2), "value2"));
  s = Flush();
  ASSERT_EQ(s, Status::OK());
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
      {{"DBImpl::FlushMemTable:FlushMemTableFinished",
        "BackgroundCallCompaction:0"}});
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
      "BackgroundCallCompaction:0",
      [&](void*) { fault_fs->IngestDataCorruptionBeforeWrite(); });
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
  ASSERT_OK(Put(Key(1), "value3"));
  s = Flush();
  ASSERT_EQ(s, Status::OK());
  s = dbfull()->TEST_WaitForCompact();
  ASSERT_EQ(s.severity(),
            ROCKSDB_NAMESPACE::Status::Severity::kUnrecoverableError);
  SyncPoint::GetInstance()->DisableProcessing();

  Destroy(options);
}

TEST_F(DBCompactionTest, CompactionWithChecksumHandoff2) {
  if (mem_env_ || encrypted_env_) {
    ROCKSDB_GTEST_SKIP("Test requires non-mem or non-encrypted environment");
    return;
  }
  std::shared_ptr<FaultInjectionTestFS> fault_fs(
      new FaultInjectionTestFS(FileSystem::Default()));
  std::unique_ptr<Env> fault_fs_env(NewCompositeEnv(fault_fs));
  Options options = CurrentOptions();
  options.level0_file_num_compaction_trigger = 2;
  options.num_levels = 3;
  options.env = fault_fs_env.get();
  options.create_if_missing = true;
  Status s;
  Reopen(options);

  fault_fs->SetChecksumHandoffFuncType(ChecksumType::kCRC32c);
  ASSERT_OK(Put(Key(0), "value1"));
  ASSERT_OK(Put(Key(2), "value2"));
  s = Flush();
  ASSERT_EQ(s, Status::OK());
  ASSERT_OK(Put(Key(1), "value3"));
  s = Flush();
  ASSERT_EQ(s, Status::OK());
  s = dbfull()->TEST_WaitForCompact();
  ASSERT_EQ(s, Status::OK());
  Destroy(options);
  Reopen(options);

  // options is not set, the checksum handoff will not be triggered
  ASSERT_OK(Put(Key(0), "value1"));
  ASSERT_OK(Put(Key(2), "value2"));
  s = Flush();
  ASSERT_EQ(s, Status::OK());
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
      {{"DBImpl::FlushMemTable:FlushMemTableFinished",
        "BackgroundCallCompaction:0"}});
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
      "BackgroundCallCompaction:0", [&](void*) {
        fault_fs->SetChecksumHandoffFuncType(ChecksumType::kxxHash);
      });
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
  ASSERT_OK(Put(Key(1), "value3"));
  s = Flush();
  ASSERT_EQ(s, Status::OK());
  s = dbfull()->TEST_WaitForCompact();
  ASSERT_EQ(s, Status::OK());
  SyncPoint::GetInstance()->DisableProcessing();
  Destroy(options);
  Reopen(options);

  // The file system does not support checksum handoff. The check
  // will be ignored.
  fault_fs->SetChecksumHandoffFuncType(ChecksumType::kNoChecksum);
  ASSERT_OK(Put(Key(0), "value1"));
  ASSERT_OK(Put(Key(2), "value2"));
  s = Flush();
  ASSERT_EQ(s, Status::OK());
  ASSERT_OK(Put(Key(1), "value3"));
  s = Flush();
  ASSERT_EQ(s, Status::OK());
  s = dbfull()->TEST_WaitForCompact();
  ASSERT_EQ(s, Status::OK());

  // options is not set, the checksum handoff will not be triggered
  fault_fs->SetChecksumHandoffFuncType(ChecksumType::kCRC32c);
  ASSERT_OK(Put(Key(0), "value1"));
  ASSERT_OK(Put(Key(2), "value2"));
  s = Flush();
  ASSERT_EQ(s, Status::OK());
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
      {{"DBImpl::FlushMemTable:FlushMemTableFinished",
        "BackgroundCallCompaction:0"}});
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
      "BackgroundCallCompaction:0",
      [&](void*) { fault_fs->IngestDataCorruptionBeforeWrite(); });
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
  ASSERT_OK(Put(Key(1), "value3"));
  s = Flush();
  ASSERT_EQ(s, Status::OK());
  s = dbfull()->TEST_WaitForCompact();
  ASSERT_EQ(s, Status::OK());

  Destroy(options);
}

TEST_F(DBCompactionTest, CompactionWithChecksumHandoffManifest1) {
  if (mem_env_ || encrypted_env_) {
    ROCKSDB_GTEST_SKIP("Test requires non-mem or non-encrypted environment");
    return;
  }
  std::shared_ptr<FaultInjectionTestFS> fault_fs(
      new FaultInjectionTestFS(FileSystem::Default()));
  std::unique_ptr<Env> fault_fs_env(NewCompositeEnv(fault_fs));
  Options options = CurrentOptions();
  options.level0_file_num_compaction_trigger = 2;
  options.num_levels = 3;
  options.env = fault_fs_env.get();
  options.create_if_missing = true;
  options.checksum_handoff_file_types.Add(FileType::kDescriptorFile);
  Status s;
  fault_fs->SetChecksumHandoffFuncType(ChecksumType::kCRC32c);
  Reopen(options);

  ASSERT_OK(Put(Key(0), "value1"));
  ASSERT_OK(Put(Key(2), "value2"));
  s = Flush();
  ASSERT_EQ(s, Status::OK());
  ASSERT_OK(Put(Key(1), "value3"));
  s = Flush();
  ASSERT_EQ(s, Status::OK());
  s = dbfull()->TEST_WaitForCompact();
  ASSERT_EQ(s, Status::OK());
  Destroy(options);
  Reopen(options);

  // The hash does not match, compaction write fails
  // fault_fs->SetChecksumHandoffFuncType(ChecksumType::kxxHash);
  // Since the file system returns IOStatus::Corruption, it is mapped to
  // kFatalError error.
  ASSERT_OK(Put(Key(0), "value1"));
  ASSERT_OK(Put(Key(2), "value2"));
  s = Flush();
  ASSERT_EQ(s, Status::OK());
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
      {{"DBImpl::FlushMemTable:FlushMemTableFinished",
        "BackgroundCallCompaction:0"}});
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
      "BackgroundCallCompaction:0", [&](void*) {
        fault_fs->SetChecksumHandoffFuncType(ChecksumType::kxxHash);
      });
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
  ASSERT_OK(Put(Key(1), "value3"));
  s = Flush();
  ASSERT_EQ(s, Status::OK());
  s = dbfull()->TEST_WaitForCompact();
  ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kFatalError);
  SyncPoint::GetInstance()->DisableProcessing();
  Destroy(options);
}

TEST_F(DBCompactionTest, CompactionWithChecksumHandoffManifest2) {
  if (mem_env_ || encrypted_env_) {
    ROCKSDB_GTEST_SKIP("Test requires non-mem or non-encrypted environment");
    return;
  }
  std::shared_ptr<FaultInjectionTestFS> fault_fs(
      new FaultInjectionTestFS(FileSystem::Default()));
  std::unique_ptr<Env> fault_fs_env(NewCompositeEnv(fault_fs));
  Options options = CurrentOptions();
  options.level0_file_num_compaction_trigger = 2;
  options.num_levels = 3;
  options.env = fault_fs_env.get();
  options.create_if_missing = true;
  options.checksum_handoff_file_types.Add(FileType::kDescriptorFile);
  Status s;
  fault_fs->SetChecksumHandoffFuncType(ChecksumType::kNoChecksum);
  Reopen(options);

  // The file system does not support checksum handoff. The check
  // will be ignored.
  ASSERT_OK(Put(Key(0), "value1"));
  ASSERT_OK(Put(Key(2), "value2"));
  s = Flush();
  ASSERT_EQ(s, Status::OK());
  ASSERT_OK(Put(Key(1), "value3"));
  s = Flush();
  ASSERT_EQ(s, Status::OK());
  s = dbfull()->TEST_WaitForCompact();
  ASSERT_EQ(s, Status::OK());

  // Each write will be similated as corrupted.
  // Since the file system returns IOStatus::Corruption, it is mapped to
  // kFatalError error.
  fault_fs->SetChecksumHandoffFuncType(ChecksumType::kCRC32c);
  ASSERT_OK(Put(Key(0), "value1"));
  ASSERT_OK(Put(Key(2), "value2"));
  s = Flush();
  ASSERT_EQ(s, Status::OK());
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
      {{"DBImpl::FlushMemTable:FlushMemTableFinished",
        "BackgroundCallCompaction:0"}});
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
      "BackgroundCallCompaction:0",
      [&](void*) { fault_fs->IngestDataCorruptionBeforeWrite(); });
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
  ASSERT_OK(Put(Key(1), "value3"));
  s = Flush();
  ASSERT_EQ(s, Status::OK());
  s = dbfull()->TEST_WaitForCompact();
  ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kFatalError);
  SyncPoint::GetInstance()->DisableProcessing();

  Destroy(options);
}

TEST_F(DBCompactionTest, FIFOChangeTemperature) {
  Options options = CurrentOptions();
  options.compaction_style = kCompactionStyleFIFO;
  options.num_levels = 1;
  options.max_open_files = -1;
  options.level0_file_num_compaction_trigger = 2;
  options.create_if_missing = true;
  CompactionOptionsFIFO fifo_options;
  fifo_options.file_temperature_age_thresholds = {{Temperature::kCold, 1000}};
  fifo_options.max_table_files_size = 100000000;
  options.compaction_options_fifo = fifo_options;
  env_->SetMockSleep();
  Reopen(options);

  int total_cold = 0;
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
      "NewWritableFile::FileOptions.temperature", [&](void* arg) {
        Temperature temperature = *(static_cast<Temperature*>(arg));
        if (temperature == Temperature::kCold) {
          total_cold++;
        }
      });
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();

  // The file system does not support checksum handoff. The check
  // will be ignored.
  ASSERT_OK(Put(Key(0), "value1"));
  env_->MockSleepForSeconds(800);
  ASSERT_OK(Put(Key(2), "value2"));
  ASSERT_OK(Flush());

  ASSERT_OK(Put(Key(0), "value1"));
  env_->MockSleepForSeconds(800);
  ASSERT_OK(Put(Key(2), "value2"));
  ASSERT_OK(Flush());

  ASSERT_OK(Put(Key(0), "value1"));
  env_->MockSleepForSeconds(800);
  ASSERT_OK(Put(Key(2), "value2"));
  ASSERT_OK(Flush());
  ASSERT_OK(dbfull()->TEST_WaitForCompact());

  ASSERT_OK(Put(Key(0), "value1"));
  env_->MockSleepForSeconds(800);
  ASSERT_OK(Put(Key(2), "value2"));
  ASSERT_OK(Flush());

  ASSERT_OK(dbfull()->TEST_WaitForCompact());

  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();

  ColumnFamilyMetaData metadata;
  db_->GetColumnFamilyMetaData(&metadata);
  ASSERT_EQ(4, metadata.file_count);
  ASSERT_EQ(Temperature::kUnknown, metadata.levels[0].files[0].temperature);
  ASSERT_EQ(Temperature::kUnknown, metadata.levels[0].files[1].temperature);
  ASSERT_EQ(Temperature::kCold, metadata.levels[0].files[2].temperature);
  ASSERT_EQ(Temperature::kCold, metadata.levels[0].files[3].temperature);
  ASSERT_EQ(2, total_cold);

  Destroy(options);
}

TEST_F(DBCompactionTest, DisableMultiManualCompaction) {
  const int kNumL0Files = 10;

  Options options = CurrentOptions();
  options.level0_file_num_compaction_trigger = kNumL0Files;
  Reopen(options);

  // Generate 2 levels of file to make sure the manual compaction is not skipped
  for (int i = 0; i < 10; i++) {
    ASSERT_OK(Put(Key(i), "value"));
    if (i % 2) {
      ASSERT_OK(Flush());
    }
  }
  MoveFilesToLevel(2);

  for (int i = 0; i < 10; i++) {
    ASSERT_OK(Put(Key(i), "value"));
    if (i % 2) {
      ASSERT_OK(Flush());
    }
  }
  MoveFilesToLevel(1);

  // Block compaction queue
  test::SleepingBackgroundTask sleeping_task_low;
  env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low,
                 Env::Priority::LOW);

  port::Thread compact_thread1([&]() {
    CompactRangeOptions cro;
    cro.exclusive_manual_compaction = false;
    std::string begin_str = Key(0);
    std::string end_str = Key(3);
    Slice b = begin_str;
    Slice e = end_str;
    auto s = db_->CompactRange(cro, &b, &e);
    ASSERT_TRUE(s.IsIncomplete());
  });

  port::Thread compact_thread2([&]() {
    CompactRangeOptions cro;
    cro.exclusive_manual_compaction = false;
    std::string begin_str = Key(4);
    std::string end_str = Key(7);
    Slice b = begin_str;
    Slice e = end_str;
    auto s = db_->CompactRange(cro, &b, &e);
    ASSERT_TRUE(s.IsIncomplete());
  });

  // Disable manual compaction should cancel both manual compactions and both
  // compaction should return incomplete.
  db_->DisableManualCompaction();

  compact_thread1.join();
  compact_thread2.join();

  sleeping_task_low.WakeUp();
  sleeping_task_low.WaitUntilDone();
  ASSERT_OK(dbfull()->TEST_WaitForCompact());
}

TEST_F(DBCompactionTest, DisableJustStartedManualCompaction) {
  const int kNumL0Files = 4;

  Options options = CurrentOptions();
  options.level0_file_num_compaction_trigger = kNumL0Files;
  Reopen(options);

  // generate files, but avoid trigger auto compaction
  for (int i = 0; i < kNumL0Files / 2; i++) {
    ASSERT_OK(Put(Key(1), "value1"));
    ASSERT_OK(Put(Key(2), "value2"));
    ASSERT_OK(Flush());
  }

  // make sure the manual compaction background is started but not yet set the
  // status to in_progress, then cancel the manual compaction, which should not
  // result in segfault
  SyncPoint::GetInstance()->LoadDependency(
      {{"DBImpl::BGWorkCompaction",
        "DBCompactionTest::DisableJustStartedManualCompaction:"
        "PreDisableManualCompaction"},
       {"DBImpl::RunManualCompaction:Unscheduled",
        "BackgroundCallCompaction:0"}});
  SyncPoint::GetInstance()->EnableProcessing();

  port::Thread compact_thread([&]() {
    CompactRangeOptions cro;
    cro.exclusive_manual_compaction = true;
    auto s = db_->CompactRange(cro, nullptr, nullptr);
    ASSERT_TRUE(s.IsIncomplete());
  });
  TEST_SYNC_POINT(
      "DBCompactionTest::DisableJustStartedManualCompaction:"
      "PreDisableManualCompaction");
  db_->DisableManualCompaction();

  compact_thread.join();
}

TEST_F(DBCompactionTest, DisableInProgressManualCompaction) {
  const int kNumL0Files = 4;

  Options options = CurrentOptions();
  options.level0_file_num_compaction_trigger = kNumL0Files;
  Reopen(options);

  SyncPoint::GetInstance()->LoadDependency(
      {{"DBImpl::BackgroundCompaction:InProgress",
        "DBCompactionTest::DisableInProgressManualCompaction:"
        "PreDisableManualCompaction"},
       {"DBImpl::RunManualCompaction:Unscheduled",
        "CompactionJob::Run():Start"}});
  SyncPoint::GetInstance()->EnableProcessing();

  // generate files, but avoid trigger auto compaction
  for (int i = 0; i < kNumL0Files / 2; i++) {
    ASSERT_OK(Put(Key(1), "value1"));
    ASSERT_OK(Put(Key(2), "value2"));
    ASSERT_OK(Flush());
  }

  port::Thread compact_thread([&]() {
    CompactRangeOptions cro;
    cro.exclusive_manual_compaction = true;
    auto s = db_->CompactRange(cro, nullptr, nullptr);
    ASSERT_TRUE(s.IsIncomplete());
  });

  TEST_SYNC_POINT(
      "DBCompactionTest::DisableInProgressManualCompaction:"
      "PreDisableManualCompaction");
  db_->DisableManualCompaction();

  compact_thread.join();
}

TEST_F(DBCompactionTest, DisableManualCompactionThreadQueueFull) {
  const int kNumL0Files = 4;

  SyncPoint::GetInstance()->LoadDependency(
      {{"DBImpl::RunManualCompaction:Scheduled",
        "DBCompactionTest::DisableManualCompactionThreadQueueFull:"
        "PreDisableManualCompaction"}});
  SyncPoint::GetInstance()->EnableProcessing();

  Options options = CurrentOptions();
  options.level0_file_num_compaction_trigger = kNumL0Files;
  Reopen(options);

  // Block compaction queue
  test::SleepingBackgroundTask sleeping_task_low;
  env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low,
                 Env::Priority::LOW);

  // generate files, but avoid trigger auto compaction
  for (int i = 0; i < kNumL0Files / 2; i++) {
    ASSERT_OK(Put(Key(1), "value1"));
    ASSERT_OK(Put(Key(2), "value2"));
    ASSERT_OK(Flush());
  }

  port::Thread compact_thread([&]() {
    CompactRangeOptions cro;
    cro.exclusive_manual_compaction = true;
    auto s = db_->CompactRange(cro, nullptr, nullptr);
    ASSERT_TRUE(s.IsIncomplete());
  });

  TEST_SYNC_POINT(
      "DBCompactionTest::DisableManualCompactionThreadQueueFull:"
      "PreDisableManualCompaction");

  // Generate more files to trigger auto compaction which is scheduled after
  // manual compaction. Has to generate 4 more files because existing files are
  // pending compaction
  for (int i = 0; i < kNumL0Files; i++) {
    ASSERT_OK(Put(Key(1), "value1"));
    ASSERT_OK(Put(Key(2), "value2"));
    ASSERT_OK(Flush());
  }
  ASSERT_EQ(std::to_string(kNumL0Files + (kNumL0Files / 2)), FilesPerLevel(0));

  db_->DisableManualCompaction();

  // CompactRange should return before the compaction has the chance to run
  compact_thread.join();

  sleeping_task_low.WakeUp();
  sleeping_task_low.WaitUntilDone();
  ASSERT_OK(dbfull()->TEST_WaitForCompact());
  ASSERT_EQ("0,1", FilesPerLevel(0));
}

TEST_F(DBCompactionTest, DisableManualCompactionThreadQueueFullDBClose) {
  const int kNumL0Files = 4;

  SyncPoint::GetInstance()->LoadDependency(
      {{"DBImpl::RunManualCompaction:Scheduled",
        "DBCompactionTest::DisableManualCompactionThreadQueueFullDBClose:"
        "PreDisableManualCompaction"}});
  SyncPoint::GetInstance()->EnableProcessing();

  Options options = CurrentOptions();
  options.level0_file_num_compaction_trigger = kNumL0Files;
  Reopen(options);

  // Block compaction queue
  test::SleepingBackgroundTask sleeping_task_low;
  env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low,
                 Env::Priority::LOW);

  // generate files, but avoid trigger auto compaction
  for (int i = 0; i < kNumL0Files / 2; i++) {
    ASSERT_OK(Put(Key(1), "value1"));
    ASSERT_OK(Put(Key(2), "value2"));
    ASSERT_OK(Flush());
  }

  port::Thread compact_thread([&]() {
    CompactRangeOptions cro;
    cro.exclusive_manual_compaction = true;
    auto s = db_->CompactRange(cro, nullptr, nullptr);
    ASSERT_TRUE(s.IsIncomplete());
  });

  TEST_SYNC_POINT(
      "DBCompactionTest::DisableManualCompactionThreadQueueFullDBClose:"
      "PreDisableManualCompaction");

  // Generate more files to trigger auto compaction which is scheduled after
  // manual compaction. Has to generate 4 more files because existing files are
  // pending compaction
  for (int i = 0; i < kNumL0Files; i++) {
    ASSERT_OK(Put(Key(1), "value1"));
    ASSERT_OK(Put(Key(2), "value2"));
    ASSERT_OK(Flush());
  }
  ASSERT_EQ(std::to_string(kNumL0Files + (kNumL0Files / 2)), FilesPerLevel(0));

  db_->DisableManualCompaction();

  // CompactRange should return before the compaction has the chance to run
  compact_thread.join();

  // Try close DB while manual compaction is canceled but still in the queue.
  // And an auto-triggered compaction is also in the queue.
  auto s = db_->Close();
  ASSERT_OK(s);

  sleeping_task_low.WakeUp();
  sleeping_task_low.WaitUntilDone();
}

TEST_F(DBCompactionTest, DBCloseWithManualCompaction) {
  const int kNumL0Files = 4;

  SyncPoint::GetInstance()->LoadDependency(
      {{"DBImpl::RunManualCompaction:Scheduled",
        "DBCompactionTest::DisableManualCompactionThreadQueueFullDBClose:"
        "PreDisableManualCompaction"}});
  SyncPoint::GetInstance()->EnableProcessing();

  Options options = CurrentOptions();
  options.level0_file_num_compaction_trigger = kNumL0Files;
  Reopen(options);

  // Block compaction queue
  test::SleepingBackgroundTask sleeping_task_low;
  env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low,
                 Env::Priority::LOW);

  // generate files, but avoid trigger auto compaction
  for (int i = 0; i < kNumL0Files / 2; i++) {
    ASSERT_OK(Put(Key(1), "value1"));
    ASSERT_OK(Put(Key(2), "value2"));
    ASSERT_OK(Flush());
  }

  port::Thread compact_thread([&]() {
    CompactRangeOptions cro;
    cro.exclusive_manual_compaction = true;
    auto s = db_->CompactRange(cro, nullptr, nullptr);
    ASSERT_TRUE(s.IsIncomplete());
  });

  TEST_SYNC_POINT(
      "DBCompactionTest::DisableManualCompactionThreadQueueFullDBClose:"
      "PreDisableManualCompaction");

  // Generate more files to trigger auto compaction which is scheduled after
  // manual compaction. Has to generate 4 more files because existing files are
  // pending compaction
  for (int i = 0; i < kNumL0Files; i++) {
    ASSERT_OK(Put(Key(1), "value1"));
    ASSERT_OK(Put(Key(2), "value2"));
    ASSERT_OK(Flush());
  }
  ASSERT_EQ(std::to_string(kNumL0Files + (kNumL0Files / 2)), FilesPerLevel(0));

  // Close DB with manual compaction and auto triggered compaction in the queue.
  auto s = db_->Close();
  ASSERT_OK(s);

  // manual compaction thread should return with Incomplete().
  compact_thread.join();

  sleeping_task_low.WakeUp();
  sleeping_task_low.WaitUntilDone();
}

TEST_F(DBCompactionTest,
       DisableManualCompactionDoesNotWaitForDrainingAutomaticCompaction) {
  // When `CompactRangeOptions::exclusive_manual_compaction == true`, we wait
  // for automatic compactions to drain before starting the manual compaction.
  // This test verifies `DisableManualCompaction()` can cancel such a compaction
  // without waiting for the drain to complete.
  const int kNumL0Files = 4;

  // Enforces manual compaction enters wait loop due to pending automatic
  // compaction.
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
      {{"DBImpl::BGWorkCompaction", "DBImpl::RunManualCompaction:NotScheduled"},
       {"DBImpl::RunManualCompaction:WaitScheduled",
        "BackgroundCallCompaction:0"}});
  // The automatic compaction will cancel the waiting manual compaction.
  // Completing this implies the cancellation did not wait on automatic
  // compactions to finish.
  bool callback_completed = false;
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
      "BackgroundCallCompaction:0", [&](void* /*arg*/) {
        db_->DisableManualCompaction();
        callback_completed = true;
      });
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();

  Options options = CurrentOptions();
  options.level0_file_num_compaction_trigger = kNumL0Files;
  Reopen(options);

  for (int i = 0; i < kNumL0Files; ++i) {
    ASSERT_OK(Put(Key(1), "value1"));
    ASSERT_OK(Put(Key(2), "value2"));
    ASSERT_OK(Flush());
  }

  CompactRangeOptions cro;
  cro.exclusive_manual_compaction = true;
  ASSERT_TRUE(db_->CompactRange(cro, nullptr, nullptr).IsIncomplete());

  ASSERT_OK(dbfull()->TEST_WaitForCompact());
  ASSERT_TRUE(callback_completed);
}

TEST_F(DBCompactionTest, ChangeLevelConflictsWithManual) {
  Options options = CurrentOptions();
  options.num_levels = 3;
  Reopen(options);

  // Setup an LSM with L2 populated.
  Random rnd(301);
  ASSERT_OK(Put(Key(0), rnd.RandomString(990)));
  ASSERT_OK(Put(Key(1), rnd.RandomString(990)));
  {
    CompactRangeOptions cro;
    cro.change_level = true;
    cro.target_level = 2;
    ASSERT_OK(dbfull()->CompactRange(cro, nullptr, nullptr));
  }
  ASSERT_EQ("0,0,1", FilesPerLevel(0));

  // The background thread will refit L2->L1 while the foreground thread will
  // attempt to run a compaction on new data. The following dependencies
  // ensure the background manual compaction's refitting phase disables manual
  // compaction immediately before the foreground manual compaction can register
  // itself. Manual compaction is kept disabled until the foreground manual
  // checks for the failure once.
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency({
      // Only do Put()s for foreground CompactRange() once the background
      // CompactRange() has reached the refitting phase.
      {
          "DBImpl::CompactRange:BeforeRefit:1",
          "DBCompactionTest::ChangeLevelConflictsWithManual:"
          "PreForegroundCompactRange",
      },
      // Right before we register the manual compaction, proceed with
      // the refitting phase so manual compactions are disabled. Stay in
      // the refitting phase with manual compactions disabled until it is
      // noticed.
      {
          "DBImpl::RunManualCompaction:0",
          "DBImpl::CompactRange:BeforeRefit:2",
      },
      {
          "DBImpl::CompactRange:PreRefitLevel",
          "DBImpl::RunManualCompaction:1",
      },
      {
          "DBImpl::RunManualCompaction:PausedAtStart",
          "DBImpl::CompactRange:PostRefitLevel",
      },
      // If compaction somehow were scheduled, let's let it run after reenabling
      // manual compactions. This dependency is not expected to be hit but is
      // here for speculatively coercing future bugs.
      {
          "DBImpl::CompactRange:PostRefitLevel:ManualCompactionEnabled",
          "BackgroundCallCompaction:0",
      },
  });
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();

  ROCKSDB_NAMESPACE::port::Thread refit_level_thread([&] {
    CompactRangeOptions cro;
    cro.change_level = true;
    cro.target_level = 1;
    ASSERT_OK(dbfull()->CompactRange(cro, nullptr, nullptr));
  });

  TEST_SYNC_POINT(
      "DBCompactionTest::ChangeLevelConflictsWithManual:"
      "PreForegroundCompactRange");
  ASSERT_OK(Put(Key(0), rnd.RandomString(990)));
  ASSERT_OK(Put(Key(1), rnd.RandomString(990)));
  ASSERT_TRUE(dbfull()
                  ->CompactRange(CompactRangeOptions(), nullptr, nullptr)
                  .IsIncomplete());

  refit_level_thread.join();
}

TEST_F(DBCompactionTest, BottomPriCompactionCountsTowardConcurrencyLimit) {
  // Flushes several files to trigger compaction while lock is released during
  // a bottom-pri compaction. Verifies it does not get scheduled to thread pool
  // because per-DB limit for compaction parallelism is one (default).
  const int kNumL0Files = 4;
  const int kNumLevels = 3;

  env_->SetBackgroundThreads(1, Env::Priority::BOTTOM);

  Options options = CurrentOptions();
  options.level0_file_num_compaction_trigger = kNumL0Files;
  options.num_levels = kNumLevels;
  DestroyAndReopen(options);

  // Setup last level to be non-empty since it's a bit unclear whether
  // compaction to an empty level would be considered "bottommost".
  ASSERT_OK(Put(Key(0), "val"));
  ASSERT_OK(Flush());
  MoveFilesToLevel(kNumLevels - 1);

  SyncPoint::GetInstance()->LoadDependency(
      {{"DBImpl::BGWorkBottomCompaction",
        "DBCompactionTest::BottomPriCompactionCountsTowardConcurrencyLimit:"
        "PreTriggerCompaction"},
       {"DBCompactionTest::BottomPriCompactionCountsTowardConcurrencyLimit:"
        "PostTriggerCompaction",
        "BackgroundCallCompaction:0"}});
  SyncPoint::GetInstance()->EnableProcessing();

  port::Thread compact_range_thread([&] {
    CompactRangeOptions cro;
    cro.bottommost_level_compaction = BottommostLevelCompaction::kForce;
    cro.exclusive_manual_compaction = false;
    ASSERT_OK(dbfull()->CompactRange(cro, nullptr, nullptr));
  });

  // Sleep in the low-pri thread so any newly scheduled compaction will be
  // queued. Otherwise it might finish before we check its existence.
  test::SleepingBackgroundTask sleeping_task_low;
  env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low,
                 Env::Priority::LOW);

  TEST_SYNC_POINT(
      "DBCompactionTest::BottomPriCompactionCountsTowardConcurrencyLimit:"
      "PreTriggerCompaction");
  for (int i = 0; i < kNumL0Files; ++i) {
    ASSERT_OK(Put(Key(0), "val"));
    ASSERT_OK(Flush());
  }
  ASSERT_EQ(0u, env_->GetThreadPoolQueueLen(Env::Priority::LOW));
  TEST_SYNC_POINT(
      "DBCompactionTest::BottomPriCompactionCountsTowardConcurrencyLimit:"
      "PostTriggerCompaction");

  sleeping_task_low.WakeUp();
  sleeping_task_low.WaitUntilDone();
  compact_range_thread.join();
}

TEST_F(DBCompactionTest, BottommostFileCompactionAllowIngestBehind) {
  // allow_ingest_behind prevents seqnum zeroing, and could cause
  // compaction loop with reason kBottommostFiles.
  Options options = CurrentOptions();
  options.env = env_;
  options.compaction_style = kCompactionStyleLevel;
  options.allow_ingest_behind = true;
  options.comparator = BytewiseComparator();
  DestroyAndReopen(options);

  WriteOptions write_opts;
  ASSERT_OK(db_->Put(write_opts, "infinite", "compaction loop"));
  ASSERT_OK(db_->Put(write_opts, "infinite", "loop"));

  ASSERT_OK(Flush());
  MoveFilesToLevel(1);
  ASSERT_OK(db_->Put(write_opts, "bumpseqnum", ""));
  ASSERT_OK(Flush());
  auto snapshot = db_->GetSnapshot();
  // Bump up oldest_snapshot_seqnum_ in VersionStorageInfo.
  db_->ReleaseSnapshot(snapshot);
  bool compacted = false;
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
      "LevelCompactionPicker::PickCompaction:Return", [&](void* /* arg */) {
        // There should not be a compaction.
        compacted = true;
      });
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
  // Wait for compaction to be scheduled.
  env_->SleepForMicroseconds(2000000);
  ASSERT_FALSE(compacted);
  // The following assert can be used to check for compaction loop:
  // it used to wait forever before the fix.
  // ASSERT_OK(dbfull()->TEST_WaitForCompact(true /* wait_unscheduled */));
}

TEST_F(DBCompactionTest, TurnOnLevelCompactionDynamicLevelBytes) {
  Options options = CurrentOptions();
  options.compaction_style = kCompactionStyleLevel;
  options.allow_ingest_behind = false;
  options.level_compaction_dynamic_level_bytes = false;
  options.num_levels = 6;
  options.compression = kNoCompression;
  options.max_bytes_for_level_base = 1 << 20;
  options.max_bytes_for_level_multiplier = 10;
  DestroyAndReopen(options);

  // put files in L0, L1 and L2
  WriteOptions write_opts;
  ASSERT_OK(db_->Put(write_opts, Key(1), "val1"));
  Random rnd(33);
  // Fill L2 with size larger than max_bytes_for_level_base,
  // so the level above it won't be drained.
  for (int i = 2; i <= (1 << 10); ++i) {
    ASSERT_OK(db_->Put(write_opts, Key(i), rnd.RandomString(2 << 10)));
  }
  ASSERT_OK(Flush());
  MoveFilesToLevel(2);
  ASSERT_OK(db_->Put(write_opts, Key(2), "val2"));
  ASSERT_OK(Flush());
  MoveFilesToLevel(2);
  ASSERT_OK(db_->Put(write_opts, Key(1), "new_val1"));
  ASSERT_OK(Flush());
  MoveFilesToLevel(1);
  ASSERT_OK(db_->Put(write_opts, Key(3), "val3"));
  ASSERT_OK(Flush());
  ASSERT_EQ("1,1,2", FilesPerLevel());
  auto verify_db = [&]() {
    ASSERT_EQ(Get(Key(1)), "new_val1");
    ASSERT_EQ(Get(Key(2)), "val2");
    ASSERT_EQ(Get(Key(3)), "val3");
  };
  verify_db();

  options.level_compaction_dynamic_level_bytes = true;
  Reopen(options);
  // except for L0, files should be pushed down as much as possible
  ASSERT_EQ("1,0,0,0,1,2", FilesPerLevel());
  verify_db();

  // turning the options on and off should be safe
  options.level_compaction_dynamic_level_bytes = false;
  Reopen(options);
  MoveFilesToLevel(1);
  ASSERT_EQ("0,1,0,0,1,2", FilesPerLevel());
  verify_db();

  // newly flushed file is also pushed down
  options.level_compaction_dynamic_level_bytes = true;
  Reopen(options);
  // Files in L1 should be trivially moved down during DB opening.
  // The file should be moved to L3, and then may be drained and compacted to
  // L4. So we just check L1 and L2 here.
  ASSERT_EQ(0, NumTableFilesAtLevel(1));
  ASSERT_EQ(0, NumTableFilesAtLevel(2));
  verify_db();
}

TEST_F(DBCompactionTest, TurnOnLevelCompactionDynamicLevelBytesUCToLC) {
  // Basic test for migrating from UC to LC.
  // DB has non-empty L1 that should be pushed down to last level (L49).
  Options options = CurrentOptions();
  options.compaction_style = CompactionStyle::kCompactionStyleUniversal;
  options.allow_ingest_behind = false;
  options.level_compaction_dynamic_level_bytes = false;
  options.num_levels = 50;
  CreateAndReopenWithCF({"pikachu"}, options);

  Random rnd(33);
  for (int f = 0; f < 10; ++f) {
    ASSERT_OK(Put(1, Key(f), rnd.RandomString(1000)));
    ASSERT_OK(Flush(1));
  }
  CompactRangeOptions compact_options;
  compact_options.change_level = true;
  compact_options.target_level = 1;
  ASSERT_OK(db_->CompactRange(compact_options, handles_[1], nullptr, nullptr));
  ASSERT_EQ("0,1", FilesPerLevel(1));

  options.compaction_style = CompactionStyle::kCompactionStyleLevel;
  options.level_compaction_dynamic_level_bytes = true;
  ReopenWithColumnFamilies({"default", "pikachu"}, options);
  std::string expected_lsm = "";
  for (int i = 0; i < 49; ++i) {
    expected_lsm += "0,";
  }
  expected_lsm += "1";
  ASSERT_EQ(expected_lsm, FilesPerLevel(1));

  // Tests that entries for trial move in MANIFEST should be valid
  ReopenWithColumnFamilies({"default", "pikachu"}, options);
  ASSERT_EQ(expected_lsm, FilesPerLevel(1));
}

TEST_F(DBCompactionTest, DrainUnnecessaryLevelsAfterMultiplierChanged) {
  // When the level size multiplier increases such that fewer levels become
  // necessary, unnecessary levels should to be drained.
  const int kBaseLevelBytes = 256 << 10;  // 256KB
  const int kFileBytes = 64 << 10;        // 64KB
  const int kInitMultiplier = 2, kChangedMultiplier = 10;
  const int kNumFiles = 32;
  const int kNumLevels = 5;
  const int kValueBytes = 1 << 10;  // 1KB

  Options options = CurrentOptions();
  options.compression = kNoCompression;
  options.level_compaction_dynamic_level_bytes = true;
  options.max_bytes_for_level_base = kBaseLevelBytes;
  options.max_bytes_for_level_multiplier = kInitMultiplier;
  options.num_levels = kNumLevels;
  Reopen(options);

  // Initially we setup the LSM to look roughly as follows:
  //
  // L0: empty
  // L1: 256KB
  // ...
  // L4: 1MB
  Random rnd(301);
  for (int file = 0; file < kNumFiles; ++file) {
    for (int i = 0; i < kFileBytes / kValueBytes; ++i) {
      ASSERT_OK(Put(Key(file * kFileBytes / kValueBytes + i),
                    rnd.RandomString(kValueBytes)));
    }
    ASSERT_OK(Flush());
  }

  int init_num_nonempty = 0;
  ASSERT_OK(dbfull()->TEST_WaitForCompact());
  for (int level = 1; level < kNumLevels; ++level) {
    if (NumTableFilesAtLevel(level) > 0) {
      ++init_num_nonempty;
    }
  }

  // After increasing the multiplier and running compaction fewer levels are
  // needed to hold all the data. Unnecessary levels should be drained.
  ASSERT_OK(db_->SetOptions({{"max_bytes_for_level_multiplier",
                              std::to_string(kChangedMultiplier)}}));
  ASSERT_OK(dbfull()->TEST_WaitForCompact());
  int final_num_nonempty = 0;
  for (int level = 1; level < kNumLevels; ++level) {
    if (NumTableFilesAtLevel(level) > 0) {
      ++final_num_nonempty;
    }
  }
  ASSERT_GT(init_num_nonempty, final_num_nonempty);
}

TEST_F(DBCompactionTest, DrainUnnecessaryLevelsAfterDBBecomesSmall) {
  // When the DB size is smaller, e.g., large chunk of data deleted by
  // DeleteRange(), unnecessary levels should to be drained.
  const int kBaseLevelBytes = 256 << 10;  // 256KB
  const int kFileBytes = 64 << 10;        // 64KB
  const int kMultiplier = 2;
  const int kNumFiles = 32;
  const int kNumLevels = 5;
  const int kValueBytes = 1 << 10;  // 1KB
  const int kDeleteFileNum = 8;

  Options options = CurrentOptions();
  options.compression = kNoCompression;
  options.level_compaction_dynamic_level_bytes = true;
  options.max_bytes_for_level_base = kBaseLevelBytes;
  options.max_bytes_for_level_multiplier = kMultiplier;
  options.num_levels = kNumLevels;
  Reopen(options);

  // Initially we setup the LSM to look roughly as follows:
  //
  // L0: empty
  // L1: 256KB
  // ...
  // L4: 1MB
  Random rnd(301);
  for (int file = 0; file < kNumFiles; ++file) {
    for (int i = 0; i < kFileBytes / kValueBytes; ++i) {
      ASSERT_OK(Put(Key(file * kFileBytes / kValueBytes + i),
                    rnd.RandomString(kValueBytes)));
    }
    ASSERT_OK(Flush());
    if (file == kDeleteFileNum) {
      // Ensure the DeleteRange() call below only delete data from last level
      ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
      ASSERT_EQ(NumTableFilesAtLevel(kNumLevels - 1), kDeleteFileNum + 1);
    }
  }

  int init_num_nonempty = 0;
  ASSERT_OK(dbfull()->TEST_WaitForCompact());
  for (int level = 1; level < kNumLevels; ++level) {
    if (NumTableFilesAtLevel(level) > 0) {
      ++init_num_nonempty;
    }
  }

  // Disable auto compaction CompactRange() below
  ASSERT_OK(dbfull()->SetOptions({{"disable_auto_compactions", "true"}}));
  // Delete keys within first (kDeleteFileNum + 1) files' key ranges.
  // This should reduce DB size enough such that there is now
  // an unneeded level.
  std::string begin = Key(0);
  std::string end = Key(kDeleteFileNum * kFileBytes / kValueBytes);
  ASSERT_OK(
      db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), begin, end));
  Slice begin_slice = begin;
  Slice end_slice = end;
  ASSERT_OK(db_->CompactRange(CompactRangeOptions(), &begin_slice, &end_slice));
  int after_delete_range_nonempty = 0;
  for (int level = 1; level < kNumLevels; ++level) {
    if (NumTableFilesAtLevel(level) > 0) {
      ++after_delete_range_nonempty;
    }
  }
  ASSERT_OK(dbfull()->SetOptions({{"disable_auto_compactions", "false"}}));
  ASSERT_OK(dbfull()->TEST_WaitForCompact());
  int final_num_nonempty = 0;
  for (int level = 1; level < kNumLevels; ++level) {
    if (NumTableFilesAtLevel(level) > 0) {
      ++final_num_nonempty;
    }
  }
  ASSERT_GE(init_num_nonempty, after_delete_range_nonempty);
  ASSERT_GT(after_delete_range_nonempty, final_num_nonempty);
}

TEST_F(DBCompactionTest, ManualCompactionCompactAllKeysInRange) {
  // CompactRange() used to pre-compute target level to compact to
  // before running compactions. However, the files at target level
  // could be trivially moved down by some background compaction. This means
  // some keys in the manual compaction key range may not be compacted
  // during the manual compaction. This unit test tests this scenario.
  // A fix has been applied for this scenario to always compact
  // to the bottommost level.
  const int kBaseLevelBytes = 8 << 20;  // 8MB
  const int kMultiplier = 2;
  Options options = CurrentOptions();
  options.num_levels = 7;
  options.level_compaction_dynamic_level_bytes = false;
  options.compaction_style = kCompactionStyleLevel;
  options.max_bytes_for_level_base = kBaseLevelBytes;
  options.max_bytes_for_level_multiplier = kMultiplier;
  options.compression = kNoCompression;
  options.target_file_size_base = 2 * kBaseLevelBytes;

  DestroyAndReopen(options);
  Random rnd(301);
  // Populate L2 so that manual compaction will compact to at least L2.
  // Otherwise, there is still a possibility of race condition where
  // the manual compaction thread believes that max non-empty level is L1
  // while there is some auto compaction that moves some files from L1 to L2.
  ASSERT_OK(db_->Put(WriteOptions(), Key(1000), rnd.RandomString(100)));
  ASSERT_OK(Flush());
  MoveFilesToLevel(2);
  ASSERT_EQ(1, NumTableFilesAtLevel(2));

  // one file in L1: [Key(5), Key(6)]
  ASSERT_OK(
      db_->Put(WriteOptions(), Key(5), rnd.RandomString(kBaseLevelBytes / 3)));
  ASSERT_OK(
      db_->Put(WriteOptions(), Key(6), rnd.RandomString(kBaseLevelBytes / 3)));
  ASSERT_OK(Flush());
  MoveFilesToLevel(1);
  ASSERT_EQ(1, NumTableFilesAtLevel(1));

  ASSERT_OK(
      db_->Put(WriteOptions(), Key(1), rnd.RandomString(kBaseLevelBytes / 2)));
  // We now do manual compaction for key range [Key(1), Key(6)].
  // First it compacts file [Key(1)] to L1.
  // L1 will have two files [Key(1)], and [Key(5), Key(6)].
  // After L0 -> L1 manual compaction, an automatic compaction will trivially
  // move both files from L1 to L2. Here the dependency makes manual compaction
  // wait for auto-compaction to pick a compaction before proceeding. Manual
  // compaction should not stop at L1 and keep compacting L2. With kForce
  // specified, expected output is that manual compaction compacts to L2 and L2
  // will contain 2 files: one for Key(1000) and one for Key(1), Key(5) and
  // Key(6).
  SyncPoint::GetInstance()->LoadDependency(
      {{"DBImpl::BackgroundCompaction():AfterPickCompaction",
        "DBImpl::RunManualCompaction()::1"}});
  SyncPoint::GetInstance()->EnableProcessing();
  std::string begin_str = Key(1);
  std::string end_str = Key(6);
  Slice begin_slice = begin_str;
  Slice end_slice = end_str;
  CompactRangeOptions cro;
  cro.bottommost_level_compaction = BottommostLevelCompaction::kForce;
  ASSERT_OK(db_->CompactRange(cro, &begin_slice, &end_slice));

  ASSERT_EQ(NumTableFilesAtLevel(2), 2);
}

TEST_F(DBCompactionTest,
       ManualCompactionCompactAllKeysInRangeDynamicLevelBytes) {
  // Similar to the test above (ManualCompactionCompactAllKeysInRange), but with
  // level_compaction_dynamic_level_bytes = true.
  const int kBaseLevelBytes = 8 << 20;  // 8MB
  const int kMultiplier = 2;
  Options options = CurrentOptions();
  options.num_levels = 7;
  options.level_compaction_dynamic_level_bytes = true;
  options.compaction_style = kCompactionStyleLevel;
  options.max_bytes_for_level_base = kBaseLevelBytes;
  options.max_bytes_for_level_multiplier = kMultiplier;
  options.compression = kNoCompression;
  options.target_file_size_base = 2 * kBaseLevelBytes;
  DestroyAndReopen(options);

  Random rnd(301);
  ASSERT_OK(db_->Put(WriteOptions(), Key(5),
                     rnd.RandomString(3 * kBaseLevelBytes / 2)));
  ASSERT_OK(Flush());
  ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
  ASSERT_EQ(1, NumTableFilesAtLevel(6));
  // L6 now has one file with size ~ 3/2 * kBaseLevelBytes.
  // L5 is the new base level, with target size ~ 3/4 * kBaseLevelBytes.

  ASSERT_OK(
      db_->Put(WriteOptions(), Key(3), rnd.RandomString(kBaseLevelBytes / 3)));
  ASSERT_OK(
      db_->Put(WriteOptions(), Key(4), rnd.RandomString(kBaseLevelBytes / 3)));
  ASSERT_OK(Flush());

  MoveFilesToLevel(5);
  ASSERT_EQ(1, NumTableFilesAtLevel(5));
  // L5 now has one file with size ~ 2/3 * kBaseLevelBytes, which is below its
  // target size.

  ASSERT_OK(
      db_->Put(WriteOptions(), Key(1), rnd.RandomString(kBaseLevelBytes / 3)));
  ASSERT_OK(
      db_->Put(WriteOptions(), Key(2), rnd.RandomString(kBaseLevelBytes / 3)));
  SyncPoint::GetInstance()->LoadDependency(
      {{"DBImpl::BackgroundCompaction():AfterPickCompaction",
        "DBImpl::RunManualCompaction()::1"}});
  SyncPoint::GetInstance()->EnableProcessing();
  // After compacting the file with [Key(1), Key(2)] to L5,
  // L5 has size ~ 4/3 * kBaseLevelBytes > its target size.
  // We let manual compaction wait for an auto-compaction to pick
  // a compaction before proceeding. The auto-compaction would
  // trivially move both files in L5 down to L6. If manual compaction
  // works correctly with kForce specified, it should rewrite the two files in
  // L6 into a single file.
  CompactRangeOptions cro;
  cro.bottommost_level_compaction = BottommostLevelCompaction::kForce;
  std::string begin_str = Key(1);
  std::string end_str = Key(4);
  Slice begin_slice = begin_str;
  Slice end_slice = end_str;
  ASSERT_OK(db_->CompactRange(cro, &begin_slice, &end_slice));
  ASSERT_EQ(2, NumTableFilesAtLevel(6));
  ASSERT_EQ(0, NumTableFilesAtLevel(5));
}

TEST_F(DBCompactionTest, NumberOfSubcompactions) {
  // Tests that expected number of subcompactions are created.
  class SubCompactionEventListener : public EventListener {
   public:
    void OnSubcompactionCompleted(const SubcompactionJobInfo&) override {
      sub_compaction_finished_++;
    }
    void OnCompactionCompleted(DB*, const CompactionJobInfo&) override {
      compaction_finished_++;
    }
    std::atomic<int> sub_compaction_finished_{0};
    std::atomic<int> compaction_finished_{0};
  };
  Options options = CurrentOptions();
  options.compaction_style = kCompactionStyleLevel;
  options.compression = kNoCompression;
  const int kFileSize = 100 << 10;  // 100KB
  options.target_file_size_base = kFileSize;
  const int kLevel0CompactTrigger = 2;
  options.level0_file_num_compaction_trigger = kLevel0CompactTrigger;
  Destroy(options);
  Random rnd(301);

  // Exposing internal implementation detail here where the
  // number of subcompactions depends on the size of data
  // being compacted. In particular, to enable x subcompactions,
  // we need to compact at least x * target file size amount
  // of data.
  //
  // Will write two files below to avoid trivial move.
  // Size written in total: 500 * 1000 * 2 ~ 10MB ~ 100 * target file size.
  const int kValueSize = 500;
  const int kNumKeyPerFile = 1000;
  for (int i = 1; i <= 8; ++i) {
    options.max_subcompactions = i;
    SubCompactionEventListener* listener = new SubCompactionEventListener();
    options.listeners.clear();
    options.listeners.emplace_back(listener);
    TryReopen(options);

    for (int file = 0; file < kLevel0CompactTrigger; ++file) {
      for (int key = file; key < 2 * kNumKeyPerFile; key += 2) {
        ASSERT_OK(Put(Key(key), rnd.RandomString(kValueSize)));
      }
      ASSERT_OK(Flush());
    }
    ASSERT_OK(dbfull()->TEST_WaitForCompact());
    ASSERT_EQ(listener->compaction_finished_, 1);
    EXPECT_EQ(listener->sub_compaction_finished_, i);
    Destroy(options);
  }
}

}  // namespace ROCKSDB_NAMESPACE

int main(int argc, char** argv) {
  ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
  ::testing::InitGoogleTest(&argc, argv);
  return RUN_ALL_TESTS();
}