Deflake tests of compaction based on compensated file size (#8036)

Summary: CompactionDeletionTriggerReopen was observed to be flaky recently: https://app.circleci.com/pipelines/github/facebook/rocksdb/6030/workflows/787af4f3-b9f7-4645-8e8d-1fb0ebf05539/jobs/101451. I went through it and the related tests and arrived at different conclusions on what constraints we can expect on DB size. Some constraints got looser and some got tighter. The particular constraint that flaked got a lot looser so at least the flake linked above would have been prevented. Pull Request resolved: https://github.com/facebook/rocksdb/pull/8036 Reviewed By: riversand963 Differential Revision: D26862566 Pulled By: ajkr fbshipit-source-id: 3512b86b4fb41aeecae32e1c7382c03916d88d88
2021-03-14 20:24:04 -07:00 · 2021-03-14 20:24:04 -07:00 · b8f40f7f7b
parent b708b166dc
commit b8f40f7f7b
1 changed files with 54 additions and 13 deletions
--- a/db/db_compaction_test.cc
+++ b/db/db_compaction_test.cc
@ -206,6 +206,7 @@ Options DeletionTriggerOptions(Options options) {
      options.target_file_size_base * options.target_file_size_multiplier;
  options.max_bytes_for_level_multiplier = 2;
  options.disable_auto_compactions = false;
+  options.compaction_options_universal.max_size_amplification_percent = 100;
  return options;
 }

@ -360,12 +361,34 @@ TEST_P(DBCompactionTestWithParam, CompactionDeletionTrigger) {
    for (int k = 0; k < kTestSize; ++k) {
      ASSERT_OK(Delete(Key(k)));
    }
-    ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
+    ASSERT_OK(Flush());
    ASSERT_OK(dbfull()->TEST_WaitForCompact());
    ASSERT_OK(Size(Key(0), Key(kTestSize - 1), &db_size[1]));

-    // must have much smaller db size.
-    ASSERT_GT(db_size[0] / 3, db_size[1]);
+    if (options.compaction_style == kCompactionStyleUniversal) {
+      // Claim: in universal compaction none of the original data will remain
+      // once compactions settle.
+      //
+      // Proof: The compensated size of the file containing the most tombstones
+      // is enough on its own to trigger size amp compaction. Size amp
+      // compaction is a full compaction, so all tombstones meet the obsolete
+      // keys they cover.
+      ASSERT_EQ(0, db_size[1]);
+    } else {
+      // Claim: in level compaction at most `db_size[0] / 2` of the original
+      // data will remain once compactions settle.
+      //
+      // Proof: Assume the original data is all in the bottom level. If it were
+      // not, it would meet its tombstone sooner. The original data size is
+      // large enough to require fanout to bottom level to be greater than
+      // `max_bytes_for_level_multiplier == 2`. In the level just above,
+      // tombstones must cover less than `db_size[0] / 4` bytes since fanout >=
+      // 2 and file size is compensated by doubling the size of values we expect
+      // are covered (`kDeletionWeightOnCompaction == 2`). The tombstones in
+      // levels above must cover less than `db_size[0] / 8` bytes of original
+      // data, `db_size[0] / 16`, and so on.
+      ASSERT_GT(db_size[0] / 2, db_size[1]);
+    }
  }
 }
 #endif  // ROCKSDB_VALGRIND_RUN
@ -632,9 +655,8 @@ TEST_P(DBCompactionTestWithParam, CompactionDeletionTriggerReopen) {
    }
    ASSERT_OK(Size(Key(0), Key(kTestSize - 1), &db_size[1]));
    Close();
-    // as auto_compaction is off, we shouldn't see too much reduce
-    // in db size.
-    ASSERT_LT(db_size[0] / 3, db_size[1]);
+    // as auto_compaction is off, we shouldn't see any reduction in db size.
+    ASSERT_LE(db_size[0], db_size[1]);

    // round 3 --- reopen db with auto_compaction on and see if
    // deletion compensation still work.
@ -648,7 +670,13 @@ TEST_P(DBCompactionTestWithParam, CompactionDeletionTriggerReopen) {
    ASSERT_OK(dbfull()->TEST_WaitForCompact());
    ASSERT_OK(Size(Key(0), Key(kTestSize - 1), &db_size[2]));
    // this time we're expecting significant drop in size.
-    ASSERT_GT(db_size[0] / 3, db_size[2]);
+    //
+    // See "CompactionDeletionTrigger" test for proof that at most
+    // `db_size[0] / 2` of the original data remains. In addition to that, this
+    // test inserts `db_size[0] / 10` to push the tombstones into SST files and
+    // then through automatic compactions. So in total `3 * db_size[0] / 5` of
+    // the original data may remain.
+    ASSERT_GT(3 * db_size[0] / 5, db_size[2]);
  }
 }

@ -735,8 +763,15 @@ TEST_F(DBCompactionTest, DisableStatsUpdateReopen) {
      values.push_back(rnd.RandomString(kCDTValueSize));
      ASSERT_OK(Put(Key(k), values[k]));
    }
-    ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
+    ASSERT_OK(Flush());
    ASSERT_OK(dbfull()->TEST_WaitForCompact());
+    // L1 and L2 can fit deletions iff size compensation does not take effect,
+    // i.e., when `skip_stats_update_on_db_open == true`. Move any remaining
+    // files at or above L2 down to L3 to ensure obsolete data does not
+    // accidentally meet its tombstone above L3. This makes the final size more
+    // deterministic and easy to see whether size compensation for deletions
+    // took effect.
+    MoveFilesToLevel(3 /* level */);
    ASSERT_OK(Size(Key(0), Key(kTestSize - 1), &db_size[0]));
    Close();

@ -752,9 +787,8 @@ TEST_F(DBCompactionTest, DisableStatsUpdateReopen) {
    }
    ASSERT_OK(Size(Key(0), Key(kTestSize - 1), &db_size[1]));
    Close();
-    // as auto_compaction is off, we shouldn't see too much reduce
-    // in db size.
-    ASSERT_LT(db_size[0] / 3, db_size[1]);
+    // as auto_compaction is off, we shouldn't see any reduction in db size.
+    ASSERT_LE(db_size[0], db_size[1]);

    // round 3 --- reopen db with auto_compaction on and see if
    // deletion compensation still work.
@ -767,10 +801,17 @@ TEST_F(DBCompactionTest, DisableStatsUpdateReopen) {
    if (options.skip_stats_update_on_db_open) {
      // If update stats on DB::Open is disable, we don't expect
      // deletion entries taking effect.
-      ASSERT_LT(db_size[0] / 3, db_size[2]);
+      //
+      // The deletions are small enough to fit in L1 and L2, and obsolete keys
+      // were moved to L3+, so none of the original data should have been
+      // dropped.
+      ASSERT_LE(db_size[0], db_size[2]);
    } else {
      // Otherwise, we should see a significant drop in db size.
-      ASSERT_GT(db_size[0] / 3, db_size[2]);
+      //
+      // See "CompactionDeletionTrigger" test for proof that at most
+      // `db_size[0] / 2` of the original data remains.
+      ASSERT_GT(db_size[0] / 2, db_size[2]);
    }
  }
 }