From 5d5ff8240852c484c43580f017a831bbf306fb0d Mon Sep 17 00:00:00 2001
From: Andrew Kryczka <ajkr@users.noreply.github.com>
Date: Mon, 17 Aug 2020 18:19:45 -0700
Subject: [PATCH] Disable `recycle_log_file_num` with
 `kTolerateCorruptedTailRecords` (#7271)

Summary:
The two features are naturally incompatible. WAL recycling expects the recovery to succeed upon encountering a corrupt record at the point where new data ends and recycled data remains at the tail. However, `WALRecoveryMode::kTolerateCorruptedTailRecords` must fail upon encountering any such corrupt record, as it cannot differentiate between this and a real corruption, which would cause committed updates to be truncated.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/7271

Reviewed By: riversand963

Differential Revision: D23169923

Pulled By: ajkr

fbshipit-source-id: 2cf8a3bcd2c9a0ecb0055a84725047a10fd4db50
---
 HISTORY.md                 |  1 +
 db/db_impl/db_impl_open.cc | 20 +++++++++++++++-----
 db/db_wal_test.cc          | 10 ++++++++--
 3 files changed, 24 insertions(+), 7 deletions(-)

diff --git a/HISTORY.md b/HISTORY.md
index f9d10e831e..2acc39a7af 100644
--- a/HISTORY.md
+++ b/HISTORY.md
@@ -3,6 +3,7 @@
 ### Bug fixes
 * Fix a performance regression introduced in 6.4 that makes a upper bound check for every Next() even if keys are within a data block that is within the upper bound.
 * Fix a possible corruption to the LSM state (overlapping files within a level) when a `CompactRange()` for refitting levels (`CompactRangeOptions::change_level == true`) and another manual compaction are executed in parallel.
+* Sanitize `recycle_log_file_num` to zero when the user attempts to enable it in combination with `WALRecoveryMode::kTolerateCorruptedTailRecords`. Previously the two features were allowed together, which compromised the user's configured crash-recovery guarantees.
 * Fix a bug where a level refitting in CompactRange() might race with an automatic compaction that puts the data to the target level of the refitting. The bug has been there for years.
 
 ### New Features
diff --git a/db/db_impl/db_impl_open.cc b/db/db_impl/db_impl_open.cc
index 97618fac39..8514bb1c45 100644
--- a/db/db_impl/db_impl_open.cc
+++ b/db/db_impl/db_impl_open.cc
@@ -90,12 +90,22 @@ DBOptions SanitizeOptions(const std::string& dbname, const DBOptions& src) {
   }
 
   if (result.recycle_log_file_num &&
-      (result.wal_recovery_mode == WALRecoveryMode::kPointInTimeRecovery ||
+      (result.wal_recovery_mode ==
+           WALRecoveryMode::kTolerateCorruptedTailRecords ||
+       result.wal_recovery_mode == WALRecoveryMode::kPointInTimeRecovery ||
        result.wal_recovery_mode == WALRecoveryMode::kAbsoluteConsistency)) {
-    // kPointInTimeRecovery is inconsistent with recycle log file feature since
-    // we define the "end" of the log as the first corrupt record we encounter.
-    // kAbsoluteConsistency doesn't make sense because even a clean
-    // shutdown leaves old junk at the end of the log file.
+    // - kTolerateCorruptedTailRecords is inconsistent with recycle log file
+    //   feature. WAL recycling expects recovery success upon encountering a
+    //   corrupt record at the point where new data ends and recycled data
+    //   remains at the tail. However, `kTolerateCorruptedTailRecords` must fail
+    //   upon encountering any such corrupt record, as it cannot differentiate
+    //   between this and a real corruption, which would cause committed updates
+    //   to be truncated -- a violation of the recovery guarantee.
+    // - kPointInTimeRecovery and kAbsoluteConsistency are incompatible with
+    //   recycle log file feature temporarily due to a bug found introducing a
+    //   hole in the recovered data
+    //   (https://github.com/facebook/rocksdb/pull/7252#issuecomment-673766236).
+    //   Besides this bug, we believe the features are fundamentally compatible.
     result.recycle_log_file_num = 0;
   }
 
diff --git a/db/db_wal_test.cc b/db/db_wal_test.cc
index b2f8eacc83..c133d3b7a1 100644
--- a/db/db_wal_test.cc
+++ b/db/db_wal_test.cc
@@ -528,7 +528,10 @@ TEST_F(DBWALTest, PreallocateBlock) {
 #endif  // !(defined NDEBUG) || !defined(OS_WIN)
 
 #ifndef ROCKSDB_LITE
-TEST_F(DBWALTest, FullPurgePreservesRecycledLog) {
+TEST_F(DBWALTest, DISABLED_FullPurgePreservesRecycledLog) {
+  // TODO(ajkr): Disabled until WAL recycling is fixed for
+  // `kPointInTimeRecovery`.
+
   // For github issue #1303
   for (int i = 0; i < 2; ++i) {
     Options options = CurrentOptions();
@@ -564,7 +567,10 @@ TEST_F(DBWALTest, FullPurgePreservesRecycledLog) {
   }
 }
 
-TEST_F(DBWALTest, FullPurgePreservesLogPendingReuse) {
+TEST_F(DBWALTest, DISABLED_FullPurgePreservesLogPendingReuse) {
+  // TODO(ajkr): Disabled until WAL recycling is fixed for
+  // `kPointInTimeRecovery`.
+
   // Ensures full purge cannot delete a WAL while it's in the process of being
   // recycled. In particular, we force the full purge after a file has been
   // chosen for reuse, but before it has been renamed.