diff --git a/db/write_batch.cc b/db/write_batch.cc index d58e1a4668..24880bd17a 100644 --- a/db/write_batch.cc +++ b/db/write_batch.cc @@ -928,15 +928,19 @@ Status WriteBatch::Put(ColumnFamilyHandle* column_family, const Slice& key, } if (0 == ts_sz) { - return WriteBatchInternal::Put(this, cf_id, key, value); + s = WriteBatchInternal::Put(this, cf_id, key, value); + } else { + needs_in_place_update_ts_ = true; + has_key_with_ts_ = true; + std::string dummy_ts(ts_sz, '\0'); + std::array key_with_ts{{key, dummy_ts}}; + s = WriteBatchInternal::Put(this, cf_id, SliceParts(key_with_ts.data(), 2), + SliceParts(&value, 1)); } - - needs_in_place_update_ts_ = true; - has_key_with_ts_ = true; - std::string dummy_ts(ts_sz, '\0'); - std::array key_with_ts{{key, dummy_ts}}; - return WriteBatchInternal::Put(this, cf_id, SliceParts(key_with_ts.data(), 2), - SliceParts(&value, 1)); + if (s.ok()) { + MaybeTrackTimestampSize(cf_id, ts_sz); + } + return s; } Status WriteBatch::TimedPut(ColumnFamilyHandle* column_family, const Slice& key, @@ -961,7 +965,7 @@ Status WriteBatch::TimedPut(ColumnFamilyHandle* column_family, const Slice& key, Status WriteBatch::Put(ColumnFamilyHandle* column_family, const Slice& key, const Slice& ts, const Slice& value) { - const Status s = CheckColumnFamilyTimestampSize(column_family, ts); + Status s = CheckColumnFamilyTimestampSize(column_family, ts); if (!s.ok()) { return s; } @@ -969,8 +973,12 @@ Status WriteBatch::Put(ColumnFamilyHandle* column_family, const Slice& key, assert(column_family); uint32_t cf_id = column_family->GetID(); std::array key_with_ts{{key, ts}}; - return WriteBatchInternal::Put(this, cf_id, SliceParts(key_with_ts.data(), 2), - SliceParts(&value, 1)); + s = WriteBatchInternal::Put(this, cf_id, SliceParts(key_with_ts.data(), 2), + SliceParts(&value, 1)); + if (s.ok()) { + MaybeTrackTimestampSize(cf_id, ts.size()); + } + return s; } Status WriteBatchInternal::CheckSlicePartsLength(const SliceParts& key, @@ -1038,7 +1046,11 @@ Status WriteBatch::Put(ColumnFamilyHandle* column_family, const SliceParts& key, } if (ts_sz == 0) { - return WriteBatchInternal::Put(this, cf_id, key, value); + s = WriteBatchInternal::Put(this, cf_id, key, value); + if (s.ok()) { + MaybeTrackTimestampSize(cf_id, ts_sz); + } + return s; } return Status::InvalidArgument( @@ -1245,20 +1257,24 @@ Status WriteBatch::Delete(ColumnFamilyHandle* column_family, const Slice& key) { } if (0 == ts_sz) { - return WriteBatchInternal::Delete(this, cf_id, key); + s = WriteBatchInternal::Delete(this, cf_id, key); + } else { + needs_in_place_update_ts_ = true; + has_key_with_ts_ = true; + std::string dummy_ts(ts_sz, '\0'); + std::array key_with_ts{{key, dummy_ts}}; + s = WriteBatchInternal::Delete(this, cf_id, + SliceParts(key_with_ts.data(), 2)); } - - needs_in_place_update_ts_ = true; - has_key_with_ts_ = true; - std::string dummy_ts(ts_sz, '\0'); - std::array key_with_ts{{key, dummy_ts}}; - return WriteBatchInternal::Delete(this, cf_id, - SliceParts(key_with_ts.data(), 2)); + if (s.ok()) { + MaybeTrackTimestampSize(cf_id, ts_sz); + } + return s; } Status WriteBatch::Delete(ColumnFamilyHandle* column_family, const Slice& key, const Slice& ts) { - const Status s = CheckColumnFamilyTimestampSize(column_family, ts); + Status s = CheckColumnFamilyTimestampSize(column_family, ts); if (!s.ok()) { return s; } @@ -1266,8 +1282,12 @@ Status WriteBatch::Delete(ColumnFamilyHandle* column_family, const Slice& key, has_key_with_ts_ = true; uint32_t cf_id = column_family->GetID(); std::array key_with_ts{{key, ts}}; - return WriteBatchInternal::Delete(this, cf_id, - SliceParts(key_with_ts.data(), 2)); + s = WriteBatchInternal::Delete(this, cf_id, + SliceParts(key_with_ts.data(), 2)); + if (s.ok()) { + MaybeTrackTimestampSize(cf_id, ts.size()); + } + return s; } Status WriteBatchInternal::Delete(WriteBatch* b, uint32_t column_family_id, @@ -1312,7 +1332,11 @@ Status WriteBatch::Delete(ColumnFamilyHandle* column_family, } if (0 == ts_sz) { - return WriteBatchInternal::Delete(this, cf_id, key); + s = WriteBatchInternal::Delete(this, cf_id, key); + if (s.ok()) { + MaybeTrackTimestampSize(cf_id, ts_sz); + } + return s; } return Status::InvalidArgument( @@ -1360,20 +1384,24 @@ Status WriteBatch::SingleDelete(ColumnFamilyHandle* column_family, } if (0 == ts_sz) { - return WriteBatchInternal::SingleDelete(this, cf_id, key); + s = WriteBatchInternal::SingleDelete(this, cf_id, key); + } else { + needs_in_place_update_ts_ = true; + has_key_with_ts_ = true; + std::string dummy_ts(ts_sz, '\0'); + std::array key_with_ts{{key, dummy_ts}}; + s = WriteBatchInternal::SingleDelete(this, cf_id, + SliceParts(key_with_ts.data(), 2)); } - - needs_in_place_update_ts_ = true; - has_key_with_ts_ = true; - std::string dummy_ts(ts_sz, '\0'); - std::array key_with_ts{{key, dummy_ts}}; - return WriteBatchInternal::SingleDelete(this, cf_id, - SliceParts(key_with_ts.data(), 2)); + if (s.ok()) { + MaybeTrackTimestampSize(cf_id, ts_sz); + } + return s; } Status WriteBatch::SingleDelete(ColumnFamilyHandle* column_family, const Slice& key, const Slice& ts) { - const Status s = CheckColumnFamilyTimestampSize(column_family, ts); + Status s = CheckColumnFamilyTimestampSize(column_family, ts); if (!s.ok()) { return s; } @@ -1381,8 +1409,12 @@ Status WriteBatch::SingleDelete(ColumnFamilyHandle* column_family, assert(column_family); uint32_t cf_id = column_family->GetID(); std::array key_with_ts{{key, ts}}; - return WriteBatchInternal::SingleDelete(this, cf_id, - SliceParts(key_with_ts.data(), 2)); + s = WriteBatchInternal::SingleDelete(this, cf_id, + SliceParts(key_with_ts.data(), 2)); + if (s.ok()) { + MaybeTrackTimestampSize(cf_id, ts.size()); + } + return s; } Status WriteBatchInternal::SingleDelete(WriteBatch* b, @@ -1429,7 +1461,11 @@ Status WriteBatch::SingleDelete(ColumnFamilyHandle* column_family, } if (0 == ts_sz) { - return WriteBatchInternal::SingleDelete(this, cf_id, key); + s = WriteBatchInternal::SingleDelete(this, cf_id, key); + if (s.ok()) { + MaybeTrackTimestampSize(cf_id, ts_sz); + } + return s; } return Status::InvalidArgument( @@ -1479,23 +1515,27 @@ Status WriteBatch::DeleteRange(ColumnFamilyHandle* column_family, } if (0 == ts_sz) { - return WriteBatchInternal::DeleteRange(this, cf_id, begin_key, end_key); + s = WriteBatchInternal::DeleteRange(this, cf_id, begin_key, end_key); + } else { + needs_in_place_update_ts_ = true; + has_key_with_ts_ = true; + std::string dummy_ts(ts_sz, '\0'); + std::array begin_key_with_ts{{begin_key, dummy_ts}}; + std::array end_key_with_ts{{end_key, dummy_ts}}; + s = WriteBatchInternal::DeleteRange(this, cf_id, + SliceParts(begin_key_with_ts.data(), 2), + SliceParts(end_key_with_ts.data(), 2)); } - - needs_in_place_update_ts_ = true; - has_key_with_ts_ = true; - std::string dummy_ts(ts_sz, '\0'); - std::array begin_key_with_ts{{begin_key, dummy_ts}}; - std::array end_key_with_ts{{end_key, dummy_ts}}; - return WriteBatchInternal::DeleteRange( - this, cf_id, SliceParts(begin_key_with_ts.data(), 2), - SliceParts(end_key_with_ts.data(), 2)); + if (s.ok()) { + MaybeTrackTimestampSize(cf_id, ts_sz); + } + return s; } Status WriteBatch::DeleteRange(ColumnFamilyHandle* column_family, const Slice& begin_key, const Slice& end_key, const Slice& ts) { - const Status s = CheckColumnFamilyTimestampSize(column_family, ts); + Status s = CheckColumnFamilyTimestampSize(column_family, ts); if (!s.ok()) { return s; } @@ -1504,9 +1544,13 @@ Status WriteBatch::DeleteRange(ColumnFamilyHandle* column_family, uint32_t cf_id = column_family->GetID(); std::array key_with_ts{{begin_key, ts}}; std::array end_key_with_ts{{end_key, ts}}; - return WriteBatchInternal::DeleteRange(this, cf_id, - SliceParts(key_with_ts.data(), 2), - SliceParts(end_key_with_ts.data(), 2)); + s = WriteBatchInternal::DeleteRange(this, cf_id, + SliceParts(key_with_ts.data(), 2), + SliceParts(end_key_with_ts.data(), 2)); + if (s.ok()) { + MaybeTrackTimestampSize(cf_id, ts.size()); + } + return s; } Status WriteBatchInternal::DeleteRange(WriteBatch* b, uint32_t column_family_id, @@ -1553,7 +1597,11 @@ Status WriteBatch::DeleteRange(ColumnFamilyHandle* column_family, } if (0 == ts_sz) { - return WriteBatchInternal::DeleteRange(this, cf_id, begin_key, end_key); + s = WriteBatchInternal::DeleteRange(this, cf_id, begin_key, end_key); + if (s.ok()) { + MaybeTrackTimestampSize(cf_id, ts_sz); + } + return s; } return Status::InvalidArgument( @@ -1607,21 +1655,25 @@ Status WriteBatch::Merge(ColumnFamilyHandle* column_family, const Slice& key, } if (0 == ts_sz) { - return WriteBatchInternal::Merge(this, cf_id, key, value); + s = WriteBatchInternal::Merge(this, cf_id, key, value); + } else { + needs_in_place_update_ts_ = true; + has_key_with_ts_ = true; + std::string dummy_ts(ts_sz, '\0'); + std::array key_with_ts{{key, dummy_ts}}; + + s = WriteBatchInternal::Merge( + this, cf_id, SliceParts(key_with_ts.data(), 2), SliceParts(&value, 1)); } - - needs_in_place_update_ts_ = true; - has_key_with_ts_ = true; - std::string dummy_ts(ts_sz, '\0'); - std::array key_with_ts{{key, dummy_ts}}; - - return WriteBatchInternal::Merge( - this, cf_id, SliceParts(key_with_ts.data(), 2), SliceParts(&value, 1)); + if (s.ok()) { + MaybeTrackTimestampSize(cf_id, ts_sz); + } + return s; } Status WriteBatch::Merge(ColumnFamilyHandle* column_family, const Slice& key, const Slice& ts, const Slice& value) { - const Status s = CheckColumnFamilyTimestampSize(column_family, ts); + Status s = CheckColumnFamilyTimestampSize(column_family, ts); if (!s.ok()) { return s; } @@ -1629,8 +1681,12 @@ Status WriteBatch::Merge(ColumnFamilyHandle* column_family, const Slice& key, assert(column_family); uint32_t cf_id = column_family->GetID(); std::array key_with_ts{{key, ts}}; - return WriteBatchInternal::Merge( - this, cf_id, SliceParts(key_with_ts.data(), 2), SliceParts(&value, 1)); + s = WriteBatchInternal::Merge(this, cf_id, SliceParts(key_with_ts.data(), 2), + SliceParts(&value, 1)); + if (s.ok()) { + MaybeTrackTimestampSize(cf_id, ts.size()); + } + return s; } Status WriteBatchInternal::Merge(WriteBatch* b, uint32_t column_family_id, @@ -1679,7 +1735,11 @@ Status WriteBatch::Merge(ColumnFamilyHandle* column_family, } if (0 == ts_sz) { - return WriteBatchInternal::Merge(this, cf_id, key, value); + s = WriteBatchInternal::Merge(this, cf_id, key, value); + if (s.ok()) { + MaybeTrackTimestampSize(cf_id, ts_sz); + } + return s; } return Status::InvalidArgument( diff --git a/include/rocksdb/utilities/transaction_db.h b/include/rocksdb/utilities/transaction_db.h index 62aa786270..7022666400 100644 --- a/include/rocksdb/utilities/transaction_db.h +++ b/include/rocksdb/utilities/transaction_db.h @@ -323,6 +323,22 @@ struct TransactionOptions { // description. If a negative value is specified, then the default value from // TransactionDBOptions is used. int64_t write_batch_flush_threshold = -1; + + // DO NOT USE. + // This is only a temporary option dedicated for MyRocks that will soon be + // removed. + // In normal use cases, meta info like column family's timestamp size is + // tracked at the transaction layer, so it's not necessary and even + // detrimental to track such info inside the internal WriteBatch because it + // may let anti-patterns like bypassing Transaction write APIs and directly + // write to its internal `WriteBatch` retrieved like this: + // https://github.com/facebook/mysql-5.6/blob/fb-mysql-8.0.32/storage/rocksdb/ha_rocksdb.cc#L4949-L4950 + // Setting this option to true will keep aforementioned use case continue to + // work before it's refactored out. + // When this flag is enabled, we also intentionally only track the timestamp + // size in APIs that MyRocks currently are using, including Put, Merge, Delete + // DeleteRange, SingleDelete. + bool write_batch_track_timestamp_size = false; }; // The per-write optimizations that do not involve transactions. TransactionDB diff --git a/include/rocksdb/write_batch.h b/include/rocksdb/write_batch.h index dfdd2834bf..a7fba3e853 100644 --- a/include/rocksdb/write_batch.h +++ b/include/rocksdb/write_batch.h @@ -437,6 +437,30 @@ class WriteBatch : public WriteBatchBase { Status UpdateTimestamps(const Slice& ts, std::function ts_sz_func); + // TODO: remove these internal APIs after MyRocks refactor to not directly + // write to a `WriteBatch` retrieved from `Transaction` via + // `Transaction::GetWriteBatch`. + + void SetTrackTimestampSize(bool track_timestamp_size) { + track_timestamp_size_ = track_timestamp_size; + } + + inline void MaybeTrackTimestampSize(uint32_t column_family_id, size_t ts_sz) { + if (!track_timestamp_size_) { + return; + } + auto iter = cf_id_to_ts_sz_.find(column_family_id); + if (iter == cf_id_to_ts_sz_.end()) { + cf_id_to_ts_sz_.emplace(column_family_id, ts_sz); + } + } + + // Return a mapping from column family id to timestamp size of all the column + // families involved in this WriteBatch. + const std::unordered_map& GetColumnFamilyToTimestampSize() { + return cf_id_to_ts_sz_; + } + // Verify the per-key-value checksums of this write batch. // Corruption status will be returned if the verification fails. // If this write batch does not have per-key-value checksum, @@ -511,6 +535,10 @@ class WriteBatch : public WriteBatchBase { size_t default_cf_ts_sz_ = 0; + bool track_timestamp_size_ = false; + + std::unordered_map cf_id_to_ts_sz_; + protected: std::string rep_; // See comment in write_batch.cc for the format of rep_ }; diff --git a/utilities/transactions/pessimistic_transaction.cc b/utilities/transactions/pessimistic_transaction.cc index bda2b66550..cf039942a7 100644 --- a/utilities/transactions/pessimistic_transaction.cc +++ b/utilities/transactions/pessimistic_transaction.cc @@ -73,6 +73,8 @@ void PessimisticTransaction::Initialize(const TransactionOptions& txn_options) { deadlock_detect_ = txn_options.deadlock_detect; deadlock_detect_depth_ = txn_options.deadlock_detect_depth; write_batch_.SetMaxBytes(txn_options.max_write_batch_size); + write_batch_.GetWriteBatch()->SetTrackTimestampSize( + txn_options.write_batch_track_timestamp_size); skip_concurrency_control_ = txn_options.skip_concurrency_control; lock_timeout_ = txn_options.lock_timeout * 1000; @@ -717,8 +719,16 @@ Status WriteCommittedTxn::CommitWithoutPrepareInternal() { EncodeFixed64(commit_ts_buf, commit_timestamp_); Slice commit_ts(commit_ts_buf, sizeof(commit_ts_buf)); - Status s = - wb->UpdateTimestamps(commit_ts, [wbwi, this](uint32_t cf) -> size_t { + Status s = wb->UpdateTimestamps( + commit_ts, [wb, wbwi, this](uint32_t cf) -> size_t { + // First search through timestamp info kept inside the WriteBatch + // in case some writes bypassed the Transaction's write APIs. + auto cf_id_to_ts_sz = wb->GetColumnFamilyToTimestampSize(); + auto iter = cf_id_to_ts_sz.find(cf); + if (iter != cf_id_to_ts_sz.end()) { + size_t ts_sz = iter->second; + return ts_sz; + } auto cf_iter = cfs_with_ts_tracked_when_indexing_disabled_.find(cf); if (cf_iter != cfs_with_ts_tracked_when_indexing_disabled_.end()) { return sizeof(kMaxTxnTimestamp); @@ -793,16 +803,24 @@ Status WriteCommittedTxn::CommitInternal() { s = WriteBatchInternal::MarkCommitWithTimestamp(working_batch, name_, commit_ts); if (s.ok()) { - s = wb->UpdateTimestamps(commit_ts, [wbwi, this](uint32_t cf) -> size_t { - if (cfs_with_ts_tracked_when_indexing_disabled_.find(cf) != - cfs_with_ts_tracked_when_indexing_disabled_.end()) { - return sizeof(kMaxTxnTimestamp); - } - const Comparator* ucmp = - WriteBatchWithIndexInternal::GetUserComparator(*wbwi, cf); - return ucmp ? ucmp->timestamp_size() - : std::numeric_limits::max(); - }); + s = wb->UpdateTimestamps( + commit_ts, [wb, wbwi, this](uint32_t cf) -> size_t { + // first search through timestamp info kept inside the WriteBatch + // in case some writes bypassed the Transaction's write APIs. + auto cf_id_to_ts_sz = wb->GetColumnFamilyToTimestampSize(); + auto iter = cf_id_to_ts_sz.find(cf); + if (iter != cf_id_to_ts_sz.end()) { + return iter->second; + } + if (cfs_with_ts_tracked_when_indexing_disabled_.find(cf) != + cfs_with_ts_tracked_when_indexing_disabled_.end()) { + return sizeof(kMaxTxnTimestamp); + } + const Comparator* ucmp = + WriteBatchWithIndexInternal::GetUserComparator(*wbwi, cf); + return ucmp ? ucmp->timestamp_size() + : std::numeric_limits::max(); + }); } } diff --git a/utilities/transactions/write_committed_transaction_ts_test.cc b/utilities/transactions/write_committed_transaction_ts_test.cc index 8d4a604b80..d936b353ee 100644 --- a/utilities/transactions/write_committed_transaction_ts_test.cc +++ b/utilities/transactions/write_committed_transaction_ts_test.cc @@ -130,6 +130,129 @@ void CheckKeyValueTsWithIterator( } } +// This is an incorrect usage of this API, supporting this should be removed +// after MyRocks remove this pattern in a refactor. +TEST_P(WriteCommittedTxnWithTsTest, WritesBypassTransactionAPIs) { + options.comparator = test::BytewiseComparatorWithU64TsWrapper(); + ASSERT_OK(ReOpen()); + + const std::string test_cf_name = "test_cf"; + ColumnFamilyOptions cf_options; + ColumnFamilyHandle* cfh = nullptr; + assert(db); + ASSERT_OK(db->CreateColumnFamily(cf_options, test_cf_name, &cfh)); + delete cfh; + cfh = nullptr; + + std::vector cf_descs; + cf_descs.emplace_back(kDefaultColumnFamilyName, options); + cf_descs.emplace_back(test_cf_name, Options(DBOptions(), cf_options)); + options.avoid_flush_during_shutdown = true; + ASSERT_OK(ReOpenNoDelete(cf_descs, &handles_)); + + // Write in each transaction a mixture of column families that enable + // timestamp and disable timestamps. + + TransactionOptions txn_opts; + txn_opts.write_batch_track_timestamp_size = true; + std::unique_ptr txn0(NewTxn(WriteOptions(), txn_opts)); + assert(txn0); + ASSERT_OK(txn0->Put(handles_[0], "key1", "key1_val")); + // Timestamp size info for writes like this can only be correctly tracked if + // TransactionOptions.write_batch_track_timestamp_size is true. + ASSERT_OK(txn0->GetWriteBatch()->GetWriteBatch()->Put(handles_[1], "foo", + "foo_val")); + ASSERT_OK(txn0->SetName("txn0")); + ASSERT_OK(txn0->SetCommitTimestamp(2)); + ASSERT_OK(txn0->Prepare()); + ASSERT_OK(txn0->Commit()); + txn0.reset(); + + // For keys written from transactions that disable + // `write_batch_track_timestamp_size` + // The keys has incorrect behavior like: + // *Cannot be found after commit: because transaction's UpdateTimestamp do not + // have correct timestamp size when this write bypass transaction write APIs. + // *Can be found again after DB restart recovers the write from WAL log: + // because recovered transaction's UpdateTimestamp get correct timestamp size + // info directly from VersionSet. + // If there is a flush that persisted this transaction into sst files after + // it's committed, the key will be forever corrupted. + std::unique_ptr txn1( + NewTxn(WriteOptions(), TransactionOptions())); + assert(txn1); + ASSERT_OK(txn1->Put(handles_[0], "key2", "key2_val")); + // Writing a key with more than 8 bytes so that we can manifest the error as + // a NotFound error instead of an issue during `WriteBatch::UpdateTimestamp`. + ASSERT_OK(txn1->GetWriteBatch()->GetWriteBatch()->Put( + handles_[1], "foobarbaz", "baz_val")); + ASSERT_OK(txn1->SetName("txn1")); + ASSERT_OK(txn1->SetCommitTimestamp(2)); + ASSERT_OK(txn1->Prepare()); + ASSERT_OK(txn1->Commit()); + txn1.reset(); + + ASSERT_OK(db->Flush(FlushOptions(), handles_[1])); + + WriteOptions wopts; + wopts.sync = true; + std::unique_ptr txn2(NewTxn(wopts, TransactionOptions())); + assert(txn2); + ASSERT_OK(txn2->Put(handles_[0], "key3", "key3_val")); + ASSERT_OK(txn2->GetWriteBatch()->GetWriteBatch()->Put( + handles_[1], "bazbazbaz", "bazbazbaz_val")); + ASSERT_OK(txn2->SetCommitTimestamp(2)); + ASSERT_OK(txn2->SetName("txn2")); + ASSERT_OK(txn2->Prepare()); + ASSERT_OK(txn2->Commit()); + txn2.reset(); + + std::unique_ptr txn3( + NewTxn(WriteOptions(), TransactionOptions())); + assert(txn3); + std::string value; + ReadOptions ropts; + std::string read_ts; + Slice timestamp = EncodeU64Ts(2, &read_ts); + ropts.timestamp = ×tamp; + ASSERT_OK(txn3->Get(ropts, handles_[0], "key1", &value)); + ASSERT_EQ("key1_val", value); + ASSERT_OK(txn3->Get(ropts, handles_[0], "key2", &value)); + ASSERT_EQ("key2_val", value); + ASSERT_OK(txn3->Get(ropts, handles_[0], "key3", &value)); + ASSERT_EQ("key3_val", value); + txn3.reset(); + + std::unique_ptr txn4( + NewTxn(WriteOptions(), TransactionOptions())); + assert(txn4); + ASSERT_OK(txn4->Get(ReadOptions(), handles_[1], "foo", &value)); + ASSERT_EQ("foo_val", value); + // Incorrect behavior: committed keys cannot be found + ASSERT_TRUE( + txn4->Get(ReadOptions(), handles_[1], "foobarbaz", &value).IsNotFound()); + ASSERT_TRUE( + txn4->Get(ReadOptions(), handles_[1], "bazbazbaz", &value).IsNotFound()); + txn4.reset(); + + ASSERT_OK(ReOpenNoDelete(cf_descs, &handles_)); + std::unique_ptr txn5( + NewTxn(WriteOptions(), TransactionOptions())); + assert(txn5); + ASSERT_OK(txn5->Get(ReadOptions(), handles_[1], "foo", &value)); + ASSERT_EQ("foo_val", value); + // Incorrect behavior: + // *unflushed key can be found after reopen replays the entries from WAL + // (this is not suggesting using flushing as a workaround but to show a + // possible misleading behavior) + // *flushed key is forever corrupted. + ASSERT_TRUE( + txn5->Get(ReadOptions(), handles_[1], "foobarbaz", &value).IsNotFound()); + ASSERT_OK(txn5->Get(ReadOptions(), handles_[1], "bazbazbaz", &value)); + ASSERT_EQ("bazbazbaz_val", value); + txn5.reset(); +} + TEST_P(WriteCommittedTxnWithTsTest, ReOpenWithTimestamp) { options.merge_operator = MergeOperators::CreateUInt64AddOperator(); ASSERT_OK(ReOpenNoDelete());