mirror of https://github.com/facebook/rocksdb.git
Fix txn `MultiGet()` return un-committed data with snapshot (#7963)
Summary: TransactionDB uses read callback to filter out un-committed data before a snapshot. But `MultiGet()` API doesn't use that at all, which causes returning unwanted data. Pull Request resolved: https://github.com/facebook/rocksdb/pull/7963 Test Plan: Added unittest to reproduce Reviewed By: anand1976 Differential Revision: D26455851 Pulled By: jay-zhuang fbshipit-source-id: 265276698cf9d8c4cd79e3250ef10d14375bac55
This commit is contained in:
parent
6a85aea5b1
commit
59ba104e4a
|
@ -13,6 +13,7 @@
|
|||
### Bug Fixes
|
||||
* Since 6.15.0, `TransactionDB` returns error `Status`es from calls to `DeleteRange()` and calls to `Write()` where the `WriteBatch` contains a range deletion. Previously such operations may have succeeded while not providing the expected transactional guarantees. There are certain cases where range deletion can still be used on such DBs; see the API doc on `TransactionDB::DeleteRange()` for details.
|
||||
* `OptimisticTransactionDB` now returns error `Status`es from calls to `DeleteRange()` and calls to `Write()` where the `WriteBatch` contains a range deletion. Previously such operations may have succeeded while not providing the expected transactional guarantees.
|
||||
* Fix `WRITE_PREPARED`, `WRITE_UNPREPARED` TransactionDB `MultiGet()` may return uncommitted data with snapshot.
|
||||
|
||||
### Public API Change
|
||||
* Add new Append and PositionedAppend APIs to FileSystem to bring the data verification information (data checksum information) from upper layer (e.g., WritableFileWriter) to the storage layer. In this way, the customized FileSystem is able to verify the correctness of data being written to the storage on time. Add checksum_handoff_file_types to DBOptions. User can use this option to control which file types (Currently supported file tyes: kWALFile, kTableFile, kDescriptorFile.) should use the new Append and PositionedAppend APIs to handoff the verification information. Currently, RocksDB only use crc32c to calculate the checksum for write handoff.
|
||||
|
|
|
@ -1719,7 +1719,9 @@ Status DBImpl::GetImpl(const ReadOptions& read_options, const Slice& key,
|
|||
}
|
||||
// If timestamp is used, we use read callback to ensure <key,t,s> is returned
|
||||
// only if t <= read_opts.timestamp and s <= snapshot.
|
||||
if (ts_sz > 0 && !get_impl_options.callback) {
|
||||
if (ts_sz > 0) {
|
||||
assert(!get_impl_options
|
||||
.callback); // timestamp with callback is not supported
|
||||
read_cb.Refresh(snapshot);
|
||||
get_impl_options.callback = &read_cb;
|
||||
}
|
||||
|
@ -2395,8 +2397,9 @@ void DBImpl::MultiGetWithCallback(
|
|||
}
|
||||
|
||||
GetWithTimestampReadCallback timestamp_read_callback(0);
|
||||
ReadCallback* read_callback = nullptr;
|
||||
ReadCallback* read_callback = callback;
|
||||
if (read_options.timestamp && read_options.timestamp->size() > 0) {
|
||||
assert(!read_callback); // timestamp with callback is not supported
|
||||
timestamp_read_callback.Refresh(consistent_seqnum);
|
||||
read_callback = ×tamp_read_callback;
|
||||
}
|
||||
|
|
|
@ -2925,6 +2925,47 @@ TEST_P(TransactionTest, MultiGetLargeBatchedTest) {
|
|||
}
|
||||
}
|
||||
|
||||
TEST_P(TransactionTest, MultiGetSnapshot) {
|
||||
WriteOptions write_options;
|
||||
TransactionOptions transaction_options;
|
||||
Transaction* txn1 = db->BeginTransaction(write_options, transaction_options);
|
||||
|
||||
Slice key = "foo";
|
||||
|
||||
Status s = txn1->Put(key, "bar");
|
||||
ASSERT_OK(s);
|
||||
|
||||
s = txn1->SetName("test");
|
||||
ASSERT_OK(s);
|
||||
|
||||
s = txn1->Prepare();
|
||||
ASSERT_OK(s);
|
||||
|
||||
// Get snapshot between prepare and commit
|
||||
// Un-committed data should be invisible to other transactions
|
||||
const Snapshot* s1 = db->GetSnapshot();
|
||||
|
||||
s = txn1->Commit();
|
||||
ASSERT_OK(s);
|
||||
delete txn1;
|
||||
|
||||
Transaction* txn2 = db->BeginTransaction(write_options, transaction_options);
|
||||
ReadOptions read_options;
|
||||
read_options.snapshot = s1;
|
||||
|
||||
std::vector<Slice> keys;
|
||||
std::vector<PinnableSlice> values(1);
|
||||
std::vector<Status> statuses(1);
|
||||
keys.push_back(key);
|
||||
auto cfd = db->DefaultColumnFamily();
|
||||
txn2->MultiGet(read_options, cfd, 1, keys.data(), values.data(),
|
||||
statuses.data());
|
||||
ASSERT_TRUE(statuses[0].IsNotFound());
|
||||
delete txn2;
|
||||
|
||||
db->ReleaseSnapshot(s1);
|
||||
}
|
||||
|
||||
TEST_P(TransactionTest, ColumnFamiliesTest2) {
|
||||
WriteOptions write_options;
|
||||
ReadOptions read_options, snapshot_read_options;
|
||||
|
|
Loading…
Reference in New Issue