mirror of https://github.com/facebook/rocksdb.git
Fix a bug in iterator with UDT + `ReadOptions::pin_data` (#12451)
Summary: with https://github.com/facebook/rocksdb/issues/12414 enabling `ReadOptions::pin_data`, this bug surfaced as corrupted per key-value checksum during crash test. `saved_key_.GetUserKey()` could be pinned user key, so DBIter should not overwrite it. In one case, it only surfaces when iterator skips many keys of the same user key. To stress that code path, this PR also added `max_sequential_skip_in_iterations` to crash test. Pull Request resolved: https://github.com/facebook/rocksdb/pull/12451 Test Plan: - Set ReadOptions::pin_data to true, the bug can be reproed quickly with `./db_stress --persist_user_defined_timestamps=1 --user_timestamp_size=8 --writepercent=35 --delpercent=4 --delrangepercent=1 --iterpercent=20 --nooverwritepercent=1 --prefix_size=8 --prefixpercent=10 --readpercent=30 --memtable_protection_bytes_per_key=8 --block_protection_bytes_per_key=2 --clear_column_family_one_in=0`. - Set max_sequential_skip_in_iterations to 1 for the other occurrence of the bug. Reviewed By: jowlyzhang Differential Revision: D55003766 Pulled By: cbi42 fbshipit-source-id: 23e1049129456684dafb028b6132b70e0afc07fb
This commit is contained in:
parent
f2546b6623
commit
3d5be596a5
|
@ -712,16 +712,21 @@ bool DBIter::ReverseToForward() {
|
|||
// not exist or may have different prefix than the current key().
|
||||
// If that's the case, seek iter_ to current key.
|
||||
if (!expect_total_order_inner_iter() || !iter_.Valid()) {
|
||||
IterKey last_key;
|
||||
ParsedInternalKey pikey(saved_key_.GetUserKey(), kMaxSequenceNumber,
|
||||
kValueTypeForSeek);
|
||||
if (timestamp_size_ > 0) {
|
||||
std::string last_key;
|
||||
if (timestamp_size_ == 0) {
|
||||
AppendInternalKey(
|
||||
&last_key, ParsedInternalKey(saved_key_.GetUserKey(),
|
||||
kMaxSequenceNumber, kValueTypeForSeek));
|
||||
} else {
|
||||
// TODO: pre-create kTsMax.
|
||||
const std::string kTsMax(timestamp_size_, '\xff');
|
||||
pikey.SetTimestamp(kTsMax);
|
||||
AppendInternalKeyWithDifferentTimestamp(
|
||||
&last_key,
|
||||
ParsedInternalKey(saved_key_.GetUserKey(), kMaxSequenceNumber,
|
||||
kValueTypeForSeek),
|
||||
kTsMax);
|
||||
}
|
||||
last_key.SetInternalKey(pikey);
|
||||
iter_.Seek(last_key.GetInternalKey());
|
||||
iter_.Seek(last_key);
|
||||
RecordTick(statistics_, NUMBER_OF_RESEEKS_IN_ITERATION);
|
||||
}
|
||||
|
||||
|
@ -1371,18 +1376,23 @@ bool DBIter::FindUserKeyBeforeSavedKey() {
|
|||
|
||||
if (num_skipped >= max_skip_) {
|
||||
num_skipped = 0;
|
||||
IterKey last_key;
|
||||
ParsedInternalKey pikey(saved_key_.GetUserKey(), kMaxSequenceNumber,
|
||||
kValueTypeForSeek);
|
||||
if (timestamp_size_ > 0) {
|
||||
std::string last_key;
|
||||
if (timestamp_size_ == 0) {
|
||||
AppendInternalKey(&last_key, ParsedInternalKey(saved_key_.GetUserKey(),
|
||||
kMaxSequenceNumber,
|
||||
kValueTypeForSeek));
|
||||
} else {
|
||||
// TODO: pre-create kTsMax.
|
||||
const std::string kTsMax(timestamp_size_, '\xff');
|
||||
pikey.SetTimestamp(kTsMax);
|
||||
AppendInternalKeyWithDifferentTimestamp(
|
||||
&last_key,
|
||||
ParsedInternalKey(saved_key_.GetUserKey(), kMaxSequenceNumber,
|
||||
kValueTypeForSeek),
|
||||
kTsMax);
|
||||
}
|
||||
last_key.SetInternalKey(pikey);
|
||||
// It would be more efficient to use SeekForPrev() here, but some
|
||||
// iterators may not support it.
|
||||
iter_.Seek(last_key.GetInternalKey());
|
||||
iter_.Seek(last_key);
|
||||
RecordTick(statistics_, NUMBER_OF_RESEEKS_IN_ITERATION);
|
||||
if (!iter_.Valid()) {
|
||||
break;
|
||||
|
|
|
@ -391,6 +391,7 @@ DECLARE_double(high_pri_pool_ratio);
|
|||
DECLARE_double(low_pri_pool_ratio);
|
||||
DECLARE_uint64(soft_pending_compaction_bytes_limit);
|
||||
DECLARE_uint64(hard_pending_compaction_bytes_limit);
|
||||
DECLARE_uint64(max_sequential_skip_in_iterations);
|
||||
|
||||
constexpr long KB = 1024;
|
||||
constexpr int kRandomValueMaxFactor = 3;
|
||||
|
|
|
@ -36,6 +36,11 @@ DEFINE_int64(max_key, 1 * KB * KB,
|
|||
|
||||
DEFINE_int32(max_key_len, 3, "Maximum length of a key in 8-byte units");
|
||||
|
||||
DEFINE_uint64(max_sequential_skip_in_iterations,
|
||||
ROCKSDB_NAMESPACE::Options().max_sequential_skip_in_iterations,
|
||||
"Iterator will reseek after scanning this number of keys with"
|
||||
"the same user key during Next/Prev().");
|
||||
|
||||
DEFINE_string(key_len_percent_dist, "",
|
||||
"Percentages of keys of various lengths. For example, 1,30,69 "
|
||||
"means 1% of keys are 8 bytes, 30% are 16 bytes, and 69% are "
|
||||
|
|
|
@ -3649,6 +3649,8 @@ void InitializeOptionsFromFlags(
|
|||
FLAGS_soft_pending_compaction_bytes_limit;
|
||||
options.hard_pending_compaction_bytes_limit =
|
||||
FLAGS_hard_pending_compaction_bytes_limit;
|
||||
options.max_sequential_skip_in_iterations =
|
||||
FLAGS_max_sequential_skip_in_iterations;
|
||||
}
|
||||
|
||||
void InitializeOptionsGeneral(
|
||||
|
|
|
@ -96,6 +96,7 @@ default_params = {
|
|||
"max_bytes_for_level_base": 10485760,
|
||||
# max_key has to be the same across invocations for verification to work, hence no lambda
|
||||
"max_key": random.choice([100000, 25000000]),
|
||||
"max_sequential_skip_in_iterations": lambda: random.choice([1, 2, 8, 16]),
|
||||
"max_write_buffer_number": 3,
|
||||
"mmap_read": lambda: random.randint(0, 1),
|
||||
# Setting `nooverwritepercent > 0` is only possible because we do not vary
|
||||
|
|
Loading…
Reference in New Issue