mirror of https://github.com/facebook/rocksdb.git
Fix stress tests failure for auto_readahead_size (#12131)
Summary: When auto_readahead_size is enabled, Prev operation calls SeekForPrev in db_iter so that - BlockBasedTableIterator can point index_iter_ to the right block. - disable readahead_cache_lookup. However, there can be cases where SeekForPrev might not go through Version_set and call BlockBasedTableIterator SeekForPrev. In that case, when BlockBasedTableIterator::Prev is called, it returns NotSupported error. This more like a corner case. So to handle that case, removed SeekForPrev calling from db_iter and reseeking index_iter_ in Prev operation. block_iter_'s key already point to right block. So reseeking to index_iter_ solves the issue. Pull Request resolved: https://github.com/facebook/rocksdb/pull/12131 Test Plan: - Tested on db_stress command that was failing - `./db_stress --acquire_snapshot_one_in=10000 --adaptive_readahead=1 --allow_data_in_errors=True --async_io=0 --atomic_flush=0 --auto_readahead_size=1 --avoid_flush_during_recovery=0 --avoid_unnecessary_blocking_io=1 --backup_max_size=104857600 --backup_one_in=100000 --batch_protection_bytes_per_key=0 --best_efforts_recovery=1 --block_protection_bytes_per_key=1 --block_size=16384 --bloom_before_level=2147483646 --bloom_bits=12 --bottommost_compression_type=none --bottommost_file_compaction_delay=0 --bytes_per_sync=262144 --cache_index_and_filter_blocks=0 --cache_size=33554432 --cache_type=lru_cache --charge_compression_dictionary_building_buffer=1 --charge_file_metadata=0 --charge_filter_construction=1 --charge_table_reader=1 --checkpoint_one_in=1000000 --checksum_type=kxxHash64 --clear_column_family_one_in=0 --column_families=1 --compact_files_one_in=1000000 --compact_range_one_in=1000000 --compaction_pri=4 --compaction_readahead_size=1048576 --compaction_ttl=10 --compressed_secondary_cache_size=16777216 --compression_checksum=0 --compression_max_dict_buffer_bytes=0 --compression_max_dict_bytes=0 --compression_parallel_threads=1 --compression_type=zlib --compression_use_zstd_dict_trainer=0 --compression_zstd_max_train_bytes=0 --continuous_verification_interval=0 --data_block_index_type=1 --db=/home/akankshamahajan/rocksdb_auto_tune/dev/shm/rocksdb_test/rocksdb_crashtest_blackbox --db_write_buffer_size=134217728 --delpercent=4 --delrangepercent=1 --destroy_db_initially=0 --detect_filter_construct_corruption=1 --disable_wal=1 --enable_compaction_filter=0 --enable_pipelined_write=0 --enable_thread_tracking=1 --expected_values_dir=/home/akankshamahajan/rocksdb_auto_tune/dev/shm/rocksdb_test/rocksdb_crashtest_expected --fail_if_options_file_error=1 --fifo_allow_compaction=1 --file_checksum_impl=big --flush_one_in=1000000 --format_version=6 --get_current_wal_file_one_in=0 --get_live_files_one_in=1000000 --get_property_one_in=1000000 --get_sorted_wal_files_one_in=0 --index_block_restart_interval=10 --index_type=0 --ingest_external_file_one_in=0 --initial_auto_readahead_size=0 --iterpercent=10 --key_len_percent_dist=1,30,69 --level_compaction_dynamic_level_bytes=1 --lock_wal_one_in=1000000 --long_running_snapshots=1 --manual_wal_flush_one_in=0 --mark_for_compaction_one_file_in=0 --max_auto_readahead_size=524288 --max_background_compactions=20 --max_bytes_for_level_base=10485760 --max_key=25000000 --max_key_len=3 --max_manifest_file_size=1073741824 --max_write_batch_group_size_bytes=16 --max_write_buffer_number=3 --max_write_buffer_size_to_maintain=4194304 --memtable_max_range_deletions=1000 --memtable_prefix_bloom_size_ratio=0 --memtable_protection_bytes_per_key=2 --memtable_whole_key_filtering=0 --memtablerep=skip_list --min_write_buffer_number_to_merge=1 --mmap_read=1 --mock_direct_io=False --nooverwritepercent=1 --num_file_reads_for_auto_readahead=1 --open_files=-1 --open_metadata_write_fault_one_in=0 --open_read_fault_one_in=0 --open_write_fault_one_in=0 --ops_per_thread=100000000 --optimize_filters_for_memory=1 --paranoid_file_checks=1 --partition_filters=0 --partition_pinning=1 --pause_background_one_in=1000000 --periodic_compaction_seconds=10 --prefix_size=-1 --prefixpercent=0 --prepopulate_block_cache=0 --preserve_internal_time_seconds=0 --progress_reports=0 --read_fault_one_in=1000 --readahead_size=524288 --readpercent=50 --recycle_log_file_num=0 --reopen=0 --secondary_cache_fault_one_in=0 --secondary_cache_uri= --set_options_one_in=10000 --skip_verifydb=1 --snapshot_hold_ops=100000 --sst_file_manager_bytes_per_sec=0 --sst_file_manager_bytes_per_truncate=0 --stats_dump_period_sec=0 --subcompactions=2 --sync=0 --sync_fault_injection=0 --target_file_size_base=2097152 --target_file_size_multiplier=2 --test_batches_snapshots=0 --top_level_index_pinning=3 --unpartitioned_pinning=3 --use_direct_io_for_flush_and_compaction=0 --use_direct_reads=0 --use_full_merge_v1=0 --use_get_entity=1 --use_merge=1 --use_multi_get_entity=0 --use_multiget=1 --use_put_entity_one_in=10 --use_write_buffer_manager=0 --user_timestamp_size=0 --value_size_mult=32 --verification_only=0 --verify_checksum=1 --verify_checksum_one_in=1000000 --verify_db_one_in=0 --verify_file_checksums_one_in=1000000 --verify_iterator_with_expected_state_one_in=5 --verify_sst_unique_id_in_manifest=1 --wal_bytes_per_sync=0 --wal_compression=zstd --write_buffer_size=4194304 --write_dbid_to_manifest=0 --write_fault_one_in=0 --writepercent=35` - make crash_test -j32 Reviewed By: anand1976 Differential Revision: D51986326 Pulled By: akankshamahajan15 fbshipit-source-id: 90e11e63d1f1894770b457a44d8b213ae5512df9
This commit is contained in:
parent
d8e47620d7
commit
d926593df5
|
@ -83,8 +83,7 @@ DBIter::DBIter(Env* _env, const ReadOptions& read_options,
|
|||
cfd_(cfd),
|
||||
timestamp_ub_(read_options.timestamp),
|
||||
timestamp_lb_(read_options.iter_start_ts),
|
||||
timestamp_size_(timestamp_ub_ ? timestamp_ub_->size() : 0),
|
||||
auto_readahead_size_(read_options.auto_readahead_size) {
|
||||
timestamp_size_(timestamp_ub_ ? timestamp_ub_->size() : 0) {
|
||||
RecordTick(statistics_, NO_ITERATOR_CREATED);
|
||||
if (pin_thru_lifetime_) {
|
||||
pinned_iters_mgr_.StartPinning();
|
||||
|
@ -747,22 +746,15 @@ bool DBIter::ReverseToBackward() {
|
|||
// When current_entry_is_merged_ is true, iter_ may be positioned on the next
|
||||
// key, which may not exist or may have prefix different from current.
|
||||
// If that's the case, seek to saved_key_.
|
||||
//
|
||||
// In case of auto_readahead_size enabled, index_iter moves forward during
|
||||
// forward scan for block cache lookup and points to different block. If Prev
|
||||
// op is called, it needs to call SeekForPrev to point to right index_iter_ in
|
||||
// BlockBasedTableIterator. This only happens when direction is changed from
|
||||
// forward to backward.
|
||||
if ((current_entry_is_merged_ &&
|
||||
(!expect_total_order_inner_iter() || !iter_.Valid())) ||
|
||||
auto_readahead_size_) {
|
||||
if (current_entry_is_merged_ &&
|
||||
(!expect_total_order_inner_iter() || !iter_.Valid())) {
|
||||
IterKey last_key;
|
||||
// Using kMaxSequenceNumber and kValueTypeForSeek
|
||||
// (not kValueTypeForSeekForPrev) to seek to a key strictly smaller
|
||||
// than saved_key_.
|
||||
last_key.SetInternalKey(ParsedInternalKey(
|
||||
saved_key_.GetUserKey(), kMaxSequenceNumber, kValueTypeForSeek));
|
||||
if (!expect_total_order_inner_iter() || auto_readahead_size_) {
|
||||
if (!expect_total_order_inner_iter()) {
|
||||
iter_.SeekForPrev(last_key.GetInternalKey());
|
||||
} else {
|
||||
// Some iterators may not support SeekForPrev(), so we avoid using it
|
||||
|
|
|
@ -402,7 +402,6 @@ class DBIter final : public Iterator {
|
|||
const Slice* const timestamp_lb_;
|
||||
const size_t timestamp_size_;
|
||||
std::string saved_timestamp_;
|
||||
bool auto_readahead_size_;
|
||||
};
|
||||
|
||||
// Return a new iterator that converts internal keys (yielded by
|
||||
|
|
|
@ -1744,13 +1744,9 @@ struct ReadOptions {
|
|||
// For this feature to enabled, iterate_upper_bound must also be specified.
|
||||
//
|
||||
// NOTE: - Recommended for forward Scans only.
|
||||
// - In case of backward scans like Prev or SeekForPrev, the
|
||||
// cost of these backward operations might increase and affect the
|
||||
// performace. So this option should not be enabled if workload
|
||||
// contains backward scans.
|
||||
// - If there is a backward scans, this option will be
|
||||
// disabled internally and won't be reset if forward scan is done
|
||||
// again.
|
||||
// disabled internally and won't be enabled again if the forward scan
|
||||
// is issued again.
|
||||
//
|
||||
// Default: false
|
||||
bool auto_readahead_size = false;
|
||||
|
|
|
@ -303,12 +303,29 @@ bool BlockBasedTableIterator::NextAndGetResult(IterateResult* result) {
|
|||
}
|
||||
|
||||
void BlockBasedTableIterator::Prev() {
|
||||
// Return Error.
|
||||
if (readahead_cache_lookup_) {
|
||||
block_iter_.Invalidate(
|
||||
Status::NotSupported("auto tuning of readahead_size in is not "
|
||||
"supported with Prev operation."));
|
||||
return;
|
||||
if (readahead_cache_lookup_ && !IsIndexAtCurr()) {
|
||||
// In case of readahead_cache_lookup_, index_iter_ has moved forward. So we
|
||||
// need to reseek the index_iter_ to point to current block by using
|
||||
// block_iter_'s key.
|
||||
if (Valid()) {
|
||||
ResetBlockCacheLookupVar();
|
||||
direction_ = IterDirection::kBackward;
|
||||
Slice last_key = key();
|
||||
|
||||
index_iter_->Seek(last_key);
|
||||
is_index_at_curr_block_ = true;
|
||||
|
||||
// Check for IO error.
|
||||
if (!index_iter_->Valid()) {
|
||||
ResetDataIter();
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (!Valid()) {
|
||||
ResetDataIter();
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
ResetBlockCacheLookupVar();
|
||||
|
|
|
@ -411,7 +411,6 @@ class BlockBasedTableIterator : public InternalIteratorBase<Slice> {
|
|||
uint64_t& start_updated_offset,
|
||||
uint64_t& end_updated_offset,
|
||||
size_t& prev_handles_size);
|
||||
|
||||
// *** END APIs relevant to auto tuning of readahead_size ***
|
||||
};
|
||||
} // namespace ROCKSDB_NAMESPACE
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
Fix a corner case with auto_readahead_size where Prev Operation returns NOT SUPPORTED error when scans direction is changed from forward to backward.
|
Loading…
Reference in New Issue