mirror of
https://github.com/facebook/rocksdb.git
synced 2024-12-01 07:15:51 +00:00
39455974cb
Summary: Not sure where or how it happens, but using a recent CircleCI failure I got a reliable db_stress reproducer. Using std::unique_ptr appropriately for managing them has apparently (and unsurprisingly) fixed the problem without needing to know exactly where the problem was. Suggested follow-up: * Three or even four levels of pointers is very confusing to work with. Surely this part can be cleaned up to be simpler. Pull Request resolved: https://github.com/facebook/rocksdb/pull/12805 Test Plan: Reproducer passes, plus ASAN test and crash test runs. I don't think it's worth the extra work to track down the details and create a careful unit test. ``` ./db_stress --WAL_size_limit_MB=1 --WAL_ttl_seconds=60 --acquire_snapshot_one_in=10000 --adaptive_readahead=1 --adm_policy=2 --advise_random_on_open=1 --allow_data_in_errors=True --allow_fallocate=1 --async_io=0 --auto_readahead_size=1 --avoid_flush_during_recovery=0 --avoid_flush_during_shutdown=1 --avoid_unnecessary_blocking_io=1 --backup_max_size=104857600 --backup_one_in=100000 --batch_protection_bytes_per_key=0 --bgerror_resume_retry_interval=1000000 --block_align=1 --block_protection_bytes_per_key=4 --block_size=16384 --bloom_before_level=2147483646 --bloom_bits=15 --bottommost_compression_type=none --bottommost_file_compaction_delay=3600 --bytes_per_sync=262144 --cache_index_and_filter_blocks=0 --cache_index_and_filter_blocks_with_high_priority=0 --cache_size=33554432 --cache_type=tiered_lru_cache --charge_compression_dictionary_building_buffer=0 --charge_file_metadata=1 --charge_filter_construction=0 --charge_table_reader=0 --check_multiget_consistency=1 --check_multiget_entity_consistency=1 --checkpoint_one_in=10000 --checksum_type=kxxHash --clear_column_family_one_in=0 --compact_files_one_in=1000000 --compact_range_one_in=1000 --compaction_pri=0 --compaction_readahead_size=0 --compaction_ttl=0 --compress_format_version=2 --compressed_secondary_cache_ratio=0.2 --compressed_secondary_cache_size=0 --compression_checksum=0 --compression_max_dict_buffer_bytes=0 --compression_max_dict_bytes=0 --compression_parallel_threads=1 --compression_type=none --compression_use_zstd_dict_trainer=0 --compression_zstd_max_train_bytes=0 --continuous_verification_interval=0 --daily_offpeak_time_utc= --data_block_index_type=0 --db=/dev/shm/rocksdb.gpxs/rocksdb_crashtest_blackbox --db_write_buffer_size=0 --default_temperature=kWarm --default_write_temperature=kCold --delete_obsolete_files_period_micros=21600000000 --delpercent=4 --delrangepercent=1 --destroy_db_initially=0 --detect_filter_construct_corruption=0 --disable_file_deletions_one_in=10000 --disable_manual_compaction_one_in=1000000 --disable_wal=0 --dump_malloc_stats=1 --enable_checksum_handoff=1 --enable_compaction_filter=0 --enable_custom_split_merge=0 --enable_do_not_compress_roles=0 --enable_index_compression=0 --enable_memtable_insert_with_hint_prefix_extractor=0 --enable_pipelined_write=1 --enable_sst_partitioner_factory=0 --enable_thread_tracking=1 --enable_write_thread_adaptive_yield=0 --error_recovery_with_no_fault_injection=0 --expected_values_dir=/dev/shm/rocksdb.gpxs/rocksdb_crashtest_expected --fail_if_options_file_error=0 --fifo_allow_compaction=0 --file_checksum_impl=none --fill_cache=1 --flush_one_in=1000000 --format_version=3 --get_all_column_family_metadata_one_in=1000000 --get_current_wal_file_one_in=0 --get_live_files_apis_one_in=10000 --get_properties_of_all_tables_one_in=100000 --get_property_one_in=100000 --get_sorted_wal_files_one_in=0 --hard_pending_compaction_bytes_limit=274877906944 --high_pri_pool_ratio=0 --index_block_restart_interval=4 --index_shortening=0 --index_type=0 --ingest_external_file_one_in=0 --initial_auto_readahead_size=16384 --inplace_update_support=0 --iterpercent=10 --key_len_percent_dist=1,30,69 --key_may_exist_one_in=100 --last_level_temperature=kHot --level_compaction_dynamic_level_bytes=0 --lock_wal_one_in=1000000 --log_file_time_to_roll=0 --log_readahead_size=0 --long_running_snapshots=1 --low_pri_pool_ratio=0 --lowest_used_cache_tier=2 --manifest_preallocation_size=5120 --manual_wal_flush_one_in=1000 --mark_for_compaction_one_file_in=10 --max_auto_readahead_size=16384 --max_background_compactions=20 --max_bytes_for_level_base=10485760 --max_key=2500000 --max_key_len=3 --max_log_file_size=0 --max_manifest_file_size=1073741824 --max_sequential_skip_in_iterations=1 --max_total_wal_size=0 --max_write_batch_group_size_bytes=16 --max_write_buffer_number=3 --max_write_buffer_size_to_maintain=0 --memtable_insert_hint_per_batch=1 --memtable_max_range_deletions=100 --memtable_prefix_bloom_size_ratio=0 --memtable_protection_bytes_per_key=4 --memtable_whole_key_filtering=0 --memtablerep=skip_list --metadata_charge_policy=0 --metadata_read_fault_one_in=32 --metadata_write_fault_one_in=0 --min_write_buffer_number_to_merge=2 --mmap_read=1 --mock_direct_io=False --nooverwritepercent=1 --num_file_reads_for_auto_readahead=0 --open_files=100 --open_metadata_read_fault_one_in=0 --open_metadata_write_fault_one_in=8 --open_read_fault_one_in=0 --open_write_fault_one_in=16 --ops_per_thread=100000000 --optimize_filters_for_hits=1 --optimize_filters_for_memory=0 --optimize_multiget_for_io=1 --paranoid_file_checks=1 --partition_filters=0 --partition_pinning=1 --pause_background_one_in=1000000 --periodic_compaction_seconds=0 --prefix_size=-1 --prefixpercent=0 --prepopulate_block_cache=1 --preserve_internal_time_seconds=60 --progress_reports=0 --promote_l0_one_in=0 --read_amp_bytes_per_bit=0 --read_fault_one_in=32 --readahead_size=524288 --readpercent=50 --recycle_log_file_num=1 --reopen=0 --report_bg_io_stats=1 --reset_stats_one_in=10000 --sample_for_compression=5 --secondary_cache_fault_one_in=32 --secondary_cache_uri= --set_options_one_in=10000 --skip_stats_update_on_db_open=0 --snapshot_hold_ops=100000 --soft_pending_compaction_bytes_limit=68719476736 --sqfc_name=bar --sqfc_version=1 --sst_file_manager_bytes_per_sec=104857600 --sst_file_manager_bytes_per_truncate=0 --stats_dump_period_sec=0 --stats_history_buffer_size=1048576 --strict_bytes_per_sync=1 --subcompactions=3 --sync=0 --sync_fault_injection=1 --table_cache_numshardbits=0 --target_file_size_base=524288 --target_file_size_multiplier=2 --test_batches_snapshots=0 --test_cf_consistency=1 --top_level_index_pinning=1 --uncache_aggressiveness=5 --universal_max_read_amp=-1 --unpartitioned_pinning=2 --use_adaptive_mutex=0 --use_adaptive_mutex_lru=0 --use_attribute_group=1 --use_delta_encoding=1 --use_direct_io_for_flush_and_compaction=0 --use_direct_reads=0 --use_full_merge_v1=0 --use_get_entity=0 --use_merge=0 --use_multi_cf_iterator=0 --use_multi_get_entity=0 --use_multiget=1 --use_put_entity_one_in=1 --use_sqfc_for_range_queries=1 --use_timed_put_one_in=0 --use_write_buffer_manager=0 --user_timestamp_size=0 --value_size_mult=32 --verification_only=0 --verify_checksum=1 --verify_checksum_one_in=1000000 --verify_compression=1 --verify_db_one_in=100000 --verify_file_checksums_one_in=0 --verify_iterator_with_expected_state_one_in=0 --verify_sst_unique_id_in_manifest=1 --wal_bytes_per_sync=0 --wal_compression=none --write_buffer_size=1048576 --write_dbid_to_manifest=1 --write_fault_one_in=0 --writepercent=35 ``` Reviewed By: cbi42 Differential Revision: D58958390 Pulled By: pdillinger fbshipit-source-id: 1271cfdcc3c574f78cd59f3c68148f7ed4a19c47
129 lines
4.8 KiB
C++
129 lines
4.8 KiB
C++
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
|
// This source code is licensed under both the GPLv2 (found in the
|
|
// COPYING file in the root directory) and Apache 2.0 License
|
|
// (found in the LICENSE.Apache file in the root directory).
|
|
//
|
|
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style license that can be
|
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
|
|
|
#pragma once
|
|
#include <stdint.h>
|
|
|
|
#include <string>
|
|
|
|
#include "db/db_impl/db_impl.h"
|
|
#include "db/db_iter.h"
|
|
#include "db/range_del_aggregator.h"
|
|
#include "memory/arena.h"
|
|
#include "options/cf_options.h"
|
|
#include "rocksdb/db.h"
|
|
#include "rocksdb/iterator.h"
|
|
#include "util/autovector.h"
|
|
|
|
namespace ROCKSDB_NAMESPACE {
|
|
|
|
class Arena;
|
|
class Version;
|
|
|
|
// A wrapper iterator which wraps DB Iterator and the arena, with which the DB
|
|
// iterator is supposed to be allocated. This class is used as an entry point of
|
|
// a iterator hierarchy whose memory can be allocated inline. In that way,
|
|
// accessing the iterator tree can be more cache friendly. It is also faster
|
|
// to allocate.
|
|
// When using the class's Iterator interface, the behavior is exactly
|
|
// the same as the inner DBIter.
|
|
class ArenaWrappedDBIter : public Iterator {
|
|
public:
|
|
~ArenaWrappedDBIter() override {
|
|
if (db_iter_ != nullptr) {
|
|
db_iter_->~DBIter();
|
|
} else {
|
|
assert(false);
|
|
}
|
|
}
|
|
|
|
// Get the arena to be used to allocate memory for DBIter to be wrapped,
|
|
// as well as child iterators in it.
|
|
virtual Arena* GetArena() { return &arena_; }
|
|
|
|
const ReadOptions& GetReadOptions() { return read_options_; }
|
|
|
|
// Set the internal iterator wrapped inside the DB Iterator. Usually it is
|
|
// a merging iterator.
|
|
virtual void SetIterUnderDBIter(InternalIterator* iter) {
|
|
db_iter_->SetIter(iter);
|
|
}
|
|
|
|
void SetMemtableRangetombstoneIter(
|
|
std::unique_ptr<TruncatedRangeDelIterator>* iter) {
|
|
memtable_range_tombstone_iter_ = iter;
|
|
}
|
|
|
|
bool Valid() const override { return db_iter_->Valid(); }
|
|
void SeekToFirst() override { db_iter_->SeekToFirst(); }
|
|
void SeekToLast() override { db_iter_->SeekToLast(); }
|
|
// 'target' does not contain timestamp, even if user timestamp feature is
|
|
// enabled.
|
|
void Seek(const Slice& target) override { db_iter_->Seek(target); }
|
|
void SeekForPrev(const Slice& target) override {
|
|
db_iter_->SeekForPrev(target);
|
|
}
|
|
void Next() override { db_iter_->Next(); }
|
|
void Prev() override { db_iter_->Prev(); }
|
|
Slice key() const override { return db_iter_->key(); }
|
|
Slice value() const override { return db_iter_->value(); }
|
|
const WideColumns& columns() const override { return db_iter_->columns(); }
|
|
Status status() const override { return db_iter_->status(); }
|
|
Slice timestamp() const override { return db_iter_->timestamp(); }
|
|
bool IsBlob() const { return db_iter_->IsBlob(); }
|
|
|
|
Status GetProperty(std::string prop_name, std::string* prop) override;
|
|
|
|
Status Refresh() override;
|
|
Status Refresh(const Snapshot*) override;
|
|
|
|
void Init(Env* env, const ReadOptions& read_options,
|
|
const ImmutableOptions& ioptions,
|
|
const MutableCFOptions& mutable_cf_options, const Version* version,
|
|
const SequenceNumber& sequence,
|
|
uint64_t max_sequential_skip_in_iterations, uint64_t version_number,
|
|
ReadCallback* read_callback, ColumnFamilyHandleImpl* cfh,
|
|
bool expose_blob_index, bool allow_refresh);
|
|
|
|
// Store some parameters so we can refresh the iterator at a later point
|
|
// with these same params
|
|
void StoreRefreshInfo(ColumnFamilyHandleImpl* cfh,
|
|
ReadCallback* read_callback, bool expose_blob_index) {
|
|
cfh_ = cfh;
|
|
read_callback_ = read_callback;
|
|
expose_blob_index_ = expose_blob_index;
|
|
}
|
|
|
|
private:
|
|
DBIter* db_iter_ = nullptr;
|
|
Arena arena_;
|
|
uint64_t sv_number_;
|
|
ColumnFamilyHandleImpl* cfh_ = nullptr;
|
|
ReadOptions read_options_;
|
|
ReadCallback* read_callback_;
|
|
bool expose_blob_index_ = false;
|
|
bool allow_refresh_ = true;
|
|
// If this is nullptr, it means the mutable memtable does not contain range
|
|
// tombstone when added under this DBIter.
|
|
std::unique_ptr<TruncatedRangeDelIterator>* memtable_range_tombstone_iter_ =
|
|
nullptr;
|
|
};
|
|
|
|
// Generate the arena wrapped iterator class.
|
|
// `cfh` is used for reneweal. If left null, renewal will not
|
|
// be supported.
|
|
ArenaWrappedDBIter* NewArenaWrappedDbIterator(
|
|
Env* env, const ReadOptions& read_options, const ImmutableOptions& ioptions,
|
|
const MutableCFOptions& mutable_cf_options, const Version* version,
|
|
const SequenceNumber& sequence, uint64_t max_sequential_skip_in_iterations,
|
|
uint64_t version_number, ReadCallback* read_callback,
|
|
ColumnFamilyHandleImpl* cfh = nullptr, bool expose_blob_index = false,
|
|
bool allow_refresh = true);
|
|
} // namespace ROCKSDB_NAMESPACE
|