2016-02-09 23:12:00 +00:00
|
|
|
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
2017-07-15 23:03:42 +00:00
|
|
|
// This source code is licensed under both the GPLv2 (found in the
|
|
|
|
// COPYING file in the root directory) and Apache 2.0 License
|
|
|
|
// (found in the LICENSE.Apache file in the root directory).
|
2013-10-16 21:59:46 +00:00
|
|
|
//
|
2011-03-18 22:37:00 +00:00
|
|
|
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
|
|
|
// Use of this source code is governed by a BSD-style license that can be
|
|
|
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
|
|
|
//
|
|
|
|
// File names used by DB code
|
|
|
|
|
2013-10-05 05:32:05 +00:00
|
|
|
#pragma once
|
2011-03-18 22:37:00 +00:00
|
|
|
#include <stdint.h>
|
2022-10-25 01:34:52 +00:00
|
|
|
|
2011-03-18 22:37:00 +00:00
|
|
|
#include <string>
|
2022-10-25 01:34:52 +00:00
|
|
|
#include <unordered_map>
|
2014-07-02 16:54:20 +00:00
|
|
|
#include <vector>
|
2015-01-09 20:57:11 +00:00
|
|
|
|
2017-04-06 02:02:00 +00:00
|
|
|
#include "options/db_options.h"
|
2015-01-09 20:57:11 +00:00
|
|
|
#include "port/port.h"
|
2020-03-03 00:14:00 +00:00
|
|
|
#include "rocksdb/file_system.h"
|
2015-01-09 20:57:11 +00:00
|
|
|
#include "rocksdb/options.h"
|
2013-08-23 15:38:13 +00:00
|
|
|
#include "rocksdb/slice.h"
|
|
|
|
#include "rocksdb/status.h"
|
2013-10-24 06:39:23 +00:00
|
|
|
#include "rocksdb/transaction_log.h"
|
2011-03-18 22:37:00 +00:00
|
|
|
|
2020-02-20 20:07:53 +00:00
|
|
|
namespace ROCKSDB_NAMESPACE {
|
2011-03-18 22:37:00 +00:00
|
|
|
|
|
|
|
class Env;
|
2014-05-06 21:51:33 +00:00
|
|
|
class Directory;
|
2021-01-26 06:07:26 +00:00
|
|
|
class SystemClock;
|
Move rate_limiter, write buffering, most perf context instrumentation and most random kill out of Env
Summary: We want to keep Env a think layer for better portability. Less platform dependent codes should be moved out of Env. In this patch, I create a wrapper of file readers and writers, and put rate limiting, write buffering, as well as most perf context instrumentation and random kill out of Env. It will make it easier to maintain multiple Env in the future.
Test Plan: Run all existing unit tests.
Reviewers: anthony, kradhakrishnan, IslamAbdelRahman, yhchiang, igor
Reviewed By: igor
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D42321
2015-07-17 23:16:11 +00:00
|
|
|
class WritableFileWriter;
|
2011-03-18 22:37:00 +00:00
|
|
|
|
2020-03-21 02:17:54 +00:00
|
|
|
#ifdef OS_WIN
|
2021-10-16 17:03:19 +00:00
|
|
|
constexpr char kFilePathSeparator = '\\';
|
2020-03-21 02:17:54 +00:00
|
|
|
#else
|
2021-10-16 17:03:19 +00:00
|
|
|
constexpr char kFilePathSeparator = '/';
|
2020-03-21 02:17:54 +00:00
|
|
|
#endif
|
|
|
|
|
2011-03-18 22:37:00 +00:00
|
|
|
// Return the name of the log file with the specified number
|
|
|
|
// in the db named by "dbname". The result will be prefixed with
|
|
|
|
// "dbname".
|
|
|
|
extern std::string LogFileName(const std::string& dbname, uint64_t number);
|
|
|
|
|
2019-08-01 22:45:19 +00:00
|
|
|
extern std::string LogFileName(uint64_t number);
|
|
|
|
|
2020-05-07 16:29:21 +00:00
|
|
|
extern std::string BlobFileName(uint64_t number);
|
|
|
|
|
2017-04-18 19:00:36 +00:00
|
|
|
extern std::string BlobFileName(const std::string& bdirname, uint64_t number);
|
|
|
|
|
2018-08-31 18:59:49 +00:00
|
|
|
extern std::string BlobFileName(const std::string& dbname,
|
|
|
|
const std::string& blob_dir, uint64_t number);
|
|
|
|
|
2012-12-08 00:30:22 +00:00
|
|
|
extern std::string ArchivalDirectory(const std::string& dbname);
|
|
|
|
|
2012-11-30 01:28:37 +00:00
|
|
|
// Return the name of the archived log file with the specified number
|
|
|
|
// in the db named by "dbname". The result will be prefixed with "dbname".
|
2022-10-25 01:34:52 +00:00
|
|
|
extern std::string ArchivedLogFileName(const std::string& dbname, uint64_t num);
|
2012-11-30 01:28:37 +00:00
|
|
|
|
2014-07-02 16:54:20 +00:00
|
|
|
extern std::string MakeTableFileName(const std::string& name, uint64_t number);
|
|
|
|
|
2019-08-01 22:45:19 +00:00
|
|
|
extern std::string MakeTableFileName(uint64_t number);
|
|
|
|
|
2015-10-07 00:46:22 +00:00
|
|
|
// Return the name of sstable with LevelDB suffix
|
|
|
|
// created from RocksDB sstable suffixed name
|
|
|
|
extern std::string Rocks2LevelTableFileName(const std::string& fullname);
|
|
|
|
|
CompactFiles, EventListener and GetDatabaseMetaData
Summary:
This diff adds three sets of APIs to RocksDB.
= GetColumnFamilyMetaData =
* This APIs allow users to obtain the current state of a RocksDB instance on one column family.
* See GetColumnFamilyMetaData in include/rocksdb/db.h
= EventListener =
* A virtual class that allows users to implement a set of
call-back functions which will be called when specific
events of a RocksDB instance happens.
* To register EventListener, simply insert an EventListener to ColumnFamilyOptions::listeners
= CompactFiles =
* CompactFiles API inputs a set of file numbers and an output level, and RocksDB
will try to compact those files into the specified level.
= Example =
* Example code can be found in example/compact_files_example.cc, which implements
a simple external compactor using EventListener, GetColumnFamilyMetaData, and
CompactFiles API.
Test Plan:
listener_test
compactor_test
example/compact_files_example
export ROCKSDB_TESTS=CompactFiles
db_test
export ROCKSDB_TESTS=MetaData
db_test
Reviewers: ljin, igor, rven, sdong
Reviewed By: sdong
Subscribers: MarkCallaghan, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D24705
2014-11-07 22:45:18 +00:00
|
|
|
// the reverse function of MakeTableFileName
|
|
|
|
// TODO(yhchiang): could merge this function with ParseFileName()
|
|
|
|
extern uint64_t TableFileNameToNumber(const std::string& name);
|
|
|
|
|
2011-03-18 22:37:00 +00:00
|
|
|
// Return the name of the sstable with the specified number
|
|
|
|
// in the db named by "dbname". The result will be prefixed with
|
|
|
|
// "dbname".
|
2014-07-14 22:34:30 +00:00
|
|
|
extern std::string TableFileName(const std::vector<DbPath>& db_paths,
|
2014-07-02 16:54:20 +00:00
|
|
|
uint64_t number, uint32_t path_id);
|
|
|
|
|
2014-08-13 18:57:40 +00:00
|
|
|
// Sufficient buffer size for FormatFileNumber.
|
2015-07-01 23:13:49 +00:00
|
|
|
const size_t kFormatFileNumberBufSize = 38;
|
2014-08-13 18:57:40 +00:00
|
|
|
|
|
|
|
extern void FormatFileNumber(uint64_t number, uint32_t path_id, char* out_buf,
|
|
|
|
size_t out_buf_size);
|
2011-03-18 22:37:00 +00:00
|
|
|
|
|
|
|
// Return the name of the descriptor file for the db named by
|
|
|
|
// "dbname" and the specified incarnation number. The result will be
|
|
|
|
// prefixed with "dbname".
|
|
|
|
extern std::string DescriptorFileName(const std::string& dbname,
|
|
|
|
uint64_t number);
|
|
|
|
|
2021-10-16 17:03:19 +00:00
|
|
|
extern std::string DescriptorFileName(uint64_t number);
|
|
|
|
|
|
|
|
extern const std::string kCurrentFileName; // = "CURRENT"
|
|
|
|
|
2011-03-18 22:37:00 +00:00
|
|
|
// Return the name of the current file. This file contains the name
|
|
|
|
// of the current manifest file. The result will be prefixed with
|
|
|
|
// "dbname".
|
|
|
|
extern std::string CurrentFileName(const std::string& dbname);
|
|
|
|
|
|
|
|
// Return the name of the lock file for the db named by
|
|
|
|
// "dbname". The result will be prefixed with "dbname".
|
|
|
|
extern std::string LockFileName(const std::string& dbname);
|
|
|
|
|
|
|
|
// Return the name of a temporary file owned by the db named "dbname".
|
|
|
|
// The result will be prefixed with "dbname".
|
|
|
|
extern std::string TempFileName(const std::string& dbname, uint64_t number);
|
|
|
|
|
2014-08-14 17:05:16 +00:00
|
|
|
// A helper structure for prefix of info log names.
|
|
|
|
struct InfoLogPrefix {
|
|
|
|
char buf[260];
|
|
|
|
Slice prefix;
|
|
|
|
// Prefix with DB absolute path encoded
|
|
|
|
explicit InfoLogPrefix(bool has_log_dir, const std::string& db_absolute_path);
|
|
|
|
// Default Prefix
|
|
|
|
explicit InfoLogPrefix();
|
|
|
|
};
|
|
|
|
|
2011-03-18 22:37:00 +00:00
|
|
|
// Return the name of the info log file for "dbname".
|
2012-09-06 00:44:13 +00:00
|
|
|
extern std::string InfoLogFileName(const std::string& dbname,
|
2015-09-23 19:22:44 +00:00
|
|
|
const std::string& db_path = "",
|
|
|
|
const std::string& log_dir = "");
|
2011-03-18 22:37:00 +00:00
|
|
|
|
|
|
|
// Return the name of the old info log file for "dbname".
|
2012-09-06 00:44:13 +00:00
|
|
|
extern std::string OldInfoLogFileName(const std::string& dbname, uint64_t ts,
|
2015-09-23 19:22:44 +00:00
|
|
|
const std::string& db_path = "",
|
|
|
|
const std::string& log_dir = "");
|
2011-03-18 22:37:00 +00:00
|
|
|
|
2021-10-16 17:03:19 +00:00
|
|
|
extern const std::string kOptionsFileNamePrefix; // = "OPTIONS-"
|
|
|
|
extern const std::string kTempFileNameSuffix; // = "dbtmp"
|
2015-11-11 06:58:01 +00:00
|
|
|
|
|
|
|
// Return a options file name given the "dbname" and file number.
|
|
|
|
// Format: OPTIONS-[number].dbtmp
|
|
|
|
extern std::string OptionsFileName(const std::string& dbname,
|
|
|
|
uint64_t file_num);
|
2021-10-16 17:03:19 +00:00
|
|
|
extern std::string OptionsFileName(uint64_t file_num);
|
2015-11-11 06:58:01 +00:00
|
|
|
|
|
|
|
// Return a temp options file name given the "dbname" and file number.
|
|
|
|
// Format: OPTIONS-[number]
|
|
|
|
extern std::string TempOptionsFileName(const std::string& dbname,
|
|
|
|
uint64_t file_num);
|
|
|
|
|
2012-12-17 19:26:59 +00:00
|
|
|
// Return the name to use for a metadatabase. The result will be prefixed with
|
|
|
|
// "dbname".
|
2022-10-25 01:34:52 +00:00
|
|
|
extern std::string MetaDatabaseName(const std::string& dbname, uint64_t number);
|
2012-12-17 19:26:59 +00:00
|
|
|
|
2013-10-18 21:50:54 +00:00
|
|
|
// Return the name of the Identity file which stores a unique number for the db
|
|
|
|
// that will get regenerated if the db loses all its data and is recreated fresh
|
|
|
|
// either from a backup-image or empty
|
|
|
|
extern std::string IdentityFileName(const std::string& dbname);
|
|
|
|
|
2013-10-05 05:32:05 +00:00
|
|
|
// If filename is a rocksdb file, store the type of the file in *type.
|
2011-04-20 22:48:11 +00:00
|
|
|
// The number encoded in the filename is stored in *number. If the
|
|
|
|
// filename was successfully parsed, returns true. Else return false.
|
2014-08-14 17:05:16 +00:00
|
|
|
// info_log_name_prefix is the path of info logs.
|
|
|
|
extern bool ParseFileName(const std::string& filename, uint64_t* number,
|
|
|
|
const Slice& info_log_name_prefix, FileType* type,
|
2013-10-24 06:39:23 +00:00
|
|
|
WalFileType* log_type = nullptr);
|
2014-08-14 17:05:16 +00:00
|
|
|
// Same as previous function, but skip info log files.
|
|
|
|
extern bool ParseFileName(const std::string& filename, uint64_t* number,
|
|
|
|
FileType* type, WalFileType* log_type = nullptr);
|
2011-03-18 22:37:00 +00:00
|
|
|
|
|
|
|
// Make the CURRENT file point to the descriptor file with the
|
Sync dir containing CURRENT after RenameFile on CURRENT as much as possible (#10573)
Summary:
**Context:**
Below crash test revealed a bug that directory containing CURRENT file (short for `dir_contains_current_file` below) was not always get synced after a new CURRENT is created and being called with `RenameFile` as part of the creation.
This bug exposes a risk that such un-synced directory containing the updated CURRENT can’t survive a host crash (e.g, power loss) hence get corrupted. This then will be followed by a recovery from a corrupted CURRENT that we don't want.
The root-cause is that a nullptr `FSDirectory* dir_contains_current_file` sometimes gets passed-down to `SetCurrentFile()` hence in those case `dir_contains_current_file->FSDirectory::FsyncWithDirOptions()` will be skipped (which otherwise will internally call`Env/FS::SyncDic()` )
```
./db_stress --acquire_snapshot_one_in=10000 --adaptive_readahead=1 --allow_data_in_errors=True --avoid_unnecessary_blocking_io=0 --backup_max_size=104857600 --backup_one_in=100000 --batch_protection_bytes_per_key=8 --block_size=16384 --bloom_bits=134.8015470676662 --bottommost_compression_type=disable --cache_size=8388608 --checkpoint_one_in=1000000 --checksum_type=kCRC32c --clear_column_family_one_in=0 --compact_files_one_in=1000000 --compact_range_one_in=1000000 --compaction_pri=2 --compaction_ttl=100 --compression_max_dict_buffer_bytes=511 --compression_max_dict_bytes=16384 --compression_type=zstd --compression_use_zstd_dict_trainer=1 --compression_zstd_max_train_bytes=65536 --continuous_verification_interval=0 --data_block_index_type=0 --db=$db --db_write_buffer_size=1048576 --delpercent=5 --delrangepercent=0 --destroy_db_initially=0 --disable_wal=0 --enable_compaction_filter=0 --enable_pipelined_write=1 --expected_values_dir=$exp --fail_if_options_file_error=1 --file_checksum_impl=none --flush_one_in=1000000 --get_current_wal_file_one_in=0 --get_live_files_one_in=1000000 --get_property_one_in=1000000 --get_sorted_wal_files_one_in=0 --index_block_restart_interval=4 --ingest_external_file_one_in=0 --iterpercent=10 --key_len_percent_dist=1,30,69 --level_compaction_dynamic_level_bytes=True --mark_for_compaction_one_file_in=10 --max_background_compactions=20 --max_bytes_for_level_base=10485760 --max_key=10000 --max_key_len=3 --max_manifest_file_size=16384 --max_write_batch_group_size_bytes=64 --max_write_buffer_number=3 --max_write_buffer_size_to_maintain=0 --memtable_prefix_bloom_size_ratio=0.001 --memtable_protection_bytes_per_key=1 --memtable_whole_key_filtering=1 --mmap_read=1 --nooverwritepercent=1 --open_metadata_write_fault_one_in=0 --open_read_fault_one_in=0 --open_write_fault_one_in=0 --ops_per_thread=100000000 --optimize_filters_for_memory=1 --paranoid_file_checks=1 --partition_pinning=2 --pause_background_one_in=1000000 --periodic_compaction_seconds=0 --prefix_size=5 --prefixpercent=5 --prepopulate_block_cache=1 --progress_reports=0 --read_fault_one_in=1000 --readpercent=45 --recycle_log_file_num=0 --reopen=0 --ribbon_starting_level=999 --secondary_cache_fault_one_in=32 --secondary_cache_uri=compressed_secondary_cache://capacity=8388608 --set_options_one_in=10000 --snapshot_hold_ops=100000 --sst_file_manager_bytes_per_sec=0 --sst_file_manager_bytes_per_truncate=0 --subcompactions=3 --sync_fault_injection=1 --target_file_size_base=2097 --target_file_size_multiplier=2 --test_batches_snapshots=1 --top_level_index_pinning=1 --use_full_merge_v1=1 --use_merge=1 --value_size_mult=32 --verify_checksum=1 --verify_checksum_one_in=1000000 --verify_db_one_in=100000 --verify_sst_unique_id_in_manifest=1 --wal_bytes_per_sync=524288 --write_buffer_size=4194 --writepercent=35
```
```
stderr:
WARNING: prefix_size is non-zero but memtablerep != prefix_hash
db_stress: utilities/fault_injection_fs.cc:748: virtual rocksdb::IOStatus rocksdb::FaultInjectionTestFS::RenameFile(const std::string &, const std::string &, const rocksdb::IOOptions &, rocksdb::IODebugContext *): Assertion `tlist.find(tdn.second) == tlist.end()' failed.`
```
**Summary:**
The PR ensured the non-test path pass down a non-null dir containing CURRENT (which is by current RocksDB assumption just db_dir) by doing the following:
- Renamed `directory_to_fsync` as `dir_contains_current_file` in `SetCurrentFile()` to tighten the association between this directory and CURRENT file
- Changed `SetCurrentFile()` API to require `dir_contains_current_file` being passed-in, instead of making it by default nullptr.
- Because `SetCurrentFile()`'s `dir_contains_current_file` is passed down from `VersionSet::LogAndApply()` then `VersionSet::ProcessManifestWrites()` (i.e, think about this as a chain of 3 functions related to MANIFEST update), these 2 functions also got refactored to require `dir_contains_current_file`
- Updated the non-test-path callers of these 3 functions to obtain and pass in non-nullptr `dir_contains_current_file`, which by current assumption of RocksDB, is the `FSDirectory* db_dir`.
- `db_impl` path will obtain `DBImpl::directories_.getDbDir()` while others with no access to such `directories_` are obtained on the fly by creating such object `FileSystem::NewDirectory(..)` and manage it by unique pointers to ensure short life time.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/10573
Test Plan:
- `make check`
- Passed the repro db_stress command
- For future improvement, since we currently don't assert dir containing CURRENT to be non-nullptr due to https://github.com/facebook/rocksdb/pull/10573#pullrequestreview-1087698899, there is still chances that future developers mistakenly pass down nullptr dir containing CURRENT thus resulting skipped sync dir and cause the bug again. Therefore a smarter test (e.g, such as quoted from ajkr "(make) unsynced data loss to be dropping files corresponding to unsynced directory entries") is still needed.
Reviewed By: ajkr
Differential Revision: D39005886
Pulled By: hx235
fbshipit-source-id: 336fb9090d0cfa6ca3dd580db86268007dde7f5a
2022-08-30 00:35:21 +00:00
|
|
|
// specified number. On its success and when dir_contains_current_file is not
|
|
|
|
// nullptr, the function will fsync the directory containing the CURRENT file
|
|
|
|
// when
|
Pass IOStatus to write path and set retryable IO Error as hard error in BG jobs (#6487)
Summary:
In the current code base, we use Status to get and store the returned status from the call. Specifically, for IO related functions, the current Status cannot reflect the IO Error details such as error scope, error retryable attribute, and others. With the implementation of https://github.com/facebook/rocksdb/issues/5761, we have the new Wrapper for IO, which returns IOStatus instead of Status. However, the IOStatus is purged at the lower level of write path and transferred to Status.
The first job of this PR is to pass the IOStatus to the write path (flush, WAL write, and Compaction). The second job is to identify the Retryable IO Error as HardError, and set the bg_error_ as HardError. In this case, the DB Instance becomes read only. User is informed of the Status and need to take actions to deal with it (e.g., call db->Resume()).
Pull Request resolved: https://github.com/facebook/rocksdb/pull/6487
Test Plan: Added the testing case to error_handler_fs_test. Pass make asan_check
Reviewed By: anand1976
Differential Revision: D20685017
Pulled By: zhichao-cao
fbshipit-source-id: ff85f042896243abcd6ef37877834e26f36b6eb0
2020-03-27 23:03:05 +00:00
|
|
|
extern IOStatus SetCurrentFile(FileSystem* fs, const std::string& dbname,
|
2020-03-29 02:05:54 +00:00
|
|
|
uint64_t descriptor_number,
|
Sync dir containing CURRENT after RenameFile on CURRENT as much as possible (#10573)
Summary:
**Context:**
Below crash test revealed a bug that directory containing CURRENT file (short for `dir_contains_current_file` below) was not always get synced after a new CURRENT is created and being called with `RenameFile` as part of the creation.
This bug exposes a risk that such un-synced directory containing the updated CURRENT can’t survive a host crash (e.g, power loss) hence get corrupted. This then will be followed by a recovery from a corrupted CURRENT that we don't want.
The root-cause is that a nullptr `FSDirectory* dir_contains_current_file` sometimes gets passed-down to `SetCurrentFile()` hence in those case `dir_contains_current_file->FSDirectory::FsyncWithDirOptions()` will be skipped (which otherwise will internally call`Env/FS::SyncDic()` )
```
./db_stress --acquire_snapshot_one_in=10000 --adaptive_readahead=1 --allow_data_in_errors=True --avoid_unnecessary_blocking_io=0 --backup_max_size=104857600 --backup_one_in=100000 --batch_protection_bytes_per_key=8 --block_size=16384 --bloom_bits=134.8015470676662 --bottommost_compression_type=disable --cache_size=8388608 --checkpoint_one_in=1000000 --checksum_type=kCRC32c --clear_column_family_one_in=0 --compact_files_one_in=1000000 --compact_range_one_in=1000000 --compaction_pri=2 --compaction_ttl=100 --compression_max_dict_buffer_bytes=511 --compression_max_dict_bytes=16384 --compression_type=zstd --compression_use_zstd_dict_trainer=1 --compression_zstd_max_train_bytes=65536 --continuous_verification_interval=0 --data_block_index_type=0 --db=$db --db_write_buffer_size=1048576 --delpercent=5 --delrangepercent=0 --destroy_db_initially=0 --disable_wal=0 --enable_compaction_filter=0 --enable_pipelined_write=1 --expected_values_dir=$exp --fail_if_options_file_error=1 --file_checksum_impl=none --flush_one_in=1000000 --get_current_wal_file_one_in=0 --get_live_files_one_in=1000000 --get_property_one_in=1000000 --get_sorted_wal_files_one_in=0 --index_block_restart_interval=4 --ingest_external_file_one_in=0 --iterpercent=10 --key_len_percent_dist=1,30,69 --level_compaction_dynamic_level_bytes=True --mark_for_compaction_one_file_in=10 --max_background_compactions=20 --max_bytes_for_level_base=10485760 --max_key=10000 --max_key_len=3 --max_manifest_file_size=16384 --max_write_batch_group_size_bytes=64 --max_write_buffer_number=3 --max_write_buffer_size_to_maintain=0 --memtable_prefix_bloom_size_ratio=0.001 --memtable_protection_bytes_per_key=1 --memtable_whole_key_filtering=1 --mmap_read=1 --nooverwritepercent=1 --open_metadata_write_fault_one_in=0 --open_read_fault_one_in=0 --open_write_fault_one_in=0 --ops_per_thread=100000000 --optimize_filters_for_memory=1 --paranoid_file_checks=1 --partition_pinning=2 --pause_background_one_in=1000000 --periodic_compaction_seconds=0 --prefix_size=5 --prefixpercent=5 --prepopulate_block_cache=1 --progress_reports=0 --read_fault_one_in=1000 --readpercent=45 --recycle_log_file_num=0 --reopen=0 --ribbon_starting_level=999 --secondary_cache_fault_one_in=32 --secondary_cache_uri=compressed_secondary_cache://capacity=8388608 --set_options_one_in=10000 --snapshot_hold_ops=100000 --sst_file_manager_bytes_per_sec=0 --sst_file_manager_bytes_per_truncate=0 --subcompactions=3 --sync_fault_injection=1 --target_file_size_base=2097 --target_file_size_multiplier=2 --test_batches_snapshots=1 --top_level_index_pinning=1 --use_full_merge_v1=1 --use_merge=1 --value_size_mult=32 --verify_checksum=1 --verify_checksum_one_in=1000000 --verify_db_one_in=100000 --verify_sst_unique_id_in_manifest=1 --wal_bytes_per_sync=524288 --write_buffer_size=4194 --writepercent=35
```
```
stderr:
WARNING: prefix_size is non-zero but memtablerep != prefix_hash
db_stress: utilities/fault_injection_fs.cc:748: virtual rocksdb::IOStatus rocksdb::FaultInjectionTestFS::RenameFile(const std::string &, const std::string &, const rocksdb::IOOptions &, rocksdb::IODebugContext *): Assertion `tlist.find(tdn.second) == tlist.end()' failed.`
```
**Summary:**
The PR ensured the non-test path pass down a non-null dir containing CURRENT (which is by current RocksDB assumption just db_dir) by doing the following:
- Renamed `directory_to_fsync` as `dir_contains_current_file` in `SetCurrentFile()` to tighten the association between this directory and CURRENT file
- Changed `SetCurrentFile()` API to require `dir_contains_current_file` being passed-in, instead of making it by default nullptr.
- Because `SetCurrentFile()`'s `dir_contains_current_file` is passed down from `VersionSet::LogAndApply()` then `VersionSet::ProcessManifestWrites()` (i.e, think about this as a chain of 3 functions related to MANIFEST update), these 2 functions also got refactored to require `dir_contains_current_file`
- Updated the non-test-path callers of these 3 functions to obtain and pass in non-nullptr `dir_contains_current_file`, which by current assumption of RocksDB, is the `FSDirectory* db_dir`.
- `db_impl` path will obtain `DBImpl::directories_.getDbDir()` while others with no access to such `directories_` are obtained on the fly by creating such object `FileSystem::NewDirectory(..)` and manage it by unique pointers to ensure short life time.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/10573
Test Plan:
- `make check`
- Passed the repro db_stress command
- For future improvement, since we currently don't assert dir containing CURRENT to be non-nullptr due to https://github.com/facebook/rocksdb/pull/10573#pullrequestreview-1087698899, there is still chances that future developers mistakenly pass down nullptr dir containing CURRENT thus resulting skipped sync dir and cause the bug again. Therefore a smarter test (e.g, such as quoted from ajkr "(make) unsynced data loss to be dropping files corresponding to unsynced directory entries") is still needed.
Reviewed By: ajkr
Differential Revision: D39005886
Pulled By: hx235
fbshipit-source-id: 336fb9090d0cfa6ca3dd580db86268007dde7f5a
2022-08-30 00:35:21 +00:00
|
|
|
FSDirectory* dir_contains_current_file);
|
2011-03-18 22:37:00 +00:00
|
|
|
|
2013-10-18 21:50:54 +00:00
|
|
|
// Make the IDENTITY file for the db
|
2019-09-03 15:50:47 +00:00
|
|
|
extern Status SetIdentityFile(Env* env, const std::string& dbname,
|
|
|
|
const std::string& db_id = {});
|
2011-03-18 22:37:00 +00:00
|
|
|
|
2015-01-22 19:43:38 +00:00
|
|
|
// Sync manifest file `file`.
|
2021-03-15 11:32:24 +00:00
|
|
|
extern IOStatus SyncManifest(const ImmutableDBOptions* db_options,
|
Pass IOStatus to write path and set retryable IO Error as hard error in BG jobs (#6487)
Summary:
In the current code base, we use Status to get and store the returned status from the call. Specifically, for IO related functions, the current Status cannot reflect the IO Error details such as error scope, error retryable attribute, and others. With the implementation of https://github.com/facebook/rocksdb/issues/5761, we have the new Wrapper for IO, which returns IOStatus instead of Status. However, the IOStatus is purged at the lower level of write path and transferred to Status.
The first job of this PR is to pass the IOStatus to the write path (flush, WAL write, and Compaction). The second job is to identify the Retryable IO Error as HardError, and set the bg_error_ as HardError. In this case, the DB Instance becomes read only. User is informed of the Status and need to take actions to deal with it (e.g., call db->Resume()).
Pull Request resolved: https://github.com/facebook/rocksdb/pull/6487
Test Plan: Added the testing case to error_handler_fs_test. Pass make asan_check
Reviewed By: anand1976
Differential Revision: D20685017
Pulled By: zhichao-cao
fbshipit-source-id: ff85f042896243abcd6ef37877834e26f36b6eb0
2020-03-27 23:03:05 +00:00
|
|
|
WritableFileWriter* file);
|
2015-01-22 19:43:38 +00:00
|
|
|
|
2019-05-31 17:45:20 +00:00
|
|
|
// Return list of file names of info logs in `file_names`.
|
|
|
|
// The list only contains file name. The parent directory name is stored
|
|
|
|
// in `parent_dir`.
|
|
|
|
// `db_log_dir` should be the one as in options.db_log_dir
|
2021-01-26 06:07:26 +00:00
|
|
|
extern Status GetInfoLogFiles(const std::shared_ptr<FileSystem>& fs,
|
|
|
|
const std::string& db_log_dir,
|
2019-05-31 17:45:20 +00:00
|
|
|
const std::string& dbname,
|
|
|
|
std::string* parent_dir,
|
|
|
|
std::vector<std::string>* file_names);
|
2020-03-21 02:17:54 +00:00
|
|
|
|
|
|
|
extern std::string NormalizePath(const std::string& path);
|
2020-02-20 20:07:53 +00:00
|
|
|
} // namespace ROCKSDB_NAMESPACE
|