Some API clarifications (#9080)

Summary:
* Clarify that RocksDB is not exception safe on many of our callback
and extension interfaces
* Clarify FSRandomAccessFile::MultiRead implementations must accept
non-sorted inputs (see https://github.com/facebook/rocksdb/issues/8953)
* Clarify ConcurrentTaskLimiter and SstFileManager are not (currently)
extensible interfaces
* Mark WriteBufferManager as `final`, so it is then clearly not a
callback interface, even though it smells like one
* Clarify TablePropertiesCollector Status returns are mostly ignored

Pull Request resolved: https://github.com/facebook/rocksdb/pull/9080

Test Plan: comments only (except WriteBufferManager final)

Reviewed By: ajkr

Differential Revision: D31968782

Pulled By: pdillinger

fbshipit-source-id: 11b648ce3ce3c5e5bdc02d2eafc7ea4b864bd1d2
This commit is contained in:
Peter Dillinger 2021-11-02 20:29:07 -07:00 committed by Facebook GitHub Bot
parent f72c834eab
commit 82afa01815
24 changed files with 128 additions and 24 deletions

View File

@ -17,6 +17,8 @@
### Public API change
* Made FileSystem extend the Customizable class and added a CreateFromString method. Implementations need to be registered with the ObjectRegistry and to implement a Name() method in order to be created via this method.
* Clarified in API comments that RocksDB is not exception safe for callbacks and custom extensions. An exception propagating into RocksDB can lead to undefined behavior, including data loss, unreported corruption, deadlocks, and more.
* Marked `WriteBufferManager` as `final` because it is not intended for extension.
## 6.26.0 (2021-10-20)
### Bug Fixes

View File

@ -307,35 +307,39 @@ struct AdvancedColumnFamilyOptions {
// delta_value - Delta value to be merged with the existing_value.
// Stored in transaction logs.
// merged_value - Set when delta is applied on the previous value.
//
// Applicable only when inplace_update_support is true,
// this callback function is called at the time of updating the memtable
// as part of a Put operation, lets say Put(key, delta_value). It allows the
// 'delta_value' specified as part of the Put operation to be merged with
// an 'existing_value' of the key in the database.
//
// If the merged value is smaller in size that the 'existing_value',
// then this function can update the 'existing_value' buffer inplace and
// the corresponding 'existing_value'_size pointer, if it wishes to.
// The callback should return UpdateStatus::UPDATED_INPLACE.
// In this case. (In this case, the snapshot-semantics of the rocksdb
// Iterator is not atomic anymore).
//
// If the merged value is larger in size than the 'existing_value' or the
// application does not wish to modify the 'existing_value' buffer inplace,
// then the merged value should be returned via *merge_value. It is set by
// merging the 'existing_value' and the Put 'delta_value'. The callback should
// return UpdateStatus::UPDATED in this case. This merged value will be added
// to the memtable.
//
// If merging fails or the application does not wish to take any action,
// then the callback should return UpdateStatus::UPDATE_FAILED.
//
// Please remember that the original call from the application is Put(key,
// delta_value). So the transaction log (if enabled) will still contain (key,
// delta_value). The 'merged_value' is not stored in the transaction log.
// Hence the inplace_callback function should be consistent across db reopens.
//
// RocksDB callbacks are NOT exception-safe. A callback completing with an
// exception can lead to undefined behavior in RocksDB, including data loss,
// unreported corruption, deadlocks, and more.
//
// Default: nullptr
UpdateStatus (*inplace_callback)(char* existing_value,
uint32_t* existing_value_size,

View File

@ -170,6 +170,10 @@ class Cache {
// The SizeCallback takes a void* pointer to the object and returns the size
// of the persistable data. It can be used by the secondary cache to allocate
// memory if needed.
//
// RocksDB callbacks are NOT exception-safe. A callback completing with an
// exception can lead to undefined behavior in RocksDB, including data loss,
// unreported corruption, deadlocks, and more.
using SizeCallback = size_t (*)(void* obj);
// The SaveToCallback takes a void* object pointer and saves the persistable

View File

@ -24,7 +24,10 @@ class SliceTransform;
// CompactionFilter allows an application to modify/delete a key-value during
// table file creation.
//
// Exceptions MUST NOT propagate out of overridden functions into RocksDB,
// because RocksDB is not exception-safe. This could cause undefined behavior
// including data loss, unreported corruption, deadlocks, and more.
class CompactionFilter : public Customizable {
public:
enum ValueType {
@ -219,6 +222,10 @@ class CompactionFilter : public Customizable {
// `CompactionFilter` according to `ShouldFilterTableFileCreation()`. This
// allows the application to know about the different ongoing threads of work
// and makes it unnecessary for `CompactionFilter` to provide thread-safety.
//
// Exceptions MUST NOT propagate out of overridden functions into RocksDB,
// because RocksDB is not exception-safe. This could cause undefined behavior
// including data loss, unreported corruption, deadlocks, and more.
class CompactionFilterFactory : public Customizable {
public:
virtual ~CompactionFilterFactory() {}

View File

@ -21,6 +21,10 @@ class Slice;
// used as keys in an sstable or a database. A Comparator implementation
// must be thread-safe since rocksdb may invoke its methods concurrently
// from multiple threads.
//
// Exceptions MUST NOT propagate out of overridden functions into RocksDB,
// because RocksDB is not exception-safe. This could cause undefined behavior
// including data loss, unreported corruption, deadlocks, and more.
class Comparator : public Customizable {
public:
Comparator() : timestamp_size_(0) {}

View File

@ -17,6 +17,8 @@
namespace ROCKSDB_NAMESPACE {
// This is NOT an extensible interface but a public interface for result of
// NewConcurrentTaskLimiter. Any derived classes must be RocksDB internal.
class ConcurrentTaskLimiter {
public:
virtual ~ConcurrentTaskLimiter() {}

View File

@ -145,6 +145,9 @@ struct EnvOptions {
RateLimiter* rate_limiter = nullptr;
};
// Exceptions MUST NOT propagate out of overridden functions into RocksDB,
// because RocksDB is not exception-safe. This could cause undefined behavior
// including data loss, unreported corruption, deadlocks, and more.
class Env {
public:
struct FileAttributes {
@ -1150,6 +1153,10 @@ enum InfoLogLevel : unsigned char {
};
// An interface for writing log messages.
//
// Exceptions MUST NOT propagate out of overridden functions into RocksDB,
// because RocksDB is not exception-safe. This could cause undefined behavior
// including data loss, unreported corruption, deadlocks, and more.
class Logger {
public:
size_t kDoNotSupportGetLogFileSize = (std::numeric_limits<size_t>::max)();

View File

@ -62,6 +62,10 @@ class BlockAccessCipherStream {
};
// BlockCipher
//
// Exceptions MUST NOT propagate out of overridden functions into RocksDB,
// because RocksDB is not exception-safe. This could cause undefined behavior
// including data loss, unreported corruption, deadlocks, and more.
class BlockCipher : public Customizable {
public:
virtual ~BlockCipher(){};
@ -105,6 +109,10 @@ class BlockCipher : public Customizable {
// The encryption provider is used to create a cipher stream for a specific
// file. The returned cipher stream will be used for actual
// encryption/decryption actions.
//
// Exceptions MUST NOT propagate out of overridden functions into RocksDB,
// because RocksDB is not exception-safe. This could cause undefined behavior
// including data loss, unreported corruption, deadlocks, and more.
class EncryptionProvider : public Customizable {
public:
virtual ~EncryptionProvider(){};

View File

@ -43,6 +43,10 @@ struct FileChecksumGenContext {
// * Finalize is called at most once during the life of the object
// * All calls to Update come before Finalize
// * All calls to GetChecksum come after Finalize
//
// Exceptions MUST NOT propagate out of overridden functions into RocksDB,
// because RocksDB is not exception-safe. This could cause undefined behavior
// including data loss, unreported corruption, deadlocks, and more.
class FileChecksumGenerator {
public:
virtual ~FileChecksumGenerator() {}
@ -64,6 +68,10 @@ class FileChecksumGenerator {
};
// Create the FileChecksumGenerator object for each SST file.
//
// Exceptions MUST NOT propagate out of overridden functions into RocksDB,
// because RocksDB is not exception-safe. This could cause undefined behavior
// including data loss, unreported corruption, deadlocks, and more.
class FileChecksumGenFactory : public Customizable {
public:
virtual ~FileChecksumGenFactory() {}
@ -86,6 +94,10 @@ class FileChecksumGenFactory : public Customizable {
// the checksum information of all valid SST file of a DB instance. It can
// also be used to store the checksum information of a list of SST files to
// be ingested.
//
// Exceptions MUST NOT propagate out of overridden functions into RocksDB,
// because RocksDB is not exception-safe. This could cause undefined behavior
// including data loss, unreported corruption, deadlocks, and more.
class FileChecksumList {
public:
virtual ~FileChecksumList() {}

View File

@ -208,7 +208,10 @@ struct IODebugContext {
// retryable.
// NewCompositeEnv can be used to create an Env with a custom FileSystem for
// DBOptions::env.
//
// Exceptions MUST NOT propagate out of overridden functions into RocksDB,
// because RocksDB is not exception-safe. This could cause undefined behavior
// including data loss, unreported corruption, deadlocks, and more.
class FileSystem : public Customizable {
public:
FileSystem();
@ -731,10 +734,11 @@ class FSRandomAccessFile {
// Read a bunch of blocks as described by reqs. The blocks can
// optionally be read in parallel. This is a synchronous call, i.e it
// should return after all reads have completed. The reads will be
// non-overlapping. If the function return Status is not ok, status of
// individual requests will be ignored and return status will be assumed
// for all read requests. The function return status is only meant for any
// any errors that occur before even processing specific read requests
// non-overlapping but can be in any order. If the function return Status
// is not ok, status of individual requests will be ignored and return
// status will be assumed for all read requests. The function return status
// is only meant for errors that occur before processing individual read
// requests.
virtual IOStatus MultiRead(FSReadRequest* reqs, size_t num_reqs,
const IOOptions& options, IODebugContext* dbg) {
assert(reqs != nullptr);

View File

@ -18,7 +18,11 @@ struct ConfigOptions;
struct Options;
// FlushBlockPolicy provides a configurable way to determine when to flush a
// block in the block based tables,
// block in the block based tables.
//
// Exceptions MUST NOT propagate out of overridden functions into RocksDB,
// because RocksDB is not exception-safe. This could cause undefined behavior
// including data loss, unreported corruption, deadlocks, and more.
class FlushBlockPolicy {
public:
// Keep track of the key/value sequences and return the boolean value to

View File

@ -476,6 +476,10 @@ struct ExternalFileIngestionInfo {
// the current thread holding any DB mutex. This is to prevent potential
// deadlock and performance issue when using EventListener callback
// in a complex way.
//
// [Exceptions] Exceptions MUST NOT propagate out of overridden functions into
// RocksDB, because RocksDB is not exception-safe. This could cause undefined
// behavior including data loss, unreported corruption, deadlocks, and more.
class EventListener : public Customizable {
public:
static const char* Type() { return "EventListener"; }

View File

@ -44,6 +44,9 @@ class Logger;
//
// Refer to rocksdb-merge wiki for more details and example implementations.
//
// Exceptions MUST NOT propagate out of overridden functions into RocksDB,
// because RocksDB is not exception-safe. This could cause undefined behavior
// including data loss, unreported corruption, deadlocks, and more.
class MergeOperator : public Customizable {
public:
virtual ~MergeOperator() {}

View File

@ -396,6 +396,9 @@ struct CompactionServiceJobInfo {
priority(priority_) {}
};
// Exceptions MUST NOT propagate out of overridden functions into RocksDB,
// because RocksDB is not exception-safe. This could cause undefined behavior
// including data loss, unreported corruption, deadlocks, and more.
class CompactionService : public Customizable {
public:
static const char* Type() { return "CompactionService"; }

View File

@ -15,6 +15,9 @@
namespace ROCKSDB_NAMESPACE {
// Exceptions MUST NOT propagate out of overridden functions into RocksDB,
// because RocksDB is not exception-safe. This could cause undefined behavior
// including data loss, unreported corruption, deadlocks, and more.
class RateLimiter {
public:
enum class OpType {

View File

@ -43,6 +43,10 @@ class SecondaryCacheResultHandle {
//
// Cache interface for caching blocks on a secondary tier (which can include
// non-volatile media, or alternate forms of caching such as compressed data)
//
// Exceptions MUST NOT propagate out of overridden functions into RocksDB,
// because RocksDB is not exception-safe. This could cause undefined behavior
// including data loss, unreported corruption, deadlocks, and more.
class SecondaryCache : public Customizable {
public:
virtual ~SecondaryCache() {}

View File

@ -25,12 +25,14 @@ namespace ROCKSDB_NAMESPACE {
class Slice;
struct ConfigOptions;
/*
* A SliceTransform is a generic pluggable way of transforming one string
* to another. Its primary use-case is in configuring rocksdb
* to store prefix blooms by setting prefix_extractor in
* ColumnFamilyOptions.
*/
// A SliceTransform is a generic pluggable way of transforming one string
// to another. Its primary use-case is in configuring rocksdb
// to store prefix blooms by setting prefix_extractor in
// ColumnFamilyOptions.
//
// Exceptions MUST NOT propagate out of overridden functions into RocksDB,
// because RocksDB is not exception-safe. This could cause undefined behavior
// including data loss, unreported corruption, deadlocks, and more.
class SliceTransform : public Customizable {
public:
virtual ~SliceTransform(){};

View File

@ -21,7 +21,8 @@ class Logger;
// SstFileManager is used to track SST and blob files in the DB and control
// their deletion rate. All SstFileManager public functions are thread-safe.
// SstFileManager is not extensible.
// SstFileManager is NOT an extensible interface but a public interface for
// result of NewSstFileManager. Any derived classes must be RocksDB internal.
class SstFileManager {
public:
virtual ~SstFileManager() {}

View File

@ -78,6 +78,9 @@ class SstPartitioner {
};
};
// Exceptions MUST NOT propagate out of overridden functions into RocksDB,
// because RocksDB is not exception-safe. This could cause undefined behavior
// including data loss, unreported corruption, deadlocks, and more.
class SstPartitionerFactory : public Customizable {
public:
virtual ~SstPartitionerFactory() {}

View File

@ -573,6 +573,10 @@ enum StatsLevel : uint8_t {
// options.statistics->getTickerCount(NUMBER_BLOCK_COMPRESSED);
// HistogramData hist;
// options.statistics->histogramData(FLUSH_TIME, &hist);
//
// Exceptions MUST NOT propagate out of overridden functions into RocksDB,
// because RocksDB is not exception-safe. This could cause undefined behavior
// including data loss, unreported corruption, deadlocks, and more.
class Statistics : public Customizable {
public:
virtual ~Statistics() {}

View File

@ -80,7 +80,14 @@ extern const std::string kRangeDelBlock;
// a collection of callback functions that will be invoked during table
// building. It is constructed with TablePropertiesCollectorFactory. The methods
// don't need to be thread-safe, as we will create exactly one
// TablePropertiesCollector object per table and then call it sequentially
// TablePropertiesCollector object per table and then call it sequentially.
//
// Statuses from these callbacks are currently logged when not OK, but
// otherwise ignored by RocksDB.
//
// Exceptions MUST NOT propagate out of overridden functions into RocksDB,
// because RocksDB is not exception-safe. This could cause undefined behavior
// including data loss, unreported corruption, deadlocks, and more.
class TablePropertiesCollector {
public:
virtual ~TablePropertiesCollector() {}
@ -133,6 +140,10 @@ class TablePropertiesCollector {
// Constructs TablePropertiesCollector. Internals create a new
// TablePropertiesCollector for each new table
//
// Exceptions MUST NOT propagate out of overridden functions into RocksDB,
// because RocksDB is not exception-safe. This could cause undefined behavior
// including data loss, unreported corruption, deadlocks, and more.
class TablePropertiesCollectorFactory : public Customizable {
public:
struct Context {

View File

@ -262,6 +262,10 @@ struct CreateBackupOptions {
bool flush_before_backup = false;
// Callback for reporting progress, based on callback_trigger_interval_size.
//
// RocksDB callbacks are NOT exception-safe. A callback completing with an
// exception can lead to undefined behavior in RocksDB, including data loss,
// unreported corruption, deadlocks, and more.
std::function<void()> progress_callback = []() {};
// If false, background_thread_cpu_priority is ignored.

View File

@ -19,6 +19,10 @@ struct ConfigOptions;
// WALFilter allows an application to inspect write-ahead-log (WAL)
// records or modify their processing on recovery.
// Please see the details below.
//
// Exceptions MUST NOT propagate out of overridden functions into RocksDB,
// because RocksDB is not exception-safe. This could cause undefined behavior
// including data loss, unreported corruption, deadlocks, and more.
class WalFilter : public Customizable {
public:
static const char* Type() { return "WalFilter"; }

View File

@ -23,8 +23,8 @@
namespace ROCKSDB_NAMESPACE {
class CacheReservationManager;
// Interface to block and signal DB instances.
// Each DB instance contains ptr to StallInterface.
// Interface to block and signal DB instances, intended for RocksDB
// internal use only. Each DB instance contains ptr to StallInterface.
class StallInterface {
public:
virtual ~StallInterface() {}
@ -34,7 +34,7 @@ class StallInterface {
virtual void Signal() = 0;
};
class WriteBufferManager {
class WriteBufferManager final {
public:
// Parameters:
// _buffer_size: _buffer_size = 0 indicates no limit. Memory won't be capped.