2016-04-26 19:41:07 +00:00
|
|
|
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
2017-07-15 23:03:42 +00:00
|
|
|
// This source code is licensed under both the GPLv2 (found in the
|
|
|
|
// COPYING file in the root directory) and Apache 2.0 License
|
|
|
|
// (found in the LICENSE.Apache file in the root directory).
|
2016-04-26 19:41:07 +00:00
|
|
|
//
|
|
|
|
#pragma once
|
|
|
|
#include <algorithm>
|
|
|
|
#include <memory>
|
2016-08-11 18:54:17 +00:00
|
|
|
#include <utility>
|
2016-04-26 19:41:07 +00:00
|
|
|
#include <vector>
|
|
|
|
|
|
|
|
#include "table/internal_iterator.h"
|
|
|
|
|
2020-02-20 20:07:53 +00:00
|
|
|
namespace ROCKSDB_NAMESPACE {
|
2016-04-26 19:41:07 +00:00
|
|
|
|
|
|
|
// PinnedIteratorsManager will be notified whenever we need to pin an Iterator
|
|
|
|
// and it will be responsible for deleting pinned Iterators when they are
|
|
|
|
// not needed anymore.
|
2016-12-29 23:48:24 +00:00
|
|
|
class PinnedIteratorsManager : public Cleanable {
|
2016-04-26 19:41:07 +00:00
|
|
|
public:
|
2016-08-11 18:54:17 +00:00
|
|
|
PinnedIteratorsManager() : pinning_enabled(false) {}
|
|
|
|
~PinnedIteratorsManager() {
|
|
|
|
if (pinning_enabled) {
|
|
|
|
ReleasePinnedData();
|
|
|
|
}
|
|
|
|
}
|
2016-04-26 19:41:07 +00:00
|
|
|
|
Avoid allocations/copies for large `GetMergeOperands()` results (#10458)
Summary:
This PR avoids allocations and copies for the result of `GetMergeOperands()` when the average operand size is at least 256 bytes and the total operands size is at least 32KB. The `GetMergeOperands()` already included `PinnableSlice` but was calling `PinSelf()` (i.e., allocating and copying) for each operand. When this optimization takes effect, we instead call `PinSlice()` to skip that allocation and copy. Resources are pinned in order for the `PinnableSlice` to point to valid memory even after `GetMergeOperands()` returns.
The pinned resources include a referenced `SuperVersion`, a `MergingContext`, and a `PinnedIteratorsManager`. They are bundled into a `GetMergeOperandsState`. We use `SharedCleanablePtr` to share that bundle among all `PinnableSlice`s populated by `GetMergeOperands()`. That way, the last `PinnableSlice` to be `Reset()` will cleanup the bundle, including unreferencing the `SuperVersion`.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/10458
Test Plan:
- new DB level test
- measured benefit/regression in a number of memtable scenarios
Setup command:
```
$ ./db_bench -benchmarks=mergerandom -merge_operator=StringAppendOperator -num=$num -writes=16384 -key_size=16 -value_size=$value_sz -compression_type=none -write_buffer_size=1048576000
```
Benchmark command:
```
./db_bench -threads=$threads -use_existing_db=true -avoid_flush_during_recovery=true -write_buffer_size=1048576000 -benchmarks=readrandomoperands -merge_operator=StringAppendOperator -num=$num -duration=10
```
Worst regression is when a key has many tiny operands:
- Parameters: num=1 (implying 16384 operands per key), value_sz=8, threads=1
- `GetMergeOperands()` latency increases 682 micros -> 800 micros (+17%)
The regression disappears into the noise (<1% difference) if we remove the `Reset()` loop and the size counting loop. The former is arguably needed regardless of this PR as the convention in `Get()` and `MultiGet()` is to `Reset()` the input `PinnableSlice`s at the start. The latter could be optimized to count the size as we accumulate operands rather than after the fact.
Best improvement is when a key has large operands and high concurrency:
- Parameters: num=4 (implying 4096 operands per key), value_sz=2KB, threads=32
- `GetMergeOperands()` latency decreases 11492 micros -> 437 micros (-96%).
Reviewed By: cbi42
Differential Revision: D38336578
Pulled By: ajkr
fbshipit-source-id: 48146d127e04cb7f2d4d2939a2b9dff3aba18258
2022-08-04 07:42:13 +00:00
|
|
|
// Move constructor and move assignment is allowed.
|
|
|
|
PinnedIteratorsManager(PinnedIteratorsManager&& other) noexcept = default;
|
|
|
|
PinnedIteratorsManager& operator=(PinnedIteratorsManager&& other) noexcept =
|
|
|
|
default;
|
|
|
|
|
2016-04-26 19:41:07 +00:00
|
|
|
// Enable Iterators pinning
|
|
|
|
void StartPinning() {
|
2016-08-11 18:54:17 +00:00
|
|
|
assert(pinning_enabled == false);
|
|
|
|
pinning_enabled = true;
|
2016-04-26 19:41:07 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Is pinning enabled ?
|
|
|
|
bool PinningEnabled() { return pinning_enabled; }
|
|
|
|
|
2016-08-11 18:54:17 +00:00
|
|
|
// Take ownership of iter and delete it when ReleasePinnedData() is called
|
|
|
|
void PinIterator(InternalIterator* iter, bool arena = false) {
|
|
|
|
if (arena) {
|
|
|
|
PinPtr(iter, &PinnedIteratorsManager::ReleaseArenaInternalIterator);
|
|
|
|
} else {
|
|
|
|
PinPtr(iter, &PinnedIteratorsManager::ReleaseInternalIterator);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-09-07 18:31:12 +00:00
|
|
|
using ReleaseFunction = void (*)(void* arg1);
|
2016-08-11 18:54:17 +00:00
|
|
|
void PinPtr(void* ptr, ReleaseFunction release_func) {
|
|
|
|
assert(pinning_enabled);
|
|
|
|
if (ptr == nullptr) {
|
2016-04-26 19:41:07 +00:00
|
|
|
return;
|
|
|
|
}
|
2016-08-11 18:54:17 +00:00
|
|
|
pinned_ptrs_.emplace_back(ptr, release_func);
|
2016-04-26 19:41:07 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Release pinned Iterators
|
2016-08-11 18:54:17 +00:00
|
|
|
inline void ReleasePinnedData() {
|
|
|
|
assert(pinning_enabled == true);
|
|
|
|
pinning_enabled = false;
|
|
|
|
|
|
|
|
// Remove duplicate pointers
|
|
|
|
std::sort(pinned_ptrs_.begin(), pinned_ptrs_.end());
|
2016-12-29 02:25:50 +00:00
|
|
|
auto unique_end = std::unique(pinned_ptrs_.begin(), pinned_ptrs_.end());
|
2016-04-26 19:41:07 +00:00
|
|
|
|
2016-12-29 02:25:50 +00:00
|
|
|
for (auto i = pinned_ptrs_.begin(); i != unique_end; ++i) {
|
|
|
|
void* ptr = i->first;
|
|
|
|
ReleaseFunction release_func = i->second;
|
2016-08-11 18:54:17 +00:00
|
|
|
release_func(ptr);
|
2016-04-26 19:41:07 +00:00
|
|
|
}
|
2016-08-11 18:54:17 +00:00
|
|
|
pinned_ptrs_.clear();
|
2016-12-29 23:48:24 +00:00
|
|
|
// Also do cleanups from the base Cleanable
|
|
|
|
Cleanable::Reset();
|
2016-04-26 19:41:07 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
2016-08-11 18:54:17 +00:00
|
|
|
static void ReleaseInternalIterator(void* ptr) {
|
Prefer static_cast in place of most reinterpret_cast (#12308)
Summary:
The following are risks associated with pointer-to-pointer reinterpret_cast:
* Can produce the "wrong result" (crash or memory corruption). IIRC, in theory this can happen for any up-cast or down-cast for a non-standard-layout type, though in practice would only happen for multiple inheritance cases (where the base class pointer might be "inside" the derived object). We don't use multiple inheritance a lot, but we do.
* Can mask useful compiler errors upon code change, including converting between unrelated pointer types that you are expecting to be related, and converting between pointer and scalar types unintentionally.
I can only think of some obscure cases where static_cast could be troublesome when it compiles as a replacement:
* Going through `void*` could plausibly cause unnecessary or broken pointer arithmetic. Suppose we have
`struct Derived: public Base1, public Base2`. If we have `Derived*` -> `void*` -> `Base2*` -> `Derived*` through reinterpret casts, this could plausibly work (though technical UB) assuming the `Base2*` is not dereferenced. Changing to static cast could introduce breaking pointer arithmetic.
* Unnecessary (but safe) pointer arithmetic could arise in a case like `Derived*` -> `Base2*` -> `Derived*` where before the Base2 pointer might not have been dereferenced. This could potentially affect performance.
With some light scripting, I tried replacing pointer-to-pointer reinterpret_casts with static_cast and kept the cases that still compile. Most occurrences of reinterpret_cast have successfully been changed (except for java/ and third-party/). 294 changed, 257 remain.
A couple of related interventions included here:
* Previously Cache::Handle was not actually derived from in the implementations and just used as a `void*` stand-in with reinterpret_cast. Now there is a relationship to allow static_cast. In theory, this could introduce pointer arithmetic (as described above) but is unlikely without multiple inheritance AND non-empty Cache::Handle.
* Remove some unnecessary casts to void* as this is allowed to be implicit (for better or worse).
Most of the remaining reinterpret_casts are for converting to/from raw bytes of objects. We could consider better idioms for these patterns in follow-up work.
I wish there were a way to implement a template variant of static_cast that would only compile if no pointer arithmetic is generated, but best I can tell, this is not possible. AFAIK the best you could do is a dynamic check that the void* conversion after the static cast is unchanged.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/12308
Test Plan: existing tests, CI
Reviewed By: ltamasi
Differential Revision: D53204947
Pulled By: pdillinger
fbshipit-source-id: 9de23e618263b0d5b9820f4e15966876888a16e2
2024-02-07 18:44:11 +00:00
|
|
|
delete static_cast<InternalIterator*>(ptr);
|
2016-08-11 18:54:17 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static void ReleaseArenaInternalIterator(void* ptr) {
|
Prefer static_cast in place of most reinterpret_cast (#12308)
Summary:
The following are risks associated with pointer-to-pointer reinterpret_cast:
* Can produce the "wrong result" (crash or memory corruption). IIRC, in theory this can happen for any up-cast or down-cast for a non-standard-layout type, though in practice would only happen for multiple inheritance cases (where the base class pointer might be "inside" the derived object). We don't use multiple inheritance a lot, but we do.
* Can mask useful compiler errors upon code change, including converting between unrelated pointer types that you are expecting to be related, and converting between pointer and scalar types unintentionally.
I can only think of some obscure cases where static_cast could be troublesome when it compiles as a replacement:
* Going through `void*` could plausibly cause unnecessary or broken pointer arithmetic. Suppose we have
`struct Derived: public Base1, public Base2`. If we have `Derived*` -> `void*` -> `Base2*` -> `Derived*` through reinterpret casts, this could plausibly work (though technical UB) assuming the `Base2*` is not dereferenced. Changing to static cast could introduce breaking pointer arithmetic.
* Unnecessary (but safe) pointer arithmetic could arise in a case like `Derived*` -> `Base2*` -> `Derived*` where before the Base2 pointer might not have been dereferenced. This could potentially affect performance.
With some light scripting, I tried replacing pointer-to-pointer reinterpret_casts with static_cast and kept the cases that still compile. Most occurrences of reinterpret_cast have successfully been changed (except for java/ and third-party/). 294 changed, 257 remain.
A couple of related interventions included here:
* Previously Cache::Handle was not actually derived from in the implementations and just used as a `void*` stand-in with reinterpret_cast. Now there is a relationship to allow static_cast. In theory, this could introduce pointer arithmetic (as described above) but is unlikely without multiple inheritance AND non-empty Cache::Handle.
* Remove some unnecessary casts to void* as this is allowed to be implicit (for better or worse).
Most of the remaining reinterpret_casts are for converting to/from raw bytes of objects. We could consider better idioms for these patterns in follow-up work.
I wish there were a way to implement a template variant of static_cast that would only compile if no pointer arithmetic is generated, but best I can tell, this is not possible. AFAIK the best you could do is a dynamic check that the void* conversion after the static cast is unchanged.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/12308
Test Plan: existing tests, CI
Reviewed By: ltamasi
Differential Revision: D53204947
Pulled By: pdillinger
fbshipit-source-id: 9de23e618263b0d5b9820f4e15966876888a16e2
2024-02-07 18:44:11 +00:00
|
|
|
static_cast<InternalIterator*>(ptr)->~InternalIterator();
|
2016-08-11 18:54:17 +00:00
|
|
|
}
|
|
|
|
|
2016-04-26 19:41:07 +00:00
|
|
|
bool pinning_enabled;
|
2016-08-11 18:54:17 +00:00
|
|
|
std::vector<std::pair<void*, ReleaseFunction>> pinned_ptrs_;
|
2016-04-26 19:41:07 +00:00
|
|
|
};
|
|
|
|
|
2020-02-20 20:07:53 +00:00
|
|
|
} // namespace ROCKSDB_NAMESPACE
|