rocksdb/util/core_local.h
Peter Dillinger f4e4039f00 Add some more bit operations to internal APIs (#11660)
Summary:
BottomNBits() - there is a single fast instruction for this on x86 since BMI2, but testing with godbolt indicates you need at least GCC 10 for the compiler to choose that instruction from the obvious C++ code. https://godbolt.org/z/5a7Ysd41h

BitwiseAnd() - this is a convenience function that works around the language flaw that the type of the result of x & y is the larger of the two input types, when it should be the smaller. This can save some ugly static_cast.

I expect to use both of these in coming HyperClockCache developments, and have applied them in a couple of places in existing code.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/11660

Test Plan: unit tests added

Reviewed By: jowlyzhang

Differential Revision: D47935531

Pulled By: pdillinger

fbshipit-source-id: d148c43a1e51df4a1c549b93aaf2725a3f8d3bd6
2023-08-02 11:30:10 -07:00

86 lines
2.5 KiB
C++

// Copyright (c) 2017-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#pragma once
#include <cassert>
#include <cstddef>
#include <thread>
#include <utility>
#include <vector>
#include "port/likely.h"
#include "port/port.h"
#include "util/math.h"
#include "util/random.h"
namespace ROCKSDB_NAMESPACE {
// An array of core-local values. Ideally the value type, T, is cache aligned to
// prevent false sharing.
template <typename T>
class CoreLocalArray {
public:
CoreLocalArray();
size_t Size() const;
// returns pointer to the element corresponding to the core that the thread
// currently runs on.
T* Access() const;
// same as above, but also returns the core index, which the client can cache
// to reduce how often core ID needs to be retrieved. Only do this if some
// inaccuracy is tolerable, as the thread may migrate to a different core.
std::pair<T*, size_t> AccessElementAndIndex() const;
// returns pointer to element for the specified core index. This can be used,
// e.g., for aggregation, or if the client caches core index.
T* AccessAtCore(size_t core_idx) const;
private:
std::unique_ptr<T[]> data_;
int size_shift_;
};
template <typename T>
CoreLocalArray<T>::CoreLocalArray() {
int num_cpus = static_cast<int>(std::thread::hardware_concurrency());
// find a power of two >= num_cpus and >= 8
size_shift_ = 3;
while (1 << size_shift_ < num_cpus) {
++size_shift_;
}
data_.reset(new T[static_cast<size_t>(1) << size_shift_]);
}
template <typename T>
size_t CoreLocalArray<T>::Size() const {
return static_cast<size_t>(1) << size_shift_;
}
template <typename T>
T* CoreLocalArray<T>::Access() const {
return AccessElementAndIndex().first;
}
template <typename T>
std::pair<T*, size_t> CoreLocalArray<T>::AccessElementAndIndex() const {
int cpuid = port::PhysicalCoreID();
size_t core_idx;
if (UNLIKELY(cpuid < 0)) {
// cpu id unavailable, just pick randomly
core_idx = Random::GetTLSInstance()->Uniform(1 << size_shift_);
} else {
core_idx = static_cast<size_t>(BottomNBits(cpuid, size_shift_));
}
return {AccessAtCore(core_idx), core_idx};
}
template <typename T>
T* CoreLocalArray<T>::AccessAtCore(size_t core_idx) const {
assert(core_idx < static_cast<size_t>(1) << size_shift_);
return &data_[core_idx];
}
} // namespace ROCKSDB_NAMESPACE