Add some more bit operations to internal APIs (#11660)

Summary:
BottomNBits() - there is a single fast instruction for this on x86 since BMI2, but testing with godbolt indicates you need at least GCC 10 for the compiler to choose that instruction from the obvious C++ code. https://godbolt.org/z/5a7Ysd41h

BitwiseAnd() - this is a convenience function that works around the language flaw that the type of the result of x & y is the larger of the two input types, when it should be the smaller. This can save some ugly static_cast.

I expect to use both of these in coming HyperClockCache developments, and have applied them in a couple of places in existing code.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/11660

Test Plan: unit tests added

Reviewed By: jowlyzhang

Differential Revision: D47935531

Pulled By: pdillinger

fbshipit-source-id: d148c43a1e51df4a1c549b93aaf2725a3f8d3bd6
This commit is contained in:
Peter Dillinger 2023-08-02 11:30:10 -07:00 committed by Facebook GitHub Bot
parent 946d1009bc
commit f4e4039f00
6 changed files with 136 additions and 16 deletions

3
cache/clock_cache.h vendored
View File

@ -24,6 +24,7 @@
#include "rocksdb/cache.h"
#include "rocksdb/secondary_cache.h"
#include "util/autovector.h"
#include "util/math.h"
namespace ROCKSDB_NAMESPACE {
@ -563,7 +564,7 @@ class HyperClockTable : public BaseClockTable {
private: // functions
// Returns x mod 2^{length_bits_}.
inline size_t ModTableSize(uint64_t x) {
return static_cast<size_t>(x) & length_bits_mask_;
return BitwiseAnd(x, length_bits_mask_);
}
// Returns the first slot in the probe sequence with a handle e such that

View File

@ -38,7 +38,7 @@ uint32_t DetermineSeed(int32_t hash_seed_option) {
return GetSliceHash(hostname) & kSeedMask;
} else {
// Fall back on something stable within the process.
return static_cast<uint32_t>(gen.GetBaseUpper()) & kSeedMask;
return BitwiseAnd(gen.GetBaseUpper(), kSeedMask);
}
} else {
// for kQuasiRandomHashSeed and fallback

View File

@ -13,6 +13,7 @@
#include "port/likely.h"
#include "port/port.h"
#include "util/math.h"
#include "util/random.h"
namespace ROCKSDB_NAMESPACE {
@ -70,7 +71,7 @@ std::pair<T*, size_t> CoreLocalArray<T>::AccessElementAndIndex() const {
// cpu id unavailable, just pick randomly
core_idx = Random::GetTLSInstance()->Uniform(1 << size_shift_);
} else {
core_idx = static_cast<size_t>(cpuid & ((1 << size_shift_) - 1));
core_idx = static_cast<size_t>(BottomNBits(cpuid, size_shift_));
}
return {AccessAtCore(core_idx), core_idx};
}

View File

@ -565,6 +565,8 @@ size_t FastRange64(uint64_t hash, size_t range) {
// Tests for math.h / math128.h (not worth a separate test binary)
using ROCKSDB_NAMESPACE::BitParity;
using ROCKSDB_NAMESPACE::BitsSetToOne;
using ROCKSDB_NAMESPACE::BitwiseAnd;
using ROCKSDB_NAMESPACE::BottomNBits;
using ROCKSDB_NAMESPACE::ConstexprFloorLog2;
using ROCKSDB_NAMESPACE::CountTrailingZeroBits;
using ROCKSDB_NAMESPACE::DecodeFixed128;
@ -580,6 +582,19 @@ using ROCKSDB_NAMESPACE::Upper64of128;
int blah(int x) { return DownwardInvolution(x); }
template <typename T1, typename T2>
static void test_BitwiseAnd(T1 v1, T2 v2) {
auto a = BitwiseAnd(v1, v2);
// Essentially repeating the implementation :-/
if constexpr (sizeof(T1) < sizeof(T2)) {
static_assert(std::is_same_v<decltype(a), T1>);
EXPECT_EQ(a, static_cast<T1>(v1 & v2));
} else {
static_assert(std::is_same_v<decltype(a), T2>);
EXPECT_EQ(a, static_cast<T2>(v1 & v2));
}
}
template <typename T>
static void test_BitOps() {
// This complex code is to generalize to 128-bit values. Otherwise
@ -598,6 +613,22 @@ static void test_BitOps() {
// If we could directly use arithmetic:
// T vm1 = static_cast<T>(v - 1);
// BottomNBits
{
// An essentially full length value
T x = everyOtherBit;
if (i > 2) {
// Make it slightly irregular
x = x ^ (T{1} << (i / 2));
}
auto a = BottomNBits(x, i);
auto b = BottomNBits(~x, i);
EXPECT_EQ(x | a, x);
EXPECT_EQ(a | b, vm1);
EXPECT_EQ(a & b, T{0});
EXPECT_EQ(BottomNBits(x ^ a, i), T{0});
}
// FloorLog2
if (v > 0) {
EXPECT_EQ(FloorLog2(v), i);
@ -707,9 +738,22 @@ static void test_BitOps() {
}
}
// BitwiseAnd
{
test_BitwiseAnd(vm1, static_cast<char>(0x99));
test_BitwiseAnd(v, static_cast<char>(0x99));
test_BitwiseAnd(char{0x66}, vm1);
test_BitwiseAnd(char{0x66}, v);
test_BitwiseAnd(v, int16_t{0x6699});
test_BitwiseAnd(v, uint16_t{0x9966});
test_BitwiseAnd(int64_t{0x1234234534564567}, v);
test_BitwiseAnd(uint64_t{0x9876876576545432}, v);
}
vm1 = (vm1 << 1) | 1;
}
// ConstexprFloorLog2
EXPECT_EQ(ConstexprFloorLog2(T{1}), 0);
EXPECT_EQ(ConstexprFloorLog2(T{2}), 1);
EXPECT_EQ(ConstexprFloorLog2(T{3}), 1);

View File

@ -9,6 +9,9 @@
#ifdef _MSC_VER
#include <intrin.h>
#endif
#ifdef __BMI2__
#include <immintrin.h>
#endif
#include <cstdint>
#include <type_traits>
@ -20,11 +23,33 @@ ASSERT_FEATURE_COMPAT_HEADER();
namespace ROCKSDB_NAMESPACE {
// Fast implementation of extracting the bottom n bits of an integer.
// To ensure fast implementation, undefined if n bits is full width or more.
template <typename T>
inline T BottomNBits(T v, int nbits) {
static_assert(std::is_integral_v<T>, "non-integral type");
static_assert(!std::is_reference_v<T>, "use std::remove_reference_t");
assert(nbits >= 0);
assert(nbits < int{8 * sizeof(T)});
#ifdef __BMI2__
if constexpr (sizeof(T) <= 4) {
return static_cast<T>(_bzhi_u32(static_cast<uint32_t>(v), nbits));
}
if constexpr (sizeof(T) <= 8) {
return static_cast<T>(_bzhi_u64(static_cast<uint64_t>(v), nbits));
}
#endif
// Newer compilers compile this down to bzhi on x86, but some older
// ones don't, thus the need for the intrinsic above.
return static_cast<T>(v & ((T{1} << nbits) - 1));
}
// Fast implementation of floor(log2(v)). Undefined for 0 or negative
// numbers (in case of signed type).
template <typename T>
inline int FloorLog2(T v) {
static_assert(std::is_integral<T>::value, "non-integral type");
static_assert(std::is_integral_v<T>, "non-integral type");
static_assert(!std::is_reference_v<T>, "use std::remove_reference_t");
assert(v > 0);
#ifdef _MSC_VER
static_assert(sizeof(T) <= sizeof(uint64_t), "type too big");
@ -63,6 +88,8 @@ inline int FloorLog2(T v) {
// Constexpr version of FloorLog2
template <typename T>
constexpr int ConstexprFloorLog2(T v) {
// NOTE: not checking is_integral so that this works with Unsigned128
static_assert(!std::is_reference_v<T>, "use std::remove_reference_t");
int rv = 0;
while (v > T{1}) {
++rv;
@ -74,7 +101,8 @@ constexpr int ConstexprFloorLog2(T v) {
// Number of low-order zero bits before the first 1 bit. Undefined for 0.
template <typename T>
inline int CountTrailingZeroBits(T v) {
static_assert(std::is_integral<T>::value, "non-integral type");
static_assert(std::is_integral_v<T>, "non-integral type");
static_assert(!std::is_reference_v<T>, "use std::remove_reference_t");
assert(v != 0);
#ifdef _MSC_VER
static_assert(sizeof(T) <= sizeof(uint64_t), "type too big");
@ -115,6 +143,9 @@ namespace detail {
template <typename T>
int BitsSetToOneFallback(T v) {
static_assert(std::is_integral_v<T>, "non-integral type");
static_assert(!std::is_reference_v<T>, "use std::remove_reference_t");
const int kBits = static_cast<int>(sizeof(T)) * 8;
static_assert((kBits & (kBits - 1)) == 0, "must be power of two bits");
// we static_cast these bit patterns in order to truncate them to the correct
@ -140,7 +171,9 @@ int BitsSetToOneFallback(T v) {
// Number of bits set to 1. Also known as "population count".
template <typename T>
inline int BitsSetToOne(T v) {
static_assert(std::is_integral<T>::value, "non-integral type");
static_assert(std::is_integral_v<T>, "non-integral type");
static_assert(!std::is_reference_v<T>, "use std::remove_reference_t");
#ifdef _MSC_VER
static_assert(sizeof(T) <= sizeof(uint64_t), "type too big");
if (sizeof(T) < sizeof(uint32_t)) {
@ -192,7 +225,9 @@ inline int BitsSetToOne(T v) {
template <typename T>
inline int BitParity(T v) {
static_assert(std::is_integral<T>::value, "non-integral type");
static_assert(std::is_integral_v<T>, "non-integral type");
static_assert(!std::is_reference_v<T>, "use std::remove_reference_t");
#ifdef _MSC_VER
// bit parity == oddness of popcount
return BitsSetToOne(v) & 1;
@ -214,7 +249,8 @@ inline int BitParity(T v) {
// encode/decode big endian.
template <typename T>
inline T EndianSwapValue(T v) {
static_assert(std::is_integral<T>::value, "non-integral type");
static_assert(std::is_integral_v<T>, "non-integral type");
static_assert(!std::is_reference_v<T>, "use std::remove_reference_t");
#ifdef _MSC_VER
if (sizeof(T) == 2) {
@ -244,6 +280,9 @@ inline T EndianSwapValue(T v) {
// Reverses the order of bits in an integral value
template <typename T>
inline T ReverseBits(T v) {
static_assert(std::is_integral_v<T>, "non-integral type");
static_assert(!std::is_reference_v<T>, "use std::remove_reference_t");
T r = EndianSwapValue(v);
const T kHighestByte = T{1} << ((sizeof(T) - 1) * 8);
const T kEveryByte = kHighestByte | (kHighestByte / 255);
@ -277,7 +316,8 @@ inline T ReverseBits(T v) {
// is that all square sub-matrices that include the top row are invertible.
template <typename T>
inline T DownwardInvolution(T v) {
static_assert(std::is_integral<T>::value, "non-integral type");
static_assert(std::is_integral_v<T>, "non-integral type");
static_assert(!std::is_reference_v<T>, "use std::remove_reference_t");
static_assert(sizeof(T) <= 8, "only supported up to 64 bits");
uint64_t r = static_cast<uint64_t>(v);
@ -296,4 +336,16 @@ inline T DownwardInvolution(T v) {
return static_cast<T>(r);
}
// Bitwise-And with typing that allows you to avoid writing an explicit cast
// to the smaller type, or the type of the right parameter if same size.
template <typename A, typename B>
inline std::conditional_t<sizeof(A) < sizeof(B), A, B> BitwiseAnd(A a, B b) {
static_assert(std::is_integral_v<A>, "non-integral type");
static_assert(std::is_integral_v<B>, "non-integral type");
static_assert(!std::is_reference_v<A>, "use std::remove_reference_t");
static_assert(!std::is_reference_v<B>, "use std::remove_reference_t");
using Smaller = std::conditional_t<sizeof(A) < sizeof(B), A, B>;
return static_cast<Smaller>(a & b);
}
} // namespace ROCKSDB_NAMESPACE

View File

@ -41,13 +41,13 @@ struct Unsigned128 {
hi = upper;
}
explicit operator uint64_t() { return lo; }
explicit operator uint32_t() { return static_cast<uint32_t>(lo); }
explicit operator uint16_t() { return static_cast<uint16_t>(lo); }
explicit operator uint8_t() { return static_cast<uint8_t>(lo); }
// Convert to any integer 64 bits or less.
template <typename T,
typename = std::enable_if_t<std::is_integral_v<T> &&
sizeof(T) <= sizeof(uint64_t)> >
explicit operator T() {
return static_cast<T>(lo);
}
};
inline Unsigned128 operator<<(const Unsigned128& lhs, unsigned shift) {
@ -190,6 +190,16 @@ inline Unsigned128 Multiply64to128(uint64_t a, uint64_t b) {
#endif
}
template <>
inline Unsigned128 BottomNBits(Unsigned128 v, int nbits) {
if (nbits < 64) {
return BottomNBits(Lower64of128(v), nbits);
} else {
return (Unsigned128{BottomNBits(Upper64of128(v), nbits - 64)} << 64) |
Lower64of128(v);
}
}
template <>
inline int FloorLog2(Unsigned128 v) {
if (Upper64of128(v) == 0) {
@ -236,6 +246,18 @@ inline Unsigned128 DownwardInvolution(Unsigned128 v) {
DownwardInvolution(Upper64of128(v) ^ Lower64of128(v));
}
template <typename A>
inline std::remove_reference_t<A> BitwiseAnd(A a, Unsigned128 b) {
static_assert(sizeof(A) <= sizeof(Unsigned128));
return static_cast<A>(a & b);
}
template <typename B>
inline std::remove_reference_t<B> BitwiseAnd(Unsigned128 a, B b) {
static_assert(sizeof(B) <= sizeof(Unsigned128));
return static_cast<B>(a & b);
}
template <typename T>
struct IsUnsignedUpTo128
: std::integral_constant<bool, std::is_unsigned<T>::value ||