diff --git a/CMakeLists.txt b/CMakeLists.txt index c72e7a0dd5..65cc32004d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -879,6 +879,7 @@ set(SOURCES util/compression_context_cache.cc util/concurrent_task_limiter_impl.cc util/crc32c.cc + util/data_structure.cc util/dynamic_bloom.cc util/hash.cc util/murmurhash.cc diff --git a/TARGETS b/TARGETS index f77eba770c..5a0c956dd1 100644 --- a/TARGETS +++ b/TARGETS @@ -247,6 +247,7 @@ cpp_library_wrapper(name="rocksdb_lib", srcs=[ "util/concurrent_task_limiter_impl.cc", "util/crc32c.cc", "util/crc32c_arm64.cc", + "util/data_structure.cc", "util/dynamic_bloom.cc", "util/file_checksum_helper.cc", "util/hash.cc", @@ -589,6 +590,7 @@ cpp_library_wrapper(name="rocksdb_whole_archive_lib", srcs=[ "util/concurrent_task_limiter_impl.cc", "util/crc32c.cc", "util/crc32c_arm64.cc", + "util/data_structure.cc", "util/dynamic_bloom.cc", "util/file_checksum_helper.cc", "util/hash.cc", diff --git a/include/rocksdb/data_structure.h b/include/rocksdb/data_structure.h index f868a6be59..ffab82c514 100644 --- a/include/rocksdb/data_structure.h +++ b/include/rocksdb/data_structure.h @@ -15,37 +15,172 @@ namespace ROCKSDB_NAMESPACE { -// This is a data structure specifically designed as a "Set" for a -// pretty small scale of Enum structure. For now, it can support up -// to 64 element, and it is expandable in the future. -template +namespace detail { +int CountTrailingZeroBitsForSmallEnumSet(uint64_t); +} // namespace detail + +// Represents a set of values of some enum type with a small number of +// possible enumerators. For now, it supports enums where no enumerator +// exceeds 63 when converted to int. +template class SmallEnumSet { + private: + using StateT = uint64_t; + static constexpr int kStateBits = sizeof(StateT) * 8; + static constexpr int kMaxMax = kStateBits - 1; + static constexpr int kMaxValue = static_cast(MAX_ENUMERATOR); + static_assert(kMaxValue >= 0); + static_assert(kMaxValue <= kMaxMax); + public: + // construct / create SmallEnumSet() : state_(0) {} - ~SmallEnumSet() {} - - // Return true if the input enum is included in the "Set" (i.e., changes the - // internal scalar state successfully), otherwise, it will return false. - bool Add(const ENUM_TYPE value) { - static_assert(MAX_VALUE <= 63, "Size currently limited to 64"); - assert(value >= 0 && value <= MAX_VALUE); - uint64_t old_state = state_; - uint64_t tmp = 1; - state_ |= (tmp << value); - return old_state != state_; + template + /*implicit*/ constexpr SmallEnumSet(const ENUM_TYPE e, TRest... rest) { + *this = SmallEnumSet(rest...).With(e); } + // Return the set that includes all valid values, assuming the enum + // is "dense" (includes all values converting to 0 through kMaxValue) + static constexpr SmallEnumSet All() { + StateT tmp = StateT{1} << kMaxValue; + return SmallEnumSet(RawStateMarker(), tmp | (tmp - 1)); + } + + // equality + bool operator==(const SmallEnumSet& that) const { + return this->state_ == that.state_; + } + bool operator!=(const SmallEnumSet& that) const { return !(*this == that); } + + // query + // Return true if the input enum is contained in the "Set". - bool Contains(const ENUM_TYPE value) { - static_assert(MAX_VALUE <= 63, "Size currently limited to 64"); - assert(value >= 0 && value <= MAX_VALUE); - uint64_t tmp = 1; + bool Contains(const ENUM_TYPE e) const { + int value = static_cast(e); + assert(value >= 0 && value <= kMaxValue); + StateT tmp = 1; return state_ & (tmp << value); } + bool empty() const { return state_ == 0; } + + // iterator + class const_iterator { + public: + // copy + const_iterator(const const_iterator& that) = default; + const_iterator& operator=(const const_iterator& that) = default; + + // move + const_iterator(const_iterator&& that) noexcept = default; + const_iterator& operator=(const_iterator&& that) noexcept = default; + + // equality + bool operator==(const const_iterator& that) const { + assert(set_ == that.set_); + return this->pos_ == that.pos_; + } + + bool operator!=(const const_iterator& that) const { + return !(*this == that); + } + + // ++iterator + const_iterator& operator++() { + if (pos_ < kMaxValue) { + pos_ = set_->SkipUnset(pos_ + 1); + } else { + pos_ = kStateBits; + } + return *this; + } + + // iterator++ + const_iterator operator++(int) { + auto old = *this; + ++*this; + return old; + } + + ENUM_TYPE operator*() const { + assert(pos_ <= kMaxValue); + return static_cast(pos_); + } + + private: + friend class SmallEnumSet; + const_iterator(const SmallEnumSet* set, int pos) : set_(set), pos_(pos) {} + const SmallEnumSet* set_; + int pos_; + }; + + const_iterator begin() const { return const_iterator(this, SkipUnset(0)); } + + const_iterator end() const { return const_iterator(this, kStateBits); } + + // mutable ops + + // Modifies the set (if needed) to include the given value. Returns true + // iff the set was modified. + bool Add(const ENUM_TYPE e) { + int value = static_cast(e); + assert(value >= 0 && value <= kMaxValue); + StateT old_state = state_; + state_ |= (StateT{1} << value); + return old_state != state_; + } + + // Modifies the set (if needed) not to include the given value. Returns true + // iff the set was modified. + bool Remove(const ENUM_TYPE e) { + int value = static_cast(e); + assert(value >= 0 && value <= kMaxValue); + StateT old_state = state_; + state_ &= ~(StateT{1} << value); + return old_state != state_; + } + + // applicative ops + + // Return a new set based on this one with the additional value(s) inserted + constexpr SmallEnumSet With(const ENUM_TYPE e) const { + int value = static_cast(e); + assert(value >= 0 && value <= kMaxValue); + return SmallEnumSet(RawStateMarker(), state_ | (StateT{1} << value)); + } + template + constexpr SmallEnumSet With(const ENUM_TYPE e1, const ENUM_TYPE e2, + TRest... rest) const { + return With(e1).With(e2, rest...); + } + + // Return a new set based on this one excluding the given value(s) + constexpr SmallEnumSet Without(const ENUM_TYPE e) const { + int value = static_cast(e); + assert(value >= 0 && value <= kMaxValue); + return SmallEnumSet(RawStateMarker(), state_ & ~(StateT{1} << value)); + } + template + constexpr SmallEnumSet Without(const ENUM_TYPE e1, const ENUM_TYPE e2, + TRest... rest) const { + return Without(e1).Without(e2, rest...); + } + private: - uint64_t state_; + int SkipUnset(int pos) const { + StateT tmp = state_ >> pos; + if (tmp == 0) { + return kStateBits; + } else { + return pos + detail::CountTrailingZeroBitsForSmallEnumSet(tmp); + } + } + struct RawStateMarker {}; + explicit SmallEnumSet(RawStateMarker, StateT state) : state_(state) {} + + StateT state_; }; } // namespace ROCKSDB_NAMESPACE diff --git a/src.mk b/src.mk index b825706860..ae1523e71c 100644 --- a/src.mk +++ b/src.mk @@ -234,6 +234,7 @@ LIB_SOURCES = \ util/concurrent_task_limiter_impl.cc \ util/crc32c.cc \ util/crc32c_arm64.cc \ + util/data_structure.cc \ util/dynamic_bloom.cc \ util/hash.cc \ util/murmurhash.cc \ diff --git a/util/data_structure.cc b/util/data_structure.cc new file mode 100644 index 0000000000..d647df5d5b --- /dev/null +++ b/util/data_structure.cc @@ -0,0 +1,18 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#include "rocksdb/data_structure.h" + +#include "util/math.h" + +namespace ROCKSDB_NAMESPACE { +namespace detail { + +int CountTrailingZeroBitsForSmallEnumSet(uint64_t v) { + return CountTrailingZeroBits(v); +} + +} // namespace detail +} // namespace ROCKSDB_NAMESPACE diff --git a/util/slice_test.cc b/util/slice_test.cc index e1c35d567f..010ded3d87 100644 --- a/util/slice_test.cc +++ b/util/slice_test.cc @@ -173,13 +173,74 @@ class SmallEnumSetTest : public testing::Test { ~SmallEnumSetTest() {} }; -TEST_F(SmallEnumSetTest, SmallSetTest) { - FileTypeSet fs; +TEST_F(SmallEnumSetTest, SmallEnumSetTest1) { + FileTypeSet fs; // based on a legacy enum type + ASSERT_TRUE(fs.empty()); ASSERT_TRUE(fs.Add(FileType::kIdentityFile)); + ASSERT_FALSE(fs.empty()); ASSERT_FALSE(fs.Add(FileType::kIdentityFile)); ASSERT_TRUE(fs.Add(FileType::kInfoLogFile)); ASSERT_TRUE(fs.Contains(FileType::kIdentityFile)); ASSERT_FALSE(fs.Contains(FileType::kDBLockFile)); + ASSERT_FALSE(fs.empty()); + ASSERT_FALSE(fs.Remove(FileType::kDBLockFile)); + ASSERT_TRUE(fs.Remove(FileType::kIdentityFile)); + ASSERT_FALSE(fs.empty()); + ASSERT_TRUE(fs.Remove(FileType::kInfoLogFile)); + ASSERT_TRUE(fs.empty()); +} + +namespace { +enum class MyEnumClass { A, B, C }; +} // namespace + +using MyEnumClassSet = SmallEnumSet; + +TEST_F(SmallEnumSetTest, SmallEnumSetTest2) { + MyEnumClassSet s; // based on an enum class type + ASSERT_TRUE(s.Add(MyEnumClass::A)); + ASSERT_TRUE(s.Contains(MyEnumClass::A)); + ASSERT_FALSE(s.Contains(MyEnumClass::B)); + ASSERT_TRUE(s.With(MyEnumClass::B).Contains(MyEnumClass::B)); + ASSERT_TRUE(s.With(MyEnumClass::A).Contains(MyEnumClass::A)); + ASSERT_FALSE(s.Contains(MyEnumClass::B)); + ASSERT_FALSE(s.Without(MyEnumClass::A).Contains(MyEnumClass::A)); + ASSERT_FALSE( + s.With(MyEnumClass::B).Without(MyEnumClass::B).Contains(MyEnumClass::B)); + ASSERT_TRUE( + s.Without(MyEnumClass::B).With(MyEnumClass::B).Contains(MyEnumClass::B)); + ASSERT_TRUE(s.Contains(MyEnumClass::A)); + + const MyEnumClassSet cs = s; + ASSERT_TRUE(cs.Contains(MyEnumClass::A)); + ASSERT_EQ(cs, MyEnumClassSet{MyEnumClass::A}); + ASSERT_EQ(cs.Without(MyEnumClass::A), MyEnumClassSet{}); + ASSERT_EQ(cs, MyEnumClassSet::All().Without(MyEnumClass::B, MyEnumClass::C)); + ASSERT_EQ(cs.With(MyEnumClass::B, MyEnumClass::C), MyEnumClassSet::All()); + ASSERT_EQ( + MyEnumClassSet::All(), + MyEnumClassSet{}.With(MyEnumClass::A, MyEnumClass::B, MyEnumClass::C)); + ASSERT_NE(cs, MyEnumClassSet{MyEnumClass::B}); + ASSERT_NE(cs, MyEnumClassSet::All()); + + int count = 0; + for (MyEnumClass e : cs) { + ASSERT_EQ(e, MyEnumClass::A); + ++count; + } + ASSERT_EQ(count, 1); + + count = 0; + for (MyEnumClass e : MyEnumClassSet::All().Without(MyEnumClass::B)) { + ASSERT_NE(e, MyEnumClass::B); + ++count; + } + ASSERT_EQ(count, 2); + + for (MyEnumClass e : MyEnumClassSet{}) { + (void)e; + assert(false); + } } } // namespace ROCKSDB_NAMESPACE