mirror of https://github.com/facebook/rocksdb.git
Ribbon: InterleavedSolutionStorage (#7598)
Summary: The core algorithms for InterleavedSolutionStorage and the implementation SerializableInterleavedSolution make Ribbon fast for filter queries. Example output from new unit test: Simple outside query, hot, incl hashing, ns/key: 117.796 Interleaved outside query, hot, incl hashing, ns/key: 42.2655 Bloom outside query, hot, incl hashing, ns/key: 24.0071 Also includes misc cleanup of previous Ribbon code and comments. Some TODOs and FIXMEs remain for futher work / investigation. Pull Request resolved: https://github.com/facebook/rocksdb/pull/7598 Test Plan: unit tests included (integration work and tests coming later) Reviewed By: jay-zhuang Differential Revision: D24559209 Pulled By: pdillinger fbshipit-source-id: fea483cd354ba782aea3e806f2bc96e183d59441
This commit is contained in:
parent
0b94468bba
commit
746909ceda
|
@ -8,6 +8,7 @@ rocksdb.pc
|
||||||
*.gcda
|
*.gcda
|
||||||
*.gcno
|
*.gcno
|
||||||
*.o
|
*.o
|
||||||
|
*.o.tmp
|
||||||
*.so
|
*.so
|
||||||
*.so.*
|
*.so.*
|
||||||
*_test
|
*_test
|
||||||
|
|
|
@ -10,8 +10,10 @@
|
||||||
#pragma once
|
#pragma once
|
||||||
#include <stddef.h>
|
#include <stddef.h>
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
|
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
|
|
||||||
|
#include "port/port.h" // for PREFETCH
|
||||||
#include "rocksdb/slice.h"
|
#include "rocksdb/slice.h"
|
||||||
#include "util/hash.h"
|
#include "util/hash.h"
|
||||||
|
|
||||||
|
|
|
@ -44,6 +44,10 @@ struct Unsigned128 {
|
||||||
explicit operator uint64_t() { return lo; }
|
explicit operator uint64_t() { return lo; }
|
||||||
|
|
||||||
explicit operator uint32_t() { return static_cast<uint32_t>(lo); }
|
explicit operator uint32_t() { return static_cast<uint32_t>(lo); }
|
||||||
|
|
||||||
|
explicit operator uint16_t() { return static_cast<uint16_t>(lo); }
|
||||||
|
|
||||||
|
explicit operator uint8_t() { return static_cast<uint8_t>(lo); }
|
||||||
};
|
};
|
||||||
|
|
||||||
inline Unsigned128 operator<<(const Unsigned128& lhs, unsigned shift) {
|
inline Unsigned128 operator<<(const Unsigned128& lhs, unsigned shift) {
|
||||||
|
|
|
@ -6,6 +6,7 @@
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <array>
|
#include <array>
|
||||||
|
#include <memory>
|
||||||
|
|
||||||
#include "util/math128.h"
|
#include "util/math128.h"
|
||||||
|
|
||||||
|
@ -31,7 +32,7 @@ namespace ribbon {
|
||||||
// (b) developed by Peter C. Dillinger, though not the first on-the-fly
|
// (b) developed by Peter C. Dillinger, though not the first on-the-fly
|
||||||
// GE algorithm. See "On the fly Gaussian Elimination for LT codes" by
|
// GE algorithm. See "On the fly Gaussian Elimination for LT codes" by
|
||||||
// Bioglio, Grangetto, Gaeta, and Sereno.
|
// Bioglio, Grangetto, Gaeta, and Sereno.
|
||||||
// (c) TODO: not yet implemented here
|
// (c) see "interleaved" solution storage below.
|
||||||
//
|
//
|
||||||
// See ribbon_impl.h for high-level behavioral summary. This file focuses
|
// See ribbon_impl.h for high-level behavioral summary. This file focuses
|
||||||
// on the core design details.
|
// on the core design details.
|
||||||
|
@ -242,7 +243,7 @@ namespace ribbon {
|
||||||
// #################### Ribbon on-the-fly banding #######################
|
// #################### Ribbon on-the-fly banding #######################
|
||||||
//
|
//
|
||||||
// "Banding" is what we call the process of reducing the inputs to an
|
// "Banding" is what we call the process of reducing the inputs to an
|
||||||
// upper-triangluar r-band matrix ready for finishing a solution with
|
// upper-triangular r-band matrix ready for finishing a solution with
|
||||||
// back-substitution. Although the DW paper presents an algorithm for
|
// back-substitution. Although the DW paper presents an algorithm for
|
||||||
// this ("SGauss"), the awesome properties of their construction enable
|
// this ("SGauss"), the awesome properties of their construction enable
|
||||||
// an even simpler, faster, and more backtrackable algorithm. In simplest
|
// an even simpler, faster, and more backtrackable algorithm. In simplest
|
||||||
|
@ -253,7 +254,7 @@ namespace ribbon {
|
||||||
// The enhanced algorithm is based on these observations:
|
// The enhanced algorithm is based on these observations:
|
||||||
// - When processing a coefficient row with first 1 in column j,
|
// - When processing a coefficient row with first 1 in column j,
|
||||||
// - If it's the first at column j to be processed, it can be part of
|
// - If it's the first at column j to be processed, it can be part of
|
||||||
// the banding at row j. (And that descision never overwritten, with
|
// the banding at row j. (And that decision never overwritten, with
|
||||||
// no loss of generality!)
|
// no loss of generality!)
|
||||||
// - Else, it can be combined with existing row j and re-processed,
|
// - Else, it can be combined with existing row j and re-processed,
|
||||||
// which will look for a later "empty" row or reach "no solution".
|
// which will look for a later "empty" row or reach "no solution".
|
||||||
|
@ -299,7 +300,7 @@ namespace ribbon {
|
||||||
// Row-major layout is typical for boolean (bit) matrices, including for
|
// Row-major layout is typical for boolean (bit) matrices, including for
|
||||||
// MWHC (Xor) filters where a query combines k b-bit values, and k is
|
// MWHC (Xor) filters where a query combines k b-bit values, and k is
|
||||||
// typically smaller than b. Even for k=4 and b=2, at least k=4 random
|
// typically smaller than b. Even for k=4 and b=2, at least k=4 random
|
||||||
// lookups are required regardless of layout.
|
// look-ups are required regardless of layout.
|
||||||
//
|
//
|
||||||
// Ribbon PHSFs are quite different, however, because
|
// Ribbon PHSFs are quite different, however, because
|
||||||
// (a) all of the solution rows relevant to a query are within a single
|
// (a) all of the solution rows relevant to a query are within a single
|
||||||
|
@ -343,9 +344,29 @@ namespace ribbon {
|
||||||
// At first glance, PHSFs only offer a whole number of bits per "slot"
|
// At first glance, PHSFs only offer a whole number of bits per "slot"
|
||||||
// (m rather than number of keys n), but coefficient locality in the
|
// (m rather than number of keys n), but coefficient locality in the
|
||||||
// Ribbon construction makes fractional bits/key quite possible and
|
// Ribbon construction makes fractional bits/key quite possible and
|
||||||
// attractive for filter applications.
|
// attractive for filter applications. This works by a prefix of the
|
||||||
|
// structure using b-1 solution columns and the rest using b solution
|
||||||
|
// columns. See InterleavedSolutionStorage below for more detail.
|
||||||
//
|
//
|
||||||
// TODO: more detail
|
// Because false positive rates are non-linear in bits/key, this approach
|
||||||
|
// is not quite optimal in terms of information theory. In common cases,
|
||||||
|
// we see additional space overhead up to about 1.5% vs. theoretical
|
||||||
|
// optimal to achieve the same FP rate. We consider this a quite acceptable
|
||||||
|
// overhead for very efficiently utilizing space that might otherwise be
|
||||||
|
// wasted.
|
||||||
|
//
|
||||||
|
// This property of Ribbon even makes it "elastic." A Ribbon filter and
|
||||||
|
// its small metadata for answering queries can be adapted into another
|
||||||
|
// Ribbon filter filling any smaller multiple of r bits (plus small
|
||||||
|
// metadata), with a correspondingly higher FP rate. None of the data
|
||||||
|
// thrown away during construction needs to be recalled for this reduction.
|
||||||
|
// Similarly a single Ribbon construction can be separated (by solution
|
||||||
|
// column) into two or more structures (or "layers" or "levels") with
|
||||||
|
// independent filtering ability (no FP correlation, just as solution or
|
||||||
|
// result columns in a single structure) despite being constructed as part
|
||||||
|
// of a single linear system. (TODO: implement)
|
||||||
|
// See also "ElasticBF: Fine-grained and Elastic Bloom Filter Towards
|
||||||
|
// Efficient Read for LSM-tree-based KV Stores."
|
||||||
//
|
//
|
||||||
|
|
||||||
// ######################################################################
|
// ######################################################################
|
||||||
|
@ -354,7 +375,8 @@ namespace ribbon {
|
||||||
//
|
//
|
||||||
// These algorithms are templatized for genericity but near-maximum
|
// These algorithms are templatized for genericity but near-maximum
|
||||||
// performance in a given application. The template parameters
|
// performance in a given application. The template parameters
|
||||||
// adhere to class/struct type concepts outlined below.
|
// adhere to informal class/struct type concepts outlined below. (This
|
||||||
|
// code is written for C++11 so does not use formal C++ concepts.)
|
||||||
|
|
||||||
// Rough architecture for these algorithms:
|
// Rough architecture for these algorithms:
|
||||||
//
|
//
|
||||||
|
@ -413,7 +435,7 @@ namespace ribbon {
|
||||||
// // Given a hash value, return the r-bit sequence of coefficients to
|
// // Given a hash value, return the r-bit sequence of coefficients to
|
||||||
// // associate with it. It's generally OK if
|
// // associate with it. It's generally OK if
|
||||||
// // sizeof(CoeffRow) > sizeof(Hash)
|
// // sizeof(CoeffRow) > sizeof(Hash)
|
||||||
// // as long as the hash itself is not too prone to collsions for the
|
// // as long as the hash itself is not too prone to collisions for the
|
||||||
// // applications and the CoeffRow is generated uniformly from
|
// // applications and the CoeffRow is generated uniformly from
|
||||||
// // available hash data, but relatively independent of the start.
|
// // available hash data, but relatively independent of the start.
|
||||||
// //
|
// //
|
||||||
|
@ -699,20 +721,41 @@ bool BandingAddRange(BandingStorage *bs, const BandingHasher &bh,
|
||||||
// for filter queries.
|
// for filter queries.
|
||||||
|
|
||||||
// concept SimpleSolutionStorage extends RibbonTypes {
|
// concept SimpleSolutionStorage extends RibbonTypes {
|
||||||
|
// // This is called at the beginning of back-substitution for the
|
||||||
|
// // solution storage to do any remaining configuration before data
|
||||||
|
// // is stored to it. If configuration is previously finalized, this
|
||||||
|
// // could be a simple assertion or even no-op. Ribbon algorithms
|
||||||
|
// // only call this from back-substitution, and only once per call,
|
||||||
|
// // before other functions here.
|
||||||
// void PrepareForNumStarts(Index num_starts) const;
|
// void PrepareForNumStarts(Index num_starts) const;
|
||||||
|
// // Must return num_starts passed to PrepareForNumStarts, or the most
|
||||||
|
// // recent call to PrepareForNumStarts if this storage object can be
|
||||||
|
// // reused. Note that num_starts == num_slots - kCoeffBits + 1 because
|
||||||
|
// // there must be a run of kCoeffBits slots starting from each start.
|
||||||
// Index GetNumStarts() const;
|
// Index GetNumStarts() const;
|
||||||
|
// // Load the solution row (type ResultRow) for a slot
|
||||||
// ResultRow Load(Index slot_num) const;
|
// ResultRow Load(Index slot_num) const;
|
||||||
|
// // Store the solution row (type ResultRow) for a slot
|
||||||
// void Store(Index slot_num, ResultRow data);
|
// void Store(Index slot_num, ResultRow data);
|
||||||
// };
|
// };
|
||||||
|
|
||||||
// Back-substitution for generating a solution from BandingStorage to
|
// Back-substitution for generating a solution from BandingStorage to
|
||||||
// SimpleSolutionStorage.
|
// SimpleSolutionStorage.
|
||||||
template <typename SimpleSolutionStorage, typename BandingStorage>
|
template <typename SimpleSolutionStorage, typename BandingStorage>
|
||||||
void SimpleBackSubst(SimpleSolutionStorage *sss, const BandingStorage &ss) {
|
void SimpleBackSubst(SimpleSolutionStorage *sss, const BandingStorage &bs) {
|
||||||
using CoeffRow = typename BandingStorage::CoeffRow;
|
using CoeffRow = typename BandingStorage::CoeffRow;
|
||||||
using Index = typename BandingStorage::Index;
|
using Index = typename BandingStorage::Index;
|
||||||
using ResultRow = typename BandingStorage::ResultRow;
|
using ResultRow = typename BandingStorage::ResultRow;
|
||||||
|
|
||||||
|
static_assert(sizeof(Index) == sizeof(typename SimpleSolutionStorage::Index),
|
||||||
|
"must be same");
|
||||||
|
static_assert(
|
||||||
|
sizeof(CoeffRow) == sizeof(typename SimpleSolutionStorage::CoeffRow),
|
||||||
|
"must be same");
|
||||||
|
static_assert(
|
||||||
|
sizeof(ResultRow) == sizeof(typename SimpleSolutionStorage::ResultRow),
|
||||||
|
"must be same");
|
||||||
|
|
||||||
constexpr auto kCoeffBits = static_cast<Index>(sizeof(CoeffRow) * 8U);
|
constexpr auto kCoeffBits = static_cast<Index>(sizeof(CoeffRow) * 8U);
|
||||||
constexpr auto kResultBits = static_cast<Index>(sizeof(ResultRow) * 8U);
|
constexpr auto kResultBits = static_cast<Index>(sizeof(ResultRow) * 8U);
|
||||||
|
|
||||||
|
@ -722,14 +765,14 @@ void SimpleBackSubst(SimpleSolutionStorage *sss, const BandingStorage &ss) {
|
||||||
std::array<CoeffRow, kResultBits> state;
|
std::array<CoeffRow, kResultBits> state;
|
||||||
state.fill(0);
|
state.fill(0);
|
||||||
|
|
||||||
const Index num_starts = ss.GetNumStarts();
|
const Index num_starts = bs.GetNumStarts();
|
||||||
sss->PrepareForNumStarts(num_starts);
|
sss->PrepareForNumStarts(num_starts);
|
||||||
const Index num_slots = num_starts + kCoeffBits - 1;
|
const Index num_slots = num_starts + kCoeffBits - 1;
|
||||||
|
|
||||||
for (Index i = num_slots; i > 0;) {
|
for (Index i = num_slots; i > 0;) {
|
||||||
--i;
|
--i;
|
||||||
CoeffRow cr = *const_cast<BandingStorage &>(ss).CoeffRowPtr(i);
|
CoeffRow cr = *const_cast<BandingStorage &>(bs).CoeffRowPtr(i);
|
||||||
ResultRow rr = *const_cast<BandingStorage &>(ss).ResultRowPtr(i);
|
ResultRow rr = *const_cast<BandingStorage &>(bs).ResultRowPtr(i);
|
||||||
// solution row
|
// solution row
|
||||||
ResultRow sr = 0;
|
ResultRow sr = 0;
|
||||||
for (Index j = 0; j < kResultBits; ++j) {
|
for (Index j = 0; j < kResultBits; ++j) {
|
||||||
|
@ -767,9 +810,9 @@ typename SimpleSolutionStorage::ResultRow SimpleQueryHelper(
|
||||||
|
|
||||||
ResultRow result = 0;
|
ResultRow result = 0;
|
||||||
for (unsigned i = 0; i < kCoeffBits; ++i) {
|
for (unsigned i = 0; i < kCoeffBits; ++i) {
|
||||||
if (static_cast<unsigned>(cr >> i) & 1U) {
|
// Bit masking whole value is generally faster here than 'if'
|
||||||
result ^= sss.Load(start_slot + i);
|
result ^= sss.Load(start_slot + i) &
|
||||||
}
|
(ResultRow{0} - (static_cast<ResultRow>(cr >> i) & ResultRow{1}));
|
||||||
}
|
}
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
@ -781,6 +824,13 @@ typename SimpleSolutionStorage::ResultRow SimplePhsfQuery(
|
||||||
const SimpleSolutionStorage &sss) {
|
const SimpleSolutionStorage &sss) {
|
||||||
const typename PhsfQueryHasher::Hash hash = hasher.GetHash(key);
|
const typename PhsfQueryHasher::Hash hash = hasher.GetHash(key);
|
||||||
|
|
||||||
|
static_assert(sizeof(typename SimpleSolutionStorage::Index) ==
|
||||||
|
sizeof(typename PhsfQueryHasher::Index),
|
||||||
|
"must be same");
|
||||||
|
static_assert(sizeof(typename SimpleSolutionStorage::CoeffRow) ==
|
||||||
|
sizeof(typename PhsfQueryHasher::CoeffRow),
|
||||||
|
"must be same");
|
||||||
|
|
||||||
return SimpleQueryHelper(hasher.GetStart(hash, sss.GetNumStarts()),
|
return SimpleQueryHelper(hasher.GetStart(hash, sss.GetNumStarts()),
|
||||||
hasher.GetCoeffRow(hash), sss);
|
hasher.GetCoeffRow(hash), sss);
|
||||||
}
|
}
|
||||||
|
@ -794,6 +844,16 @@ bool SimpleFilterQuery(const typename FilterQueryHasher::Key &key,
|
||||||
const typename SimpleSolutionStorage::ResultRow expected =
|
const typename SimpleSolutionStorage::ResultRow expected =
|
||||||
hasher.GetResultRowFromHash(hash);
|
hasher.GetResultRowFromHash(hash);
|
||||||
|
|
||||||
|
static_assert(sizeof(typename SimpleSolutionStorage::Index) ==
|
||||||
|
sizeof(typename FilterQueryHasher::Index),
|
||||||
|
"must be same");
|
||||||
|
static_assert(sizeof(typename SimpleSolutionStorage::CoeffRow) ==
|
||||||
|
sizeof(typename FilterQueryHasher::CoeffRow),
|
||||||
|
"must be same");
|
||||||
|
static_assert(sizeof(typename SimpleSolutionStorage::ResultRow) ==
|
||||||
|
sizeof(typename FilterQueryHasher::ResultRow),
|
||||||
|
"must be same");
|
||||||
|
|
||||||
return expected ==
|
return expected ==
|
||||||
SimpleQueryHelper(hasher.GetStart(hash, sss.GetNumStarts()),
|
SimpleQueryHelper(hasher.GetStart(hash, sss.GetNumStarts()),
|
||||||
hasher.GetCoeffRow(hash), sss);
|
hasher.GetCoeffRow(hash), sss);
|
||||||
|
@ -803,18 +863,326 @@ bool SimpleFilterQuery(const typename FilterQueryHasher::Key &key,
|
||||||
|
|
||||||
// InterleavedSolutionStorage is row-major at a high level, for good
|
// InterleavedSolutionStorage is row-major at a high level, for good
|
||||||
// locality, and column-major at a low level, for CPU efficiency
|
// locality, and column-major at a low level, for CPU efficiency
|
||||||
// especially in filter querys or relatively small number of result bits
|
// especially in filter queries or relatively small number of result bits
|
||||||
// (== solution columns). The storage is a sequence of "blocks" where a
|
// (== solution columns). The storage is a sequence of "blocks" where a
|
||||||
// block has one CoeffRow for each solution column.
|
// block has one CoeffRow-sized segment for each solution column. Each
|
||||||
|
// query spans at most two blocks; the starting solution row is typically
|
||||||
|
// in the row-logical middle of a block and spans to the middle of the
|
||||||
|
// next block. (See diagram below.)
|
||||||
|
//
|
||||||
|
// InterleavedSolutionStorage supports choosing b (number of result or
|
||||||
|
// solution columns) at run time, and even supports mixing b and b-1 solution
|
||||||
|
// columns in a single linear system solution, for filters that can
|
||||||
|
// effectively utilize any size space (multiple of CoeffRow) for minimizing
|
||||||
|
// FP rate for any number of added keys. To simplify query implementation
|
||||||
|
// (with lower-index columns first), the b-bit portion comes after the b-1
|
||||||
|
// portion of the structure.
|
||||||
|
//
|
||||||
|
// Diagram (=== marks logical block boundary; b=4; ### is data used by a
|
||||||
|
// query crossing the b-1 to b boundary, each Segment has type CoeffRow):
|
||||||
|
// ...
|
||||||
|
// +======================+
|
||||||
|
// | S e g m e n t col=0 |
|
||||||
|
// +----------------------+
|
||||||
|
// | S e g m e n t col=1 |
|
||||||
|
// +----------------------+
|
||||||
|
// | S e g m e n t col=2 |
|
||||||
|
// +======================+
|
||||||
|
// | S e g m e n #########|
|
||||||
|
// +----------------------+
|
||||||
|
// | S e g m e n #########|
|
||||||
|
// +----------------------+
|
||||||
|
// | S e g m e n #########|
|
||||||
|
// +======================+ Result/solution columns: above = 3, below = 4
|
||||||
|
// |#############t col=0 |
|
||||||
|
// +----------------------+
|
||||||
|
// |#############t col=1 |
|
||||||
|
// +----------------------+
|
||||||
|
// |#############t col=2 |
|
||||||
|
// +----------------------+
|
||||||
|
// | S e g m e n t col=3 |
|
||||||
|
// +======================+
|
||||||
|
// | S e g m e n t col=0 |
|
||||||
|
// +----------------------+
|
||||||
|
// | S e g m e n t col=1 |
|
||||||
|
// +----------------------+
|
||||||
|
// | S e g m e n t col=2 |
|
||||||
|
// +----------------------+
|
||||||
|
// | S e g m e n t col=3 |
|
||||||
|
// +======================+
|
||||||
|
// ...
|
||||||
|
//
|
||||||
|
// InterleavedSolutionStorage will be adapted by the algorithms from
|
||||||
|
// simple array-like segment storage. That array-like storage is templatized
|
||||||
|
// in part so that an implementation may choose to handle byte ordering
|
||||||
|
// at access time.
|
||||||
|
//
|
||||||
// concept InterleavedSolutionStorage extends RibbonTypes {
|
// concept InterleavedSolutionStorage extends RibbonTypes {
|
||||||
// Index GetNumColumns() const;
|
// // This is called at the beginning of back-substitution for the
|
||||||
|
// // solution storage to do any remaining configuration before data
|
||||||
|
// // is stored to it. If configuration is previously finalized, this
|
||||||
|
// // could be a simple assertion or even no-op. Ribbon algorithms
|
||||||
|
// // only call this from back-substitution, and only once per call,
|
||||||
|
// // before other functions here.
|
||||||
|
// void PrepareForNumStarts(Index num_starts) const;
|
||||||
|
// // Must return num_starts passed to PrepareForNumStarts, or the most
|
||||||
|
// // recent call to PrepareForNumStarts if this storage object can be
|
||||||
|
// // reused. Note that num_starts == num_slots - kCoeffBits + 1 because
|
||||||
|
// // there must be a run of kCoeffBits slots starting from each start.
|
||||||
// Index GetNumStarts() const;
|
// Index GetNumStarts() const;
|
||||||
// CoeffRow Load(Index block_num, Index column) const;
|
// // The larger number of solution columns used (called "b" above).
|
||||||
// void Store(Index block_num, Index column, CoeffRow data);
|
// Index GetUpperNumColumns() const;
|
||||||
|
// // If returns > 0, then block numbers below that use
|
||||||
|
// // GetUpperNumColumns() - 1 columns per solution row, and the rest
|
||||||
|
// // use GetUpperNumColumns(). A block represents kCoeffBits "slots",
|
||||||
|
// // where all but the last kCoeffBits - 1 slots are also starts. And
|
||||||
|
// // a block contains a segment for each solution column.
|
||||||
|
// // An implementation may only support uniform columns per solution
|
||||||
|
// // row and return constant 0 here.
|
||||||
|
// Index GetUpperStartBlock() const;
|
||||||
|
//
|
||||||
|
// // ### "Array of segments" portion of API ###
|
||||||
|
// // The number of values of type CoeffRow used in this solution
|
||||||
|
// // representation. (This value can be inferred from the previous
|
||||||
|
// // three functions, but is expected at least for sanity / assertion
|
||||||
|
// // checking.)
|
||||||
|
// Index GetNumSegments() const;
|
||||||
|
// // Load an entry from the logical array of segments
|
||||||
|
// CoeffRow LoadSegment(Index segment_num) const;
|
||||||
|
// // Store an entry to the logical array of segments
|
||||||
|
// void StoreSegment(Index segment_num, CoeffRow data);
|
||||||
// };
|
// };
|
||||||
|
|
||||||
// TODO: not yet implemented here (only in prototype code elsewhere)
|
// A helper for InterleavedBackSubst.
|
||||||
|
template <typename BandingStorage>
|
||||||
|
inline void BackSubstBlock(typename BandingStorage::CoeffRow *state,
|
||||||
|
typename BandingStorage::Index num_columns,
|
||||||
|
const BandingStorage &bs,
|
||||||
|
typename BandingStorage::Index start_slot) {
|
||||||
|
using CoeffRow = typename BandingStorage::CoeffRow;
|
||||||
|
using Index = typename BandingStorage::Index;
|
||||||
|
using ResultRow = typename BandingStorage::ResultRow;
|
||||||
|
|
||||||
|
constexpr auto kCoeffBits = static_cast<Index>(sizeof(CoeffRow) * 8U);
|
||||||
|
|
||||||
|
for (Index i = start_slot + kCoeffBits; i > start_slot;) {
|
||||||
|
--i;
|
||||||
|
CoeffRow cr = *const_cast<BandingStorage &>(bs).CoeffRowPtr(i);
|
||||||
|
ResultRow rr = *const_cast<BandingStorage &>(bs).ResultRowPtr(i);
|
||||||
|
for (Index j = 0; j < num_columns; ++j) {
|
||||||
|
// Compute next solution bit at row i, column j (see derivation below)
|
||||||
|
CoeffRow tmp = state[j] << 1;
|
||||||
|
int bit = BitParity(tmp & cr) ^ ((rr >> j) & 1);
|
||||||
|
tmp |= static_cast<CoeffRow>(bit);
|
||||||
|
|
||||||
|
// Now tmp is solution at column j from row i for next kCoeffBits
|
||||||
|
// more rows. Thus, for valid solution, the dot product of the
|
||||||
|
// solution column with the coefficient row has to equal the result
|
||||||
|
// at that column,
|
||||||
|
// BitParity(tmp & cr) == ((rr >> j) & 1)
|
||||||
|
|
||||||
|
// Update state.
|
||||||
|
state[j] = tmp;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Back-substitution for generating a solution from BandingStorage to
|
||||||
|
// InterleavedSolutionStorage.
|
||||||
|
template <typename InterleavedSolutionStorage, typename BandingStorage>
|
||||||
|
void InterleavedBackSubst(InterleavedSolutionStorage *iss,
|
||||||
|
const BandingStorage &bs) {
|
||||||
|
using CoeffRow = typename BandingStorage::CoeffRow;
|
||||||
|
using Index = typename BandingStorage::Index;
|
||||||
|
|
||||||
|
static_assert(
|
||||||
|
sizeof(Index) == sizeof(typename InterleavedSolutionStorage::Index),
|
||||||
|
"must be same");
|
||||||
|
static_assert(
|
||||||
|
sizeof(CoeffRow) == sizeof(typename InterleavedSolutionStorage::CoeffRow),
|
||||||
|
"must be same");
|
||||||
|
|
||||||
|
constexpr auto kCoeffBits = static_cast<Index>(sizeof(CoeffRow) * 8U);
|
||||||
|
|
||||||
|
const Index num_starts = bs.GetNumStarts();
|
||||||
|
// Although it might be nice to have a filter that returns "always false"
|
||||||
|
// when no key is added, we aren't specifically supporting that here
|
||||||
|
// because it would require another condition branch in the query.
|
||||||
|
assert(num_starts > 0);
|
||||||
|
iss->PrepareForNumStarts(num_starts);
|
||||||
|
|
||||||
|
const Index num_slots = num_starts + kCoeffBits - 1;
|
||||||
|
assert(num_slots % kCoeffBits == 0);
|
||||||
|
const Index num_blocks = num_slots / kCoeffBits;
|
||||||
|
const Index num_segments = iss->GetNumSegments();
|
||||||
|
|
||||||
|
// For now upper, then lower
|
||||||
|
Index num_columns = iss->GetUpperNumColumns();
|
||||||
|
const Index upper_start_block = iss->GetUpperStartBlock();
|
||||||
|
|
||||||
|
if (num_columns == 0) {
|
||||||
|
// Nothing to do, presumably because there's not enough space for even
|
||||||
|
// a single segment.
|
||||||
|
assert(num_segments == 0);
|
||||||
|
// When num_columns == 0, a Ribbon filter query will always return true,
|
||||||
|
// or a PHSF query always 0.
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// We should be utilizing all available segments
|
||||||
|
assert(num_segments == (upper_start_block * (num_columns - 1)) +
|
||||||
|
((num_blocks - upper_start_block) * num_columns));
|
||||||
|
|
||||||
|
// TODO: consider fixed-column specializations with stack-allocated state
|
||||||
|
|
||||||
|
// A column-major buffer of the solution matrix, containing enough
|
||||||
|
// recently-computed solution data to compute the next solution row
|
||||||
|
// (based also on banding data).
|
||||||
|
std::unique_ptr<CoeffRow[]> state{new CoeffRow[num_columns]()};
|
||||||
|
|
||||||
|
Index block = num_blocks;
|
||||||
|
Index segment = num_segments;
|
||||||
|
while (block > upper_start_block) {
|
||||||
|
--block;
|
||||||
|
BackSubstBlock(state.get(), num_columns, bs, block * kCoeffBits);
|
||||||
|
segment -= num_columns;
|
||||||
|
for (Index i = 0; i < num_columns; ++i) {
|
||||||
|
iss->StoreSegment(segment + i, state[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Now (if applicable), region using lower number of columns
|
||||||
|
// (This should be optimized away if GetUpperStartBlock() returns
|
||||||
|
// constant 0.)
|
||||||
|
--num_columns;
|
||||||
|
while (block > 0) {
|
||||||
|
--block;
|
||||||
|
BackSubstBlock(state.get(), num_columns, bs, block * kCoeffBits);
|
||||||
|
segment -= num_columns;
|
||||||
|
for (Index i = 0; i < num_columns; ++i) {
|
||||||
|
iss->StoreSegment(segment + i, state[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Verify everything processed
|
||||||
|
assert(block == 0);
|
||||||
|
assert(segment == 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
// General PHSF query a key from InterleavedSolutionStorage.
|
||||||
|
template <typename InterleavedSolutionStorage, typename PhsfQueryHasher>
|
||||||
|
typename InterleavedSolutionStorage::ResultRow InterleavedPhsfQuery(
|
||||||
|
const typename PhsfQueryHasher::Key &key, const PhsfQueryHasher &hasher,
|
||||||
|
const InterleavedSolutionStorage &iss) {
|
||||||
|
using Hash = typename PhsfQueryHasher::Hash;
|
||||||
|
|
||||||
|
using CoeffRow = typename InterleavedSolutionStorage::CoeffRow;
|
||||||
|
using Index = typename InterleavedSolutionStorage::Index;
|
||||||
|
using ResultRow = typename InterleavedSolutionStorage::ResultRow;
|
||||||
|
|
||||||
|
static_assert(sizeof(Index) == sizeof(typename PhsfQueryHasher::Index),
|
||||||
|
"must be same");
|
||||||
|
static_assert(sizeof(CoeffRow) == sizeof(typename PhsfQueryHasher::CoeffRow),
|
||||||
|
"must be same");
|
||||||
|
|
||||||
|
constexpr auto kCoeffBits = static_cast<Index>(sizeof(CoeffRow) * 8U);
|
||||||
|
|
||||||
|
const Hash hash = hasher.GetHash(key);
|
||||||
|
const Index start_slot = hasher.GetStart(hash, iss.GetNumStarts());
|
||||||
|
|
||||||
|
const Index upper_start_block = iss->GetUpperStartBlock();
|
||||||
|
Index num_columns = iss->GetUpperNumColumns();
|
||||||
|
Index start_block_num = start_slot / kCoeffBits;
|
||||||
|
Index segment = start_block_num * num_columns -
|
||||||
|
std::min(start_block_num, upper_start_block);
|
||||||
|
// Change to lower num columns if applicable.
|
||||||
|
// (This should not compile to a conditional branch.)
|
||||||
|
num_columns -= (start_block_num < upper_start_block) ? 1 : 0;
|
||||||
|
|
||||||
|
const CoeffRow cr = hasher.GetCoeffRow(hash);
|
||||||
|
Index start_bit = start_slot % kCoeffBits;
|
||||||
|
|
||||||
|
ResultRow sr = 0;
|
||||||
|
const CoeffRow cr_left = cr << start_bit;
|
||||||
|
for (Index i = 0; i < num_columns; ++i) {
|
||||||
|
sr ^= BitParity(iss->LoadSegment(segment + i) & cr_left) << i;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (start_bit > 0) {
|
||||||
|
segment += num_columns;
|
||||||
|
const CoeffRow cr_right = cr >> (kCoeffBits - start_bit);
|
||||||
|
for (Index i = 0; i < num_columns; ++i) {
|
||||||
|
sr ^= BitParity(iss->LoadSegment(segment + i) & cr_right) << i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return sr;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Filter query a key from InterleavedFilterQuery.
|
||||||
|
template <typename InterleavedSolutionStorage, typename FilterQueryHasher>
|
||||||
|
bool InterleavedFilterQuery(const typename FilterQueryHasher::Key &key,
|
||||||
|
const FilterQueryHasher &hasher,
|
||||||
|
const InterleavedSolutionStorage &iss) {
|
||||||
|
// BEGIN mostly copied from InterleavedPhsfQuery
|
||||||
|
using Hash = typename FilterQueryHasher::Hash;
|
||||||
|
|
||||||
|
using CoeffRow = typename InterleavedSolutionStorage::CoeffRow;
|
||||||
|
using Index = typename InterleavedSolutionStorage::Index;
|
||||||
|
using ResultRow = typename InterleavedSolutionStorage::ResultRow;
|
||||||
|
|
||||||
|
static_assert(sizeof(Index) == sizeof(typename FilterQueryHasher::Index),
|
||||||
|
"must be same");
|
||||||
|
static_assert(
|
||||||
|
sizeof(CoeffRow) == sizeof(typename FilterQueryHasher::CoeffRow),
|
||||||
|
"must be same");
|
||||||
|
static_assert(
|
||||||
|
sizeof(ResultRow) == sizeof(typename FilterQueryHasher::ResultRow),
|
||||||
|
"must be same");
|
||||||
|
|
||||||
|
constexpr auto kCoeffBits = static_cast<Index>(sizeof(CoeffRow) * 8U);
|
||||||
|
|
||||||
|
const Hash hash = hasher.GetHash(key);
|
||||||
|
const Index start_slot = hasher.GetStart(hash, iss.GetNumStarts());
|
||||||
|
|
||||||
|
const Index upper_start_block = iss.GetUpperStartBlock();
|
||||||
|
Index num_columns = iss.GetUpperNumColumns();
|
||||||
|
Index start_block_num = start_slot / kCoeffBits;
|
||||||
|
Index segment = start_block_num * num_columns -
|
||||||
|
std::min(start_block_num, upper_start_block);
|
||||||
|
// Change to lower num columns if applicable.
|
||||||
|
// (This should not compile to a conditional branch.)
|
||||||
|
num_columns -= (start_block_num < upper_start_block) ? 1 : 0;
|
||||||
|
|
||||||
|
const CoeffRow cr = hasher.GetCoeffRow(hash);
|
||||||
|
Index start_bit = start_slot % kCoeffBits;
|
||||||
|
// END mostly copied from InterleavedPhsfQuery.
|
||||||
|
|
||||||
|
const ResultRow expected = hasher.GetResultRowFromHash(hash);
|
||||||
|
|
||||||
|
if (start_bit == 0) {
|
||||||
|
for (Index i = 0; i < num_columns; ++i) {
|
||||||
|
if (BitParity(iss.LoadSegment(segment + i) & cr) !=
|
||||||
|
(static_cast<int>(expected >> i) & 1)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for (Index i = 0; i < num_columns; ++i) {
|
||||||
|
CoeffRow soln_col =
|
||||||
|
(iss.LoadSegment(segment + i) >> static_cast<unsigned>(start_bit)) |
|
||||||
|
(iss.LoadSegment(segment + num_columns + i)
|
||||||
|
<< static_cast<unsigned>(kCoeffBits - start_bit));
|
||||||
|
if (BitParity(soln_col & cr) != (static_cast<int>(expected >> i) & 1)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// otherwise, all match
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: refactor Interleaved*Query so that queries can be "prepared" by
|
||||||
|
// prefetching memory, to hide memory latency for multiple queries in a
|
||||||
|
// single thread.
|
||||||
|
|
||||||
} // namespace ribbon
|
} // namespace ribbon
|
||||||
|
|
||||||
|
|
|
@ -5,6 +5,8 @@
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include <cmath>
|
||||||
|
|
||||||
#include "port/port.h" // for PREFETCH
|
#include "port/port.h" // for PREFETCH
|
||||||
#include "util/ribbon_alg.h"
|
#include "util/ribbon_alg.h"
|
||||||
|
|
||||||
|
@ -52,6 +54,14 @@ namespace ribbon {
|
||||||
// // less), so typical num_starts < 10k.
|
// // less), so typical num_starts < 10k.
|
||||||
// static constexpr bool kUseSmash;
|
// static constexpr bool kUseSmash;
|
||||||
//
|
//
|
||||||
|
// // When true, allows number of "starts" to be zero, for best support
|
||||||
|
// // of the "no keys to add" case by always returning false for filter
|
||||||
|
// // queries. (This is distinct from the "keys added but no space for
|
||||||
|
// // any data" case, in which a filter always returns true.) The cost
|
||||||
|
// // supporting this is a conditional branch (probably predictable) in
|
||||||
|
// // queries.
|
||||||
|
// static constexpr bool kAllowZeroStarts;
|
||||||
|
//
|
||||||
// // A seedable stock hash function on Keys. All bits of Hash must
|
// // A seedable stock hash function on Keys. All bits of Hash must
|
||||||
// // be reasonably high quality. XXH functions recommended, but
|
// // be reasonably high quality. XXH functions recommended, but
|
||||||
// // Murmur, City, Farm, etc. also work.
|
// // Murmur, City, Farm, etc. also work.
|
||||||
|
@ -77,7 +87,7 @@ struct AddInputSelector<Key, ResultRow, true /*IsFilter*/> {
|
||||||
using T = Key;
|
using T = Key;
|
||||||
};
|
};
|
||||||
|
|
||||||
// To avoid writing 'typename' everwhere that we use types like 'Index'
|
// To avoid writing 'typename' everywhere that we use types like 'Index'
|
||||||
#define IMPORT_RIBBON_TYPES_AND_SETTINGS(TypesAndSettings) \
|
#define IMPORT_RIBBON_TYPES_AND_SETTINGS(TypesAndSettings) \
|
||||||
using CoeffRow = typename TypesAndSettings::CoeffRow; \
|
using CoeffRow = typename TypesAndSettings::CoeffRow; \
|
||||||
using ResultRow = typename TypesAndSettings::ResultRow; \
|
using ResultRow = typename TypesAndSettings::ResultRow; \
|
||||||
|
@ -135,7 +145,7 @@ class StandardHasher {
|
||||||
// lookup.
|
// lookup.
|
||||||
//
|
//
|
||||||
// FastRange gives us a fast and effective mapping from h to the
|
// FastRange gives us a fast and effective mapping from h to the
|
||||||
// approriate range. This depends most, sometimes exclusively, on
|
// appropriate range. This depends most, sometimes exclusively, on
|
||||||
// upper bits of h.
|
// upper bits of h.
|
||||||
//
|
//
|
||||||
if (TypesAndSettings::kUseSmash) {
|
if (TypesAndSettings::kUseSmash) {
|
||||||
|
@ -150,10 +160,12 @@ class StandardHasher {
|
||||||
// it's usually small enough to be ignorable (less computation in
|
// it's usually small enough to be ignorable (less computation in
|
||||||
// this function) when number of slots is roughly 10k or larger.
|
// this function) when number of slots is roughly 10k or larger.
|
||||||
//
|
//
|
||||||
// TODO: re-check these degress of smash, esp with kFirstCoeffAlwaysOne
|
// The best values for these smash weights might depend on how
|
||||||
|
// densely you're packing entries, but this seems to work well for
|
||||||
|
// 2% overhead and roughly 50% success probability.
|
||||||
//
|
//
|
||||||
constexpr auto kFrontSmash = kCoeffBits / 2 - 1;
|
constexpr auto kFrontSmash = kCoeffBits / 3;
|
||||||
constexpr auto kBackSmash = kCoeffBits / 2;
|
constexpr auto kBackSmash = kCoeffBits / 3;
|
||||||
Index start = FastRangeGeneric(h, num_starts + kFrontSmash + kBackSmash);
|
Index start = FastRangeGeneric(h, num_starts + kFrontSmash + kBackSmash);
|
||||||
start = std::max(start, kFrontSmash);
|
start = std::max(start, kFrontSmash);
|
||||||
start -= kFrontSmash;
|
start -= kFrontSmash;
|
||||||
|
@ -184,7 +196,7 @@ class StandardHasher {
|
||||||
return cr;
|
return cr;
|
||||||
}
|
}
|
||||||
inline ResultRow GetResultRowMask() const {
|
inline ResultRow GetResultRowMask() const {
|
||||||
// TODO: will be used with InterleavedSolutionStorage
|
// TODO: will be used with InterleavedSolutionStorage?
|
||||||
// For now, all bits set (note: might be a small type so might need to
|
// For now, all bits set (note: might be a small type so might need to
|
||||||
// narrow after promotion)
|
// narrow after promotion)
|
||||||
return static_cast<ResultRow>(~ResultRow{0});
|
return static_cast<ResultRow>(~ResultRow{0});
|
||||||
|
@ -236,7 +248,7 @@ class StandardHasher {
|
||||||
// to apply a different seed. This hasher seeds a 1-to-1 mixing
|
// to apply a different seed. This hasher seeds a 1-to-1 mixing
|
||||||
// transformation to apply a seed to an existing hash (or hash-sized key).
|
// transformation to apply a seed to an existing hash (or hash-sized key).
|
||||||
//
|
//
|
||||||
// Testing suggests essentially no degredation of solution success rate
|
// Testing suggests essentially no degradation of solution success rate
|
||||||
// vs. going back to original inputs when changing hash seeds. For example:
|
// vs. going back to original inputs when changing hash seeds. For example:
|
||||||
// Average re-seeds for solution with r=128, 1.02x overhead, and ~100k keys
|
// Average re-seeds for solution with r=128, 1.02x overhead, and ~100k keys
|
||||||
// is about 1.10 for both StandardHasher and StandardRehasher.
|
// is about 1.10 for both StandardHasher and StandardRehasher.
|
||||||
|
@ -279,6 +291,26 @@ template <class RehasherTypesAndSettings>
|
||||||
using StandardRehasher =
|
using StandardRehasher =
|
||||||
StandardHasher<StandardRehasherAdapter<RehasherTypesAndSettings>>;
|
StandardHasher<StandardRehasherAdapter<RehasherTypesAndSettings>>;
|
||||||
|
|
||||||
|
// Especially with smaller hashes (e.g. 32 bit), there can be noticeable
|
||||||
|
// false positives due to collisions in the Hash returned by GetHash.
|
||||||
|
// This function returns the expected FP rate due to those collisions,
|
||||||
|
// which can be added to the expected FP rate from the underlying data
|
||||||
|
// structure. (Note: technically, a + b is only a good approximation of
|
||||||
|
// 1-(1-a)(1-b) == a + b - a*b, if a and b are much closer to 0 than to 1.)
|
||||||
|
// The number of entries added can be a double here in case it's an
|
||||||
|
// average.
|
||||||
|
template <class Hasher, typename Numerical>
|
||||||
|
double ExpectedCollisionFpRate(const Hasher& hasher, Numerical added) {
|
||||||
|
// Standardize on the 'double' specialization
|
||||||
|
return ExpectedCollisionFpRate(hasher, 1.0 * added);
|
||||||
|
}
|
||||||
|
template <class Hasher>
|
||||||
|
double ExpectedCollisionFpRate(const Hasher& /*hasher*/, double added) {
|
||||||
|
// Technically, there could be overlap among the added, but ignoring that
|
||||||
|
// is typically close enough.
|
||||||
|
return added / std::pow(256.0, sizeof(typename Hasher::Hash));
|
||||||
|
}
|
||||||
|
|
||||||
// StandardBanding: a canonical implementation of BandingStorage and
|
// StandardBanding: a canonical implementation of BandingStorage and
|
||||||
// BacktrackStorage, with convenience API for banding (solving with on-the-fly
|
// BacktrackStorage, with convenience API for banding (solving with on-the-fly
|
||||||
// Gaussian elimination) with and without backtracking.
|
// Gaussian elimination) with and without backtracking.
|
||||||
|
@ -288,13 +320,14 @@ class StandardBanding : public StandardHasher<TypesAndSettings> {
|
||||||
IMPORT_RIBBON_TYPES_AND_SETTINGS(TypesAndSettings);
|
IMPORT_RIBBON_TYPES_AND_SETTINGS(TypesAndSettings);
|
||||||
|
|
||||||
StandardBanding(Index num_slots = 0, Index backtrack_size = 0) {
|
StandardBanding(Index num_slots = 0, Index backtrack_size = 0) {
|
||||||
if (num_slots > 0) {
|
|
||||||
Reset(num_slots, backtrack_size);
|
Reset(num_slots, backtrack_size);
|
||||||
} else {
|
|
||||||
EnsureBacktrackSize(backtrack_size);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
void Reset(Index num_slots, Index backtrack_size = 0) {
|
void Reset(Index num_slots, Index backtrack_size = 0) {
|
||||||
|
if (num_slots == 0) {
|
||||||
|
// Unusual (TypesAndSettings::kAllowZeroStarts) or "uninitialized"
|
||||||
|
num_starts_ = 0;
|
||||||
|
} else {
|
||||||
|
// Normal
|
||||||
assert(num_slots >= kCoeffBits);
|
assert(num_slots >= kCoeffBits);
|
||||||
if (num_slots > num_slots_allocated_) {
|
if (num_slots > num_slots_allocated_) {
|
||||||
coeff_rows_.reset(new CoeffRow[num_slots]());
|
coeff_rows_.reset(new CoeffRow[num_slots]());
|
||||||
|
@ -310,6 +343,7 @@ class StandardBanding : public StandardHasher<TypesAndSettings> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
num_starts_ = num_slots - kCoeffBits + 1;
|
num_starts_ = num_slots - kCoeffBits + 1;
|
||||||
|
}
|
||||||
EnsureBacktrackSize(backtrack_size);
|
EnsureBacktrackSize(backtrack_size);
|
||||||
}
|
}
|
||||||
void EnsureBacktrackSize(Index backtrack_size) {
|
void EnsureBacktrackSize(Index backtrack_size) {
|
||||||
|
@ -323,7 +357,7 @@ class StandardBanding : public StandardHasher<TypesAndSettings> {
|
||||||
// From concept BandingStorage
|
// From concept BandingStorage
|
||||||
|
|
||||||
inline bool UsePrefetch() const {
|
inline bool UsePrefetch() const {
|
||||||
// A rough guestimate of when prefetching during construction pays off.
|
// A rough guesstimate of when prefetching during construction pays off.
|
||||||
// TODO: verify/validate
|
// TODO: verify/validate
|
||||||
return num_starts_ > 1500;
|
return num_starts_ > 1500;
|
||||||
}
|
}
|
||||||
|
@ -352,6 +386,12 @@ class StandardBanding : public StandardHasher<TypesAndSettings> {
|
||||||
//
|
//
|
||||||
template <typename InputIterator>
|
template <typename InputIterator>
|
||||||
bool AddRange(InputIterator begin, InputIterator end) {
|
bool AddRange(InputIterator begin, InputIterator end) {
|
||||||
|
assert(num_starts_ > 0 || TypesAndSettings::kAllowZeroStarts);
|
||||||
|
if (TypesAndSettings::kAllowZeroStarts && num_starts_ == 0) {
|
||||||
|
// Unusual. Can't add any in this case.
|
||||||
|
return begin == end;
|
||||||
|
}
|
||||||
|
// Normal
|
||||||
return BandingAddRange(this, *this, begin, end);
|
return BandingAddRange(this, *this, begin, end);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -364,6 +404,12 @@ class StandardBanding : public StandardHasher<TypesAndSettings> {
|
||||||
//
|
//
|
||||||
template <typename InputIterator>
|
template <typename InputIterator>
|
||||||
bool AddRangeOrRollBack(InputIterator begin, InputIterator end) {
|
bool AddRangeOrRollBack(InputIterator begin, InputIterator end) {
|
||||||
|
assert(num_starts_ > 0 || TypesAndSettings::kAllowZeroStarts);
|
||||||
|
if (TypesAndSettings::kAllowZeroStarts && num_starts_ == 0) {
|
||||||
|
// Unusual. Can't add any in this case.
|
||||||
|
return begin == end;
|
||||||
|
}
|
||||||
|
// else Normal
|
||||||
return BandingAddRange(this, this, *this, begin, end);
|
return BandingAddRange(this, this, *this, begin, end);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -372,17 +418,22 @@ class StandardBanding : public StandardHasher<TypesAndSettings> {
|
||||||
//
|
//
|
||||||
// Adding can fail even before all the "slots" are completely "full".
|
// Adding can fail even before all the "slots" are completely "full".
|
||||||
//
|
//
|
||||||
bool Add(const AddInput& input) { return AddRange(&input, &input + 1); }
|
bool Add(const AddInput& input) {
|
||||||
|
// Pointer can act as iterator
|
||||||
|
return AddRange(&input, &input + 1);
|
||||||
|
}
|
||||||
|
|
||||||
// Return the number of "occupied" rows (with non-zero coefficients stored).
|
// Return the number of "occupied" rows (with non-zero coefficients stored).
|
||||||
Index GetOccupiedCount() const {
|
Index GetOccupiedCount() const {
|
||||||
Index count = 0;
|
Index count = 0;
|
||||||
|
if (num_starts_ > 0) {
|
||||||
const Index num_slots = num_starts_ + kCoeffBits - 1;
|
const Index num_slots = num_starts_ + kCoeffBits - 1;
|
||||||
for (Index i = 0; i < num_slots; ++i) {
|
for (Index i = 0; i < num_slots; ++i) {
|
||||||
if (coeff_rows_[i] != 0) {
|
if (coeff_rows_[i] != 0) {
|
||||||
++count;
|
++count;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
return count;
|
return count;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -442,6 +493,11 @@ class InMemSimpleSolution {
|
||||||
IMPORT_RIBBON_TYPES_AND_SETTINGS(TypesAndSettings);
|
IMPORT_RIBBON_TYPES_AND_SETTINGS(TypesAndSettings);
|
||||||
|
|
||||||
void PrepareForNumStarts(Index num_starts) {
|
void PrepareForNumStarts(Index num_starts) {
|
||||||
|
if (TypesAndSettings::kAllowZeroStarts && num_starts == 0) {
|
||||||
|
// Unusual
|
||||||
|
num_starts_ = 0;
|
||||||
|
} else {
|
||||||
|
// Normal
|
||||||
const Index num_slots = num_starts + kCoeffBits - 1;
|
const Index num_slots = num_starts + kCoeffBits - 1;
|
||||||
assert(num_slots >= kCoeffBits);
|
assert(num_slots >= kCoeffBits);
|
||||||
if (num_slots > num_slots_allocated_) {
|
if (num_slots > num_slots_allocated_) {
|
||||||
|
@ -451,6 +507,7 @@ class InMemSimpleSolution {
|
||||||
}
|
}
|
||||||
num_starts_ = num_starts;
|
num_starts_ = num_starts;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
Index GetNumStarts() const { return num_starts_; }
|
Index GetNumStarts() const { return num_starts_; }
|
||||||
|
|
||||||
|
@ -464,21 +521,52 @@ class InMemSimpleSolution {
|
||||||
// High-level API
|
// High-level API
|
||||||
|
|
||||||
template <typename BandingStorage>
|
template <typename BandingStorage>
|
||||||
void BackSubstFrom(const BandingStorage& ss) {
|
void BackSubstFrom(const BandingStorage& bs) {
|
||||||
SimpleBackSubst(this, ss);
|
if (TypesAndSettings::kAllowZeroStarts && bs.GetNumStarts() == 0) {
|
||||||
|
// Unusual
|
||||||
|
PrepareForNumStarts(0);
|
||||||
|
} else {
|
||||||
|
// Normal
|
||||||
|
SimpleBackSubst(this, bs);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename PhsfQueryHasher>
|
template <typename PhsfQueryHasher>
|
||||||
ResultRow PhsfQuery(const Key& input, const PhsfQueryHasher& hasher) {
|
ResultRow PhsfQuery(const Key& input, const PhsfQueryHasher& hasher) {
|
||||||
assert(!TypesAndSettings::kIsFilter);
|
assert(!TypesAndSettings::kIsFilter);
|
||||||
|
if (TypesAndSettings::kAllowZeroStarts && num_starts_ == 0) {
|
||||||
|
// Unusual
|
||||||
|
return 0;
|
||||||
|
} else {
|
||||||
|
// Normal
|
||||||
return SimplePhsfQuery(input, hasher, *this);
|
return SimplePhsfQuery(input, hasher, *this);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
template <typename FilterQueryHasher>
|
template <typename FilterQueryHasher>
|
||||||
bool FilterQuery(const Key& input, const FilterQueryHasher& hasher) {
|
bool FilterQuery(const Key& input, const FilterQueryHasher& hasher) {
|
||||||
assert(TypesAndSettings::kIsFilter);
|
assert(TypesAndSettings::kIsFilter);
|
||||||
|
if (TypesAndSettings::kAllowZeroStarts && num_starts_ == 0) {
|
||||||
|
// Unusual. Zero starts presumes no keys added -> always false
|
||||||
|
return false;
|
||||||
|
} else {
|
||||||
|
// Normal, or upper_num_columns_ == 0 means "no space for data" and
|
||||||
|
// thus will always return true.
|
||||||
return SimpleFilterQuery(input, hasher, *this);
|
return SimpleFilterQuery(input, hasher, *this);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
double ExpectedFpRate() {
|
||||||
|
assert(TypesAndSettings::kIsFilter);
|
||||||
|
if (TypesAndSettings::kAllowZeroStarts && num_starts_ == 0) {
|
||||||
|
// Unusual, but we don't have FPs if we always return false.
|
||||||
|
return 0.0;
|
||||||
|
}
|
||||||
|
// else Normal
|
||||||
|
|
||||||
|
// Each result (solution) bit (column) cuts FP rate in half
|
||||||
|
return std::pow(0.5, 8U * sizeof(ResultRow));
|
||||||
|
}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
// We generally store "starts" instead of slots for speed of GetStart(),
|
// We generally store "starts" instead of slots for speed of GetStart(),
|
||||||
|
@ -488,6 +576,150 @@ class InMemSimpleSolution {
|
||||||
std::unique_ptr<ResultRow[]> solution_rows_;
|
std::unique_ptr<ResultRow[]> solution_rows_;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Implements concept InterleavedSolutionStorage always using little-endian
|
||||||
|
// byte order, so easy for serialization/deserialization. This implementation
|
||||||
|
// fully supports fractional bits per key, where any number of segments
|
||||||
|
// (number of bytes multiple of sizeof(CoeffRow)) can be used with any number
|
||||||
|
// of slots that is a multiple of kCoeffBits.
|
||||||
|
//
|
||||||
|
// The structure is passed an externally allocated/de-allocated byte buffer
|
||||||
|
// that is optionally pre-populated (from storage) for answering queries,
|
||||||
|
// or can be populated by BackSubstFrom.
|
||||||
|
//
|
||||||
|
template <class TypesAndSettings>
|
||||||
|
class SerializableInterleavedSolution {
|
||||||
|
public:
|
||||||
|
IMPORT_RIBBON_TYPES_AND_SETTINGS(TypesAndSettings);
|
||||||
|
|
||||||
|
// Does not take ownership of `data` but uses it (up to `data_len` bytes)
|
||||||
|
// throughout lifetime
|
||||||
|
SerializableInterleavedSolution(char* data, size_t data_len)
|
||||||
|
: data_(data), data_len_(data_len) {}
|
||||||
|
|
||||||
|
void PrepareForNumStarts(Index num_starts) {
|
||||||
|
assert(num_starts == 0 || (num_starts % kCoeffBits == 1));
|
||||||
|
num_starts_ = num_starts;
|
||||||
|
|
||||||
|
InternalConfigure();
|
||||||
|
}
|
||||||
|
|
||||||
|
Index GetNumStarts() const { return num_starts_; }
|
||||||
|
|
||||||
|
Index GetNumBlocks() const {
|
||||||
|
const Index num_slots = num_starts_ + kCoeffBits - 1;
|
||||||
|
return num_slots / kCoeffBits;
|
||||||
|
}
|
||||||
|
|
||||||
|
Index GetUpperNumColumns() const { return upper_num_columns_; }
|
||||||
|
|
||||||
|
Index GetUpperStartBlock() const { return upper_start_block_; }
|
||||||
|
|
||||||
|
Index GetNumSegments() const {
|
||||||
|
return static_cast<Index>(data_len_ / sizeof(CoeffRow));
|
||||||
|
}
|
||||||
|
|
||||||
|
CoeffRow LoadSegment(Index segment_num) const {
|
||||||
|
assert(data_ != nullptr); // suppress clang analyzer report
|
||||||
|
return DecodeFixedGeneric<CoeffRow>(data_ + segment_num * sizeof(CoeffRow));
|
||||||
|
}
|
||||||
|
void StoreSegment(Index segment_num, CoeffRow val) {
|
||||||
|
assert(data_ != nullptr); // suppress clang analyzer report
|
||||||
|
EncodeFixedGeneric(data_ + segment_num * sizeof(CoeffRow), val);
|
||||||
|
}
|
||||||
|
|
||||||
|
// ********************************************************************
|
||||||
|
// High-level API
|
||||||
|
|
||||||
|
template <typename BandingStorage>
|
||||||
|
void BackSubstFrom(const BandingStorage& bs) {
|
||||||
|
if (TypesAndSettings::kAllowZeroStarts && bs.GetNumStarts() == 0) {
|
||||||
|
// Unusual
|
||||||
|
PrepareForNumStarts(0);
|
||||||
|
} else {
|
||||||
|
// Normal
|
||||||
|
InterleavedBackSubst(this, bs);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename PhsfQueryHasher>
|
||||||
|
ResultRow PhsfQuery(const Key& input, const PhsfQueryHasher& hasher) {
|
||||||
|
assert(!TypesAndSettings::kIsFilter);
|
||||||
|
if (TypesAndSettings::kAllowZeroStarts && num_starts_ == 0) {
|
||||||
|
// Unusual
|
||||||
|
return 0;
|
||||||
|
} else {
|
||||||
|
// Normal
|
||||||
|
return InterleavedPhsfQuery(input, hasher, *this);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename FilterQueryHasher>
|
||||||
|
bool FilterQuery(const Key& input, const FilterQueryHasher& hasher) {
|
||||||
|
assert(TypesAndSettings::kIsFilter);
|
||||||
|
if (TypesAndSettings::kAllowZeroStarts && num_starts_ == 0) {
|
||||||
|
// Unusual. Zero starts presumes no keys added -> always false
|
||||||
|
return false;
|
||||||
|
} else {
|
||||||
|
// Normal, or upper_num_columns_ == 0 means "no space for data" and
|
||||||
|
// thus will always return true.
|
||||||
|
return InterleavedFilterQuery(input, hasher, *this);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
double ExpectedFpRate() {
|
||||||
|
assert(TypesAndSettings::kIsFilter);
|
||||||
|
if (TypesAndSettings::kAllowZeroStarts && num_starts_ == 0) {
|
||||||
|
// Unusual. Zero starts presumes no keys added -> always false
|
||||||
|
return 0.0;
|
||||||
|
}
|
||||||
|
// else Normal
|
||||||
|
|
||||||
|
// Note: Ignoring smash setting; still close enough in that case
|
||||||
|
double lower_portion =
|
||||||
|
(upper_start_block_ * kCoeffBits * 1.0) / num_starts_;
|
||||||
|
|
||||||
|
// Each result (solution) bit (column) cuts FP rate in half. Weight that
|
||||||
|
// for upper and lower number of bits (columns).
|
||||||
|
return lower_portion * std::pow(0.5, upper_num_columns_ - 1) +
|
||||||
|
(1.0 - lower_portion) * std::pow(0.5, upper_num_columns_);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected:
|
||||||
|
void InternalConfigure() {
|
||||||
|
const Index num_blocks = GetNumBlocks();
|
||||||
|
Index num_segments = GetNumSegments();
|
||||||
|
|
||||||
|
if (num_blocks == 0) {
|
||||||
|
// Exceptional
|
||||||
|
upper_num_columns_ = 0;
|
||||||
|
upper_start_block_ = 0;
|
||||||
|
} else {
|
||||||
|
// Normal
|
||||||
|
upper_num_columns_ =
|
||||||
|
(num_segments + /*round up*/ num_blocks - 1) / num_blocks;
|
||||||
|
upper_start_block_ = upper_num_columns_ * num_blocks - num_segments;
|
||||||
|
// Unless that's more columns than supported by ResultRow data type
|
||||||
|
if (upper_num_columns_ > 8U * sizeof(ResultRow)) {
|
||||||
|
// Use maximum columns (there will be space unused)
|
||||||
|
upper_num_columns_ = static_cast<Index>(8U * sizeof(ResultRow));
|
||||||
|
upper_start_block_ = 0;
|
||||||
|
num_segments = num_blocks * upper_num_columns_;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Update data_len_ for correct rounding and/or unused space
|
||||||
|
// NOTE: unused space stays gone if we PrepareForNumStarts again.
|
||||||
|
// We are prioritizing minimizing the number of fields over making
|
||||||
|
// the "unusued space" feature work well.
|
||||||
|
data_len_ = num_segments * sizeof(CoeffRow);
|
||||||
|
}
|
||||||
|
|
||||||
|
Index num_starts_ = 0;
|
||||||
|
Index upper_num_columns_ = 0;
|
||||||
|
Index upper_start_block_ = 0;
|
||||||
|
char* const data_;
|
||||||
|
size_t data_len_;
|
||||||
|
};
|
||||||
|
|
||||||
} // namespace ribbon
|
} // namespace ribbon
|
||||||
|
|
||||||
} // namespace ROCKSDB_NAMESPACE
|
} // namespace ROCKSDB_NAMESPACE
|
||||||
|
@ -499,5 +731,10 @@ class InMemSimpleSolution {
|
||||||
ROCKSDB_NAMESPACE::ribbon::StandardBanding<TypesAndSettings>; \
|
ROCKSDB_NAMESPACE::ribbon::StandardBanding<TypesAndSettings>; \
|
||||||
using SimpleSoln = \
|
using SimpleSoln = \
|
||||||
ROCKSDB_NAMESPACE::ribbon::InMemSimpleSolution<TypesAndSettings>; \
|
ROCKSDB_NAMESPACE::ribbon::InMemSimpleSolution<TypesAndSettings>; \
|
||||||
static_assert(sizeof(Hasher) + sizeof(Banding) + sizeof(SimpleSoln) > 0, \
|
using InterleavedSoln = \
|
||||||
|
ROCKSDB_NAMESPACE::ribbon::SerializableInterleavedSolution< \
|
||||||
|
TypesAndSettings>; \
|
||||||
|
static_assert(sizeof(Hasher) + sizeof(Banding) + sizeof(SimpleSoln) + \
|
||||||
|
sizeof(InterleavedSoln) > \
|
||||||
|
0, \
|
||||||
"avoid unused warnings, semicolon expected after macro call")
|
"avoid unused warnings, semicolon expected after macro call")
|
||||||
|
|
|
@ -6,9 +6,11 @@
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
|
|
||||||
#include "test_util/testharness.h"
|
#include "test_util/testharness.h"
|
||||||
|
#include "util/bloom_impl.h"
|
||||||
#include "util/coding.h"
|
#include "util/coding.h"
|
||||||
#include "util/hash.h"
|
#include "util/hash.h"
|
||||||
#include "util/ribbon_impl.h"
|
#include "util/ribbon_impl.h"
|
||||||
|
#include "util/stop_watch.h"
|
||||||
|
|
||||||
#ifndef GFLAGS
|
#ifndef GFLAGS
|
||||||
uint32_t FLAGS_thoroughness = 5;
|
uint32_t FLAGS_thoroughness = 5;
|
||||||
|
@ -35,7 +37,10 @@ struct DefaultTypesAndSettings {
|
||||||
static constexpr bool kIsFilter = true;
|
static constexpr bool kIsFilter = true;
|
||||||
static constexpr bool kFirstCoeffAlwaysOne = true;
|
static constexpr bool kFirstCoeffAlwaysOne = true;
|
||||||
static constexpr bool kUseSmash = false;
|
static constexpr bool kUseSmash = false;
|
||||||
|
static constexpr bool kAllowZeroStarts = false;
|
||||||
static Hash HashFn(const Key& key, Seed seed) {
|
static Hash HashFn(const Key& key, Seed seed) {
|
||||||
|
// TODO/FIXME: is there sufficient independence with sequential keys and
|
||||||
|
// sequential seeds?
|
||||||
return ROCKSDB_NAMESPACE::Hash64(key.data(), key.size(), seed);
|
return ROCKSDB_NAMESPACE::Hash64(key.data(), key.size(), seed);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
@ -47,10 +52,13 @@ struct TypesAndSettings_Coeff128Smash : public DefaultTypesAndSettings {
|
||||||
struct TypesAndSettings_Coeff64 : public DefaultTypesAndSettings {
|
struct TypesAndSettings_Coeff64 : public DefaultTypesAndSettings {
|
||||||
using CoeffRow = uint64_t;
|
using CoeffRow = uint64_t;
|
||||||
};
|
};
|
||||||
struct TypesAndSettings_Coeff64Smash : public DefaultTypesAndSettings {
|
struct TypesAndSettings_Coeff64Smash1 : public DefaultTypesAndSettings {
|
||||||
using CoeffRow = uint64_t;
|
using CoeffRow = uint64_t;
|
||||||
static constexpr bool kUseSmash = true;
|
static constexpr bool kUseSmash = true;
|
||||||
};
|
};
|
||||||
|
struct TypesAndSettings_Coeff64Smash0 : public TypesAndSettings_Coeff64Smash1 {
|
||||||
|
static constexpr bool kFirstCoeffAlwaysOne = false;
|
||||||
|
};
|
||||||
struct TypesAndSettings_Result16 : public DefaultTypesAndSettings {
|
struct TypesAndSettings_Result16 : public DefaultTypesAndSettings {
|
||||||
using ResultRow = uint16_t;
|
using ResultRow = uint16_t;
|
||||||
};
|
};
|
||||||
|
@ -60,7 +68,7 @@ struct TypesAndSettings_IndexSizeT : public DefaultTypesAndSettings {
|
||||||
struct TypesAndSettings_Hash32 : public DefaultTypesAndSettings {
|
struct TypesAndSettings_Hash32 : public DefaultTypesAndSettings {
|
||||||
using Hash = uint32_t;
|
using Hash = uint32_t;
|
||||||
static Hash HashFn(const Key& key, Seed seed) {
|
static Hash HashFn(const Key& key, Seed seed) {
|
||||||
// NOTE: Using RockDB 32-bit Hash() here fails test below because of
|
// NOTE: Using RocksDB 32-bit Hash() here fails test below because of
|
||||||
// insufficient mixing of seed (or generally insufficient mixing)
|
// insufficient mixing of seed (or generally insufficient mixing)
|
||||||
return ROCKSDB_NAMESPACE::Upper32of64(
|
return ROCKSDB_NAMESPACE::Upper32of64(
|
||||||
ROCKSDB_NAMESPACE::Hash64(key.data(), key.size(), seed));
|
ROCKSDB_NAMESPACE::Hash64(key.data(), key.size(), seed));
|
||||||
|
@ -78,10 +86,13 @@ struct TypesAndSettings_Seed8 : public DefaultTypesAndSettings {
|
||||||
struct TypesAndSettings_NoAlwaysOne : public DefaultTypesAndSettings {
|
struct TypesAndSettings_NoAlwaysOne : public DefaultTypesAndSettings {
|
||||||
static constexpr bool kFirstCoeffAlwaysOne = false;
|
static constexpr bool kFirstCoeffAlwaysOne = false;
|
||||||
};
|
};
|
||||||
|
struct TypesAndSettings_AllowZeroStarts : public DefaultTypesAndSettings {
|
||||||
|
static constexpr bool kAllowZeroStarts = true;
|
||||||
|
};
|
||||||
struct TypesAndSettings_RehasherWrapped : public DefaultTypesAndSettings {
|
struct TypesAndSettings_RehasherWrapped : public DefaultTypesAndSettings {
|
||||||
// This doesn't directly use StandardRehasher as a whole, but simulates
|
// This doesn't directly use StandardRehasher as a whole, but simulates
|
||||||
// its behavior with unseeded hash of key, then seeded hash-to-hash
|
// its behavior with unseeded hash of key, then seeded hash-to-hash
|
||||||
// tranform.
|
// transform.
|
||||||
static Hash HashFn(const Key& key, Seed seed) {
|
static Hash HashFn(const Key& key, Seed seed) {
|
||||||
Hash unseeded = DefaultTypesAndSettings::HashFn(key, /*seed*/ 0);
|
Hash unseeded = DefaultTypesAndSettings::HashFn(key, /*seed*/ 0);
|
||||||
using Rehasher = ROCKSDB_NAMESPACE::ribbon::StandardRehasherAdapter<
|
using Rehasher = ROCKSDB_NAMESPACE::ribbon::StandardRehasherAdapter<
|
||||||
|
@ -89,10 +100,14 @@ struct TypesAndSettings_RehasherWrapped : public DefaultTypesAndSettings {
|
||||||
return Rehasher::HashFn(unseeded, seed);
|
return Rehasher::HashFn(unseeded, seed);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
struct TypesAndSettings_RehasherWrapped_Result16
|
||||||
|
: public TypesAndSettings_RehasherWrapped {
|
||||||
|
using ResultRow = uint16_t;
|
||||||
|
};
|
||||||
struct TypesAndSettings_Rehasher32Wrapped : public TypesAndSettings_Hash32 {
|
struct TypesAndSettings_Rehasher32Wrapped : public TypesAndSettings_Hash32 {
|
||||||
// This doesn't directly use StandardRehasher as a whole, but simulates
|
// This doesn't directly use StandardRehasher as a whole, but simulates
|
||||||
// its behavior with unseeded hash of key, then seeded hash-to-hash
|
// its behavior with unseeded hash of key, then seeded hash-to-hash
|
||||||
// tranform.
|
// transform.
|
||||||
static Hash HashFn(const Key& key, Seed seed) {
|
static Hash HashFn(const Key& key, Seed seed) {
|
||||||
Hash unseeded = TypesAndSettings_Hash32::HashFn(key, /*seed*/ 0);
|
Hash unseeded = TypesAndSettings_Hash32::HashFn(key, /*seed*/ 0);
|
||||||
using Rehasher = ROCKSDB_NAMESPACE::ribbon::StandardRehasherAdapter<
|
using Rehasher = ROCKSDB_NAMESPACE::ribbon::StandardRehasherAdapter<
|
||||||
|
@ -101,14 +116,15 @@ struct TypesAndSettings_Rehasher32Wrapped : public TypesAndSettings_Hash32 {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
using TestTypesAndSettings =
|
using TestTypesAndSettings = ::testing::Types<
|
||||||
::testing::Types<TypesAndSettings_Coeff128, TypesAndSettings_Coeff128Smash,
|
TypesAndSettings_Coeff128, TypesAndSettings_Coeff128Smash,
|
||||||
TypesAndSettings_Coeff64, TypesAndSettings_Coeff64Smash,
|
TypesAndSettings_Coeff64, TypesAndSettings_Coeff64Smash0,
|
||||||
TypesAndSettings_Result16, TypesAndSettings_IndexSizeT,
|
TypesAndSettings_Coeff64Smash1, TypesAndSettings_Result16,
|
||||||
TypesAndSettings_Hash32, TypesAndSettings_Hash32_Result16,
|
TypesAndSettings_IndexSizeT, TypesAndSettings_Hash32,
|
||||||
TypesAndSettings_KeyString, TypesAndSettings_Seed8,
|
TypesAndSettings_Hash32_Result16, TypesAndSettings_KeyString,
|
||||||
TypesAndSettings_NoAlwaysOne,
|
TypesAndSettings_Seed8, TypesAndSettings_NoAlwaysOne,
|
||||||
TypesAndSettings_RehasherWrapped,
|
TypesAndSettings_AllowZeroStarts, TypesAndSettings_RehasherWrapped,
|
||||||
|
TypesAndSettings_RehasherWrapped_Result16,
|
||||||
TypesAndSettings_Rehasher32Wrapped>;
|
TypesAndSettings_Rehasher32Wrapped>;
|
||||||
TYPED_TEST_CASE(RibbonTypeParamTest, TestTypesAndSettings);
|
TYPED_TEST_CASE(RibbonTypeParamTest, TestTypesAndSettings);
|
||||||
|
|
||||||
|
@ -125,11 +141,6 @@ struct KeyGen {
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
KeyGen& operator+=(uint64_t incr) {
|
|
||||||
id_ += incr;
|
|
||||||
return *this;
|
|
||||||
}
|
|
||||||
|
|
||||||
const std::string& operator*() {
|
const std::string& operator*() {
|
||||||
// Use multiplication to mix things up a little in the key
|
// Use multiplication to mix things up a little in the key
|
||||||
ROCKSDB_NAMESPACE::EncodeFixed64(&str_[str_.size() - 8],
|
ROCKSDB_NAMESPACE::EncodeFixed64(&str_[str_.size() - 8],
|
||||||
|
@ -191,9 +202,6 @@ TYPED_TEST(RibbonTypeParamTest, CompactnessAndBacktrackAndFpRate) {
|
||||||
|
|
||||||
// For testing FP rate etc.
|
// For testing FP rate etc.
|
||||||
constexpr Index kNumToCheck = 100000;
|
constexpr Index kNumToCheck = 100000;
|
||||||
constexpr size_t kNumSolutionColumns = 8U * sizeof(ResultRow);
|
|
||||||
const double expected_fp_count =
|
|
||||||
kNumToCheck * std::pow(0.5, kNumSolutionColumns);
|
|
||||||
|
|
||||||
const auto log2_thoroughness =
|
const auto log2_thoroughness =
|
||||||
static_cast<Seed>(ROCKSDB_NAMESPACE::FloorLog2(FLAGS_thoroughness));
|
static_cast<Seed>(ROCKSDB_NAMESPACE::FloorLog2(FLAGS_thoroughness));
|
||||||
|
@ -210,16 +218,33 @@ TYPED_TEST(RibbonTypeParamTest, CompactnessAndBacktrackAndFpRate) {
|
||||||
uint64_t total_fp_count = 0;
|
uint64_t total_fp_count = 0;
|
||||||
uint64_t total_added = 0;
|
uint64_t total_added = 0;
|
||||||
|
|
||||||
|
uint64_t soln_query_nanos = 0;
|
||||||
|
uint64_t soln_query_count = 0;
|
||||||
|
uint64_t bloom_query_nanos = 0;
|
||||||
|
uint64_t isoln_query_nanos = 0;
|
||||||
|
uint64_t isoln_query_count = 0;
|
||||||
|
|
||||||
for (uint32_t i = 0; i < FLAGS_thoroughness; ++i) {
|
for (uint32_t i = 0; i < FLAGS_thoroughness; ++i) {
|
||||||
Index numToAdd =
|
Index num_to_add =
|
||||||
sizeof(CoeffRow) == 16 ? 130000 : TypeParam::kUseSmash ? 5000 : 2500;
|
sizeof(CoeffRow) == 16 ? 130000 : TypeParam::kUseSmash ? 5500 : 2500;
|
||||||
|
|
||||||
// Use different values between that number and 50% of that number
|
// Use different values between that number and 50% of that number
|
||||||
numToAdd -= (i * 15485863) % (numToAdd / 2);
|
num_to_add -= (i * /* misc prime */ 15485863) % (num_to_add / 2);
|
||||||
|
|
||||||
total_added += numToAdd;
|
total_added += num_to_add;
|
||||||
|
|
||||||
const Index kNumSlots = static_cast<Index>(numToAdd * kFactor);
|
// Most of the time, test the Interleaved solution storage, but when
|
||||||
|
// we do we have to make num_slots a multiple of kCoeffBits. So
|
||||||
|
// sometimes we want to test without that limitation.
|
||||||
|
bool test_interleaved = (i % 7) != 6;
|
||||||
|
|
||||||
|
Index num_slots = static_cast<Index>(num_to_add * kFactor);
|
||||||
|
if (test_interleaved) {
|
||||||
|
// Round to nearest multiple of kCoeffBits
|
||||||
|
num_slots = ((num_slots + kCoeffBits / 2) / kCoeffBits) * kCoeffBits;
|
||||||
|
// Re-adjust num_to_add to get as close as possible to kFactor
|
||||||
|
num_to_add = static_cast<Index>(num_slots / kFactor);
|
||||||
|
}
|
||||||
|
|
||||||
std::string prefix;
|
std::string prefix;
|
||||||
// Take different samples if you change thoroughness
|
// Take different samples if you change thoroughness
|
||||||
|
@ -229,7 +254,7 @@ TYPED_TEST(RibbonTypeParamTest, CompactnessAndBacktrackAndFpRate) {
|
||||||
// Batch that must be added
|
// Batch that must be added
|
||||||
std::string added_str = prefix + "added";
|
std::string added_str = prefix + "added";
|
||||||
KeyGen keys_begin(added_str, 0);
|
KeyGen keys_begin(added_str, 0);
|
||||||
KeyGen keys_end(added_str, numToAdd);
|
KeyGen keys_end(added_str, num_to_add);
|
||||||
|
|
||||||
// Batch that may or may not be added
|
// Batch that may or may not be added
|
||||||
const Index kBatchSize =
|
const Index kBatchSize =
|
||||||
|
@ -243,6 +268,14 @@ TYPED_TEST(RibbonTypeParamTest, CompactnessAndBacktrackAndFpRate) {
|
||||||
KeyGen other_keys_begin(not_str, 0);
|
KeyGen other_keys_begin(not_str, 0);
|
||||||
KeyGen other_keys_end(not_str, kNumToCheck);
|
KeyGen other_keys_end(not_str, kNumToCheck);
|
||||||
|
|
||||||
|
// Vary bytes uniformly for InterleavedSoln to use number of solution
|
||||||
|
// columns varying from 0 to max allowed by ResultRow type (and used by
|
||||||
|
// SimpleSoln).
|
||||||
|
size_t ibytes =
|
||||||
|
(i * /* misc odd */ 67896789) % (sizeof(ResultRow) * num_to_add + 1);
|
||||||
|
std::unique_ptr<char[]> idata(new char[ibytes]);
|
||||||
|
InterleavedSoln isoln(idata.get(), ibytes);
|
||||||
|
|
||||||
SimpleSoln soln;
|
SimpleSoln soln;
|
||||||
Hasher hasher;
|
Hasher hasher;
|
||||||
bool first_single;
|
bool first_single;
|
||||||
|
@ -251,7 +284,7 @@ TYPED_TEST(RibbonTypeParamTest, CompactnessAndBacktrackAndFpRate) {
|
||||||
{
|
{
|
||||||
Banding banding;
|
Banding banding;
|
||||||
// Traditional solve for a fixed set.
|
// Traditional solve for a fixed set.
|
||||||
ASSERT_TRUE(banding.ResetAndFindSeedToSolve(kNumSlots, keys_begin,
|
ASSERT_TRUE(banding.ResetAndFindSeedToSolve(num_slots, keys_begin,
|
||||||
keys_end, max_seed));
|
keys_end, max_seed));
|
||||||
|
|
||||||
// Now to test backtracking, starting with guaranteed fail
|
// Now to test backtracking, starting with guaranteed fail
|
||||||
|
@ -276,15 +309,24 @@ TYPED_TEST(RibbonTypeParamTest, CompactnessAndBacktrackAndFpRate) {
|
||||||
}
|
}
|
||||||
ASSERT_LE(banding.GetOccupiedCount(), occupied_count + more_added);
|
ASSERT_LE(banding.GetOccupiedCount(), occupied_count + more_added);
|
||||||
|
|
||||||
|
// Also verify that redundant adds are OK (no effect)
|
||||||
|
ASSERT_TRUE(
|
||||||
|
banding.AddRange(keys_begin, KeyGen(added_str, num_to_add / 8)));
|
||||||
|
ASSERT_LE(banding.GetOccupiedCount(), occupied_count + more_added);
|
||||||
|
|
||||||
// Now back-substitution
|
// Now back-substitution
|
||||||
soln.BackSubstFrom(banding);
|
soln.BackSubstFrom(banding);
|
||||||
|
if (test_interleaved) {
|
||||||
|
isoln.BackSubstFrom(banding);
|
||||||
|
}
|
||||||
|
|
||||||
Seed seed = banding.GetSeed();
|
Seed seed = banding.GetSeed();
|
||||||
total_reseeds += seed;
|
total_reseeds += seed;
|
||||||
if (seed > log2_thoroughness + 1) {
|
if (seed > log2_thoroughness + 1) {
|
||||||
fprintf(stderr, "%s high reseeds at %u, %u: %u\n",
|
fprintf(stderr, "%s high reseeds at %u, %u/%u: %u\n",
|
||||||
seed > log2_thoroughness + 8 ? "FIXME Extremely" : "Somewhat",
|
seed > log2_thoroughness + 8 ? "FIXME Extremely" : "Somewhat",
|
||||||
static_cast<unsigned>(i), static_cast<unsigned>(numToAdd),
|
static_cast<unsigned>(i), static_cast<unsigned>(num_to_add),
|
||||||
static_cast<unsigned>(seed));
|
static_cast<unsigned>(num_slots), static_cast<unsigned>(seed));
|
||||||
}
|
}
|
||||||
hasher.ResetSeed(seed);
|
hasher.ResetSeed(seed);
|
||||||
}
|
}
|
||||||
|
@ -294,19 +336,23 @@ TYPED_TEST(RibbonTypeParamTest, CompactnessAndBacktrackAndFpRate) {
|
||||||
KeyGen cur = keys_begin;
|
KeyGen cur = keys_begin;
|
||||||
while (cur != keys_end) {
|
while (cur != keys_end) {
|
||||||
EXPECT_TRUE(soln.FilterQuery(*cur, hasher));
|
EXPECT_TRUE(soln.FilterQuery(*cur, hasher));
|
||||||
|
EXPECT_TRUE(!test_interleaved || isoln.FilterQuery(*cur, hasher));
|
||||||
++cur;
|
++cur;
|
||||||
}
|
}
|
||||||
// We (maybe) snuck these in!
|
// We (maybe) snuck these in!
|
||||||
if (first_single) {
|
if (first_single) {
|
||||||
EXPECT_TRUE(soln.FilterQuery("one_more", hasher));
|
EXPECT_TRUE(soln.FilterQuery("one_more", hasher));
|
||||||
|
EXPECT_TRUE(!test_interleaved || isoln.FilterQuery("one_more", hasher));
|
||||||
}
|
}
|
||||||
if (second_single) {
|
if (second_single) {
|
||||||
EXPECT_TRUE(soln.FilterQuery("two_more", hasher));
|
EXPECT_TRUE(soln.FilterQuery("two_more", hasher));
|
||||||
|
EXPECT_TRUE(!test_interleaved || isoln.FilterQuery("two_more", hasher));
|
||||||
}
|
}
|
||||||
if (batch_success) {
|
if (batch_success) {
|
||||||
cur = batch_begin;
|
cur = batch_begin;
|
||||||
while (cur != batch_end) {
|
while (cur != batch_end) {
|
||||||
EXPECT_TRUE(soln.FilterQuery(*cur, hasher));
|
EXPECT_TRUE(soln.FilterQuery(*cur, hasher));
|
||||||
|
EXPECT_TRUE(!test_interleaved || isoln.FilterQuery(*cur, hasher));
|
||||||
++cur;
|
++cur;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -314,21 +360,89 @@ TYPED_TEST(RibbonTypeParamTest, CompactnessAndBacktrackAndFpRate) {
|
||||||
// Check FP rate (depends only on number of result bits == solution columns)
|
// Check FP rate (depends only on number of result bits == solution columns)
|
||||||
Index fp_count = 0;
|
Index fp_count = 0;
|
||||||
cur = other_keys_begin;
|
cur = other_keys_begin;
|
||||||
|
{
|
||||||
|
ROCKSDB_NAMESPACE::StopWatchNano timer(ROCKSDB_NAMESPACE::Env::Default(),
|
||||||
|
true);
|
||||||
while (cur != other_keys_end) {
|
while (cur != other_keys_end) {
|
||||||
fp_count += soln.FilterQuery(*cur, hasher) ? 1 : 0;
|
fp_count += soln.FilterQuery(*cur, hasher) ? 1 : 0;
|
||||||
++cur;
|
++cur;
|
||||||
}
|
}
|
||||||
|
soln_query_nanos += timer.ElapsedNanos();
|
||||||
|
soln_query_count += kNumToCheck;
|
||||||
|
}
|
||||||
|
{
|
||||||
|
double expected_fp_count = soln.ExpectedFpRate() * kNumToCheck;
|
||||||
// For expected FP rate, also include false positives due to collisions
|
// For expected FP rate, also include false positives due to collisions
|
||||||
// in Hash value. (Negligible for 64-bit, can matter for 32-bit.)
|
// in Hash value. (Negligible for 64-bit, can matter for 32-bit.)
|
||||||
double correction =
|
double correction =
|
||||||
1.0 * kNumToCheck * numToAdd / std::pow(256.0, sizeof(Hash));
|
kNumToCheck * ROCKSDB_NAMESPACE::ribbon::ExpectedCollisionFpRate(
|
||||||
|
hasher, num_to_add);
|
||||||
EXPECT_LE(fp_count,
|
EXPECT_LE(fp_count,
|
||||||
FrequentPoissonUpperBound(expected_fp_count + correction));
|
FrequentPoissonUpperBound(expected_fp_count + correction));
|
||||||
EXPECT_GE(fp_count,
|
EXPECT_GE(fp_count,
|
||||||
FrequentPoissonLowerBound(expected_fp_count + correction));
|
FrequentPoissonLowerBound(expected_fp_count + correction));
|
||||||
|
|
||||||
total_fp_count += fp_count;
|
|
||||||
}
|
}
|
||||||
|
total_fp_count += fp_count;
|
||||||
|
|
||||||
|
// And also check FP rate for isoln
|
||||||
|
if (test_interleaved) {
|
||||||
|
Index ifp_count = 0;
|
||||||
|
cur = other_keys_begin;
|
||||||
|
ROCKSDB_NAMESPACE::StopWatchNano timer(ROCKSDB_NAMESPACE::Env::Default(),
|
||||||
|
true);
|
||||||
|
while (cur != other_keys_end) {
|
||||||
|
ifp_count += isoln.FilterQuery(*cur, hasher) ? 1 : 0;
|
||||||
|
++cur;
|
||||||
|
}
|
||||||
|
isoln_query_nanos += timer.ElapsedNanos();
|
||||||
|
isoln_query_count += kNumToCheck;
|
||||||
|
{
|
||||||
|
double expected_fp_count = isoln.ExpectedFpRate() * kNumToCheck;
|
||||||
|
// For expected FP rate, also include false positives due to collisions
|
||||||
|
// in Hash value. (Negligible for 64-bit, can matter for 32-bit.)
|
||||||
|
double correction =
|
||||||
|
kNumToCheck * ROCKSDB_NAMESPACE::ribbon::ExpectedCollisionFpRate(
|
||||||
|
hasher, num_to_add);
|
||||||
|
EXPECT_LE(ifp_count,
|
||||||
|
FrequentPoissonUpperBound(expected_fp_count + correction));
|
||||||
|
EXPECT_GE(ifp_count,
|
||||||
|
FrequentPoissonLowerBound(expected_fp_count + correction));
|
||||||
|
}
|
||||||
|
// Since the bits used in isoln are a subset of the bits used in soln,
|
||||||
|
// it cannot have fewer FPs
|
||||||
|
EXPECT_GE(ifp_count, fp_count);
|
||||||
|
}
|
||||||
|
|
||||||
|
// And compare to Bloom time, for fun
|
||||||
|
if (ibytes >= /* minimum Bloom impl bytes*/ 64) {
|
||||||
|
Index bfp_count = 0;
|
||||||
|
cur = other_keys_begin;
|
||||||
|
ROCKSDB_NAMESPACE::StopWatchNano timer(ROCKSDB_NAMESPACE::Env::Default(),
|
||||||
|
true);
|
||||||
|
while (cur != other_keys_end) {
|
||||||
|
uint64_t h = hasher.GetHash(*cur);
|
||||||
|
uint32_t h1 = ROCKSDB_NAMESPACE::Lower32of64(h);
|
||||||
|
uint32_t h2 = sizeof(Hash) >= 8 ? ROCKSDB_NAMESPACE::Upper32of64(h)
|
||||||
|
: h1 * 0x9e3779b9;
|
||||||
|
bfp_count += ROCKSDB_NAMESPACE::FastLocalBloomImpl::HashMayMatch(
|
||||||
|
h1, h2, static_cast<uint32_t>(ibytes), 6, idata.get())
|
||||||
|
? 1
|
||||||
|
: 0;
|
||||||
|
++cur;
|
||||||
|
}
|
||||||
|
bloom_query_nanos += timer.ElapsedNanos();
|
||||||
|
// ensure bfp_count is used
|
||||||
|
ASSERT_LT(bfp_count, kNumToCheck);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// "outside" == key not in original set so either negative or false positive
|
||||||
|
fprintf(stderr, "Simple outside query, hot, incl hashing, ns/key: %g\n",
|
||||||
|
1.0 * soln_query_nanos / soln_query_count);
|
||||||
|
fprintf(stderr, "Interleaved outside query, hot, incl hashing, ns/key: %g\n",
|
||||||
|
1.0 * isoln_query_nanos / isoln_query_count);
|
||||||
|
fprintf(stderr, "Bloom outside query, hot, incl hashing, ns/key: %g\n",
|
||||||
|
1.0 * bloom_query_nanos / soln_query_count);
|
||||||
|
|
||||||
{
|
{
|
||||||
double average_reseeds = 1.0 * total_reseeds / FLAGS_thoroughness;
|
double average_reseeds = 1.0 * total_reseeds / FLAGS_thoroughness;
|
||||||
|
@ -370,12 +484,14 @@ TYPED_TEST(RibbonTypeParamTest, CompactnessAndBacktrackAndFpRate) {
|
||||||
{
|
{
|
||||||
uint64_t total_checked = uint64_t{kNumToCheck} * FLAGS_thoroughness;
|
uint64_t total_checked = uint64_t{kNumToCheck} * FLAGS_thoroughness;
|
||||||
double expected_total_fp_count =
|
double expected_total_fp_count =
|
||||||
total_checked * std::pow(0.5, kNumSolutionColumns);
|
total_checked * std::pow(0.5, 8U * sizeof(ResultRow));
|
||||||
// For expected FP rate, also include false positives due to collisions
|
// For expected FP rate, also include false positives due to collisions
|
||||||
// in Hash value. (Negligible for 64-bit, can matter for 32-bit.)
|
// in Hash value. (Negligible for 64-bit, can matter for 32-bit.)
|
||||||
expected_total_fp_count += 1.0 * total_checked * total_added /
|
double average_added = 1.0 * total_added / FLAGS_thoroughness;
|
||||||
FLAGS_thoroughness /
|
expected_total_fp_count +=
|
||||||
std::pow(256.0, sizeof(Hash));
|
total_checked * ROCKSDB_NAMESPACE::ribbon::ExpectedCollisionFpRate(
|
||||||
|
Hasher(), average_added);
|
||||||
|
|
||||||
uint64_t upper_bound = InfrequentPoissonUpperBound(expected_total_fp_count);
|
uint64_t upper_bound = InfrequentPoissonUpperBound(expected_total_fp_count);
|
||||||
uint64_t lower_bound = InfrequentPoissonLowerBound(expected_total_fp_count);
|
uint64_t lower_bound = InfrequentPoissonLowerBound(expected_total_fp_count);
|
||||||
fprintf(stderr, "Average FP rate: %g (~= %g, <= %g, >= %g)\n",
|
fprintf(stderr, "Average FP rate: %g (~= %g, <= %g, >= %g)\n",
|
||||||
|
@ -383,7 +499,7 @@ TYPED_TEST(RibbonTypeParamTest, CompactnessAndBacktrackAndFpRate) {
|
||||||
expected_total_fp_count / total_checked,
|
expected_total_fp_count / total_checked,
|
||||||
1.0 * upper_bound / total_checked,
|
1.0 * upper_bound / total_checked,
|
||||||
1.0 * lower_bound / total_checked);
|
1.0 * lower_bound / total_checked);
|
||||||
// FIXME: this can fail for Result16, e.g. --thoroughness=100
|
// FIXME: this can fail for Result16, e.g. --thoroughness=300
|
||||||
// Seems due to inexpensive hashing in StandardHasher::GetCoeffRow and
|
// Seems due to inexpensive hashing in StandardHasher::GetCoeffRow and
|
||||||
// GetResultRowFromHash as replacing those with different Hash64 instances
|
// GetResultRowFromHash as replacing those with different Hash64 instances
|
||||||
// fixes it, at least mostly.
|
// fixes it, at least mostly.
|
||||||
|
@ -392,11 +508,114 @@ TYPED_TEST(RibbonTypeParamTest, CompactnessAndBacktrackAndFpRate) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(RibbonTest, Another) {
|
TYPED_TEST(RibbonTypeParamTest, Extremes) {
|
||||||
IMPORT_RIBBON_TYPES_AND_SETTINGS(DefaultTypesAndSettings);
|
IMPORT_RIBBON_TYPES_AND_SETTINGS(TypeParam);
|
||||||
IMPORT_RIBBON_IMPL_TYPES(DefaultTypesAndSettings);
|
IMPORT_RIBBON_IMPL_TYPES(TypeParam);
|
||||||
|
|
||||||
// TODO
|
size_t bytes = 128 * 1024;
|
||||||
|
std::unique_ptr<char[]> buf(new char[bytes]);
|
||||||
|
InterleavedSoln isoln(buf.get(), bytes);
|
||||||
|
SimpleSoln soln;
|
||||||
|
Hasher hasher;
|
||||||
|
Banding banding;
|
||||||
|
|
||||||
|
// ########################################
|
||||||
|
// Add zero keys to minimal number of slots
|
||||||
|
KeyGen begin_and_end("foo", 123);
|
||||||
|
ASSERT_TRUE(banding.ResetAndFindSeedToSolve(
|
||||||
|
/*slots*/ kCoeffBits, begin_and_end, begin_and_end, /*max_seed*/ 0));
|
||||||
|
|
||||||
|
soln.BackSubstFrom(banding);
|
||||||
|
isoln.BackSubstFrom(banding);
|
||||||
|
|
||||||
|
// Because there's plenty of memory, we expect the interleaved solution to
|
||||||
|
// use maximum supported columns (same as simple solution)
|
||||||
|
ASSERT_EQ(isoln.GetUpperNumColumns(), 8U * sizeof(ResultRow));
|
||||||
|
ASSERT_EQ(isoln.GetUpperStartBlock(), 0U);
|
||||||
|
|
||||||
|
// Somewhat oddly, we expect same FP rate as if we had essentially filled
|
||||||
|
// up the slots.
|
||||||
|
constexpr Index kNumToCheck = 100000;
|
||||||
|
KeyGen other_keys_begin("not", 0);
|
||||||
|
KeyGen other_keys_end("not", kNumToCheck);
|
||||||
|
|
||||||
|
Index fp_count = 0;
|
||||||
|
KeyGen cur = other_keys_begin;
|
||||||
|
while (cur != other_keys_end) {
|
||||||
|
bool isoln_query_result = isoln.FilterQuery(*cur, hasher);
|
||||||
|
bool soln_query_result = soln.FilterQuery(*cur, hasher);
|
||||||
|
// Solutions are equivalent
|
||||||
|
ASSERT_EQ(isoln_query_result, soln_query_result);
|
||||||
|
// And in fact we only expect an FP when ResultRow is 0
|
||||||
|
ASSERT_EQ(soln_query_result, hasher.GetResultRowFromHash(
|
||||||
|
hasher.GetHash(*cur)) == ResultRow{0});
|
||||||
|
|
||||||
|
fp_count += soln_query_result ? 1 : 0;
|
||||||
|
++cur;
|
||||||
|
}
|
||||||
|
{
|
||||||
|
ASSERT_EQ(isoln.ExpectedFpRate(), soln.ExpectedFpRate());
|
||||||
|
double expected_fp_count = isoln.ExpectedFpRate() * kNumToCheck;
|
||||||
|
EXPECT_LE(fp_count, InfrequentPoissonUpperBound(expected_fp_count));
|
||||||
|
EXPECT_GE(fp_count, InfrequentPoissonLowerBound(expected_fp_count));
|
||||||
|
}
|
||||||
|
|
||||||
|
// ######################################################
|
||||||
|
// Use zero bytes for interleaved solution (key(s) added)
|
||||||
|
|
||||||
|
// Add one key
|
||||||
|
KeyGen key_begin("added", 0);
|
||||||
|
KeyGen key_end("added", 1);
|
||||||
|
ASSERT_TRUE(banding.ResetAndFindSeedToSolve(
|
||||||
|
/*slots*/ kCoeffBits, key_begin, key_end, /*max_seed*/ 0));
|
||||||
|
|
||||||
|
InterleavedSoln isoln2(nullptr, /*bytes*/ 0);
|
||||||
|
|
||||||
|
isoln2.BackSubstFrom(banding);
|
||||||
|
|
||||||
|
ASSERT_EQ(isoln2.GetUpperNumColumns(), 0U);
|
||||||
|
ASSERT_EQ(isoln2.GetUpperStartBlock(), 0U);
|
||||||
|
|
||||||
|
// All queries return true
|
||||||
|
ASSERT_TRUE(isoln2.FilterQuery(*other_keys_begin, hasher));
|
||||||
|
ASSERT_EQ(isoln2.ExpectedFpRate(), 1.0);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(RibbonTest, AllowZeroStarts) {
|
||||||
|
IMPORT_RIBBON_TYPES_AND_SETTINGS(TypesAndSettings_AllowZeroStarts);
|
||||||
|
IMPORT_RIBBON_IMPL_TYPES(TypesAndSettings_AllowZeroStarts);
|
||||||
|
|
||||||
|
InterleavedSoln isoln(nullptr, /*bytes*/ 0);
|
||||||
|
SimpleSoln soln;
|
||||||
|
Hasher hasher;
|
||||||
|
Banding banding;
|
||||||
|
|
||||||
|
KeyGen begin("foo", 0);
|
||||||
|
KeyGen end("foo", 1);
|
||||||
|
// Can't add 1 entry
|
||||||
|
ASSERT_FALSE(
|
||||||
|
banding.ResetAndFindSeedToSolve(/*slots*/ 0, begin, end, /*max_seed*/ 5));
|
||||||
|
|
||||||
|
KeyGen begin_and_end("foo", 123);
|
||||||
|
// Can add 0 entries
|
||||||
|
ASSERT_TRUE(banding.ResetAndFindSeedToSolve(/*slots*/ 0, begin_and_end,
|
||||||
|
begin_and_end, /*max_seed*/ 5));
|
||||||
|
|
||||||
|
Seed seed = banding.GetSeed();
|
||||||
|
ASSERT_EQ(seed, 0U);
|
||||||
|
hasher.ResetSeed(seed);
|
||||||
|
|
||||||
|
// Can construct 0-slot solutions
|
||||||
|
isoln.BackSubstFrom(banding);
|
||||||
|
soln.BackSubstFrom(banding);
|
||||||
|
|
||||||
|
// Should always return false
|
||||||
|
ASSERT_FALSE(isoln.FilterQuery(*begin, hasher));
|
||||||
|
ASSERT_FALSE(soln.FilterQuery(*begin, hasher));
|
||||||
|
|
||||||
|
// And report that in FP rate
|
||||||
|
ASSERT_EQ(isoln.ExpectedFpRate(), 0.0);
|
||||||
|
ASSERT_EQ(soln.ExpectedFpRate(), 0.0);
|
||||||
}
|
}
|
||||||
|
|
||||||
int main(int argc, char** argv) {
|
int main(int argc, char** argv) {
|
||||||
|
|
Loading…
Reference in New Issue