mirror of https://github.com/facebook/rocksdb.git
Added bloom filter support.
In particular, we add a new FilterPolicy class. An instance of this class can be supplied in Options when opening a database. If supplied, the instance is used to generate summaries of keys (e.g., a bloom filter) which are placed in sstables. These summaries are consulted by DB::Get() so we can avoid reading sstable blocks that are guaranteed to not contain the key we are looking for. This change provides one implementation of FilterPolicy based on bloom filters. Other changes: - Updated version number to 1.4. - Some build tweaks. - C binding for CompactRange. - A few more benchmarks: deleteseq, deleterandom, readmissing, seekrandom. - Minor .gitignore update.
This commit is contained in:
parent
bc1ee4d25e
commit
85584d497e
|
@ -1,5 +1,8 @@
|
||||||
build_config.mk
|
build_config.mk
|
||||||
*.a
|
*.a
|
||||||
*.o
|
*.o
|
||||||
|
*.dylib*
|
||||||
|
*.so
|
||||||
|
*.so.*
|
||||||
*_test
|
*_test
|
||||||
db_bench
|
db_bench
|
||||||
|
|
14
Makefile
14
Makefile
|
@ -17,8 +17,8 @@ OPT ?= -O2 -DNDEBUG # (A) Production use (optimized mode)
|
||||||
#-----------------------------------------------
|
#-----------------------------------------------
|
||||||
|
|
||||||
# detect what platform we're building on
|
# detect what platform we're building on
|
||||||
$(shell sh ./build_detect_platform)
|
$(shell ./build_detect_platform build_config.mk)
|
||||||
# this file is generated by build_detect_platform to set build flags and sources
|
# this file is generated by the previous line to set build flags and sources
|
||||||
include build_config.mk
|
include build_config.mk
|
||||||
|
|
||||||
CFLAGS += -I. -I./include $(PLATFORM_CCFLAGS) $(OPT)
|
CFLAGS += -I. -I./include $(PLATFORM_CCFLAGS) $(OPT)
|
||||||
|
@ -34,6 +34,7 @@ TESTHARNESS = ./util/testharness.o $(TESTUTIL)
|
||||||
|
|
||||||
TESTS = \
|
TESTS = \
|
||||||
arena_test \
|
arena_test \
|
||||||
|
bloom_test \
|
||||||
c_test \
|
c_test \
|
||||||
cache_test \
|
cache_test \
|
||||||
coding_test \
|
coding_test \
|
||||||
|
@ -43,6 +44,7 @@ TESTS = \
|
||||||
dbformat_test \
|
dbformat_test \
|
||||||
env_test \
|
env_test \
|
||||||
filename_test \
|
filename_test \
|
||||||
|
filter_block_test \
|
||||||
log_test \
|
log_test \
|
||||||
memenv_test \
|
memenv_test \
|
||||||
skiplist_test \
|
skiplist_test \
|
||||||
|
@ -63,7 +65,7 @@ default: all
|
||||||
ifneq ($(PLATFORM_SHARED_EXT),)
|
ifneq ($(PLATFORM_SHARED_EXT),)
|
||||||
# Update db.h if you change these.
|
# Update db.h if you change these.
|
||||||
SHARED_MAJOR = 1
|
SHARED_MAJOR = 1
|
||||||
SHARED_MINOR = 3
|
SHARED_MINOR = 4
|
||||||
SHARED1 = libleveldb.$(PLATFORM_SHARED_EXT)
|
SHARED1 = libleveldb.$(PLATFORM_SHARED_EXT)
|
||||||
SHARED2 = $(SHARED1).$(SHARED_MAJOR)
|
SHARED2 = $(SHARED1).$(SHARED_MAJOR)
|
||||||
SHARED3 = $(SHARED1).$(SHARED_MAJOR).$(SHARED_MINOR)
|
SHARED3 = $(SHARED1).$(SHARED_MAJOR).$(SHARED_MINOR)
|
||||||
|
@ -101,6 +103,9 @@ db_bench_tree_db: doc/bench/db_bench_tree_db.o $(LIBOBJECTS) $(TESTUTIL)
|
||||||
arena_test: util/arena_test.o $(LIBOBJECTS) $(TESTHARNESS)
|
arena_test: util/arena_test.o $(LIBOBJECTS) $(TESTHARNESS)
|
||||||
$(CXX) util/arena_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LDFLAGS)
|
$(CXX) util/arena_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LDFLAGS)
|
||||||
|
|
||||||
|
bloom_test: util/bloom_test.o $(LIBOBJECTS) $(TESTHARNESS)
|
||||||
|
$(CXX) util/bloom_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LDFLAGS)
|
||||||
|
|
||||||
c_test: db/c_test.o $(LIBOBJECTS) $(TESTHARNESS)
|
c_test: db/c_test.o $(LIBOBJECTS) $(TESTHARNESS)
|
||||||
$(CXX) db/c_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LDFLAGS)
|
$(CXX) db/c_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LDFLAGS)
|
||||||
|
|
||||||
|
@ -128,6 +133,9 @@ env_test: util/env_test.o $(LIBOBJECTS) $(TESTHARNESS)
|
||||||
filename_test: db/filename_test.o $(LIBOBJECTS) $(TESTHARNESS)
|
filename_test: db/filename_test.o $(LIBOBJECTS) $(TESTHARNESS)
|
||||||
$(CXX) db/filename_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LDFLAGS)
|
$(CXX) db/filename_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LDFLAGS)
|
||||||
|
|
||||||
|
filter_block_test: table/filter_block_test.o $(LIBOBJECTS) $(TESTHARNESS)
|
||||||
|
$(CXX) table/filter_block_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LDFLAGS)
|
||||||
|
|
||||||
log_test: db/log_test.o $(LIBOBJECTS) $(TESTHARNESS)
|
log_test: db/log_test.o $(LIBOBJECTS) $(TESTHARNESS)
|
||||||
$(CXX) db/log_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LDFLAGS)
|
$(CXX) db/log_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LDFLAGS)
|
||||||
|
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
#!/bin/sh
|
#!/bin/sh
|
||||||
#
|
#
|
||||||
# Detects OS we're compiling on and generates build_config.mk,
|
# Detects OS we're compiling on and outputs a file specified by the first
|
||||||
# which in turn gets read while processing Makefile.
|
# argument, which in turn gets read while processing Makefile.
|
||||||
#
|
#
|
||||||
# build_config.mk will set the following variables:
|
# The output will set the following variables:
|
||||||
# PLATFORM_LDFLAGS Linker flags
|
# PLATFORM_LDFLAGS Linker flags
|
||||||
# PLATFORM_SHARED_EXT Extension for shared libraries
|
# PLATFORM_SHARED_EXT Extension for shared libraries
|
||||||
# PLATFORM_SHARED_LDFLAGS Flags for building shared library
|
# PLATFORM_SHARED_LDFLAGS Flags for building shared library
|
||||||
|
@ -13,11 +13,15 @@
|
||||||
# -DLEVELDB_PLATFORM_POSIX if cstdatomic is present
|
# -DLEVELDB_PLATFORM_POSIX if cstdatomic is present
|
||||||
# -DLEVELDB_PLATFORM_NOATOMIC if it is not
|
# -DLEVELDB_PLATFORM_NOATOMIC if it is not
|
||||||
|
|
||||||
SCRIPT_DIR=`dirname $0`
|
OUTPUT=$1
|
||||||
|
if test -z "$OUTPUT"; then
|
||||||
|
echo "usage: $0 <output-filename>"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
# Delete existing build_config.mk
|
# Delete existing output, if it exists
|
||||||
rm -f build_config.mk
|
rm -f $OUTPUT
|
||||||
touch build_config.mk
|
touch $OUTPUT
|
||||||
|
|
||||||
if test -z "$CXX"; then
|
if test -z "$CXX"; then
|
||||||
CXX=g++
|
CXX=g++
|
||||||
|
@ -96,7 +100,7 @@ esac
|
||||||
# except for the test and benchmark files. By default, find will output a list
|
# except for the test and benchmark files. By default, find will output a list
|
||||||
# of all files matching either rule, so we need to append -print to make the
|
# of all files matching either rule, so we need to append -print to make the
|
||||||
# prune take effect.
|
# prune take effect.
|
||||||
DIRS="$SCRIPT_DIR/util $SCRIPT_DIR/db $SCRIPT_DIR/table"
|
DIRS="util db table"
|
||||||
set -f # temporarily disable globbing so that our patterns aren't expanded
|
set -f # temporarily disable globbing so that our patterns aren't expanded
|
||||||
PRUNE_TEST="-name *test*.cc -prune"
|
PRUNE_TEST="-name *test*.cc -prune"
|
||||||
PRUNE_BENCH="-name *_bench.cc -prune"
|
PRUNE_BENCH="-name *_bench.cc -prune"
|
||||||
|
@ -105,8 +109,8 @@ set +f # re-enable globbing
|
||||||
|
|
||||||
# The sources consist of the portable files, plus the platform-specific port
|
# The sources consist of the portable files, plus the platform-specific port
|
||||||
# file.
|
# file.
|
||||||
echo "SOURCES=$PORTABLE_FILES $PORT_FILE" >> build_config.mk
|
echo "SOURCES=$PORTABLE_FILES $PORT_FILE" >> $OUTPUT
|
||||||
echo "MEMENV_SOURCES=helpers/memenv/memenv.cc" >> build_config.mk
|
echo "MEMENV_SOURCES=helpers/memenv/memenv.cc" >> $OUTPUT
|
||||||
|
|
||||||
if [ "$PLATFORM" = "OS_ANDROID_CROSSCOMPILE" ]; then
|
if [ "$PLATFORM" = "OS_ANDROID_CROSSCOMPILE" ]; then
|
||||||
# Cross-compiling; do not try any compilation tests.
|
# Cross-compiling; do not try any compilation tests.
|
||||||
|
@ -147,10 +151,10 @@ fi
|
||||||
PLATFORM_CCFLAGS="$PLATFORM_CCFLAGS $COMMON_FLAGS"
|
PLATFORM_CCFLAGS="$PLATFORM_CCFLAGS $COMMON_FLAGS"
|
||||||
PLATFORM_CXXFLAGS="$PLATFORM_CXXFLAGS $COMMON_FLAGS"
|
PLATFORM_CXXFLAGS="$PLATFORM_CXXFLAGS $COMMON_FLAGS"
|
||||||
|
|
||||||
echo "PLATFORM=$PLATFORM" >> build_config.mk
|
echo "PLATFORM=$PLATFORM" >> $OUTPUT
|
||||||
echo "PLATFORM_LDFLAGS=$PLATFORM_LDFLAGS" >> build_config.mk
|
echo "PLATFORM_LDFLAGS=$PLATFORM_LDFLAGS" >> $OUTPUT
|
||||||
echo "PLATFORM_CCFLAGS=$PLATFORM_CCFLAGS" >> build_config.mk
|
echo "PLATFORM_CCFLAGS=$PLATFORM_CCFLAGS" >> $OUTPUT
|
||||||
echo "PLATFORM_CXXFLAGS=$PLATFORM_CXXFLAGS" >> build_config.mk
|
echo "PLATFORM_CXXFLAGS=$PLATFORM_CXXFLAGS" >> $OUTPUT
|
||||||
echo "PLATFORM_SHARED_CFLAGS=$PLATFORM_SHARED_CFLAGS" >> build_config.mk
|
echo "PLATFORM_SHARED_CFLAGS=$PLATFORM_SHARED_CFLAGS" >> $OUTPUT
|
||||||
echo "PLATFORM_SHARED_EXT=$PLATFORM_SHARED_EXT" >> build_config.mk
|
echo "PLATFORM_SHARED_EXT=$PLATFORM_SHARED_EXT" >> $OUTPUT
|
||||||
echo "PLATFORM_SHARED_LDFLAGS=$PLATFORM_SHARED_LDFLAGS" >> build_config.mk
|
echo "PLATFORM_SHARED_LDFLAGS=$PLATFORM_SHARED_LDFLAGS" >> $OUTPUT
|
||||||
|
|
110
db/c.cc
110
db/c.cc
|
@ -10,6 +10,7 @@
|
||||||
#include "leveldb/comparator.h"
|
#include "leveldb/comparator.h"
|
||||||
#include "leveldb/db.h"
|
#include "leveldb/db.h"
|
||||||
#include "leveldb/env.h"
|
#include "leveldb/env.h"
|
||||||
|
#include "leveldb/filter_policy.h"
|
||||||
#include "leveldb/iterator.h"
|
#include "leveldb/iterator.h"
|
||||||
#include "leveldb/options.h"
|
#include "leveldb/options.h"
|
||||||
#include "leveldb/status.h"
|
#include "leveldb/status.h"
|
||||||
|
@ -21,8 +22,10 @@ using leveldb::CompressionType;
|
||||||
using leveldb::DB;
|
using leveldb::DB;
|
||||||
using leveldb::Env;
|
using leveldb::Env;
|
||||||
using leveldb::FileLock;
|
using leveldb::FileLock;
|
||||||
|
using leveldb::FilterPolicy;
|
||||||
using leveldb::Iterator;
|
using leveldb::Iterator;
|
||||||
using leveldb::Logger;
|
using leveldb::Logger;
|
||||||
|
using leveldb::NewBloomFilterPolicy;
|
||||||
using leveldb::NewLRUCache;
|
using leveldb::NewLRUCache;
|
||||||
using leveldb::Options;
|
using leveldb::Options;
|
||||||
using leveldb::RandomAccessFile;
|
using leveldb::RandomAccessFile;
|
||||||
|
@ -78,6 +81,47 @@ struct leveldb_comparator_t : public Comparator {
|
||||||
virtual void FindShortSuccessor(std::string* key) const { }
|
virtual void FindShortSuccessor(std::string* key) const { }
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct leveldb_filterpolicy_t : public FilterPolicy {
|
||||||
|
void* state_;
|
||||||
|
void (*destructor_)(void*);
|
||||||
|
const char* (*name_)(void*);
|
||||||
|
char* (*create_)(
|
||||||
|
void*,
|
||||||
|
const char* const* key_array, const size_t* key_length_array,
|
||||||
|
int num_keys,
|
||||||
|
size_t* filter_length);
|
||||||
|
unsigned char (*key_match_)(
|
||||||
|
void*,
|
||||||
|
const char* key, size_t length,
|
||||||
|
const char* filter, size_t filter_length);
|
||||||
|
|
||||||
|
virtual ~leveldb_filterpolicy_t() {
|
||||||
|
(*destructor_)(state_);
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual const char* Name() const {
|
||||||
|
return (*name_)(state_);
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual void CreateFilter(const Slice* keys, int n, std::string* dst) const {
|
||||||
|
std::vector<const char*> key_pointers(n);
|
||||||
|
std::vector<size_t> key_sizes(n);
|
||||||
|
for (int i = 0; i < n; i++) {
|
||||||
|
key_pointers[i] = keys[i].data();
|
||||||
|
key_sizes[i] = keys[i].size();
|
||||||
|
}
|
||||||
|
size_t len;
|
||||||
|
char* filter = (*create_)(state_, &key_pointers[0], &key_sizes[0], n, &len);
|
||||||
|
dst->append(filter, len);
|
||||||
|
free(filter);
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual bool KeyMayMatch(const Slice& key, const Slice& filter) const {
|
||||||
|
return (*key_match_)(state_, key.data(), key.size(),
|
||||||
|
filter.data(), filter.size());
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
struct leveldb_env_t {
|
struct leveldb_env_t {
|
||||||
Env* rep;
|
Env* rep;
|
||||||
bool is_default;
|
bool is_default;
|
||||||
|
@ -218,6 +262,17 @@ void leveldb_approximate_sizes(
|
||||||
delete[] ranges;
|
delete[] ranges;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void leveldb_compact_range(
|
||||||
|
leveldb_t* db,
|
||||||
|
const char* start_key, size_t start_key_len,
|
||||||
|
const char* limit_key, size_t limit_key_len) {
|
||||||
|
Slice a, b;
|
||||||
|
db->rep->CompactRange(
|
||||||
|
// Pass NULL Slice if corresponding "const char*" is NULL
|
||||||
|
(start_key ? (a = Slice(start_key, start_key_len), &a) : NULL),
|
||||||
|
(limit_key ? (b = Slice(limit_key, limit_key_len), &b) : NULL));
|
||||||
|
}
|
||||||
|
|
||||||
void leveldb_destroy_db(
|
void leveldb_destroy_db(
|
||||||
const leveldb_options_t* options,
|
const leveldb_options_t* options,
|
||||||
const char* name,
|
const char* name,
|
||||||
|
@ -340,6 +395,12 @@ void leveldb_options_set_comparator(
|
||||||
opt->rep.comparator = cmp;
|
opt->rep.comparator = cmp;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void leveldb_options_set_filter_policy(
|
||||||
|
leveldb_options_t* opt,
|
||||||
|
leveldb_filterpolicy_t* policy) {
|
||||||
|
opt->rep.filter_policy = policy;
|
||||||
|
}
|
||||||
|
|
||||||
void leveldb_options_set_create_if_missing(
|
void leveldb_options_set_create_if_missing(
|
||||||
leveldb_options_t* opt, unsigned char v) {
|
leveldb_options_t* opt, unsigned char v) {
|
||||||
opt->rep.create_if_missing = v;
|
opt->rep.create_if_missing = v;
|
||||||
|
@ -407,6 +468,55 @@ void leveldb_comparator_destroy(leveldb_comparator_t* cmp) {
|
||||||
delete cmp;
|
delete cmp;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
leveldb_filterpolicy_t* leveldb_filterpolicy_create(
|
||||||
|
void* state,
|
||||||
|
void (*destructor)(void*),
|
||||||
|
char* (*create_filter)(
|
||||||
|
void*,
|
||||||
|
const char* const* key_array, const size_t* key_length_array,
|
||||||
|
int num_keys,
|
||||||
|
size_t* filter_length),
|
||||||
|
unsigned char (*key_may_match)(
|
||||||
|
void*,
|
||||||
|
const char* key, size_t length,
|
||||||
|
const char* filter, size_t filter_length),
|
||||||
|
const char* (*name)(void*)) {
|
||||||
|
leveldb_filterpolicy_t* result = new leveldb_filterpolicy_t;
|
||||||
|
result->state_ = state;
|
||||||
|
result->destructor_ = destructor;
|
||||||
|
result->create_ = create_filter;
|
||||||
|
result->key_match_ = key_may_match;
|
||||||
|
result->name_ = name;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
void leveldb_filterpolicy_destroy(leveldb_filterpolicy_t* filter) {
|
||||||
|
delete filter;
|
||||||
|
}
|
||||||
|
|
||||||
|
leveldb_filterpolicy_t* leveldb_filterpolicy_create_bloom(int bits_per_key) {
|
||||||
|
// Make a leveldb_filterpolicy_t, but override all of its methods so
|
||||||
|
// they delegate to a NewBloomFilterPolicy() instead of user
|
||||||
|
// supplied C functions.
|
||||||
|
struct Wrapper : public leveldb_filterpolicy_t {
|
||||||
|
const FilterPolicy* rep_;
|
||||||
|
~Wrapper() { delete rep_; }
|
||||||
|
const char* Name() const { return rep_->Name(); }
|
||||||
|
void CreateFilter(const Slice* keys, int n, std::string* dst) const {
|
||||||
|
return rep_->CreateFilter(keys, n, dst);
|
||||||
|
}
|
||||||
|
bool KeyMayMatch(const Slice& key, const Slice& filter) const {
|
||||||
|
return rep_->KeyMayMatch(key, filter);
|
||||||
|
}
|
||||||
|
static void DoNothing(void*) { }
|
||||||
|
};
|
||||||
|
Wrapper* wrapper = new Wrapper;
|
||||||
|
wrapper->rep_ = NewBloomFilterPolicy(bits_per_key);
|
||||||
|
wrapper->state_ = NULL;
|
||||||
|
wrapper->destructor_ = &Wrapper::DoNothing;
|
||||||
|
return wrapper;
|
||||||
|
}
|
||||||
|
|
||||||
leveldb_readoptions_t* leveldb_readoptions_create() {
|
leveldb_readoptions_t* leveldb_readoptions_create() {
|
||||||
return new leveldb_readoptions_t;
|
return new leveldb_readoptions_t;
|
||||||
}
|
}
|
||||||
|
|
77
db/c_test.c
77
db/c_test.c
|
@ -122,6 +122,31 @@ static const char* CmpName(void* arg) {
|
||||||
return "foo";
|
return "foo";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Custom filter policy
|
||||||
|
static unsigned char fake_filter_result = 1;
|
||||||
|
static void FilterDestroy(void* arg) { }
|
||||||
|
static const char* FilterName(void* arg) {
|
||||||
|
return "TestFilter";
|
||||||
|
}
|
||||||
|
static char* FilterCreate(
|
||||||
|
void* arg,
|
||||||
|
const char* const* key_array, const size_t* key_length_array,
|
||||||
|
int num_keys,
|
||||||
|
size_t* filter_length) {
|
||||||
|
*filter_length = 4;
|
||||||
|
char* result = malloc(4);
|
||||||
|
memcpy(result, "fake", 4);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
unsigned char FilterKeyMatch(
|
||||||
|
void* arg,
|
||||||
|
const char* key, size_t length,
|
||||||
|
const char* filter, size_t filter_length) {
|
||||||
|
CheckCondition(filter_length == 4);
|
||||||
|
CheckCondition(memcmp(filter, "fake", 4) == 0);
|
||||||
|
return fake_filter_result;
|
||||||
|
}
|
||||||
|
|
||||||
int main(int argc, char** argv) {
|
int main(int argc, char** argv) {
|
||||||
leveldb_t* db;
|
leveldb_t* db;
|
||||||
leveldb_comparator_t* cmp;
|
leveldb_comparator_t* cmp;
|
||||||
|
@ -131,6 +156,7 @@ int main(int argc, char** argv) {
|
||||||
leveldb_readoptions_t* roptions;
|
leveldb_readoptions_t* roptions;
|
||||||
leveldb_writeoptions_t* woptions;
|
leveldb_writeoptions_t* woptions;
|
||||||
char* err = NULL;
|
char* err = NULL;
|
||||||
|
int run = -1;
|
||||||
|
|
||||||
snprintf(dbname, sizeof(dbname), "/tmp/leveldb_c_test-%d",
|
snprintf(dbname, sizeof(dbname), "/tmp/leveldb_c_test-%d",
|
||||||
((int) geteuid()));
|
((int) geteuid()));
|
||||||
|
@ -180,6 +206,14 @@ int main(int argc, char** argv) {
|
||||||
CheckNoError(err);
|
CheckNoError(err);
|
||||||
CheckGet(db, roptions, "foo", "hello");
|
CheckGet(db, roptions, "foo", "hello");
|
||||||
|
|
||||||
|
StartPhase("compactall");
|
||||||
|
leveldb_compact_range(db, NULL, 0, NULL, 0);
|
||||||
|
CheckGet(db, roptions, "foo", "hello");
|
||||||
|
|
||||||
|
StartPhase("compactrange");
|
||||||
|
leveldb_compact_range(db, "a", 1, "z", 1);
|
||||||
|
CheckGet(db, roptions, "foo", "hello");
|
||||||
|
|
||||||
StartPhase("writebatch");
|
StartPhase("writebatch");
|
||||||
{
|
{
|
||||||
leveldb_writebatch_t* wb = leveldb_writebatch_create();
|
leveldb_writebatch_t* wb = leveldb_writebatch_create();
|
||||||
|
@ -279,6 +313,49 @@ int main(int argc, char** argv) {
|
||||||
CheckGet(db, roptions, "foo", NULL);
|
CheckGet(db, roptions, "foo", NULL);
|
||||||
CheckGet(db, roptions, "bar", NULL);
|
CheckGet(db, roptions, "bar", NULL);
|
||||||
CheckGet(db, roptions, "box", "c");
|
CheckGet(db, roptions, "box", "c");
|
||||||
|
leveldb_options_set_create_if_missing(options, 1);
|
||||||
|
leveldb_options_set_error_if_exists(options, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
StartPhase("filter");
|
||||||
|
for (run = 0; run < 2; run++) {
|
||||||
|
// First run uses custom filter, second run uses bloom filter
|
||||||
|
CheckNoError(err);
|
||||||
|
leveldb_filterpolicy_t* policy;
|
||||||
|
if (run == 0) {
|
||||||
|
policy = leveldb_filterpolicy_create(
|
||||||
|
NULL, FilterDestroy, FilterCreate, FilterKeyMatch, FilterName);
|
||||||
|
} else {
|
||||||
|
policy = leveldb_filterpolicy_create_bloom(10);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create new database
|
||||||
|
leveldb_close(db);
|
||||||
|
leveldb_destroy_db(options, dbname, &err);
|
||||||
|
leveldb_options_set_filter_policy(options, policy);
|
||||||
|
db = leveldb_open(options, dbname, &err);
|
||||||
|
CheckNoError(err);
|
||||||
|
leveldb_put(db, woptions, "foo", 3, "foovalue", 8, &err);
|
||||||
|
CheckNoError(err);
|
||||||
|
leveldb_put(db, woptions, "bar", 3, "barvalue", 8, &err);
|
||||||
|
CheckNoError(err);
|
||||||
|
leveldb_compact_range(db, NULL, 0, NULL, 0);
|
||||||
|
|
||||||
|
fake_filter_result = 1;
|
||||||
|
CheckGet(db, roptions, "foo", "foovalue");
|
||||||
|
CheckGet(db, roptions, "bar", "barvalue");
|
||||||
|
if (phase == 0) {
|
||||||
|
// Must not find value when custom filter returns false
|
||||||
|
fake_filter_result = 0;
|
||||||
|
CheckGet(db, roptions, "foo", NULL);
|
||||||
|
CheckGet(db, roptions, "bar", NULL);
|
||||||
|
fake_filter_result = 1;
|
||||||
|
|
||||||
|
CheckGet(db, roptions, "foo", "foovalue");
|
||||||
|
CheckGet(db, roptions, "bar", "barvalue");
|
||||||
|
}
|
||||||
|
leveldb_options_set_filter_policy(options, NULL);
|
||||||
|
leveldb_filterpolicy_destroy(policy);
|
||||||
}
|
}
|
||||||
|
|
||||||
StartPhase("cleanup");
|
StartPhase("cleanup");
|
||||||
|
|
|
@ -25,15 +25,20 @@
|
||||||
// overwrite -- overwrite N values in random key order in async mode
|
// overwrite -- overwrite N values in random key order in async mode
|
||||||
// fillsync -- write N/100 values in random key order in sync mode
|
// fillsync -- write N/100 values in random key order in sync mode
|
||||||
// fill100K -- write N/1000 100K values in random order in async mode
|
// fill100K -- write N/1000 100K values in random order in async mode
|
||||||
|
// deleteseq -- delete N keys in sequential order
|
||||||
|
// deleterandom -- delete N keys in random order
|
||||||
// readseq -- read N times sequentially
|
// readseq -- read N times sequentially
|
||||||
// readreverse -- read N times in reverse order
|
// readreverse -- read N times in reverse order
|
||||||
// readrandom -- read N times in random order
|
// readrandom -- read N times in random order
|
||||||
|
// readmissing -- read N missing keys in random order
|
||||||
// readhot -- read N times in random order from 1% section of DB
|
// readhot -- read N times in random order from 1% section of DB
|
||||||
|
// seekrandom -- N random seeks
|
||||||
// crc32c -- repeated crc32c of 4K of data
|
// crc32c -- repeated crc32c of 4K of data
|
||||||
// acquireload -- load N*1000 times
|
// acquireload -- load N*1000 times
|
||||||
// Meta operations:
|
// Meta operations:
|
||||||
// compact -- Compact the entire DB
|
// compact -- Compact the entire DB
|
||||||
// stats -- Print DB stats
|
// stats -- Print DB stats
|
||||||
|
// sstables -- Print sstable info
|
||||||
// heapprofile -- Dump a heap profile (if supported by this port)
|
// heapprofile -- Dump a heap profile (if supported by this port)
|
||||||
static const char* FLAGS_benchmarks =
|
static const char* FLAGS_benchmarks =
|
||||||
"fillseq,"
|
"fillseq,"
|
||||||
|
@ -85,6 +90,10 @@ static int FLAGS_cache_size = -1;
|
||||||
// Maximum number of files to keep open at the same time (use default if == 0)
|
// Maximum number of files to keep open at the same time (use default if == 0)
|
||||||
static int FLAGS_open_files = 0;
|
static int FLAGS_open_files = 0;
|
||||||
|
|
||||||
|
// Bloom filter bits per key.
|
||||||
|
// Negative means use default settings.
|
||||||
|
static int FLAGS_bloom_bits = -1;
|
||||||
|
|
||||||
// If true, do not destroy the existing database. If you set this
|
// If true, do not destroy the existing database. If you set this
|
||||||
// flag and also specify a benchmark that wants a fresh database, that
|
// flag and also specify a benchmark that wants a fresh database, that
|
||||||
// benchmark will fail.
|
// benchmark will fail.
|
||||||
|
@ -293,6 +302,7 @@ struct ThreadState {
|
||||||
class Benchmark {
|
class Benchmark {
|
||||||
private:
|
private:
|
||||||
Cache* cache_;
|
Cache* cache_;
|
||||||
|
const FilterPolicy* filter_policy_;
|
||||||
DB* db_;
|
DB* db_;
|
||||||
int num_;
|
int num_;
|
||||||
int value_size_;
|
int value_size_;
|
||||||
|
@ -378,6 +388,9 @@ class Benchmark {
|
||||||
public:
|
public:
|
||||||
Benchmark()
|
Benchmark()
|
||||||
: cache_(FLAGS_cache_size >= 0 ? NewLRUCache(FLAGS_cache_size) : NULL),
|
: cache_(FLAGS_cache_size >= 0 ? NewLRUCache(FLAGS_cache_size) : NULL),
|
||||||
|
filter_policy_(FLAGS_bloom_bits >= 0
|
||||||
|
? NewBloomFilterPolicy(FLAGS_bloom_bits)
|
||||||
|
: NULL),
|
||||||
db_(NULL),
|
db_(NULL),
|
||||||
num_(FLAGS_num),
|
num_(FLAGS_num),
|
||||||
value_size_(FLAGS_value_size),
|
value_size_(FLAGS_value_size),
|
||||||
|
@ -399,6 +412,7 @@ class Benchmark {
|
||||||
~Benchmark() {
|
~Benchmark() {
|
||||||
delete db_;
|
delete db_;
|
||||||
delete cache_;
|
delete cache_;
|
||||||
|
delete filter_policy_;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Run() {
|
void Run() {
|
||||||
|
@ -457,11 +471,19 @@ class Benchmark {
|
||||||
method = &Benchmark::ReadReverse;
|
method = &Benchmark::ReadReverse;
|
||||||
} else if (name == Slice("readrandom")) {
|
} else if (name == Slice("readrandom")) {
|
||||||
method = &Benchmark::ReadRandom;
|
method = &Benchmark::ReadRandom;
|
||||||
|
} else if (name == Slice("readmissing")) {
|
||||||
|
method = &Benchmark::ReadMissing;
|
||||||
|
} else if (name == Slice("seekrandom")) {
|
||||||
|
method = &Benchmark::SeekRandom;
|
||||||
} else if (name == Slice("readhot")) {
|
} else if (name == Slice("readhot")) {
|
||||||
method = &Benchmark::ReadHot;
|
method = &Benchmark::ReadHot;
|
||||||
} else if (name == Slice("readrandomsmall")) {
|
} else if (name == Slice("readrandomsmall")) {
|
||||||
reads_ /= 1000;
|
reads_ /= 1000;
|
||||||
method = &Benchmark::ReadRandom;
|
method = &Benchmark::ReadRandom;
|
||||||
|
} else if (name == Slice("deleteseq")) {
|
||||||
|
method = &Benchmark::DeleteSeq;
|
||||||
|
} else if (name == Slice("deleterandom")) {
|
||||||
|
method = &Benchmark::DeleteRandom;
|
||||||
} else if (name == Slice("readwhilewriting")) {
|
} else if (name == Slice("readwhilewriting")) {
|
||||||
num_threads++; // Add extra thread for writing
|
num_threads++; // Add extra thread for writing
|
||||||
method = &Benchmark::ReadWhileWriting;
|
method = &Benchmark::ReadWhileWriting;
|
||||||
|
@ -478,7 +500,9 @@ class Benchmark {
|
||||||
} else if (name == Slice("heapprofile")) {
|
} else if (name == Slice("heapprofile")) {
|
||||||
HeapProfile();
|
HeapProfile();
|
||||||
} else if (name == Slice("stats")) {
|
} else if (name == Slice("stats")) {
|
||||||
PrintStats();
|
PrintStats("leveldb.stats");
|
||||||
|
} else if (name == Slice("sstables")) {
|
||||||
|
PrintStats("leveldb.sstables");
|
||||||
} else {
|
} else {
|
||||||
if (name != Slice()) { // No error message for empty name
|
if (name != Slice()) { // No error message for empty name
|
||||||
fprintf(stderr, "unknown benchmark '%s'\n", name.ToString().c_str());
|
fprintf(stderr, "unknown benchmark '%s'\n", name.ToString().c_str());
|
||||||
|
@ -669,6 +693,7 @@ class Benchmark {
|
||||||
options.create_if_missing = !FLAGS_use_existing_db;
|
options.create_if_missing = !FLAGS_use_existing_db;
|
||||||
options.block_cache = cache_;
|
options.block_cache = cache_;
|
||||||
options.write_buffer_size = FLAGS_write_buffer_size;
|
options.write_buffer_size = FLAGS_write_buffer_size;
|
||||||
|
options.filter_policy = filter_policy_;
|
||||||
Status s = DB::Open(options, FLAGS_db, &db_);
|
Status s = DB::Open(options, FLAGS_db, &db_);
|
||||||
if (!s.ok()) {
|
if (!s.ok()) {
|
||||||
fprintf(stderr, "open error: %s\n", s.ToString().c_str());
|
fprintf(stderr, "open error: %s\n", s.ToString().c_str());
|
||||||
|
@ -743,10 +768,28 @@ class Benchmark {
|
||||||
void ReadRandom(ThreadState* thread) {
|
void ReadRandom(ThreadState* thread) {
|
||||||
ReadOptions options;
|
ReadOptions options;
|
||||||
std::string value;
|
std::string value;
|
||||||
|
int found = 0;
|
||||||
for (int i = 0; i < reads_; i++) {
|
for (int i = 0; i < reads_; i++) {
|
||||||
char key[100];
|
char key[100];
|
||||||
const int k = thread->rand.Next() % FLAGS_num;
|
const int k = thread->rand.Next() % FLAGS_num;
|
||||||
snprintf(key, sizeof(key), "%016d", k);
|
snprintf(key, sizeof(key), "%016d", k);
|
||||||
|
if (db_->Get(options, key, &value).ok()) {
|
||||||
|
found++;
|
||||||
|
}
|
||||||
|
thread->stats.FinishedSingleOp();
|
||||||
|
}
|
||||||
|
char msg[100];
|
||||||
|
snprintf(msg, sizeof(msg), "(%d of %d found)", found, num_);
|
||||||
|
thread->stats.AddMessage(msg);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ReadMissing(ThreadState* thread) {
|
||||||
|
ReadOptions options;
|
||||||
|
std::string value;
|
||||||
|
for (int i = 0; i < reads_; i++) {
|
||||||
|
char key[100];
|
||||||
|
const int k = thread->rand.Next() % FLAGS_num;
|
||||||
|
snprintf(key, sizeof(key), "%016d.", k);
|
||||||
db_->Get(options, key, &value);
|
db_->Get(options, key, &value);
|
||||||
thread->stats.FinishedSingleOp();
|
thread->stats.FinishedSingleOp();
|
||||||
}
|
}
|
||||||
|
@ -765,6 +808,54 @@ class Benchmark {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void SeekRandom(ThreadState* thread) {
|
||||||
|
ReadOptions options;
|
||||||
|
std::string value;
|
||||||
|
int found = 0;
|
||||||
|
for (int i = 0; i < reads_; i++) {
|
||||||
|
Iterator* iter = db_->NewIterator(options);
|
||||||
|
char key[100];
|
||||||
|
const int k = thread->rand.Next() % FLAGS_num;
|
||||||
|
snprintf(key, sizeof(key), "%016d", k);
|
||||||
|
iter->Seek(key);
|
||||||
|
if (iter->Valid() && iter->key() == key) found++;
|
||||||
|
delete iter;
|
||||||
|
thread->stats.FinishedSingleOp();
|
||||||
|
}
|
||||||
|
char msg[100];
|
||||||
|
snprintf(msg, sizeof(msg), "(%d of %d found)", found, num_);
|
||||||
|
thread->stats.AddMessage(msg);
|
||||||
|
}
|
||||||
|
|
||||||
|
void DoDelete(ThreadState* thread, bool seq) {
|
||||||
|
RandomGenerator gen;
|
||||||
|
WriteBatch batch;
|
||||||
|
Status s;
|
||||||
|
for (int i = 0; i < num_; i += entries_per_batch_) {
|
||||||
|
batch.Clear();
|
||||||
|
for (int j = 0; j < entries_per_batch_; j++) {
|
||||||
|
const int k = seq ? i+j : (thread->rand.Next() % FLAGS_num);
|
||||||
|
char key[100];
|
||||||
|
snprintf(key, sizeof(key), "%016d", k);
|
||||||
|
batch.Delete(key);
|
||||||
|
thread->stats.FinishedSingleOp();
|
||||||
|
}
|
||||||
|
s = db_->Write(write_options_, &batch);
|
||||||
|
if (!s.ok()) {
|
||||||
|
fprintf(stderr, "del error: %s\n", s.ToString().c_str());
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void DeleteSeq(ThreadState* thread) {
|
||||||
|
DoDelete(thread, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
void DeleteRandom(ThreadState* thread) {
|
||||||
|
DoDelete(thread, false);
|
||||||
|
}
|
||||||
|
|
||||||
void ReadWhileWriting(ThreadState* thread) {
|
void ReadWhileWriting(ThreadState* thread) {
|
||||||
if (thread->tid > 0) {
|
if (thread->tid > 0) {
|
||||||
ReadRandom(thread);
|
ReadRandom(thread);
|
||||||
|
@ -799,9 +890,9 @@ class Benchmark {
|
||||||
db_->CompactRange(NULL, NULL);
|
db_->CompactRange(NULL, NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
void PrintStats() {
|
void PrintStats(const char* key) {
|
||||||
std::string stats;
|
std::string stats;
|
||||||
if (!db_->GetProperty("leveldb.stats", &stats)) {
|
if (!db_->GetProperty(key, &stats)) {
|
||||||
stats = "(failed)";
|
stats = "(failed)";
|
||||||
}
|
}
|
||||||
fprintf(stdout, "\n%s\n", stats.c_str());
|
fprintf(stdout, "\n%s\n", stats.c_str());
|
||||||
|
@ -861,6 +952,8 @@ int main(int argc, char** argv) {
|
||||||
FLAGS_write_buffer_size = n;
|
FLAGS_write_buffer_size = n;
|
||||||
} else if (sscanf(argv[i], "--cache_size=%d%c", &n, &junk) == 1) {
|
} else if (sscanf(argv[i], "--cache_size=%d%c", &n, &junk) == 1) {
|
||||||
FLAGS_cache_size = n;
|
FLAGS_cache_size = n;
|
||||||
|
} else if (sscanf(argv[i], "--bloom_bits=%d%c", &n, &junk) == 1) {
|
||||||
|
FLAGS_bloom_bits = n;
|
||||||
} else if (sscanf(argv[i], "--open_files=%d%c", &n, &junk) == 1) {
|
} else if (sscanf(argv[i], "--open_files=%d%c", &n, &junk) == 1) {
|
||||||
FLAGS_open_files = n;
|
FLAGS_open_files = n;
|
||||||
} else if (strncmp(argv[i], "--db=", 5) == 0) {
|
} else if (strncmp(argv[i], "--db=", 5) == 0) {
|
||||||
|
|
|
@ -87,12 +87,14 @@ static void ClipToRange(T* ptr, V minvalue, V maxvalue) {
|
||||||
}
|
}
|
||||||
Options SanitizeOptions(const std::string& dbname,
|
Options SanitizeOptions(const std::string& dbname,
|
||||||
const InternalKeyComparator* icmp,
|
const InternalKeyComparator* icmp,
|
||||||
|
const InternalFilterPolicy* ipolicy,
|
||||||
const Options& src) {
|
const Options& src) {
|
||||||
Options result = src;
|
Options result = src;
|
||||||
result.comparator = icmp;
|
result.comparator = icmp;
|
||||||
ClipToRange(&result.max_open_files, 20, 50000);
|
result.filter_policy = (src.filter_policy != NULL) ? ipolicy : NULL;
|
||||||
ClipToRange(&result.write_buffer_size, 64<<10, 1<<30);
|
ClipToRange(&result.max_open_files, 20, 50000);
|
||||||
ClipToRange(&result.block_size, 1<<10, 4<<20);
|
ClipToRange(&result.write_buffer_size, 64<<10, 1<<30);
|
||||||
|
ClipToRange(&result.block_size, 1<<10, 4<<20);
|
||||||
if (result.info_log == NULL) {
|
if (result.info_log == NULL) {
|
||||||
// Open a log file in the same directory as the db
|
// Open a log file in the same directory as the db
|
||||||
src.env->CreateDir(dbname); // In case it does not exist
|
src.env->CreateDir(dbname); // In case it does not exist
|
||||||
|
@ -112,7 +114,9 @@ Options SanitizeOptions(const std::string& dbname,
|
||||||
DBImpl::DBImpl(const Options& options, const std::string& dbname)
|
DBImpl::DBImpl(const Options& options, const std::string& dbname)
|
||||||
: env_(options.env),
|
: env_(options.env),
|
||||||
internal_comparator_(options.comparator),
|
internal_comparator_(options.comparator),
|
||||||
options_(SanitizeOptions(dbname, &internal_comparator_, options)),
|
internal_filter_policy_(options.filter_policy),
|
||||||
|
options_(SanitizeOptions(
|
||||||
|
dbname, &internal_comparator_, &internal_filter_policy_, options)),
|
||||||
owns_info_log_(options_.info_log != options.info_log),
|
owns_info_log_(options_.info_log != options.info_log),
|
||||||
owns_cache_(options_.block_cache != options.block_cache),
|
owns_cache_(options_.block_cache != options.block_cache),
|
||||||
dbname_(dbname),
|
dbname_(dbname),
|
||||||
|
|
|
@ -105,6 +105,7 @@ class DBImpl : public DB {
|
||||||
// Constant after construction
|
// Constant after construction
|
||||||
Env* const env_;
|
Env* const env_;
|
||||||
const InternalKeyComparator internal_comparator_;
|
const InternalKeyComparator internal_comparator_;
|
||||||
|
const InternalFilterPolicy internal_filter_policy_;
|
||||||
const Options options_; // options_.comparator == &internal_comparator_
|
const Options options_; // options_.comparator == &internal_comparator_
|
||||||
bool owns_info_log_;
|
bool owns_info_log_;
|
||||||
bool owns_cache_;
|
bool owns_cache_;
|
||||||
|
@ -185,6 +186,7 @@ class DBImpl : public DB {
|
||||||
// it is not equal to src.info_log.
|
// it is not equal to src.info_log.
|
||||||
extern Options SanitizeOptions(const std::string& db,
|
extern Options SanitizeOptions(const std::string& db,
|
||||||
const InternalKeyComparator* icmp,
|
const InternalKeyComparator* icmp,
|
||||||
|
const InternalFilterPolicy* ipolicy,
|
||||||
const Options& src);
|
const Options& src);
|
||||||
|
|
||||||
} // namespace leveldb
|
} // namespace leveldb
|
||||||
|
|
1034
db/db_test.cc
1034
db/db_test.cc
File diff suppressed because it is too large
Load Diff
|
@ -98,6 +98,26 @@ void InternalKeyComparator::FindShortSuccessor(std::string* key) const {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const char* InternalFilterPolicy::Name() const {
|
||||||
|
return user_policy_->Name();
|
||||||
|
}
|
||||||
|
|
||||||
|
void InternalFilterPolicy::CreateFilter(const Slice* keys, int n,
|
||||||
|
std::string* dst) const {
|
||||||
|
// We rely on the fact that the code in table.cc does not mind us
|
||||||
|
// adjusting keys[].
|
||||||
|
Slice* mkey = const_cast<Slice*>(keys);
|
||||||
|
for (int i = 0; i < n; i++) {
|
||||||
|
mkey[i] = ExtractUserKey(keys[i]);
|
||||||
|
// TODO(sanjay): Suppress dups?
|
||||||
|
}
|
||||||
|
user_policy_->CreateFilter(keys, n, dst);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool InternalFilterPolicy::KeyMayMatch(const Slice& key, const Slice& f) const {
|
||||||
|
return user_policy_->KeyMayMatch(ExtractUserKey(key), f);
|
||||||
|
}
|
||||||
|
|
||||||
LookupKey::LookupKey(const Slice& user_key, SequenceNumber s) {
|
LookupKey::LookupKey(const Slice& user_key, SequenceNumber s) {
|
||||||
size_t usize = user_key.size();
|
size_t usize = user_key.size();
|
||||||
size_t needed = usize + 13; // A conservative estimate
|
size_t needed = usize + 13; // A conservative estimate
|
||||||
|
|
|
@ -8,6 +8,7 @@
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include "leveldb/comparator.h"
|
#include "leveldb/comparator.h"
|
||||||
#include "leveldb/db.h"
|
#include "leveldb/db.h"
|
||||||
|
#include "leveldb/filter_policy.h"
|
||||||
#include "leveldb/slice.h"
|
#include "leveldb/slice.h"
|
||||||
#include "leveldb/table_builder.h"
|
#include "leveldb/table_builder.h"
|
||||||
#include "util/coding.h"
|
#include "util/coding.h"
|
||||||
|
@ -123,6 +124,17 @@ class InternalKeyComparator : public Comparator {
|
||||||
int Compare(const InternalKey& a, const InternalKey& b) const;
|
int Compare(const InternalKey& a, const InternalKey& b) const;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Filter policy wrapper that converts from internal keys to user keys
|
||||||
|
class InternalFilterPolicy : public FilterPolicy {
|
||||||
|
private:
|
||||||
|
const FilterPolicy* const user_policy_;
|
||||||
|
public:
|
||||||
|
explicit InternalFilterPolicy(const FilterPolicy* p) : user_policy_(p) { }
|
||||||
|
virtual const char* Name() const;
|
||||||
|
virtual void CreateFilter(const Slice* keys, int n, std::string* dst) const;
|
||||||
|
virtual bool KeyMayMatch(const Slice& key, const Slice& filter) const;
|
||||||
|
};
|
||||||
|
|
||||||
// Modules in this directory should keep internal keys wrapped inside
|
// Modules in this directory should keep internal keys wrapped inside
|
||||||
// the following class instead of plain strings so that we do not
|
// the following class instead of plain strings so that we do not
|
||||||
// incorrectly use string comparisons instead of an InternalKeyComparator.
|
// incorrectly use string comparisons instead of an InternalKeyComparator.
|
||||||
|
|
|
@ -48,7 +48,8 @@ class Repairer {
|
||||||
: dbname_(dbname),
|
: dbname_(dbname),
|
||||||
env_(options.env),
|
env_(options.env),
|
||||||
icmp_(options.comparator),
|
icmp_(options.comparator),
|
||||||
options_(SanitizeOptions(dbname, &icmp_, options)),
|
ipolicy_(options.filter_policy),
|
||||||
|
options_(SanitizeOptions(dbname, &icmp_, &ipolicy_, options)),
|
||||||
owns_info_log_(options_.info_log != options.info_log),
|
owns_info_log_(options_.info_log != options.info_log),
|
||||||
owns_cache_(options_.block_cache != options.block_cache),
|
owns_cache_(options_.block_cache != options.block_cache),
|
||||||
next_file_number_(1) {
|
next_file_number_(1) {
|
||||||
|
@ -99,6 +100,7 @@ class Repairer {
|
||||||
std::string const dbname_;
|
std::string const dbname_;
|
||||||
Env* const env_;
|
Env* const env_;
|
||||||
InternalKeyComparator const icmp_;
|
InternalKeyComparator const icmp_;
|
||||||
|
InternalFilterPolicy const ipolicy_;
|
||||||
Options const options_;
|
Options const options_;
|
||||||
bool owns_info_log_;
|
bool owns_info_log_;
|
||||||
bool owns_cache_;
|
bool owns_cache_;
|
||||||
|
|
|
@ -42,23 +42,18 @@ TableCache::~TableCache() {
|
||||||
delete cache_;
|
delete cache_;
|
||||||
}
|
}
|
||||||
|
|
||||||
Iterator* TableCache::NewIterator(const ReadOptions& options,
|
Status TableCache::FindTable(uint64_t file_number, uint64_t file_size,
|
||||||
uint64_t file_number,
|
Cache::Handle** handle) {
|
||||||
uint64_t file_size,
|
Status s;
|
||||||
Table** tableptr) {
|
|
||||||
if (tableptr != NULL) {
|
|
||||||
*tableptr = NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
char buf[sizeof(file_number)];
|
char buf[sizeof(file_number)];
|
||||||
EncodeFixed64(buf, file_number);
|
EncodeFixed64(buf, file_number);
|
||||||
Slice key(buf, sizeof(buf));
|
Slice key(buf, sizeof(buf));
|
||||||
Cache::Handle* handle = cache_->Lookup(key);
|
*handle = cache_->Lookup(key);
|
||||||
if (handle == NULL) {
|
if (*handle == NULL) {
|
||||||
std::string fname = TableFileName(dbname_, file_number);
|
std::string fname = TableFileName(dbname_, file_number);
|
||||||
RandomAccessFile* file = NULL;
|
RandomAccessFile* file = NULL;
|
||||||
Table* table = NULL;
|
Table* table = NULL;
|
||||||
Status s = env_->NewRandomAccessFile(fname, &file);
|
s = env_->NewRandomAccessFile(fname, &file);
|
||||||
if (s.ok()) {
|
if (s.ok()) {
|
||||||
s = Table::Open(*options_, file, file_size, &table);
|
s = Table::Open(*options_, file, file_size, &table);
|
||||||
}
|
}
|
||||||
|
@ -68,13 +63,28 @@ Iterator* TableCache::NewIterator(const ReadOptions& options,
|
||||||
delete file;
|
delete file;
|
||||||
// We do not cache error results so that if the error is transient,
|
// We do not cache error results so that if the error is transient,
|
||||||
// or somebody repairs the file, we recover automatically.
|
// or somebody repairs the file, we recover automatically.
|
||||||
return NewErrorIterator(s);
|
} else {
|
||||||
|
TableAndFile* tf = new TableAndFile;
|
||||||
|
tf->file = file;
|
||||||
|
tf->table = table;
|
||||||
|
*handle = cache_->Insert(key, tf, 1, &DeleteEntry);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
TableAndFile* tf = new TableAndFile;
|
Iterator* TableCache::NewIterator(const ReadOptions& options,
|
||||||
tf->file = file;
|
uint64_t file_number,
|
||||||
tf->table = table;
|
uint64_t file_size,
|
||||||
handle = cache_->Insert(key, tf, 1, &DeleteEntry);
|
Table** tableptr) {
|
||||||
|
if (tableptr != NULL) {
|
||||||
|
*tableptr = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
Cache::Handle* handle = NULL;
|
||||||
|
Status s = FindTable(file_number, file_size, &handle);
|
||||||
|
if (!s.ok()) {
|
||||||
|
return NewErrorIterator(s);
|
||||||
}
|
}
|
||||||
|
|
||||||
Table* table = reinterpret_cast<TableAndFile*>(cache_->Value(handle))->table;
|
Table* table = reinterpret_cast<TableAndFile*>(cache_->Value(handle))->table;
|
||||||
|
@ -86,6 +96,22 @@ Iterator* TableCache::NewIterator(const ReadOptions& options,
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Status TableCache::Get(const ReadOptions& options,
|
||||||
|
uint64_t file_number,
|
||||||
|
uint64_t file_size,
|
||||||
|
const Slice& k,
|
||||||
|
void* arg,
|
||||||
|
void (*saver)(void*, const Slice&, const Slice&)) {
|
||||||
|
Cache::Handle* handle = NULL;
|
||||||
|
Status s = FindTable(file_number, file_size, &handle);
|
||||||
|
if (s.ok()) {
|
||||||
|
Table* t = reinterpret_cast<TableAndFile*>(cache_->Value(handle))->table;
|
||||||
|
s = t->InternalGet(options, k, arg, saver);
|
||||||
|
cache_->Release(handle);
|
||||||
|
}
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
void TableCache::Evict(uint64_t file_number) {
|
void TableCache::Evict(uint64_t file_number) {
|
||||||
char buf[sizeof(file_number)];
|
char buf[sizeof(file_number)];
|
||||||
EncodeFixed64(buf, file_number);
|
EncodeFixed64(buf, file_number);
|
||||||
|
|
|
@ -35,6 +35,15 @@ class TableCache {
|
||||||
uint64_t file_size,
|
uint64_t file_size,
|
||||||
Table** tableptr = NULL);
|
Table** tableptr = NULL);
|
||||||
|
|
||||||
|
// If a seek to internal key "k" in specified file finds an entry,
|
||||||
|
// call (*handle_result)(arg, found_key, found_value).
|
||||||
|
Status Get(const ReadOptions& options,
|
||||||
|
uint64_t file_number,
|
||||||
|
uint64_t file_size,
|
||||||
|
const Slice& k,
|
||||||
|
void* arg,
|
||||||
|
void (*handle_result)(void*, const Slice&, const Slice&));
|
||||||
|
|
||||||
// Evict any entry for the specified file number
|
// Evict any entry for the specified file number
|
||||||
void Evict(uint64_t file_number);
|
void Evict(uint64_t file_number);
|
||||||
|
|
||||||
|
@ -43,6 +52,8 @@ class TableCache {
|
||||||
const std::string dbname_;
|
const std::string dbname_;
|
||||||
const Options* options_;
|
const Options* options_;
|
||||||
Cache* cache_;
|
Cache* cache_;
|
||||||
|
|
||||||
|
Status FindTable(uint64_t file_number, uint64_t file_size, Cache::Handle**);
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace leveldb
|
} // namespace leveldb
|
||||||
|
|
|
@ -255,35 +255,34 @@ void Version::AddIterators(const ReadOptions& options,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// If "*iter" points at a value or deletion for user_key, store
|
// Callback from TableCache::Get()
|
||||||
// either the value, or a NotFound error and return true.
|
namespace {
|
||||||
// Else return false.
|
enum SaverState {
|
||||||
static bool GetValue(const Comparator* cmp,
|
kNotFound,
|
||||||
Iterator* iter, const Slice& user_key,
|
kFound,
|
||||||
std::string* value,
|
kDeleted,
|
||||||
Status* s) {
|
kCorrupt,
|
||||||
if (!iter->Valid()) {
|
};
|
||||||
return false;
|
struct Saver {
|
||||||
}
|
SaverState state;
|
||||||
|
const Comparator* ucmp;
|
||||||
|
Slice user_key;
|
||||||
|
std::string* value;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
static void SaveValue(void* arg, const Slice& ikey, const Slice& v) {
|
||||||
|
Saver* s = reinterpret_cast<Saver*>(arg);
|
||||||
ParsedInternalKey parsed_key;
|
ParsedInternalKey parsed_key;
|
||||||
if (!ParseInternalKey(iter->key(), &parsed_key)) {
|
if (!ParseInternalKey(ikey, &parsed_key)) {
|
||||||
*s = Status::Corruption("corrupted key for ", user_key);
|
s->state = kCorrupt;
|
||||||
return true;
|
} else {
|
||||||
}
|
if (s->ucmp->Compare(parsed_key.user_key, s->user_key) == 0) {
|
||||||
if (cmp->Compare(parsed_key.user_key, user_key) != 0) {
|
s->state = (parsed_key.type == kTypeValue) ? kFound : kDeleted;
|
||||||
return false;
|
if (s->state == kFound) {
|
||||||
}
|
s->value->assign(v.data(), v.size());
|
||||||
switch (parsed_key.type) {
|
}
|
||||||
case kTypeDeletion:
|
|
||||||
*s = Status::NotFound(Slice()); // Use an empty error message for speed
|
|
||||||
break;
|
|
||||||
case kTypeValue: {
|
|
||||||
Slice v = iter->value();
|
|
||||||
value->assign(v.data(), v.size());
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool NewestFirst(FileMetaData* a, FileMetaData* b) {
|
static bool NewestFirst(FileMetaData* a, FileMetaData* b) {
|
||||||
|
@ -361,21 +360,27 @@ Status Version::Get(const ReadOptions& options,
|
||||||
last_file_read = f;
|
last_file_read = f;
|
||||||
last_file_read_level = level;
|
last_file_read_level = level;
|
||||||
|
|
||||||
Iterator* iter = vset_->table_cache_->NewIterator(
|
Saver saver;
|
||||||
options,
|
saver.state = kNotFound;
|
||||||
f->number,
|
saver.ucmp = ucmp;
|
||||||
f->file_size);
|
saver.user_key = user_key;
|
||||||
iter->Seek(ikey);
|
saver.value = value;
|
||||||
const bool done = GetValue(ucmp, iter, user_key, value, &s);
|
s = vset_->table_cache_->Get(options, f->number, f->file_size,
|
||||||
if (!iter->status().ok()) {
|
ikey, &saver, SaveValue);
|
||||||
s = iter->status();
|
if (!s.ok()) {
|
||||||
delete iter;
|
|
||||||
return s;
|
return s;
|
||||||
} else {
|
}
|
||||||
delete iter;
|
switch (saver.state) {
|
||||||
if (done) {
|
case kNotFound:
|
||||||
|
break; // Keep searching in other files
|
||||||
|
case kFound:
|
||||||
|
return s;
|
||||||
|
case kDeleted:
|
||||||
|
s = Status::NotFound(Slice()); // Use empty error message for speed
|
||||||
|
return s;
|
||||||
|
case kCorrupt:
|
||||||
|
s = Status::Corruption("corrupted key for ", user_key);
|
||||||
return s;
|
return s;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -400,6 +400,69 @@ We might want to prefix <code>filename</code> keys with one letter (say '/') and
|
||||||
over just the metadata do not force us to fetch and cache bulky file
|
over just the metadata do not force us to fetch and cache bulky file
|
||||||
contents.
|
contents.
|
||||||
<p>
|
<p>
|
||||||
|
<h2>Filters</h2>
|
||||||
|
<p>
|
||||||
|
Because of the way <code>leveldb</code> data is organized on disk,
|
||||||
|
a single <code>Get()</code> call may involve multiple reads from disk.
|
||||||
|
The optional <code>FilterPolicy</code> mechanism can be used to reduce
|
||||||
|
the number of disk reads substantially.
|
||||||
|
<pre>
|
||||||
|
leveldb::Options options;
|
||||||
|
options.filter_policy = NewBloomFilter(10);
|
||||||
|
leveldb::DB* db;
|
||||||
|
leveldb::DB::Open(options, "/tmp/testdb", &db);
|
||||||
|
... use the database ...
|
||||||
|
delete db;
|
||||||
|
delete options.filter_policy;
|
||||||
|
</pre>
|
||||||
|
The preceding code associates a
|
||||||
|
<a href="http://en.wikipedia.org/wiki/Bloom_filter">Bloom filter</a>
|
||||||
|
based filtering policy with the database. Bloom filter based
|
||||||
|
filtering relies on keeping some number of bits of data in memory per
|
||||||
|
key (in this case 10 bits per key since that is the argument we passed
|
||||||
|
to NewBloomFilter). This filter will reduce the number of unnecessary
|
||||||
|
disk reads needed for <code>Get()</code> calls by a factor of
|
||||||
|
approximately a 100. Increasing the bits per key will lead to a
|
||||||
|
larger reduction at the cost of more memory usage. We recommend that
|
||||||
|
applications whose working set does not fit in memory and that do a
|
||||||
|
lot of random reads set a filter policy.
|
||||||
|
<p>
|
||||||
|
If you are using a custom comparator, you should ensure that the filter
|
||||||
|
policy you are using is compatible with your comparator. For example,
|
||||||
|
consider a comparator that ignores trailing spaces when comparing keys.
|
||||||
|
<code>NewBloomFilter</code> must not be used with such a comparator.
|
||||||
|
Instead, the application should provide a custom filter policy that
|
||||||
|
also ignores trailing spaces. For example:
|
||||||
|
<pre>
|
||||||
|
class CustomFilterPolicy : public leveldb::FilterPolicy {
|
||||||
|
private:
|
||||||
|
FilterPolicy* builtin_policy_;
|
||||||
|
public:
|
||||||
|
CustomFilterPolicy() : builtin_policy_(NewBloomFilter(10)) { }
|
||||||
|
~CustomFilterPolicy() { delete builtin_policy_; }
|
||||||
|
|
||||||
|
const char* Name() const { return "IgnoreTrailingSpacesFilter"; }
|
||||||
|
|
||||||
|
void CreateFilter(const Slice* keys, int n, std::string* dst) const {
|
||||||
|
// Use builtin bloom filter code after removing trailing spaces
|
||||||
|
std::vector<Slice> trimmed(n);
|
||||||
|
for (int i = 0; i < n; i++) {
|
||||||
|
trimmed[i] = RemoveTrailingSpaces(keys[i]);
|
||||||
|
}
|
||||||
|
return builtin_policy_->CreateFilter(&trimmed[i], n, dst);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool KeyMayMatch(const Slice& key, const Slice& filter) const {
|
||||||
|
// Use builtin bloom filter code after removing trailing spaces
|
||||||
|
return builtin_policy_->KeyMayMatch(RemoveTrailingSpaces(key), filter);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
</pre>
|
||||||
|
<p>
|
||||||
|
Advanced applications may provide a filter policy that does not use
|
||||||
|
a bloom filter but uses some other mechanism for summarizing a set
|
||||||
|
of keys. See <code>leveldb/filter_policy.h</code> for detail.
|
||||||
|
<p>
|
||||||
<h1>Checksums</h1>
|
<h1>Checksums</h1>
|
||||||
<p>
|
<p>
|
||||||
<code>leveldb</code> associates checksums with all data it stores in the file system.
|
<code>leveldb</code> associates checksums with all data it stores in the file system.
|
||||||
|
|
|
@ -47,6 +47,47 @@ the BlockHandle of the metaindex and index blocks as well as a magic number.
|
||||||
// (40==2*BlockHandle::kMaxEncodedLength)
|
// (40==2*BlockHandle::kMaxEncodedLength)
|
||||||
magic: fixed64; // == 0xdb4775248b80fb57
|
magic: fixed64; // == 0xdb4775248b80fb57
|
||||||
|
|
||||||
|
"filter" Meta Block
|
||||||
|
-------------------
|
||||||
|
|
||||||
|
If a "FilterPolicy" was specified when the database was opened, a
|
||||||
|
filter block is stored in each table. The "metaindex" block contains
|
||||||
|
an entry that maps from "filter.<N>" to the BlockHandle for the filter
|
||||||
|
block where "<N>" is the string returned by the filter policy's
|
||||||
|
"Name()" method.
|
||||||
|
|
||||||
|
The filter block stores a sequence of filters, where filter i contains
|
||||||
|
the output of FilterPolicy::CreateFilter() on all keys that are stored
|
||||||
|
in a block whose file offset falls within the range
|
||||||
|
|
||||||
|
[ i*base ... (i+1)*base-1 ]
|
||||||
|
|
||||||
|
Currently, "base" is 2KB. So for example, if blocks X and Y start in
|
||||||
|
the range [ 0KB .. 2KB-1 ], all of the keys in X and Y will be
|
||||||
|
converted to a filter by calling FilterPolicy::CreateFilter(), and the
|
||||||
|
resulting filter will be stored as the first filter in the filter
|
||||||
|
block.
|
||||||
|
|
||||||
|
The filter block is formatted as follows:
|
||||||
|
|
||||||
|
[filter 0]
|
||||||
|
[filter 1]
|
||||||
|
[filter 2]
|
||||||
|
...
|
||||||
|
[filter N-1]
|
||||||
|
|
||||||
|
[offset of filter 0] : 4 bytes
|
||||||
|
[offset of filter 1] : 4 bytes
|
||||||
|
[offset of filter 2] : 4 bytes
|
||||||
|
...
|
||||||
|
[offset of filter N-1] : 4 bytes
|
||||||
|
|
||||||
|
[offset of beginning of offset array] : 4 bytes
|
||||||
|
lg(base) : 1 byte
|
||||||
|
|
||||||
|
The offset array at the end of the filter block allows efficient
|
||||||
|
mapping from a data block offset to the corresponding filter.
|
||||||
|
|
||||||
"stats" Meta Block
|
"stats" Meta Block
|
||||||
------------------
|
------------------
|
||||||
|
|
||||||
|
|
|
@ -55,6 +55,7 @@ typedef struct leveldb_cache_t leveldb_cache_t;
|
||||||
typedef struct leveldb_comparator_t leveldb_comparator_t;
|
typedef struct leveldb_comparator_t leveldb_comparator_t;
|
||||||
typedef struct leveldb_env_t leveldb_env_t;
|
typedef struct leveldb_env_t leveldb_env_t;
|
||||||
typedef struct leveldb_filelock_t leveldb_filelock_t;
|
typedef struct leveldb_filelock_t leveldb_filelock_t;
|
||||||
|
typedef struct leveldb_filterpolicy_t leveldb_filterpolicy_t;
|
||||||
typedef struct leveldb_iterator_t leveldb_iterator_t;
|
typedef struct leveldb_iterator_t leveldb_iterator_t;
|
||||||
typedef struct leveldb_logger_t leveldb_logger_t;
|
typedef struct leveldb_logger_t leveldb_logger_t;
|
||||||
typedef struct leveldb_options_t leveldb_options_t;
|
typedef struct leveldb_options_t leveldb_options_t;
|
||||||
|
@ -127,6 +128,11 @@ extern void leveldb_approximate_sizes(
|
||||||
const char* const* range_limit_key, const size_t* range_limit_key_len,
|
const char* const* range_limit_key, const size_t* range_limit_key_len,
|
||||||
uint64_t* sizes);
|
uint64_t* sizes);
|
||||||
|
|
||||||
|
extern void leveldb_compact_range(
|
||||||
|
leveldb_t* db,
|
||||||
|
const char* start_key, size_t start_key_len,
|
||||||
|
const char* limit_key, size_t limit_key_len);
|
||||||
|
|
||||||
/* Management operations */
|
/* Management operations */
|
||||||
|
|
||||||
extern void leveldb_destroy_db(
|
extern void leveldb_destroy_db(
|
||||||
|
@ -177,6 +183,9 @@ extern void leveldb_options_destroy(leveldb_options_t*);
|
||||||
extern void leveldb_options_set_comparator(
|
extern void leveldb_options_set_comparator(
|
||||||
leveldb_options_t*,
|
leveldb_options_t*,
|
||||||
leveldb_comparator_t*);
|
leveldb_comparator_t*);
|
||||||
|
extern void leveldb_options_set_filter_policy(
|
||||||
|
leveldb_options_t*,
|
||||||
|
leveldb_filterpolicy_t*);
|
||||||
extern void leveldb_options_set_create_if_missing(
|
extern void leveldb_options_set_create_if_missing(
|
||||||
leveldb_options_t*, unsigned char);
|
leveldb_options_t*, unsigned char);
|
||||||
extern void leveldb_options_set_error_if_exists(
|
extern void leveldb_options_set_error_if_exists(
|
||||||
|
@ -209,6 +218,26 @@ extern leveldb_comparator_t* leveldb_comparator_create(
|
||||||
const char* (*name)(void*));
|
const char* (*name)(void*));
|
||||||
extern void leveldb_comparator_destroy(leveldb_comparator_t*);
|
extern void leveldb_comparator_destroy(leveldb_comparator_t*);
|
||||||
|
|
||||||
|
/* Filter policy */
|
||||||
|
|
||||||
|
extern leveldb_filterpolicy_t* leveldb_filterpolicy_create(
|
||||||
|
void* state,
|
||||||
|
void (*destructor)(void*),
|
||||||
|
char* (*create_filter)(
|
||||||
|
void*,
|
||||||
|
const char* const* key_array, const size_t* key_length_array,
|
||||||
|
int num_keys,
|
||||||
|
size_t* filter_length),
|
||||||
|
unsigned char (*key_may_match)(
|
||||||
|
void*,
|
||||||
|
const char* key, size_t length,
|
||||||
|
const char* filter, size_t filter_length),
|
||||||
|
const char* (*name)(void*));
|
||||||
|
extern void leveldb_filterpolicy_destroy(leveldb_filterpolicy_t*);
|
||||||
|
|
||||||
|
extern leveldb_filterpolicy_t* leveldb_filterpolicy_create_bloom(
|
||||||
|
int bits_per_key);
|
||||||
|
|
||||||
/* Read options */
|
/* Read options */
|
||||||
|
|
||||||
extern leveldb_readoptions_t* leveldb_readoptions_create();
|
extern leveldb_readoptions_t* leveldb_readoptions_create();
|
||||||
|
|
|
@ -14,7 +14,7 @@ namespace leveldb {
|
||||||
|
|
||||||
// Update Makefile if you change these
|
// Update Makefile if you change these
|
||||||
static const int kMajorVersion = 1;
|
static const int kMajorVersion = 1;
|
||||||
static const int kMinorVersion = 3;
|
static const int kMinorVersion = 4;
|
||||||
|
|
||||||
struct Options;
|
struct Options;
|
||||||
struct ReadOptions;
|
struct ReadOptions;
|
||||||
|
|
|
@ -0,0 +1,70 @@
|
||||||
|
// Copyright (c) 2012 The LevelDB Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style license that can be
|
||||||
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||||
|
//
|
||||||
|
// A database can be configured with a custom FilterPolicy object.
|
||||||
|
// This object is responsible for creating a small filter from a set
|
||||||
|
// of keys. These filters are stored in leveldb and are consulted
|
||||||
|
// automatically by leveldb to decide whether or not to read some
|
||||||
|
// information from disk. In many cases, a filter can cut down the
|
||||||
|
// number of disk seeks form a handful to a single disk seek per
|
||||||
|
// DB::Get() call.
|
||||||
|
//
|
||||||
|
// Most people will want to use the builtin bloom filter support (see
|
||||||
|
// NewBloomFilterPolicy() below).
|
||||||
|
|
||||||
|
#ifndef STORAGE_LEVELDB_INCLUDE_FILTER_POLICY_H_
|
||||||
|
#define STORAGE_LEVELDB_INCLUDE_FILTER_POLICY_H_
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
namespace leveldb {
|
||||||
|
|
||||||
|
class Slice;
|
||||||
|
|
||||||
|
class FilterPolicy {
|
||||||
|
public:
|
||||||
|
virtual ~FilterPolicy();
|
||||||
|
|
||||||
|
// Return the name of this policy. Note that if the filter encoding
|
||||||
|
// changes in an incompatible way, the name returned by this method
|
||||||
|
// must be changed. Otherwise, old incompatible filters may be
|
||||||
|
// passed to methods of this type.
|
||||||
|
virtual const char* Name() const = 0;
|
||||||
|
|
||||||
|
// keys[0,n-1] contains a list of keys (potentially with duplicates)
|
||||||
|
// that are ordered according to the user supplied comparator.
|
||||||
|
// Append a filter that summarizes keys[0,n-1] to *dst.
|
||||||
|
//
|
||||||
|
// Warning: do not change the initial contents of *dst. Instead,
|
||||||
|
// append the newly constructed filter to *dst.
|
||||||
|
virtual void CreateFilter(const Slice* keys, int n, std::string* dst)
|
||||||
|
const = 0;
|
||||||
|
|
||||||
|
// "filter" contains the data appended by a preceding call to
|
||||||
|
// CreateFilter() on this class. This method must return true if
|
||||||
|
// the key was in the list of keys passed to CreateFilter().
|
||||||
|
// This method may return true or false if the key was not on the
|
||||||
|
// list, but it should aim to return false with a high probability.
|
||||||
|
virtual bool KeyMayMatch(const Slice& key, const Slice& filter) const = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Return a new filter policy that uses a bloom filter with approximately
|
||||||
|
// the specified number of bits per key. A good value for bits_per_key
|
||||||
|
// is 10, which yields a filter with ~ 1% false positive rate.
|
||||||
|
//
|
||||||
|
// Callers must delete the result after any database that is using the
|
||||||
|
// result has been closed.
|
||||||
|
//
|
||||||
|
// Note: if you are using a custom comparator that ignores some parts
|
||||||
|
// of the keys being compared, you must not use NewBloomFilterPolicy()
|
||||||
|
// and must provide your own FilterPolicy that also ignores the
|
||||||
|
// corresponding parts of the keys. For example, if the comparator
|
||||||
|
// ignores trailing spaces, it would be incorrect to use a
|
||||||
|
// FilterPolicy (like NewBloomFilterPolicy) that does not ignore
|
||||||
|
// trailing spaces in keys.
|
||||||
|
extern const FilterPolicy* NewBloomFilterPolicy(int bits_per_key);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // STORAGE_LEVELDB_INCLUDE_FILTER_POLICY_H_
|
|
@ -12,6 +12,7 @@ namespace leveldb {
|
||||||
class Cache;
|
class Cache;
|
||||||
class Comparator;
|
class Comparator;
|
||||||
class Env;
|
class Env;
|
||||||
|
class FilterPolicy;
|
||||||
class Logger;
|
class Logger;
|
||||||
class Snapshot;
|
class Snapshot;
|
||||||
|
|
||||||
|
@ -127,6 +128,13 @@ struct Options {
|
||||||
// efficiently detect that and will switch to uncompressed mode.
|
// efficiently detect that and will switch to uncompressed mode.
|
||||||
CompressionType compression;
|
CompressionType compression;
|
||||||
|
|
||||||
|
// If non-NULL, use the specified filter policy to reduce disk reads.
|
||||||
|
// Many applications will benefit from passing the result of
|
||||||
|
// NewBloomFilterPolicy() here.
|
||||||
|
//
|
||||||
|
// Default: NULL
|
||||||
|
const FilterPolicy* filter_policy;
|
||||||
|
|
||||||
// Create an Options object with default values for all fields.
|
// Create an Options object with default values for all fields.
|
||||||
Options();
|
Options();
|
||||||
};
|
};
|
||||||
|
|
|
@ -12,9 +12,11 @@ namespace leveldb {
|
||||||
|
|
||||||
class Block;
|
class Block;
|
||||||
class BlockHandle;
|
class BlockHandle;
|
||||||
|
class Footer;
|
||||||
struct Options;
|
struct Options;
|
||||||
class RandomAccessFile;
|
class RandomAccessFile;
|
||||||
struct ReadOptions;
|
struct ReadOptions;
|
||||||
|
class TableCache;
|
||||||
|
|
||||||
// A Table is a sorted map from strings to strings. Tables are
|
// A Table is a sorted map from strings to strings. Tables are
|
||||||
// immutable and persistent. A Table may be safely accessed from
|
// immutable and persistent. A Table may be safely accessed from
|
||||||
|
@ -60,6 +62,19 @@ class Table {
|
||||||
explicit Table(Rep* rep) { rep_ = rep; }
|
explicit Table(Rep* rep) { rep_ = rep; }
|
||||||
static Iterator* BlockReader(void*, const ReadOptions&, const Slice&);
|
static Iterator* BlockReader(void*, const ReadOptions&, const Slice&);
|
||||||
|
|
||||||
|
// Calls (*handle_result)(arg, ...) with the entry found after a call
|
||||||
|
// to Seek(key). May not make such a call if filter policy says
|
||||||
|
// that key is not present.
|
||||||
|
friend class TableCache;
|
||||||
|
Status InternalGet(
|
||||||
|
const ReadOptions&, const Slice& key,
|
||||||
|
void* arg,
|
||||||
|
void (*handle_result)(void* arg, const Slice& k, const Slice& v));
|
||||||
|
|
||||||
|
|
||||||
|
void ReadMeta(const Footer& footer);
|
||||||
|
void ReadFilter(const Slice& filter_handle_value);
|
||||||
|
|
||||||
// No copying allowed
|
// No copying allowed
|
||||||
Table(const Table&);
|
Table(const Table&);
|
||||||
void operator=(const Table&);
|
void operator=(const Table&);
|
||||||
|
|
|
@ -77,6 +77,7 @@ class TableBuilder {
|
||||||
private:
|
private:
|
||||||
bool ok() const { return status().ok(); }
|
bool ok() const { return status().ok(); }
|
||||||
void WriteBlock(BlockBuilder* block, BlockHandle* handle);
|
void WriteBlock(BlockBuilder* block, BlockHandle* handle);
|
||||||
|
void WriteRawBlock(const Slice& data, CompressionType, BlockHandle* handle);
|
||||||
|
|
||||||
struct Rep;
|
struct Rep;
|
||||||
Rep* rep_;
|
Rep* rep_;
|
||||||
|
|
|
@ -78,6 +78,9 @@ class CondVar {
|
||||||
// On ARM chipsets <V6, 0xffff0fa0 is the hard coded address of a
|
// On ARM chipsets <V6, 0xffff0fa0 is the hard coded address of a
|
||||||
// memory barrier function provided by the kernel.
|
// memory barrier function provided by the kernel.
|
||||||
typedef void (*LinuxKernelMemoryBarrierFunc)(void);
|
typedef void (*LinuxKernelMemoryBarrierFunc)(void);
|
||||||
|
// TODO(user): ATTRIBUTE_WEAK is undefined, so this fails to build on
|
||||||
|
// non-ARMV6_OR_7. We may be able to replace it with __attribute__((weak)) for
|
||||||
|
// older ARM builds, but x86 builds will require a different memory barrier.
|
||||||
LinuxKernelMemoryBarrierFunc pLinuxKernelMemoryBarrier ATTRIBUTE_WEAK =
|
LinuxKernelMemoryBarrierFunc pLinuxKernelMemoryBarrier ATTRIBUTE_WEAK =
|
||||||
(LinuxKernelMemoryBarrierFunc) 0xffff0fa0;
|
(LinuxKernelMemoryBarrierFunc) 0xffff0fa0;
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -9,6 +9,7 @@
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include "leveldb/comparator.h"
|
#include "leveldb/comparator.h"
|
||||||
|
#include "table/format.h"
|
||||||
#include "util/coding.h"
|
#include "util/coding.h"
|
||||||
#include "util/logging.h"
|
#include "util/logging.h"
|
||||||
|
|
||||||
|
@ -19,10 +20,10 @@ inline uint32_t Block::NumRestarts() const {
|
||||||
return DecodeFixed32(data_ + size_ - sizeof(uint32_t));
|
return DecodeFixed32(data_ + size_ - sizeof(uint32_t));
|
||||||
}
|
}
|
||||||
|
|
||||||
Block::Block(const char* data, size_t size, bool take_ownership)
|
Block::Block(const BlockContents& contents)
|
||||||
: data_(data),
|
: data_(contents.data.data()),
|
||||||
size_(size),
|
size_(contents.data.size()),
|
||||||
owned_(take_ownership) {
|
owned_(contents.heap_allocated) {
|
||||||
if (size_ < sizeof(uint32_t)) {
|
if (size_ < sizeof(uint32_t)) {
|
||||||
size_ = 0; // Error marker
|
size_ = 0; // Error marker
|
||||||
} else {
|
} else {
|
||||||
|
|
|
@ -11,14 +11,13 @@
|
||||||
|
|
||||||
namespace leveldb {
|
namespace leveldb {
|
||||||
|
|
||||||
|
struct BlockContents;
|
||||||
class Comparator;
|
class Comparator;
|
||||||
|
|
||||||
class Block {
|
class Block {
|
||||||
public:
|
public:
|
||||||
// Initialize the block with the specified contents.
|
// Initialize the block with the specified contents.
|
||||||
// Takes ownership of data[] and will delete[] it when done iff
|
explicit Block(const BlockContents& contents);
|
||||||
// "take_ownership is true.
|
|
||||||
Block(const char* data, size_t size, bool take_ownership);
|
|
||||||
|
|
||||||
~Block();
|
~Block();
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,111 @@
|
||||||
|
// Copyright (c) 2012 The LevelDB Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style license that can be
|
||||||
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||||
|
|
||||||
|
#include "table/filter_block.h"
|
||||||
|
|
||||||
|
#include "leveldb/filter_policy.h"
|
||||||
|
#include "util/coding.h"
|
||||||
|
|
||||||
|
namespace leveldb {
|
||||||
|
|
||||||
|
// See doc/table_format.txt for an explanation of the filter block format.
|
||||||
|
|
||||||
|
// Generate new filter every 2KB of data
|
||||||
|
static const size_t kFilterBaseLg = 11;
|
||||||
|
static const size_t kFilterBase = 1 << kFilterBaseLg;
|
||||||
|
|
||||||
|
FilterBlockBuilder::FilterBlockBuilder(const FilterPolicy* policy)
|
||||||
|
: policy_(policy) {
|
||||||
|
}
|
||||||
|
|
||||||
|
void FilterBlockBuilder::StartBlock(uint64_t block_offset) {
|
||||||
|
uint64_t filter_index = (block_offset / kFilterBase);
|
||||||
|
assert(filter_index >= filter_offsets_.size());
|
||||||
|
while (filter_index > filter_offsets_.size()) {
|
||||||
|
GenerateFilter();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void FilterBlockBuilder::AddKey(const Slice& key) {
|
||||||
|
Slice k = key;
|
||||||
|
start_.push_back(keys_.size());
|
||||||
|
keys_.append(k.data(), k.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
Slice FilterBlockBuilder::Finish() {
|
||||||
|
if (!start_.empty()) {
|
||||||
|
GenerateFilter();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Append array of per-filter offsets
|
||||||
|
const uint32_t array_offset = result_.size();
|
||||||
|
for (size_t i = 0; i < filter_offsets_.size(); i++) {
|
||||||
|
PutFixed32(&result_, filter_offsets_[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
PutFixed32(&result_, array_offset);
|
||||||
|
result_.push_back(kFilterBaseLg); // Save encoding parameter in result
|
||||||
|
return Slice(result_);
|
||||||
|
}
|
||||||
|
|
||||||
|
void FilterBlockBuilder::GenerateFilter() {
|
||||||
|
const size_t num_keys = start_.size();
|
||||||
|
if (num_keys == 0) {
|
||||||
|
// Fast path if there are no keys for this filter
|
||||||
|
filter_offsets_.push_back(result_.size());
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Make list of keys from flattened key structure
|
||||||
|
start_.push_back(keys_.size()); // Simplify length computation
|
||||||
|
tmp_keys_.resize(num_keys);
|
||||||
|
for (size_t i = 0; i < num_keys; i++) {
|
||||||
|
const char* base = keys_.data() + start_[i];
|
||||||
|
size_t length = start_[i+1] - start_[i];
|
||||||
|
tmp_keys_[i] = Slice(base, length);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Generate filter for current set of keys and append to result_.
|
||||||
|
filter_offsets_.push_back(result_.size());
|
||||||
|
policy_->CreateFilter(&tmp_keys_[0], num_keys, &result_);
|
||||||
|
|
||||||
|
tmp_keys_.clear();
|
||||||
|
keys_.clear();
|
||||||
|
start_.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
FilterBlockReader::FilterBlockReader(const FilterPolicy* policy,
|
||||||
|
const Slice& contents)
|
||||||
|
: policy_(policy),
|
||||||
|
data_(NULL),
|
||||||
|
offset_(NULL),
|
||||||
|
num_(0),
|
||||||
|
base_lg_(0) {
|
||||||
|
size_t n = contents.size();
|
||||||
|
if (n < 5) return; // 1 byte for base_lg_ and 4 for start of offset array
|
||||||
|
base_lg_ = contents[n-1];
|
||||||
|
uint32_t last_word = DecodeFixed32(contents.data() + n - 5);
|
||||||
|
if (last_word > n - 5) return;
|
||||||
|
data_ = contents.data();
|
||||||
|
offset_ = data_ + last_word;
|
||||||
|
num_ = (n - 5 - last_word) / 4;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool FilterBlockReader::KeyMayMatch(uint64_t block_offset, const Slice& key) {
|
||||||
|
uint64_t index = block_offset >> base_lg_;
|
||||||
|
if (index < num_) {
|
||||||
|
uint32_t start = DecodeFixed32(offset_ + index*4);
|
||||||
|
uint32_t limit = DecodeFixed32(offset_ + index*4 + 4);
|
||||||
|
if (start <= limit && limit <= (offset_ - data_)) {
|
||||||
|
Slice filter = Slice(data_ + start, limit - start);
|
||||||
|
return policy_->KeyMayMatch(key, filter);
|
||||||
|
} else if (start == limit) {
|
||||||
|
// Empty filters do not match any keys
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true; // Errors are treated as potential matches
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,68 @@
|
||||||
|
// Copyright (c) 2012 The LevelDB Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style license that can be
|
||||||
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||||
|
//
|
||||||
|
// A filter block is stored near the end of a Table file. It contains
|
||||||
|
// filters (e.g., bloom filters) for all data blocks in the table combined
|
||||||
|
// into a single filter block.
|
||||||
|
|
||||||
|
#ifndef STORAGE_LEVELDB_TABLE_FILTER_BLOCK_H_
|
||||||
|
#define STORAGE_LEVELDB_TABLE_FILTER_BLOCK_H_
|
||||||
|
|
||||||
|
#include <stddef.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
#include "leveldb/slice.h"
|
||||||
|
#include "util/hash.h"
|
||||||
|
|
||||||
|
namespace leveldb {
|
||||||
|
|
||||||
|
class FilterPolicy;
|
||||||
|
|
||||||
|
// A FilterBlockBuilder is used to construct all of the filters for a
|
||||||
|
// particular Table. It generates a single string which is stored as
|
||||||
|
// a special block in the Table.
|
||||||
|
//
|
||||||
|
// The sequence of calls to FilterBlockBuilder must match the regexp:
|
||||||
|
// (StartBlock AddKey*)* Finish
|
||||||
|
class FilterBlockBuilder {
|
||||||
|
public:
|
||||||
|
explicit FilterBlockBuilder(const FilterPolicy*);
|
||||||
|
|
||||||
|
void StartBlock(uint64_t block_offset);
|
||||||
|
void AddKey(const Slice& key);
|
||||||
|
Slice Finish();
|
||||||
|
|
||||||
|
private:
|
||||||
|
void GenerateFilter();
|
||||||
|
|
||||||
|
const FilterPolicy* policy_;
|
||||||
|
std::string keys_; // Flattened key contents
|
||||||
|
std::vector<size_t> start_; // Starting index in keys_ of each key
|
||||||
|
std::string result_; // Filter data computed so far
|
||||||
|
std::vector<Slice> tmp_keys_; // policy_->CreateFilter() argument
|
||||||
|
std::vector<uint32_t> filter_offsets_;
|
||||||
|
|
||||||
|
// No copying allowed
|
||||||
|
FilterBlockBuilder(const FilterBlockBuilder&);
|
||||||
|
void operator=(const FilterBlockBuilder&);
|
||||||
|
};
|
||||||
|
|
||||||
|
class FilterBlockReader {
|
||||||
|
public:
|
||||||
|
// REQUIRES: "contents" and *policy must stay live while *this is live.
|
||||||
|
FilterBlockReader(const FilterPolicy* policy, const Slice& contents);
|
||||||
|
bool KeyMayMatch(uint64_t block_offset, const Slice& key);
|
||||||
|
|
||||||
|
private:
|
||||||
|
const FilterPolicy* policy_;
|
||||||
|
const char* data_; // Pointer to filter data (at block-start)
|
||||||
|
const char* offset_; // Pointer to beginning of offset array (at block-end)
|
||||||
|
size_t num_; // Number of entries in offset array
|
||||||
|
size_t base_lg_; // Encoding parameter (see kFilterBaseLg in .cc file)
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // STORAGE_LEVELDB_TABLE_FILTER_BLOCK_H_
|
|
@ -0,0 +1,128 @@
|
||||||
|
// Copyright (c) 2012 The LevelDB Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style license that can be
|
||||||
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||||
|
|
||||||
|
#include "table/filter_block.h"
|
||||||
|
|
||||||
|
#include "leveldb/filter_policy.h"
|
||||||
|
#include "util/coding.h"
|
||||||
|
#include "util/hash.h"
|
||||||
|
#include "util/logging.h"
|
||||||
|
#include "util/testharness.h"
|
||||||
|
#include "util/testutil.h"
|
||||||
|
|
||||||
|
namespace leveldb {
|
||||||
|
|
||||||
|
// For testing: emit an array with one hash value per key
|
||||||
|
class TestHashFilter : public FilterPolicy {
|
||||||
|
public:
|
||||||
|
virtual const char* Name() const {
|
||||||
|
return "TestHashFilter";
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual void CreateFilter(const Slice* keys, int n, std::string* dst) const {
|
||||||
|
for (int i = 0; i < n; i++) {
|
||||||
|
uint32_t h = Hash(keys[i].data(), keys[i].size(), 1);
|
||||||
|
PutFixed32(dst, h);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual bool KeyMayMatch(const Slice& key, const Slice& filter) const {
|
||||||
|
uint32_t h = Hash(key.data(), key.size(), 1);
|
||||||
|
for (int i = 0; i + 4 <= filter.size(); i += 4) {
|
||||||
|
if (h == DecodeFixed32(filter.data() + i)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
class FilterBlockTest {
|
||||||
|
public:
|
||||||
|
TestHashFilter policy_;
|
||||||
|
};
|
||||||
|
|
||||||
|
TEST(FilterBlockTest, EmptyBuilder) {
|
||||||
|
FilterBlockBuilder builder(&policy_);
|
||||||
|
Slice block = builder.Finish();
|
||||||
|
ASSERT_EQ("\\x00\\x00\\x00\\x00\\x0b", EscapeString(block));
|
||||||
|
FilterBlockReader reader(&policy_, block);
|
||||||
|
ASSERT_TRUE(reader.KeyMayMatch(0, "foo"));
|
||||||
|
ASSERT_TRUE(reader.KeyMayMatch(100000, "foo"));
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(FilterBlockTest, SingleChunk) {
|
||||||
|
FilterBlockBuilder builder(&policy_);
|
||||||
|
builder.StartBlock(100);
|
||||||
|
builder.AddKey("foo");
|
||||||
|
builder.AddKey("bar");
|
||||||
|
builder.AddKey("box");
|
||||||
|
builder.StartBlock(200);
|
||||||
|
builder.AddKey("box");
|
||||||
|
builder.StartBlock(300);
|
||||||
|
builder.AddKey("hello");
|
||||||
|
Slice block = builder.Finish();
|
||||||
|
FilterBlockReader reader(&policy_, block);
|
||||||
|
ASSERT_TRUE(reader.KeyMayMatch(100, "foo"));
|
||||||
|
ASSERT_TRUE(reader.KeyMayMatch(100, "bar"));
|
||||||
|
ASSERT_TRUE(reader.KeyMayMatch(100, "box"));
|
||||||
|
ASSERT_TRUE(reader.KeyMayMatch(100, "hello"));
|
||||||
|
ASSERT_TRUE(reader.KeyMayMatch(100, "foo"));
|
||||||
|
ASSERT_TRUE(! reader.KeyMayMatch(100, "missing"));
|
||||||
|
ASSERT_TRUE(! reader.KeyMayMatch(100, "other"));
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(FilterBlockTest, MultiChunk) {
|
||||||
|
FilterBlockBuilder builder(&policy_);
|
||||||
|
|
||||||
|
// First filter
|
||||||
|
builder.StartBlock(0);
|
||||||
|
builder.AddKey("foo");
|
||||||
|
builder.StartBlock(2000);
|
||||||
|
builder.AddKey("bar");
|
||||||
|
|
||||||
|
// Second filter
|
||||||
|
builder.StartBlock(3100);
|
||||||
|
builder.AddKey("box");
|
||||||
|
|
||||||
|
// Third filter is empty
|
||||||
|
|
||||||
|
// Last filter
|
||||||
|
builder.StartBlock(9000);
|
||||||
|
builder.AddKey("box");
|
||||||
|
builder.AddKey("hello");
|
||||||
|
|
||||||
|
Slice block = builder.Finish();
|
||||||
|
FilterBlockReader reader(&policy_, block);
|
||||||
|
|
||||||
|
// Check first filter
|
||||||
|
ASSERT_TRUE(reader.KeyMayMatch(0, "foo"));
|
||||||
|
ASSERT_TRUE(reader.KeyMayMatch(2000, "bar"));
|
||||||
|
ASSERT_TRUE(! reader.KeyMayMatch(0, "box"));
|
||||||
|
ASSERT_TRUE(! reader.KeyMayMatch(0, "hello"));
|
||||||
|
|
||||||
|
// Check second filter
|
||||||
|
ASSERT_TRUE(reader.KeyMayMatch(3100, "box"));
|
||||||
|
ASSERT_TRUE(! reader.KeyMayMatch(3100, "foo"));
|
||||||
|
ASSERT_TRUE(! reader.KeyMayMatch(3100, "bar"));
|
||||||
|
ASSERT_TRUE(! reader.KeyMayMatch(3100, "hello"));
|
||||||
|
|
||||||
|
// Check third filter (empty)
|
||||||
|
ASSERT_TRUE(! reader.KeyMayMatch(4100, "foo"));
|
||||||
|
ASSERT_TRUE(! reader.KeyMayMatch(4100, "bar"));
|
||||||
|
ASSERT_TRUE(! reader.KeyMayMatch(4100, "box"));
|
||||||
|
ASSERT_TRUE(! reader.KeyMayMatch(4100, "hello"));
|
||||||
|
|
||||||
|
// Check last filter
|
||||||
|
ASSERT_TRUE(reader.KeyMayMatch(9000, "box"));
|
||||||
|
ASSERT_TRUE(reader.KeyMayMatch(9000, "hello"));
|
||||||
|
ASSERT_TRUE(! reader.KeyMayMatch(9000, "foo"));
|
||||||
|
ASSERT_TRUE(! reader.KeyMayMatch(9000, "bar"));
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace leveldb
|
||||||
|
|
||||||
|
int main(int argc, char** argv) {
|
||||||
|
return leveldb::test::RunAllTests();
|
||||||
|
}
|
|
@ -66,10 +66,10 @@ Status Footer::DecodeFrom(Slice* input) {
|
||||||
Status ReadBlock(RandomAccessFile* file,
|
Status ReadBlock(RandomAccessFile* file,
|
||||||
const ReadOptions& options,
|
const ReadOptions& options,
|
||||||
const BlockHandle& handle,
|
const BlockHandle& handle,
|
||||||
Block** block,
|
BlockContents* result) {
|
||||||
bool* may_cache) {
|
result->data = Slice();
|
||||||
*block = NULL;
|
result->cachable = false;
|
||||||
*may_cache = false;
|
result->heap_allocated = false;
|
||||||
|
|
||||||
// Read the block contents as well as the type/crc footer.
|
// Read the block contents as well as the type/crc footer.
|
||||||
// See table_builder.cc for the code that built this structure.
|
// See table_builder.cc for the code that built this structure.
|
||||||
|
@ -105,11 +105,13 @@ Status ReadBlock(RandomAccessFile* file,
|
||||||
// Use it directly under the assumption that it will be live
|
// Use it directly under the assumption that it will be live
|
||||||
// while the file is open.
|
// while the file is open.
|
||||||
delete[] buf;
|
delete[] buf;
|
||||||
*block = new Block(data, n, false /* do not take ownership */);
|
result->data = Slice(data, n);
|
||||||
*may_cache = false; // Do not double-cache
|
result->heap_allocated = false;
|
||||||
|
result->cachable = false; // Do not double-cache
|
||||||
} else {
|
} else {
|
||||||
*block = new Block(buf, n, true /* take ownership */);
|
result->data = Slice(buf, n);
|
||||||
*may_cache = true;
|
result->heap_allocated = true;
|
||||||
|
result->cachable = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Ok
|
// Ok
|
||||||
|
@ -127,8 +129,9 @@ Status ReadBlock(RandomAccessFile* file,
|
||||||
return Status::Corruption("corrupted compressed block contents");
|
return Status::Corruption("corrupted compressed block contents");
|
||||||
}
|
}
|
||||||
delete[] buf;
|
delete[] buf;
|
||||||
*block = new Block(ubuf, ulength, true /* take ownership */);
|
result->data = Slice(ubuf, ulength);
|
||||||
*may_cache = true;
|
result->heap_allocated = true;
|
||||||
|
result->cachable = true;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
|
|
|
@ -83,16 +83,18 @@ static const uint64_t kTableMagicNumber = 0xdb4775248b80fb57ull;
|
||||||
// 1-byte type + 32-bit crc
|
// 1-byte type + 32-bit crc
|
||||||
static const size_t kBlockTrailerSize = 5;
|
static const size_t kBlockTrailerSize = 5;
|
||||||
|
|
||||||
// Read the block identified by "handle" from "file". On success,
|
struct BlockContents {
|
||||||
// store a pointer to the heap-allocated result in *block and return
|
Slice data; // Actual contents of data
|
||||||
// OK. On failure store NULL in *block and return non-OK.
|
bool cachable; // True iff data can be cached
|
||||||
// On success, stores true in *may_cache if the result may be
|
bool heap_allocated; // True iff caller should delete[] data.data()
|
||||||
// cached, false if it must not be cached.
|
};
|
||||||
|
|
||||||
|
// Read the block identified by "handle" from "file". On failure
|
||||||
|
// return non-OK. On success fill *result and return OK.
|
||||||
extern Status ReadBlock(RandomAccessFile* file,
|
extern Status ReadBlock(RandomAccessFile* file,
|
||||||
const ReadOptions& options,
|
const ReadOptions& options,
|
||||||
const BlockHandle& handle,
|
const BlockHandle& handle,
|
||||||
Block** block,
|
BlockContents* result);
|
||||||
bool* may_cache);
|
|
||||||
|
|
||||||
// Implementation details follow. Clients should ignore,
|
// Implementation details follow. Clients should ignore,
|
||||||
|
|
||||||
|
|
116
table/table.cc
116
table/table.cc
|
@ -5,8 +5,12 @@
|
||||||
#include "leveldb/table.h"
|
#include "leveldb/table.h"
|
||||||
|
|
||||||
#include "leveldb/cache.h"
|
#include "leveldb/cache.h"
|
||||||
|
#include "leveldb/comparator.h"
|
||||||
#include "leveldb/env.h"
|
#include "leveldb/env.h"
|
||||||
|
#include "leveldb/filter_policy.h"
|
||||||
|
#include "leveldb/options.h"
|
||||||
#include "table/block.h"
|
#include "table/block.h"
|
||||||
|
#include "table/filter_block.h"
|
||||||
#include "table/format.h"
|
#include "table/format.h"
|
||||||
#include "table/two_level_iterator.h"
|
#include "table/two_level_iterator.h"
|
||||||
#include "util/coding.h"
|
#include "util/coding.h"
|
||||||
|
@ -15,6 +19,8 @@ namespace leveldb {
|
||||||
|
|
||||||
struct Table::Rep {
|
struct Table::Rep {
|
||||||
~Rep() {
|
~Rep() {
|
||||||
|
delete filter;
|
||||||
|
delete [] filter_data;
|
||||||
delete index_block;
|
delete index_block;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -22,6 +28,8 @@ struct Table::Rep {
|
||||||
Status status;
|
Status status;
|
||||||
RandomAccessFile* file;
|
RandomAccessFile* file;
|
||||||
uint64_t cache_id;
|
uint64_t cache_id;
|
||||||
|
FilterBlockReader* filter;
|
||||||
|
const char* filter_data;
|
||||||
|
|
||||||
BlockHandle metaindex_handle; // Handle to metaindex_block: saved from footer
|
BlockHandle metaindex_handle; // Handle to metaindex_block: saved from footer
|
||||||
Block* index_block;
|
Block* index_block;
|
||||||
|
@ -47,11 +55,13 @@ Status Table::Open(const Options& options,
|
||||||
if (!s.ok()) return s;
|
if (!s.ok()) return s;
|
||||||
|
|
||||||
// Read the index block
|
// Read the index block
|
||||||
|
BlockContents contents;
|
||||||
Block* index_block = NULL;
|
Block* index_block = NULL;
|
||||||
if (s.ok()) {
|
if (s.ok()) {
|
||||||
bool may_cache; // Ignored result
|
s = ReadBlock(file, ReadOptions(), footer.index_handle(), &contents);
|
||||||
s = ReadBlock(file, ReadOptions(), footer.index_handle(), &index_block,
|
if (s.ok()) {
|
||||||
&may_cache);
|
index_block = new Block(contents);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (s.ok()) {
|
if (s.ok()) {
|
||||||
|
@ -63,7 +73,10 @@ Status Table::Open(const Options& options,
|
||||||
rep->metaindex_handle = footer.metaindex_handle();
|
rep->metaindex_handle = footer.metaindex_handle();
|
||||||
rep->index_block = index_block;
|
rep->index_block = index_block;
|
||||||
rep->cache_id = (options.block_cache ? options.block_cache->NewId() : 0);
|
rep->cache_id = (options.block_cache ? options.block_cache->NewId() : 0);
|
||||||
|
rep->filter_data = NULL;
|
||||||
|
rep->filter = NULL;
|
||||||
*table = new Table(rep);
|
*table = new Table(rep);
|
||||||
|
(*table)->ReadMeta(footer);
|
||||||
} else {
|
} else {
|
||||||
if (index_block) delete index_block;
|
if (index_block) delete index_block;
|
||||||
}
|
}
|
||||||
|
@ -71,6 +84,52 @@ Status Table::Open(const Options& options,
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Table::ReadMeta(const Footer& footer) {
|
||||||
|
if (rep_->options.filter_policy == NULL) {
|
||||||
|
return; // Do not need any metadata
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO(sanjay): Skip this if footer.metaindex_handle() size indicates
|
||||||
|
// it is an empty block.
|
||||||
|
ReadOptions opt;
|
||||||
|
BlockContents contents;
|
||||||
|
if (!ReadBlock(rep_->file, opt, footer.metaindex_handle(), &contents).ok()) {
|
||||||
|
// Do not propagate errors since meta info is not needed for operation
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
Block* meta = new Block(contents);
|
||||||
|
|
||||||
|
Iterator* iter = meta->NewIterator(BytewiseComparator());
|
||||||
|
std::string key = "filter.";
|
||||||
|
key.append(rep_->options.filter_policy->Name());
|
||||||
|
iter->Seek(key);
|
||||||
|
if (iter->Valid() && iter->key() == Slice(key)) {
|
||||||
|
ReadFilter(iter->value());
|
||||||
|
}
|
||||||
|
delete iter;
|
||||||
|
delete meta;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Table::ReadFilter(const Slice& filter_handle_value) {
|
||||||
|
Slice v = filter_handle_value;
|
||||||
|
BlockHandle filter_handle;
|
||||||
|
if (!filter_handle.DecodeFrom(&v).ok()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// We might want to unify with ReadBlock() if we start
|
||||||
|
// requiring checksum verification in Table::Open.
|
||||||
|
ReadOptions opt;
|
||||||
|
BlockContents block;
|
||||||
|
if (!ReadBlock(rep_->file, opt, filter_handle, &block).ok()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (block.heap_allocated) {
|
||||||
|
rep_->filter_data = block.data.data(); // Will need to delete later
|
||||||
|
}
|
||||||
|
rep_->filter = new FilterBlockReader(rep_->options.filter_policy, block.data);
|
||||||
|
}
|
||||||
|
|
||||||
Table::~Table() {
|
Table::~Table() {
|
||||||
delete rep_;
|
delete rep_;
|
||||||
}
|
}
|
||||||
|
@ -107,7 +166,7 @@ Iterator* Table::BlockReader(void* arg,
|
||||||
// can add more features in the future.
|
// can add more features in the future.
|
||||||
|
|
||||||
if (s.ok()) {
|
if (s.ok()) {
|
||||||
bool may_cache;
|
BlockContents contents;
|
||||||
if (block_cache != NULL) {
|
if (block_cache != NULL) {
|
||||||
char cache_key_buffer[16];
|
char cache_key_buffer[16];
|
||||||
EncodeFixed64(cache_key_buffer, table->rep_->cache_id);
|
EncodeFixed64(cache_key_buffer, table->rep_->cache_id);
|
||||||
|
@ -117,14 +176,20 @@ Iterator* Table::BlockReader(void* arg,
|
||||||
if (cache_handle != NULL) {
|
if (cache_handle != NULL) {
|
||||||
block = reinterpret_cast<Block*>(block_cache->Value(cache_handle));
|
block = reinterpret_cast<Block*>(block_cache->Value(cache_handle));
|
||||||
} else {
|
} else {
|
||||||
s = ReadBlock(table->rep_->file, options, handle, &block, &may_cache);
|
s = ReadBlock(table->rep_->file, options, handle, &contents);
|
||||||
if (s.ok() && may_cache && options.fill_cache) {
|
if (s.ok()) {
|
||||||
cache_handle = block_cache->Insert(
|
block = new Block(contents);
|
||||||
key, block, block->size(), &DeleteCachedBlock);
|
if (contents.cachable && options.fill_cache) {
|
||||||
|
cache_handle = block_cache->Insert(
|
||||||
|
key, block, block->size(), &DeleteCachedBlock);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
s = ReadBlock(table->rep_->file, options, handle, &block, &may_cache);
|
s = ReadBlock(table->rep_->file, options, handle, &contents);
|
||||||
|
if (s.ok()) {
|
||||||
|
block = new Block(contents);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -148,6 +213,39 @@ Iterator* Table::NewIterator(const ReadOptions& options) const {
|
||||||
&Table::BlockReader, const_cast<Table*>(this), options);
|
&Table::BlockReader, const_cast<Table*>(this), options);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Status Table::InternalGet(const ReadOptions& options, const Slice& k,
|
||||||
|
void* arg,
|
||||||
|
void (*saver)(void*, const Slice&, const Slice&)) {
|
||||||
|
Status s;
|
||||||
|
Iterator* iiter = rep_->index_block->NewIterator(rep_->options.comparator);
|
||||||
|
iiter->Seek(k);
|
||||||
|
if (iiter->Valid()) {
|
||||||
|
Slice handle_value = iiter->value();
|
||||||
|
FilterBlockReader* filter = rep_->filter;
|
||||||
|
BlockHandle handle;
|
||||||
|
if (filter != NULL &&
|
||||||
|
handle.DecodeFrom(&handle_value).ok() &&
|
||||||
|
!filter->KeyMayMatch(handle.offset(), k)) {
|
||||||
|
// Not found
|
||||||
|
} else {
|
||||||
|
Slice handle = iiter->value();
|
||||||
|
Iterator* block_iter = BlockReader(this, options, iiter->value());
|
||||||
|
block_iter->Seek(k);
|
||||||
|
if (block_iter->Valid()) {
|
||||||
|
(*saver)(arg, block_iter->key(), block_iter->value());
|
||||||
|
}
|
||||||
|
s = block_iter->status();
|
||||||
|
delete block_iter;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (s.ok()) {
|
||||||
|
s = iiter->status();
|
||||||
|
}
|
||||||
|
delete iiter;
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
uint64_t Table::ApproximateOffsetOf(const Slice& key) const {
|
uint64_t Table::ApproximateOffsetOf(const Slice& key) const {
|
||||||
Iterator* index_iter =
|
Iterator* index_iter =
|
||||||
rep_->index_block->NewIterator(rep_->options.comparator);
|
rep_->index_block->NewIterator(rep_->options.comparator);
|
||||||
|
|
|
@ -5,14 +5,15 @@
|
||||||
#include "leveldb/table_builder.h"
|
#include "leveldb/table_builder.h"
|
||||||
|
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
#include <stdio.h>
|
|
||||||
#include "leveldb/comparator.h"
|
#include "leveldb/comparator.h"
|
||||||
#include "leveldb/env.h"
|
#include "leveldb/env.h"
|
||||||
|
#include "leveldb/filter_policy.h"
|
||||||
|
#include "leveldb/options.h"
|
||||||
#include "table/block_builder.h"
|
#include "table/block_builder.h"
|
||||||
|
#include "table/filter_block.h"
|
||||||
#include "table/format.h"
|
#include "table/format.h"
|
||||||
#include "util/coding.h"
|
#include "util/coding.h"
|
||||||
#include "util/crc32c.h"
|
#include "util/crc32c.h"
|
||||||
#include "util/logging.h"
|
|
||||||
|
|
||||||
namespace leveldb {
|
namespace leveldb {
|
||||||
|
|
||||||
|
@ -27,6 +28,7 @@ struct TableBuilder::Rep {
|
||||||
std::string last_key;
|
std::string last_key;
|
||||||
int64_t num_entries;
|
int64_t num_entries;
|
||||||
bool closed; // Either Finish() or Abandon() has been called.
|
bool closed; // Either Finish() or Abandon() has been called.
|
||||||
|
FilterBlockBuilder* filter_block;
|
||||||
|
|
||||||
// We do not emit the index entry for a block until we have seen the
|
// We do not emit the index entry for a block until we have seen the
|
||||||
// first key for the next data block. This allows us to use shorter
|
// first key for the next data block. This allows us to use shorter
|
||||||
|
@ -51,6 +53,8 @@ struct TableBuilder::Rep {
|
||||||
index_block(&index_block_options),
|
index_block(&index_block_options),
|
||||||
num_entries(0),
|
num_entries(0),
|
||||||
closed(false),
|
closed(false),
|
||||||
|
filter_block(opt.filter_policy == NULL ? NULL
|
||||||
|
: new FilterBlockBuilder(opt.filter_policy)),
|
||||||
pending_index_entry(false) {
|
pending_index_entry(false) {
|
||||||
index_block_options.block_restart_interval = 1;
|
index_block_options.block_restart_interval = 1;
|
||||||
}
|
}
|
||||||
|
@ -58,10 +62,14 @@ struct TableBuilder::Rep {
|
||||||
|
|
||||||
TableBuilder::TableBuilder(const Options& options, WritableFile* file)
|
TableBuilder::TableBuilder(const Options& options, WritableFile* file)
|
||||||
: rep_(new Rep(options, file)) {
|
: rep_(new Rep(options, file)) {
|
||||||
|
if (rep_->filter_block != NULL) {
|
||||||
|
rep_->filter_block->StartBlock(0);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
TableBuilder::~TableBuilder() {
|
TableBuilder::~TableBuilder() {
|
||||||
assert(rep_->closed); // Catch errors where caller forgot to call Finish()
|
assert(rep_->closed); // Catch errors where caller forgot to call Finish()
|
||||||
|
delete rep_->filter_block;
|
||||||
delete rep_;
|
delete rep_;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -98,6 +106,10 @@ void TableBuilder::Add(const Slice& key, const Slice& value) {
|
||||||
r->pending_index_entry = false;
|
r->pending_index_entry = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (r->filter_block != NULL) {
|
||||||
|
r->filter_block->AddKey(key);
|
||||||
|
}
|
||||||
|
|
||||||
r->last_key.assign(key.data(), key.size());
|
r->last_key.assign(key.data(), key.size());
|
||||||
r->num_entries++;
|
r->num_entries++;
|
||||||
r->data_block.Add(key, value);
|
r->data_block.Add(key, value);
|
||||||
|
@ -119,6 +131,9 @@ void TableBuilder::Flush() {
|
||||||
r->pending_index_entry = true;
|
r->pending_index_entry = true;
|
||||||
r->status = r->file->Flush();
|
r->status = r->file->Flush();
|
||||||
}
|
}
|
||||||
|
if (r->filter_block != NULL) {
|
||||||
|
r->filter_block->StartBlock(r->offset);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void TableBuilder::WriteBlock(BlockBuilder* block, BlockHandle* handle) {
|
void TableBuilder::WriteBlock(BlockBuilder* block, BlockHandle* handle) {
|
||||||
|
@ -152,6 +167,15 @@ void TableBuilder::WriteBlock(BlockBuilder* block, BlockHandle* handle) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
WriteRawBlock(block_contents, type, handle);
|
||||||
|
r->compressed_output.clear();
|
||||||
|
block->Reset();
|
||||||
|
}
|
||||||
|
|
||||||
|
void TableBuilder::WriteRawBlock(const Slice& block_contents,
|
||||||
|
CompressionType type,
|
||||||
|
BlockHandle* handle) {
|
||||||
|
Rep* r = rep_;
|
||||||
handle->set_offset(r->offset);
|
handle->set_offset(r->offset);
|
||||||
handle->set_size(block_contents.size());
|
handle->set_size(block_contents.size());
|
||||||
r->status = r->file->Append(block_contents);
|
r->status = r->file->Append(block_contents);
|
||||||
|
@ -166,8 +190,6 @@ void TableBuilder::WriteBlock(BlockBuilder* block, BlockHandle* handle) {
|
||||||
r->offset += block_contents.size() + kBlockTrailerSize;
|
r->offset += block_contents.size() + kBlockTrailerSize;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
r->compressed_output.clear();
|
|
||||||
block->Reset();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Status TableBuilder::status() const {
|
Status TableBuilder::status() const {
|
||||||
|
@ -179,13 +201,32 @@ Status TableBuilder::Finish() {
|
||||||
Flush();
|
Flush();
|
||||||
assert(!r->closed);
|
assert(!r->closed);
|
||||||
r->closed = true;
|
r->closed = true;
|
||||||
BlockHandle metaindex_block_handle;
|
|
||||||
BlockHandle index_block_handle;
|
BlockHandle filter_block_handle, metaindex_block_handle, index_block_handle;
|
||||||
|
|
||||||
|
// Write filter block
|
||||||
|
if (ok() && r->filter_block != NULL) {
|
||||||
|
WriteRawBlock(r->filter_block->Finish(), kNoCompression,
|
||||||
|
&filter_block_handle);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Write metaindex block
|
||||||
if (ok()) {
|
if (ok()) {
|
||||||
BlockBuilder meta_index_block(&r->options);
|
BlockBuilder meta_index_block(&r->options);
|
||||||
|
if (r->filter_block != NULL) {
|
||||||
|
// Add mapping from "filter.Name" to location of filter data
|
||||||
|
std::string key = "filter.";
|
||||||
|
key.append(r->options.filter_policy->Name());
|
||||||
|
std::string handle_encoding;
|
||||||
|
filter_block_handle.EncodeTo(&handle_encoding);
|
||||||
|
meta_index_block.Add(key, handle_encoding);
|
||||||
|
}
|
||||||
|
|
||||||
// TODO(postrelease): Add stats and other meta blocks
|
// TODO(postrelease): Add stats and other meta blocks
|
||||||
WriteBlock(&meta_index_block, &metaindex_block_handle);
|
WriteBlock(&meta_index_block, &metaindex_block_handle);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Write index block
|
||||||
if (ok()) {
|
if (ok()) {
|
||||||
if (r->pending_index_entry) {
|
if (r->pending_index_entry) {
|
||||||
r->options.comparator->FindShortSuccessor(&r->last_key);
|
r->options.comparator->FindShortSuccessor(&r->last_key);
|
||||||
|
@ -196,6 +237,8 @@ Status TableBuilder::Finish() {
|
||||||
}
|
}
|
||||||
WriteBlock(&r->index_block, &index_block_handle);
|
WriteBlock(&r->index_block, &index_block_handle);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Write footer
|
||||||
if (ok()) {
|
if (ok()) {
|
||||||
Footer footer;
|
Footer footer;
|
||||||
footer.set_metaindex_handle(metaindex_block_handle);
|
footer.set_metaindex_handle(metaindex_block_handle);
|
||||||
|
|
|
@ -168,8 +168,6 @@ class Constructor {
|
||||||
// Construct the data structure from the data in "data"
|
// Construct the data structure from the data in "data"
|
||||||
virtual Status FinishImpl(const Options& options, const KVMap& data) = 0;
|
virtual Status FinishImpl(const Options& options, const KVMap& data) = 0;
|
||||||
|
|
||||||
virtual size_t NumBytes() const = 0;
|
|
||||||
|
|
||||||
virtual Iterator* NewIterator() const = 0;
|
virtual Iterator* NewIterator() const = 0;
|
||||||
|
|
||||||
virtual const KVMap& data() { return data_; }
|
virtual const KVMap& data() { return data_; }
|
||||||
|
@ -185,7 +183,6 @@ class BlockConstructor: public Constructor {
|
||||||
explicit BlockConstructor(const Comparator* cmp)
|
explicit BlockConstructor(const Comparator* cmp)
|
||||||
: Constructor(cmp),
|
: Constructor(cmp),
|
||||||
comparator_(cmp),
|
comparator_(cmp),
|
||||||
block_size_(-1),
|
|
||||||
block_(NULL) { }
|
block_(NULL) { }
|
||||||
~BlockConstructor() {
|
~BlockConstructor() {
|
||||||
delete block_;
|
delete block_;
|
||||||
|
@ -201,22 +198,21 @@ class BlockConstructor: public Constructor {
|
||||||
builder.Add(it->first, it->second);
|
builder.Add(it->first, it->second);
|
||||||
}
|
}
|
||||||
// Open the block
|
// Open the block
|
||||||
Slice block_data = builder.Finish();
|
data_ = builder.Finish().ToString();
|
||||||
block_size_ = block_data.size();
|
BlockContents contents;
|
||||||
char* block_data_copy = new char[block_size_];
|
contents.data = data_;
|
||||||
memcpy(block_data_copy, block_data.data(), block_size_);
|
contents.cachable = false;
|
||||||
block_ = new Block(block_data_copy, block_size_, true /* take ownership */);
|
contents.heap_allocated = false;
|
||||||
|
block_ = new Block(contents);
|
||||||
return Status::OK();
|
return Status::OK();
|
||||||
}
|
}
|
||||||
virtual size_t NumBytes() const { return block_size_; }
|
|
||||||
|
|
||||||
virtual Iterator* NewIterator() const {
|
virtual Iterator* NewIterator() const {
|
||||||
return block_->NewIterator(comparator_);
|
return block_->NewIterator(comparator_);
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
const Comparator* comparator_;
|
const Comparator* comparator_;
|
||||||
int block_size_;
|
std::string data_;
|
||||||
Block* block_;
|
Block* block_;
|
||||||
|
|
||||||
BlockConstructor();
|
BlockConstructor();
|
||||||
|
@ -253,7 +249,6 @@ class TableConstructor: public Constructor {
|
||||||
table_options.comparator = options.comparator;
|
table_options.comparator = options.comparator;
|
||||||
return Table::Open(table_options, source_, sink.contents().size(), &table_);
|
return Table::Open(table_options, source_, sink.contents().size(), &table_);
|
||||||
}
|
}
|
||||||
virtual size_t NumBytes() const { return source_->Size(); }
|
|
||||||
|
|
||||||
virtual Iterator* NewIterator() const {
|
virtual Iterator* NewIterator() const {
|
||||||
return table_->NewIterator(ReadOptions());
|
return table_->NewIterator(ReadOptions());
|
||||||
|
@ -342,10 +337,6 @@ class MemTableConstructor: public Constructor {
|
||||||
}
|
}
|
||||||
return Status::OK();
|
return Status::OK();
|
||||||
}
|
}
|
||||||
virtual size_t NumBytes() const {
|
|
||||||
return memtable_->ApproximateMemoryUsage();
|
|
||||||
}
|
|
||||||
|
|
||||||
virtual Iterator* NewIterator() const {
|
virtual Iterator* NewIterator() const {
|
||||||
return new KeyConvertingIterator(memtable_->NewIterator());
|
return new KeyConvertingIterator(memtable_->NewIterator());
|
||||||
}
|
}
|
||||||
|
@ -379,13 +370,6 @@ class DBConstructor: public Constructor {
|
||||||
}
|
}
|
||||||
return Status::OK();
|
return Status::OK();
|
||||||
}
|
}
|
||||||
virtual size_t NumBytes() const {
|
|
||||||
Range r("", "\xff\xff");
|
|
||||||
uint64_t size;
|
|
||||||
db_->GetApproximateSizes(&r, 1, &size);
|
|
||||||
return size;
|
|
||||||
}
|
|
||||||
|
|
||||||
virtual Iterator* NewIterator() const {
|
virtual Iterator* NewIterator() const {
|
||||||
return db_->NewIterator(ReadOptions());
|
return db_->NewIterator(ReadOptions());
|
||||||
}
|
}
|
||||||
|
@ -809,7 +793,7 @@ TEST(TableTest, ApproximateOffsetOfPlain) {
|
||||||
ASSERT_TRUE(Between(c.ApproximateOffsetOf("k05"), 210000, 211000));
|
ASSERT_TRUE(Between(c.ApproximateOffsetOf("k05"), 210000, 211000));
|
||||||
ASSERT_TRUE(Between(c.ApproximateOffsetOf("k06"), 510000, 511000));
|
ASSERT_TRUE(Between(c.ApproximateOffsetOf("k06"), 510000, 511000));
|
||||||
ASSERT_TRUE(Between(c.ApproximateOffsetOf("k07"), 510000, 511000));
|
ASSERT_TRUE(Between(c.ApproximateOffsetOf("k07"), 510000, 511000));
|
||||||
ASSERT_TRUE(Between(c.ApproximateOffsetOf("xyz"), 610000, 611000));
|
ASSERT_TRUE(Between(c.ApproximateOffsetOf("xyz"), 610000, 612000));
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,95 @@
|
||||||
|
// Copyright (c) 2012 The LevelDB Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style license that can be
|
||||||
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||||
|
|
||||||
|
#include "leveldb/filter_policy.h"
|
||||||
|
|
||||||
|
#include "leveldb/slice.h"
|
||||||
|
#include "util/hash.h"
|
||||||
|
|
||||||
|
namespace leveldb {
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
static uint32_t BloomHash(const Slice& key) {
|
||||||
|
return Hash(key.data(), key.size(), 0xbc9f1d34);
|
||||||
|
}
|
||||||
|
|
||||||
|
class BloomFilterPolicy : public FilterPolicy {
|
||||||
|
private:
|
||||||
|
size_t bits_per_key_;
|
||||||
|
size_t k_;
|
||||||
|
|
||||||
|
public:
|
||||||
|
explicit BloomFilterPolicy(int bits_per_key)
|
||||||
|
: bits_per_key_(bits_per_key) {
|
||||||
|
// We intentionally round down to reduce probing cost a little bit
|
||||||
|
k_ = static_cast<size_t>(bits_per_key * 0.69); // 0.69 =~ ln(2)
|
||||||
|
if (k_ < 1) k_ = 1;
|
||||||
|
if (k_ > 30) k_ = 30;
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual const char* Name() const {
|
||||||
|
return "leveldb.BuiltinBloomFilter";
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual void CreateFilter(const Slice* keys, int n, std::string* dst) const {
|
||||||
|
// Compute bloom filter size (in both bits and bytes)
|
||||||
|
size_t bits = n * bits_per_key_;
|
||||||
|
|
||||||
|
// For small n, we can see a very high false positive rate. Fix it
|
||||||
|
// by enforcing a minimum bloom filter length.
|
||||||
|
if (bits < 64) bits = 64;
|
||||||
|
|
||||||
|
size_t bytes = (bits + 7) / 8;
|
||||||
|
bits = bytes * 8;
|
||||||
|
|
||||||
|
const size_t init_size = dst->size();
|
||||||
|
dst->resize(init_size + bytes, 0);
|
||||||
|
dst->push_back(static_cast<char>(k_)); // Remember # of probes in filter
|
||||||
|
char* array = &(*dst)[init_size];
|
||||||
|
for (size_t i = 0; i < n; i++) {
|
||||||
|
// Use double-hashing to generate a sequence of hash values.
|
||||||
|
// See analysis in [Kirsch,Mitzenmacher 2006].
|
||||||
|
uint32_t h = BloomHash(keys[i]);
|
||||||
|
const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits
|
||||||
|
for (size_t j = 0; j < k_; j++) {
|
||||||
|
const uint32_t bitpos = h % bits;
|
||||||
|
array[bitpos/8] |= (1 << (bitpos % 8));
|
||||||
|
h += delta;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual bool KeyMayMatch(const Slice& key, const Slice& bloom_filter) const {
|
||||||
|
const size_t len = bloom_filter.size();
|
||||||
|
if (len < 2) return false;
|
||||||
|
|
||||||
|
const char* array = bloom_filter.data();
|
||||||
|
const size_t bits = (len - 1) * 8;
|
||||||
|
|
||||||
|
// Use the encoded k so that we can read filters generated by
|
||||||
|
// bloom filters created using different parameters.
|
||||||
|
const size_t k = array[len-1];
|
||||||
|
if (k > 30) {
|
||||||
|
// Reserved for potentially new encodings for short bloom filters.
|
||||||
|
// Consider it a match.
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t h = BloomHash(key);
|
||||||
|
const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits
|
||||||
|
for (size_t j = 0; j < k; j++) {
|
||||||
|
const uint32_t bitpos = h % bits;
|
||||||
|
if ((array[bitpos/8] & (1 << (bitpos % 8))) == 0) return false;
|
||||||
|
h += delta;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
const FilterPolicy* NewBloomFilterPolicy(int bits_per_key) {
|
||||||
|
return new BloomFilterPolicy(bits_per_key);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace leveldb
|
|
@ -0,0 +1,159 @@
|
||||||
|
// Copyright (c) 2012 The LevelDB Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style license that can be
|
||||||
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||||
|
|
||||||
|
#include "leveldb/filter_policy.h"
|
||||||
|
|
||||||
|
#include "util/logging.h"
|
||||||
|
#include "util/testharness.h"
|
||||||
|
#include "util/testutil.h"
|
||||||
|
|
||||||
|
namespace leveldb {
|
||||||
|
|
||||||
|
static const int kVerbose = 1;
|
||||||
|
|
||||||
|
static Slice Key(int i, char* buffer) {
|
||||||
|
memcpy(buffer, &i, sizeof(i));
|
||||||
|
return Slice(buffer, sizeof(i));
|
||||||
|
}
|
||||||
|
|
||||||
|
class BloomTest {
|
||||||
|
private:
|
||||||
|
const FilterPolicy* policy_;
|
||||||
|
std::string filter_;
|
||||||
|
std::vector<std::string> keys_;
|
||||||
|
|
||||||
|
public:
|
||||||
|
BloomTest() : policy_(NewBloomFilterPolicy(10)) { }
|
||||||
|
|
||||||
|
~BloomTest() {
|
||||||
|
delete policy_;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Reset() {
|
||||||
|
keys_.clear();
|
||||||
|
filter_.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
void Add(const Slice& s) {
|
||||||
|
keys_.push_back(s.ToString());
|
||||||
|
}
|
||||||
|
|
||||||
|
void Build() {
|
||||||
|
std::vector<Slice> key_slices;
|
||||||
|
for (size_t i = 0; i < keys_.size(); i++) {
|
||||||
|
key_slices.push_back(Slice(keys_[i]));
|
||||||
|
}
|
||||||
|
filter_.clear();
|
||||||
|
policy_->CreateFilter(&key_slices[0], key_slices.size(), &filter_);
|
||||||
|
keys_.clear();
|
||||||
|
if (kVerbose >= 2) DumpFilter();
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t FilterSize() const {
|
||||||
|
return filter_.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
void DumpFilter() {
|
||||||
|
fprintf(stderr, "F(");
|
||||||
|
for (size_t i = 0; i+1 < filter_.size(); i++) {
|
||||||
|
const unsigned int c = static_cast<unsigned int>(filter_[i]);
|
||||||
|
for (int j = 0; j < 8; j++) {
|
||||||
|
fprintf(stderr, "%c", (c & (1 <<j)) ? '1' : '.');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
fprintf(stderr, ")\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
bool Matches(const Slice& s) {
|
||||||
|
if (!keys_.empty()) {
|
||||||
|
Build();
|
||||||
|
}
|
||||||
|
return policy_->KeyMayMatch(s, filter_);
|
||||||
|
}
|
||||||
|
|
||||||
|
double FalsePositiveRate() {
|
||||||
|
char buffer[sizeof(int)];
|
||||||
|
int result = 0;
|
||||||
|
for (int i = 0; i < 10000; i++) {
|
||||||
|
if (Matches(Key(i + 1000000000, buffer))) {
|
||||||
|
result++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result / 10000.0;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
TEST(BloomTest, EmptyFilter) {
|
||||||
|
ASSERT_TRUE(! Matches("hello"));
|
||||||
|
ASSERT_TRUE(! Matches("world"));
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(BloomTest, Small) {
|
||||||
|
Add("hello");
|
||||||
|
Add("world");
|
||||||
|
ASSERT_TRUE(Matches("hello"));
|
||||||
|
ASSERT_TRUE(Matches("world"));
|
||||||
|
ASSERT_TRUE(! Matches("x"));
|
||||||
|
ASSERT_TRUE(! Matches("foo"));
|
||||||
|
}
|
||||||
|
|
||||||
|
static int NextLength(int length) {
|
||||||
|
if (length < 10) {
|
||||||
|
length += 1;
|
||||||
|
} else if (length < 100) {
|
||||||
|
length += 10;
|
||||||
|
} else if (length < 1000) {
|
||||||
|
length += 100;
|
||||||
|
} else {
|
||||||
|
length += 1000;
|
||||||
|
}
|
||||||
|
return length;
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(BloomTest, VaryingLengths) {
|
||||||
|
char buffer[sizeof(int)];
|
||||||
|
|
||||||
|
// Count number of filters that significantly exceed the false positive rate
|
||||||
|
int mediocre_filters = 0;
|
||||||
|
int good_filters = 0;
|
||||||
|
|
||||||
|
for (int length = 1; length <= 10000; length = NextLength(length)) {
|
||||||
|
Reset();
|
||||||
|
for (int i = 0; i < length; i++) {
|
||||||
|
Add(Key(i, buffer));
|
||||||
|
}
|
||||||
|
Build();
|
||||||
|
|
||||||
|
ASSERT_LE(FilterSize(), (length * 10 / 8) + 40) << length;
|
||||||
|
|
||||||
|
// All added keys must match
|
||||||
|
for (int i = 0; i < length; i++) {
|
||||||
|
ASSERT_TRUE(Matches(Key(i, buffer)))
|
||||||
|
<< "Length " << length << "; key " << i;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check false positive rate
|
||||||
|
double rate = FalsePositiveRate();
|
||||||
|
if (kVerbose >= 1) {
|
||||||
|
fprintf(stderr, "False positives: %5.2f%% @ length = %6d ; bytes = %6d\n",
|
||||||
|
rate*100.0, length, static_cast<int>(FilterSize()));
|
||||||
|
}
|
||||||
|
ASSERT_LE(rate, 0.02); // Must not be over 2%
|
||||||
|
if (rate > 0.0125) mediocre_filters++; // Allowed, but not too often
|
||||||
|
else good_filters++;
|
||||||
|
}
|
||||||
|
if (kVerbose >= 1) {
|
||||||
|
fprintf(stderr, "Filters: %d good, %d mediocre\n",
|
||||||
|
good_filters, mediocre_filters);
|
||||||
|
}
|
||||||
|
ASSERT_LE(mediocre_filters, good_filters/5);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Different bits-per-byte
|
||||||
|
|
||||||
|
} // namespace leveldb
|
||||||
|
|
||||||
|
int main(int argc, char** argv) {
|
||||||
|
return leveldb::test::RunAllTests();
|
||||||
|
}
|
|
@ -0,0 +1,11 @@
|
||||||
|
// Copyright (c) 2012 The LevelDB Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style license that can be
|
||||||
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||||
|
|
||||||
|
#include "leveldb/filter_policy.h"
|
||||||
|
|
||||||
|
namespace leveldb {
|
||||||
|
|
||||||
|
FilterPolicy::~FilterPolicy() { }
|
||||||
|
|
||||||
|
} // namespace leveldb
|
|
@ -21,7 +21,8 @@ Options::Options()
|
||||||
block_cache(NULL),
|
block_cache(NULL),
|
||||||
block_size(4096),
|
block_size(4096),
|
||||||
block_restart_interval(16),
|
block_restart_interval(16),
|
||||||
compression(kSnappyCompression) {
|
compression(kSnappyCompression),
|
||||||
|
filter_policy(NULL) {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue