mirror of https://github.com/facebook/rocksdb.git
reverting disastrous MOE commit, returning to r21
git-svn-id: https://leveldb.googlecode.com/svn/trunk@23 62dab493-f737-651d-591e-8d6aee1b9529
This commit is contained in:
parent
b743906eea
commit
69c6d38342
|
@ -27,6 +27,7 @@ LIBOBJECTS = \
|
||||||
./db/version_set.o \
|
./db/version_set.o \
|
||||||
./db/write_batch.o \
|
./db/write_batch.o \
|
||||||
./port/port_posix.o \
|
./port/port_posix.o \
|
||||||
|
./port/sha1_portable.o \
|
||||||
./table/block.o \
|
./table/block.o \
|
||||||
./table/block_builder.o \
|
./table/block_builder.o \
|
||||||
./table/format.o \
|
./table/format.o \
|
||||||
|
@ -62,6 +63,7 @@ TESTS = \
|
||||||
env_test \
|
env_test \
|
||||||
filename_test \
|
filename_test \
|
||||||
log_test \
|
log_test \
|
||||||
|
sha1_test \
|
||||||
skiplist_test \
|
skiplist_test \
|
||||||
table_test \
|
table_test \
|
||||||
version_edit_test \
|
version_edit_test \
|
||||||
|
@ -113,6 +115,9 @@ log_test: db/log_test.o $(LIBOBJECTS) $(TESTHARNESS)
|
||||||
table_test: table/table_test.o $(LIBOBJECTS) $(TESTHARNESS)
|
table_test: table/table_test.o $(LIBOBJECTS) $(TESTHARNESS)
|
||||||
$(CC) $(LDFLAGS) table/table_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@
|
$(CC) $(LDFLAGS) table/table_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@
|
||||||
|
|
||||||
|
sha1_test: port/sha1_test.o $(LIBOBJECTS) $(TESTHARNESS)
|
||||||
|
$(CC) $(LDFLAGS) port/sha1_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@
|
||||||
|
|
||||||
skiplist_test: db/skiplist_test.o $(LIBOBJECTS) $(TESTHARNESS)
|
skiplist_test: db/skiplist_test.o $(LIBOBJECTS) $(TESTHARNESS)
|
||||||
$(CC) $(LDFLAGS) db/skiplist_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@
|
$(CC) $(LDFLAGS) db/skiplist_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@
|
||||||
|
|
|
@ -5,7 +5,7 @@ The code under this directory implements a system for maintaining a
|
||||||
persistent key/value store.
|
persistent key/value store.
|
||||||
|
|
||||||
See doc/index.html for more explanation.
|
See doc/index.html for more explanation.
|
||||||
See doc/impl.html for a brief overview of the implementation.
|
See doc/db_layout.txt for a brief overview of the implementation.
|
||||||
|
|
||||||
The public interface is in include/*.h. Callers should not include or
|
The public interface is in include/*.h. Callers should not include or
|
||||||
rely on the details of any other header files in this package. Those
|
rely on the details of any other header files in this package. Those
|
|
@ -8,7 +8,7 @@ db
|
||||||
object stores, etc. can be done in the background anyway, so
|
object stores, etc. can be done in the background anyway, so
|
||||||
probably not that important.
|
probably not that important.
|
||||||
|
|
||||||
api changes:
|
api changes?
|
||||||
- Make it wrappable
|
- Efficient large value reading and writing
|
||||||
|
|
||||||
Faster Get implementation
|
Faster Get implementation
|
|
@ -38,6 +38,15 @@ Status BuildTable(const std::string& dbname,
|
||||||
for (; iter->Valid(); iter->Next()) {
|
for (; iter->Valid(); iter->Next()) {
|
||||||
Slice key = iter->key();
|
Slice key = iter->key();
|
||||||
meta->largest.DecodeFrom(key);
|
meta->largest.DecodeFrom(key);
|
||||||
|
if (ExtractValueType(key) == kTypeLargeValueRef) {
|
||||||
|
if (iter->value().size() != LargeValueRef::ByteSize()) {
|
||||||
|
s = Status::Corruption("invalid indirect reference hash value (L0)");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
edit->AddLargeValueRef(LargeValueRef::FromRef(iter->value()),
|
||||||
|
meta->number,
|
||||||
|
iter->key());
|
||||||
|
}
|
||||||
builder->Add(key, iter->value());
|
builder->Add(key, iter->value());
|
||||||
}
|
}
|
||||||
|
|
|
@ -20,9 +20,9 @@ class VersionEdit;
|
||||||
// Build a Table file from the contents of *iter. The generated file
|
// Build a Table file from the contents of *iter. The generated file
|
||||||
// will be named according to meta->number. On success, the rest of
|
// will be named according to meta->number. On success, the rest of
|
||||||
// *meta will be filled with metadata about the generated table, and
|
// *meta will be filled with metadata about the generated table, and
|
||||||
// the file information will be added to *edit. If no data is present
|
// large value refs and the added file information will be added to
|
||||||
// in *iter, meta->file_size will be set to zero, and no Table file
|
// *edit. If no data is present in *iter, meta->file_size will be set
|
||||||
// will be produced.
|
// to zero, and no Table file will be produced.
|
||||||
extern Status BuildTable(const std::string& dbname,
|
extern Status BuildTable(const std::string& dbname,
|
||||||
Env* env,
|
Env* env,
|
||||||
const Options& options,
|
const Options& options,
|
|
@ -121,10 +121,11 @@ class CorruptionTest {
|
||||||
std::vector<std::string> filenames;
|
std::vector<std::string> filenames;
|
||||||
ASSERT_OK(env_.GetChildren(dbname_, &filenames));
|
ASSERT_OK(env_.GetChildren(dbname_, &filenames));
|
||||||
uint64_t number;
|
uint64_t number;
|
||||||
|
LargeValueRef large_ref;
|
||||||
FileType type;
|
FileType type;
|
||||||
std::vector<std::string> candidates;
|
std::vector<std::string> candidates;
|
||||||
for (int i = 0; i < filenames.size(); i++) {
|
for (int i = 0; i < filenames.size(); i++) {
|
||||||
if (ParseFileName(filenames[i], &number, &type) &&
|
if (ParseFileName(filenames[i], &number, &large_ref, &type) &&
|
||||||
type == filetype) {
|
type == filetype) {
|
||||||
candidates.push_back(dbname_ + "/" + filenames[i]);
|
candidates.push_back(dbname_ + "/" + filenames[i]);
|
||||||
}
|
}
|
||||||
|
@ -275,6 +276,29 @@ TEST(CorruptionTest, SequenceNumberRecovery) {
|
||||||
ASSERT_EQ("v6", v);
|
ASSERT_EQ("v6", v);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST(CorruptionTest, LargeValueRecovery) {
|
||||||
|
Options options;
|
||||||
|
options.large_value_threshold = 10000;
|
||||||
|
Reopen(&options);
|
||||||
|
|
||||||
|
Random rnd(301);
|
||||||
|
std::string big;
|
||||||
|
ASSERT_OK(db_->Put(WriteOptions(),
|
||||||
|
"foo", test::RandomString(&rnd, 100000, &big)));
|
||||||
|
std::string v;
|
||||||
|
ASSERT_OK(db_->Get(ReadOptions(), "foo", &v));
|
||||||
|
ASSERT_EQ(big, v);
|
||||||
|
|
||||||
|
RepairDB();
|
||||||
|
Reopen();
|
||||||
|
ASSERT_OK(db_->Get(ReadOptions(), "foo", &v));
|
||||||
|
ASSERT_EQ(big, v);
|
||||||
|
|
||||||
|
Reopen();
|
||||||
|
ASSERT_OK(db_->Get(ReadOptions(), "foo", &v));
|
||||||
|
ASSERT_EQ(big, v);
|
||||||
|
}
|
||||||
|
|
||||||
TEST(CorruptionTest, CorruptedDescriptor) {
|
TEST(CorruptionTest, CorruptedDescriptor) {
|
||||||
ASSERT_OK(db_->Put(WriteOptions(), "foo", "hello"));
|
ASSERT_OK(db_->Put(WriteOptions(), "foo", "hello"));
|
||||||
DBImpl* dbi = reinterpret_cast<DBImpl*>(db_);
|
DBImpl* dbi = reinterpret_cast<DBImpl*>(db_);
|
|
@ -28,6 +28,7 @@
|
||||||
// readreverse -- read N values in reverse order
|
// readreverse -- read N values in reverse order
|
||||||
// readrandom -- read N values in random order
|
// readrandom -- read N values in random order
|
||||||
// crc32c -- repeated crc32c of 4K of data
|
// crc32c -- repeated crc32c of 4K of data
|
||||||
|
// sha1 -- repeated SHA1 computation over 4K of data
|
||||||
// Meta operations:
|
// Meta operations:
|
||||||
// compact -- Compact the entire DB
|
// compact -- Compact the entire DB
|
||||||
// stats -- Print DB stats
|
// stats -- Print DB stats
|
||||||
|
@ -47,6 +48,7 @@ static const char* FLAGS_benchmarks =
|
||||||
"readreverse,"
|
"readreverse,"
|
||||||
"fill100K,"
|
"fill100K,"
|
||||||
"crc32c,"
|
"crc32c,"
|
||||||
|
"sha1,"
|
||||||
"snappycomp,"
|
"snappycomp,"
|
||||||
"snappyuncomp,"
|
"snappyuncomp,"
|
||||||
;
|
;
|
||||||
|
@ -364,6 +366,8 @@ class Benchmark {
|
||||||
Compact();
|
Compact();
|
||||||
} else if (name == Slice("crc32c")) {
|
} else if (name == Slice("crc32c")) {
|
||||||
Crc32c(4096, "(4K per op)");
|
Crc32c(4096, "(4K per op)");
|
||||||
|
} else if (name == Slice("sha1")) {
|
||||||
|
SHA1(4096, "(4K per op)");
|
||||||
} else if (name == Slice("snappycomp")) {
|
} else if (name == Slice("snappycomp")) {
|
||||||
SnappyCompress();
|
SnappyCompress();
|
||||||
} else if (name == Slice("snappyuncomp")) {
|
} else if (name == Slice("snappyuncomp")) {
|
||||||
|
@ -402,6 +406,24 @@ class Benchmark {
|
||||||
message_ = label;
|
message_ = label;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void SHA1(int size, const char* label) {
|
||||||
|
// SHA1 about 100MB of data total
|
||||||
|
std::string data(size, 'x');
|
||||||
|
int64_t bytes = 0;
|
||||||
|
char sha1[20];
|
||||||
|
while (bytes < 100 * 1048576) {
|
||||||
|
port::SHA1_Hash(data.data(), size, sha1);
|
||||||
|
FinishedSingleOp();
|
||||||
|
bytes += size;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Print so result is not dead
|
||||||
|
fprintf(stderr, "... sha1=%02x...\r", static_cast<unsigned int>(sha1[0]));
|
||||||
|
|
||||||
|
bytes_ = bytes;
|
||||||
|
message_ = label;
|
||||||
|
}
|
||||||
|
|
||||||
void SnappyCompress() {
|
void SnappyCompress() {
|
||||||
Slice input = gen_.Generate(Options().block_size);
|
Slice input = gen_.Generate(Options().block_size);
|
||||||
int64_t bytes = 0;
|
int64_t bytes = 0;
|
|
@ -81,8 +81,8 @@ class NullWritableFile : public WritableFile {
|
||||||
// Fix user-supplied options to be reasonable
|
// Fix user-supplied options to be reasonable
|
||||||
template <class T,class V>
|
template <class T,class V>
|
||||||
static void ClipToRange(T* ptr, V minvalue, V maxvalue) {
|
static void ClipToRange(T* ptr, V minvalue, V maxvalue) {
|
||||||
if (static_cast<V>(*ptr) > maxvalue) *ptr = maxvalue;
|
if (*ptr > maxvalue) *ptr = maxvalue;
|
||||||
if (static_cast<V>(*ptr) < minvalue) *ptr = minvalue;
|
if (*ptr < minvalue) *ptr = minvalue;
|
||||||
}
|
}
|
||||||
Options SanitizeOptions(const std::string& dbname,
|
Options SanitizeOptions(const std::string& dbname,
|
||||||
const InternalKeyComparator* icmp,
|
const InternalKeyComparator* icmp,
|
||||||
|
@ -91,6 +91,7 @@ Options SanitizeOptions(const std::string& dbname,
|
||||||
result.comparator = icmp;
|
result.comparator = icmp;
|
||||||
ClipToRange(&result.max_open_files, 20, 50000);
|
ClipToRange(&result.max_open_files, 20, 50000);
|
||||||
ClipToRange(&result.write_buffer_size, 64<<10, 1<<30);
|
ClipToRange(&result.write_buffer_size, 64<<10, 1<<30);
|
||||||
|
ClipToRange(&result.large_value_threshold, 16<<10, 1<<30);
|
||||||
ClipToRange(&result.block_size, 1<<10, 4<<20);
|
ClipToRange(&result.block_size, 1<<10, 4<<20);
|
||||||
if (result.info_log == NULL) {
|
if (result.info_log == NULL) {
|
||||||
// Open a log file in the same directory as the db
|
// Open a log file in the same directory as the db
|
||||||
|
@ -212,12 +213,15 @@ void DBImpl::DeleteObsoleteFiles() {
|
||||||
std::set<uint64_t> live = pending_outputs_;
|
std::set<uint64_t> live = pending_outputs_;
|
||||||
versions_->AddLiveFiles(&live);
|
versions_->AddLiveFiles(&live);
|
||||||
|
|
||||||
|
versions_->CleanupLargeValueRefs(live);
|
||||||
|
|
||||||
std::vector<std::string> filenames;
|
std::vector<std::string> filenames;
|
||||||
env_->GetChildren(dbname_, &filenames); // Ignoring errors on purpose
|
env_->GetChildren(dbname_, &filenames); // Ignoring errors on purpose
|
||||||
uint64_t number;
|
uint64_t number;
|
||||||
|
LargeValueRef large_ref;
|
||||||
FileType type;
|
FileType type;
|
||||||
for (size_t i = 0; i < filenames.size(); i++) {
|
for (int i = 0; i < filenames.size(); i++) {
|
||||||
if (ParseFileName(filenames[i], &number, &type)) {
|
if (ParseFileName(filenames[i], &number, &large_ref, &type)) {
|
||||||
bool keep = true;
|
bool keep = true;
|
||||||
switch (type) {
|
switch (type) {
|
||||||
case kLogFile:
|
case kLogFile:
|
||||||
|
@ -237,6 +241,9 @@ void DBImpl::DeleteObsoleteFiles() {
|
||||||
// be recorded in pending_outputs_, which is inserted into "live"
|
// be recorded in pending_outputs_, which is inserted into "live"
|
||||||
keep = (live.find(number) != live.end());
|
keep = (live.find(number) != live.end());
|
||||||
break;
|
break;
|
||||||
|
case kLargeValueFile:
|
||||||
|
keep = versions_->LargeValueIsLive(large_ref);
|
||||||
|
break;
|
||||||
case kCurrentFile:
|
case kCurrentFile:
|
||||||
case kDBLockFile:
|
case kDBLockFile:
|
||||||
case kInfoLogFile:
|
case kInfoLogFile:
|
||||||
|
@ -592,7 +599,7 @@ void DBImpl::CleanupCompaction(CompactionState* compact) {
|
||||||
assert(compact->outfile == NULL);
|
assert(compact->outfile == NULL);
|
||||||
}
|
}
|
||||||
delete compact->outfile;
|
delete compact->outfile;
|
||||||
for (size_t i = 0; i < compact->outputs.size(); i++) {
|
for (int i = 0; i < compact->outputs.size(); i++) {
|
||||||
const CompactionState::Output& out = compact->outputs[i];
|
const CompactionState::Output& out = compact->outputs[i];
|
||||||
pending_outputs_.erase(out.number);
|
pending_outputs_.erase(out.number);
|
||||||
}
|
}
|
||||||
|
@ -688,7 +695,7 @@ Status DBImpl::InstallCompactionResults(CompactionState* compact) {
|
||||||
// Add compaction outputs
|
// Add compaction outputs
|
||||||
compact->compaction->AddInputDeletions(compact->compaction->edit());
|
compact->compaction->AddInputDeletions(compact->compaction->edit());
|
||||||
const int level = compact->compaction->level();
|
const int level = compact->compaction->level();
|
||||||
for (size_t i = 0; i < compact->outputs.size(); i++) {
|
for (int i = 0; i < compact->outputs.size(); i++) {
|
||||||
const CompactionState::Output& out = compact->outputs[i];
|
const CompactionState::Output& out = compact->outputs[i];
|
||||||
compact->compaction->edit()->AddFile(
|
compact->compaction->edit()->AddFile(
|
||||||
level + 1,
|
level + 1,
|
||||||
|
@ -703,7 +710,7 @@ Status DBImpl::InstallCompactionResults(CompactionState* compact) {
|
||||||
DeleteObsoleteFiles();
|
DeleteObsoleteFiles();
|
||||||
} else {
|
} else {
|
||||||
// Discard any files we may have created during this failed compaction
|
// Discard any files we may have created during this failed compaction
|
||||||
for (size_t i = 0; i < compact->outputs.size(); i++) {
|
for (int i = 0; i < compact->outputs.size(); i++) {
|
||||||
env_->DeleteFile(TableFileName(dbname_, compact->outputs[i].number));
|
env_->DeleteFile(TableFileName(dbname_, compact->outputs[i].number));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -804,7 +811,7 @@ Status DBImpl::DoCompactionWork(CompactionState* compact) {
|
||||||
" Compact: %s, seq %d, type: %d %d, drop: %d, is_base: %d, "
|
" Compact: %s, seq %d, type: %d %d, drop: %d, is_base: %d, "
|
||||||
"%d smallest_snapshot: %d",
|
"%d smallest_snapshot: %d",
|
||||||
ikey.user_key.ToString().c_str(),
|
ikey.user_key.ToString().c_str(),
|
||||||
(int)ikey.sequence, ikey.type, kTypeValue, drop,
|
(int)ikey.sequence, ikey.type, kTypeLargeValueRef, drop,
|
||||||
compact->compaction->IsBaseLevelForKey(ikey.user_key),
|
compact->compaction->IsBaseLevelForKey(ikey.user_key),
|
||||||
(int)last_sequence_for_key, (int)compact->smallest_snapshot);
|
(int)last_sequence_for_key, (int)compact->smallest_snapshot);
|
||||||
#endif
|
#endif
|
||||||
|
@ -821,7 +828,26 @@ Status DBImpl::DoCompactionWork(CompactionState* compact) {
|
||||||
compact->current_output()->smallest.DecodeFrom(key);
|
compact->current_output()->smallest.DecodeFrom(key);
|
||||||
}
|
}
|
||||||
compact->current_output()->largest.DecodeFrom(key);
|
compact->current_output()->largest.DecodeFrom(key);
|
||||||
|
|
||||||
|
if (ikey.type == kTypeLargeValueRef) {
|
||||||
|
if (input->value().size() != LargeValueRef::ByteSize()) {
|
||||||
|
if (options_.paranoid_checks) {
|
||||||
|
status = Status::Corruption("invalid large value ref");
|
||||||
|
break;
|
||||||
|
} else {
|
||||||
|
Log(env_, options_.info_log,
|
||||||
|
"compaction found invalid large value ref");
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
compact->compaction->edit()->AddLargeValueRef(
|
||||||
|
LargeValueRef::FromRef(input->value()),
|
||||||
|
compact->current_output()->number,
|
||||||
|
input->key());
|
||||||
compact->builder->Add(key, input->value());
|
compact->builder->Add(key, input->value());
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
compact->builder->Add(key, input->value());
|
||||||
|
}
|
||||||
|
|
||||||
// Close output file if it is big enough
|
// Close output file if it is big enough
|
||||||
if (compact->builder->FileSize() >=
|
if (compact->builder->FileSize() >=
|
||||||
|
@ -855,7 +881,7 @@ Status DBImpl::DoCompactionWork(CompactionState* compact) {
|
||||||
stats.bytes_read += compact->compaction->input(which, i)->file_size;
|
stats.bytes_read += compact->compaction->input(which, i)->file_size;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for (size_t i = 0; i < compact->outputs.size(); i++) {
|
for (int i = 0; i < compact->outputs.size(); i++) {
|
||||||
stats.bytes_written += compact->outputs[i].file_size;
|
stats.bytes_written += compact->outputs[i].file_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -959,27 +985,40 @@ Status DBImpl::Delete(const WriteOptions& options, const Slice& key) {
|
||||||
|
|
||||||
Status DBImpl::Write(const WriteOptions& options, WriteBatch* updates) {
|
Status DBImpl::Write(const WriteOptions& options, WriteBatch* updates) {
|
||||||
Status status;
|
Status status;
|
||||||
|
|
||||||
|
WriteBatch* final = NULL;
|
||||||
|
{
|
||||||
MutexLock l(&mutex_);
|
MutexLock l(&mutex_);
|
||||||
status = MakeRoomForWrite(false); // May temporarily release lock and wait
|
status = MakeRoomForWrite(false); // May temporarily release lock and wait
|
||||||
|
|
||||||
uint64_t last_sequence = versions_->LastSequence();
|
uint64_t last_sequence = versions_->LastSequence();
|
||||||
if (status.ok()) {
|
if (status.ok()) {
|
||||||
WriteBatchInternal::SetSequence(updates, last_sequence + 1);
|
status = HandleLargeValues(last_sequence + 1, updates, &final);
|
||||||
last_sequence += WriteBatchInternal::Count(updates);
|
}
|
||||||
|
if (status.ok()) {
|
||||||
|
WriteBatchInternal::SetSequence(final, last_sequence + 1);
|
||||||
|
last_sequence += WriteBatchInternal::Count(final);
|
||||||
versions_->SetLastSequence(last_sequence);
|
versions_->SetLastSequence(last_sequence);
|
||||||
|
|
||||||
// Add to log and apply to memtable
|
// Add to log and apply to memtable
|
||||||
status = log_->AddRecord(WriteBatchInternal::Contents(updates));
|
status = log_->AddRecord(WriteBatchInternal::Contents(final));
|
||||||
if (status.ok() && options.sync) {
|
if (status.ok() && options.sync) {
|
||||||
status = logfile_->Sync();
|
status = logfile_->Sync();
|
||||||
}
|
}
|
||||||
if (status.ok()) {
|
if (status.ok()) {
|
||||||
status = WriteBatchInternal::InsertInto(updates, mem_);
|
status = WriteBatchInternal::InsertInto(final, mem_);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (options.post_write_snapshot != NULL) {
|
if (options.post_write_snapshot != NULL) {
|
||||||
*options.post_write_snapshot =
|
*options.post_write_snapshot =
|
||||||
status.ok() ? snapshots_.New(last_sequence) : NULL;
|
status.ok() ? snapshots_.New(last_sequence) : NULL;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
if (final != updates) {
|
||||||
|
delete final;
|
||||||
|
}
|
||||||
|
|
||||||
return status;
|
return status;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1031,6 +1070,124 @@ Status DBImpl::MakeRoomForWrite(bool force) {
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool DBImpl::HasLargeValues(const WriteBatch& batch) const {
|
||||||
|
if (WriteBatchInternal::ByteSize(&batch) >= options_.large_value_threshold) {
|
||||||
|
for (WriteBatchInternal::Iterator it(batch); !it.Done(); it.Next()) {
|
||||||
|
if (it.op() == kTypeValue &&
|
||||||
|
it.value().size() >= options_.large_value_threshold) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Given "raw_value", determines the appropriate compression format to use
|
||||||
|
// and stores the data that should be written to the large value file in
|
||||||
|
// "*file_bytes", and sets "*ref" to the appropriate large value reference.
|
||||||
|
// May use "*scratch" as backing store for "*file_bytes".
|
||||||
|
void DBImpl::MaybeCompressLargeValue(
|
||||||
|
const Slice& raw_value,
|
||||||
|
Slice* file_bytes,
|
||||||
|
std::string* scratch,
|
||||||
|
LargeValueRef* ref) {
|
||||||
|
switch (options_.compression) {
|
||||||
|
case kSnappyCompression: {
|
||||||
|
if (port::Snappy_Compress(raw_value.data(), raw_value.size(), scratch) &&
|
||||||
|
(scratch->size() < (raw_value.size() / 8) * 7)) {
|
||||||
|
*file_bytes = *scratch;
|
||||||
|
*ref = LargeValueRef::Make(raw_value, kSnappyCompression);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Less than 12.5% compression: just leave as uncompressed data
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case kNoCompression:
|
||||||
|
// Use default code outside of switch
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
// Store as uncompressed data
|
||||||
|
*file_bytes = raw_value;
|
||||||
|
*ref = LargeValueRef::Make(raw_value, kNoCompression);
|
||||||
|
}
|
||||||
|
|
||||||
|
Status DBImpl::HandleLargeValues(SequenceNumber assigned_seq,
|
||||||
|
WriteBatch* updates,
|
||||||
|
WriteBatch** final) {
|
||||||
|
if (!HasLargeValues(*updates)) {
|
||||||
|
// Fast path: no large values found
|
||||||
|
*final = updates;
|
||||||
|
} else {
|
||||||
|
// Copy *updates to a new WriteBatch, replacing the references to
|
||||||
|
*final = new WriteBatch;
|
||||||
|
SequenceNumber seq = assigned_seq;
|
||||||
|
for (WriteBatchInternal::Iterator it(*updates); !it.Done(); it.Next()) {
|
||||||
|
switch (it.op()) {
|
||||||
|
case kTypeValue:
|
||||||
|
if (it.value().size() < options_.large_value_threshold) {
|
||||||
|
(*final)->Put(it.key(), it.value());
|
||||||
|
} else {
|
||||||
|
std::string scratch;
|
||||||
|
Slice file_bytes;
|
||||||
|
LargeValueRef large_ref;
|
||||||
|
MaybeCompressLargeValue(
|
||||||
|
it.value(), &file_bytes, &scratch, &large_ref);
|
||||||
|
InternalKey ikey(it.key(), seq, kTypeLargeValueRef);
|
||||||
|
if (versions_->RegisterLargeValueRef(
|
||||||
|
large_ref, versions_->LogNumber(), ikey)) {
|
||||||
|
// TODO(opt): avoid holding the lock here (but be careful about
|
||||||
|
// another thread doing a Write and switching logs or
|
||||||
|
// having us get a different "assigned_seq" value).
|
||||||
|
|
||||||
|
uint64_t tmp_number = versions_->NewFileNumber();
|
||||||
|
pending_outputs_.insert(tmp_number);
|
||||||
|
std::string tmp = TempFileName(dbname_, tmp_number);
|
||||||
|
WritableFile* file;
|
||||||
|
Status s = env_->NewWritableFile(tmp, &file);
|
||||||
|
if (!s.ok()) {
|
||||||
|
return s; // Caller will delete *final
|
||||||
|
}
|
||||||
|
|
||||||
|
file->Append(file_bytes);
|
||||||
|
|
||||||
|
s = file->Close();
|
||||||
|
delete file;
|
||||||
|
|
||||||
|
if (s.ok()) {
|
||||||
|
const std::string fname =
|
||||||
|
LargeValueFileName(dbname_, large_ref);
|
||||||
|
s = env_->RenameFile(tmp, fname);
|
||||||
|
} else {
|
||||||
|
Log(env_, options_.info_log, "Write large value: %s",
|
||||||
|
s.ToString().c_str());
|
||||||
|
}
|
||||||
|
pending_outputs_.erase(tmp_number);
|
||||||
|
|
||||||
|
if (!s.ok()) {
|
||||||
|
env_->DeleteFile(tmp); // Cleanup; intentionally ignoring error
|
||||||
|
return s; // Caller will delete *final
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Put an indirect reference in the write batch in place
|
||||||
|
// of large value
|
||||||
|
WriteBatchInternal::PutLargeValueRef(*final, it.key(), large_ref);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case kTypeLargeValueRef:
|
||||||
|
return Status::Corruption("Corrupted write batch");
|
||||||
|
break;
|
||||||
|
case kTypeDeletion:
|
||||||
|
(*final)->Delete(it.key());
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
seq = seq + 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return Status::OK();
|
||||||
|
}
|
||||||
|
|
||||||
bool DBImpl::GetProperty(const Slice& property, std::string* value) {
|
bool DBImpl::GetProperty(const Slice& property, std::string* value) {
|
||||||
value->clear();
|
value->clear();
|
||||||
|
|
||||||
|
@ -1048,8 +1205,7 @@ bool DBImpl::GetProperty(const Slice& property, std::string* value) {
|
||||||
return false;
|
return false;
|
||||||
} else {
|
} else {
|
||||||
char buf[100];
|
char buf[100];
|
||||||
snprintf(buf, sizeof(buf), "%d",
|
snprintf(buf, sizeof(buf), "%d", versions_->NumLevelFiles(level));
|
||||||
versions_->NumLevelFiles(static_cast<int>(level)));
|
|
||||||
*value = buf;
|
*value = buf;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -1169,9 +1325,10 @@ Status DestroyDB(const std::string& dbname, const Options& options) {
|
||||||
Status result = env->LockFile(LockFileName(dbname), &lock);
|
Status result = env->LockFile(LockFileName(dbname), &lock);
|
||||||
if (result.ok()) {
|
if (result.ok()) {
|
||||||
uint64_t number;
|
uint64_t number;
|
||||||
|
LargeValueRef large_ref;
|
||||||
FileType type;
|
FileType type;
|
||||||
for (size_t i = 0; i < filenames.size(); i++) {
|
for (int i = 0; i < filenames.size(); i++) {
|
||||||
if (ParseFileName(filenames[i], &number, &type)) {
|
if (ParseFileName(filenames[i], &number, &large_ref, &type)) {
|
||||||
Status del = env->DeleteFile(dbname + "/" + filenames[i]);
|
Status del = env->DeleteFile(dbname + "/" + filenames[i]);
|
||||||
if (result.ok() && !del.ok()) {
|
if (result.ok() && !del.ok()) {
|
||||||
result = del;
|
result = del;
|
|
@ -92,6 +92,29 @@ class DBImpl : public DB {
|
||||||
Status WriteLevel0Table(MemTable* mem, VersionEdit* edit);
|
Status WriteLevel0Table(MemTable* mem, VersionEdit* edit);
|
||||||
|
|
||||||
Status MakeRoomForWrite(bool force /* compact even if there is room? */);
|
Status MakeRoomForWrite(bool force /* compact even if there is room? */);
|
||||||
|
bool HasLargeValues(const WriteBatch& batch) const;
|
||||||
|
|
||||||
|
// Process data in "*updates" and return a status. "assigned_seq"
|
||||||
|
// is the sequence number assigned to the first mod in "*updates".
|
||||||
|
// If no large values are encountered, "*final" is set to "updates".
|
||||||
|
// If large values were encountered, registers the references of the
|
||||||
|
// large values with the VersionSet, writes the large values to
|
||||||
|
// files (if appropriate), and allocates a new WriteBatch with the
|
||||||
|
// large values replaced with indirect references and stores a
|
||||||
|
// pointer to the new WriteBatch in *final. If *final != updates on
|
||||||
|
// return, then the client should delete *final when no longer
|
||||||
|
// needed. Returns OK on success, and an appropriate error
|
||||||
|
// otherwise.
|
||||||
|
Status HandleLargeValues(SequenceNumber assigned_seq,
|
||||||
|
WriteBatch* updates,
|
||||||
|
WriteBatch** final);
|
||||||
|
|
||||||
|
// Helper routine for HandleLargeValues
|
||||||
|
void MaybeCompressLargeValue(
|
||||||
|
const Slice& raw_value,
|
||||||
|
Slice* file_bytes,
|
||||||
|
std::string* scratch,
|
||||||
|
LargeValueRef* ref);
|
||||||
|
|
||||||
struct CompactionState;
|
struct CompactionState;
|
||||||
|
|
|
@ -53,11 +53,13 @@ class DBIter: public Iterator {
|
||||||
user_comparator_(cmp),
|
user_comparator_(cmp),
|
||||||
iter_(iter),
|
iter_(iter),
|
||||||
sequence_(s),
|
sequence_(s),
|
||||||
|
large_(NULL),
|
||||||
direction_(kForward),
|
direction_(kForward),
|
||||||
valid_(false) {
|
valid_(false) {
|
||||||
}
|
}
|
||||||
virtual ~DBIter() {
|
virtual ~DBIter() {
|
||||||
delete iter_;
|
delete iter_;
|
||||||
|
delete large_;
|
||||||
}
|
}
|
||||||
virtual bool Valid() const { return valid_; }
|
virtual bool Valid() const { return valid_; }
|
||||||
virtual Slice key() const {
|
virtual Slice key() const {
|
||||||
|
@ -66,10 +68,20 @@ class DBIter: public Iterator {
|
||||||
}
|
}
|
||||||
virtual Slice value() const {
|
virtual Slice value() const {
|
||||||
assert(valid_);
|
assert(valid_);
|
||||||
return (direction_ == kForward) ? iter_->value() : saved_value_;
|
Slice raw_value = (direction_ == kForward) ? iter_->value() : saved_value_;
|
||||||
|
if (large_ == NULL) {
|
||||||
|
return raw_value;
|
||||||
|
} else {
|
||||||
|
MutexLock l(&large_->mutex);
|
||||||
|
if (!large_->produced) {
|
||||||
|
ReadIndirectValue(raw_value);
|
||||||
|
}
|
||||||
|
return large_->value;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
virtual Status status() const {
|
virtual Status status() const {
|
||||||
if (status_.ok()) {
|
if (status_.ok()) {
|
||||||
|
if (large_ != NULL && !large_->status.ok()) return large_->status;
|
||||||
return iter_->status();
|
return iter_->status();
|
||||||
} else {
|
} else {
|
||||||
return status_;
|
return status_;
|
||||||
|
@ -83,14 +95,29 @@ class DBIter: public Iterator {
|
||||||
virtual void SeekToLast();
|
virtual void SeekToLast();
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
struct Large {
|
||||||
|
port::Mutex mutex;
|
||||||
|
std::string value;
|
||||||
|
bool produced;
|
||||||
|
Status status;
|
||||||
|
};
|
||||||
|
|
||||||
void FindNextUserEntry(bool skipping, std::string* skip);
|
void FindNextUserEntry(bool skipping, std::string* skip);
|
||||||
void FindPrevUserEntry();
|
void FindPrevUserEntry();
|
||||||
bool ParseKey(ParsedInternalKey* key);
|
bool ParseKey(ParsedInternalKey* key);
|
||||||
|
void ReadIndirectValue(Slice ref) const;
|
||||||
|
|
||||||
inline void SaveKey(const Slice& k, std::string* dst) {
|
inline void SaveKey(const Slice& k, std::string* dst) {
|
||||||
dst->assign(k.data(), k.size());
|
dst->assign(k.data(), k.size());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline void ForgetLargeValue() {
|
||||||
|
if (large_ != NULL) {
|
||||||
|
delete large_;
|
||||||
|
large_ = NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
inline void ClearSavedValue() {
|
inline void ClearSavedValue() {
|
||||||
if (saved_value_.capacity() > 1048576) {
|
if (saved_value_.capacity() > 1048576) {
|
||||||
std::string empty;
|
std::string empty;
|
||||||
|
@ -109,6 +136,7 @@ class DBIter: public Iterator {
|
||||||
Status status_;
|
Status status_;
|
||||||
std::string saved_key_; // == current key when direction_==kReverse
|
std::string saved_key_; // == current key when direction_==kReverse
|
||||||
std::string saved_value_; // == current raw value when direction_==kReverse
|
std::string saved_value_; // == current raw value when direction_==kReverse
|
||||||
|
Large* large_; // Non-NULL if value is an indirect reference
|
||||||
Direction direction_;
|
Direction direction_;
|
||||||
bool valid_;
|
bool valid_;
|
||||||
|
|
||||||
|
@ -128,6 +156,7 @@ inline bool DBIter::ParseKey(ParsedInternalKey* ikey) {
|
||||||
|
|
||||||
void DBIter::Next() {
|
void DBIter::Next() {
|
||||||
assert(valid_);
|
assert(valid_);
|
||||||
|
ForgetLargeValue();
|
||||||
|
|
||||||
if (direction_ == kReverse) { // Switch directions?
|
if (direction_ == kReverse) { // Switch directions?
|
||||||
direction_ = kForward;
|
direction_ = kForward;
|
||||||
|
@ -156,6 +185,7 @@ void DBIter::FindNextUserEntry(bool skipping, std::string* skip) {
|
||||||
// Loop until we hit an acceptable entry to yield
|
// Loop until we hit an acceptable entry to yield
|
||||||
assert(iter_->Valid());
|
assert(iter_->Valid());
|
||||||
assert(direction_ == kForward);
|
assert(direction_ == kForward);
|
||||||
|
assert(large_ == NULL);
|
||||||
do {
|
do {
|
||||||
ParsedInternalKey ikey;
|
ParsedInternalKey ikey;
|
||||||
if (ParseKey(&ikey) && ikey.sequence <= sequence_) {
|
if (ParseKey(&ikey) && ikey.sequence <= sequence_) {
|
||||||
|
@ -167,12 +197,17 @@ void DBIter::FindNextUserEntry(bool skipping, std::string* skip) {
|
||||||
skipping = true;
|
skipping = true;
|
||||||
break;
|
break;
|
||||||
case kTypeValue:
|
case kTypeValue:
|
||||||
|
case kTypeLargeValueRef:
|
||||||
if (skipping &&
|
if (skipping &&
|
||||||
user_comparator_->Compare(ikey.user_key, *skip) <= 0) {
|
user_comparator_->Compare(ikey.user_key, *skip) <= 0) {
|
||||||
// Entry hidden
|
// Entry hidden
|
||||||
} else {
|
} else {
|
||||||
valid_ = true;
|
valid_ = true;
|
||||||
saved_key_.clear();
|
saved_key_.clear();
|
||||||
|
if (ikey.type == kTypeLargeValueRef) {
|
||||||
|
large_ = new Large;
|
||||||
|
large_->produced = false;
|
||||||
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
@ -186,6 +221,7 @@ void DBIter::FindNextUserEntry(bool skipping, std::string* skip) {
|
||||||
|
|
||||||
void DBIter::Prev() {
|
void DBIter::Prev() {
|
||||||
assert(valid_);
|
assert(valid_);
|
||||||
|
ForgetLargeValue();
|
||||||
|
|
||||||
if (direction_ == kForward) { // Switch directions?
|
if (direction_ == kForward) { // Switch directions?
|
||||||
// iter_ is pointing at the current entry. Scan backwards until
|
// iter_ is pointing at the current entry. Scan backwards until
|
||||||
|
@ -213,6 +249,7 @@ void DBIter::Prev() {
|
||||||
|
|
||||||
void DBIter::FindPrevUserEntry() {
|
void DBIter::FindPrevUserEntry() {
|
||||||
assert(direction_ == kReverse);
|
assert(direction_ == kReverse);
|
||||||
|
assert(large_ == NULL);
|
||||||
|
|
||||||
ValueType value_type = kTypeDeletion;
|
ValueType value_type = kTypeDeletion;
|
||||||
if (iter_->Valid()) {
|
if (iter_->Valid()) {
|
||||||
|
@ -249,11 +286,16 @@ void DBIter::FindPrevUserEntry() {
|
||||||
direction_ = kForward;
|
direction_ = kForward;
|
||||||
} else {
|
} else {
|
||||||
valid_ = true;
|
valid_ = true;
|
||||||
|
if (value_type == kTypeLargeValueRef) {
|
||||||
|
large_ = new Large;
|
||||||
|
large_->produced = false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void DBIter::Seek(const Slice& target) {
|
void DBIter::Seek(const Slice& target) {
|
||||||
direction_ = kForward;
|
direction_ = kForward;
|
||||||
|
ForgetLargeValue();
|
||||||
ClearSavedValue();
|
ClearSavedValue();
|
||||||
saved_key_.clear();
|
saved_key_.clear();
|
||||||
AppendInternalKey(
|
AppendInternalKey(
|
||||||
|
@ -268,6 +310,7 @@ void DBIter::Seek(const Slice& target) {
|
||||||
|
|
||||||
void DBIter::SeekToFirst() {
|
void DBIter::SeekToFirst() {
|
||||||
direction_ = kForward;
|
direction_ = kForward;
|
||||||
|
ForgetLargeValue();
|
||||||
ClearSavedValue();
|
ClearSavedValue();
|
||||||
iter_->SeekToFirst();
|
iter_->SeekToFirst();
|
||||||
if (iter_->Valid()) {
|
if (iter_->Valid()) {
|
||||||
|
@ -279,11 +322,67 @@ void DBIter::SeekToFirst() {
|
||||||
|
|
||||||
void DBIter::SeekToLast() {
|
void DBIter::SeekToLast() {
|
||||||
direction_ = kReverse;
|
direction_ = kReverse;
|
||||||
|
ForgetLargeValue();
|
||||||
ClearSavedValue();
|
ClearSavedValue();
|
||||||
iter_->SeekToLast();
|
iter_->SeekToLast();
|
||||||
FindPrevUserEntry();
|
FindPrevUserEntry();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void DBIter::ReadIndirectValue(Slice ref) const {
|
||||||
|
assert(!large_->produced);
|
||||||
|
large_->produced = true;
|
||||||
|
LargeValueRef large_ref;
|
||||||
|
if (ref.size() != LargeValueRef::ByteSize()) {
|
||||||
|
large_->status = Status::Corruption("malformed large value reference");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
memcpy(large_ref.data, ref.data(), LargeValueRef::ByteSize());
|
||||||
|
std::string fname = LargeValueFileName(*dbname_, large_ref);
|
||||||
|
RandomAccessFile* file;
|
||||||
|
Status s = env_->NewRandomAccessFile(fname, &file);
|
||||||
|
uint64_t file_size = 0;
|
||||||
|
if (s.ok()) {
|
||||||
|
s = env_->GetFileSize(fname, &file_size);
|
||||||
|
}
|
||||||
|
if (s.ok()) {
|
||||||
|
uint64_t value_size = large_ref.ValueSize();
|
||||||
|
large_->value.resize(value_size);
|
||||||
|
Slice result;
|
||||||
|
s = file->Read(0, file_size, &result,
|
||||||
|
const_cast<char*>(large_->value.data()));
|
||||||
|
if (s.ok()) {
|
||||||
|
if (result.size() == file_size) {
|
||||||
|
switch (large_ref.compression_type()) {
|
||||||
|
case kNoCompression: {
|
||||||
|
if (result.data() != large_->value.data()) {
|
||||||
|
large_->value.assign(result.data(), result.size());
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case kSnappyCompression: {
|
||||||
|
std::string uncompressed;
|
||||||
|
if (port::Snappy_Uncompress(result.data(), result.size(),
|
||||||
|
&uncompressed) &&
|
||||||
|
uncompressed.size() == large_ref.ValueSize()) {
|
||||||
|
swap(uncompressed, large_->value);
|
||||||
|
} else {
|
||||||
|
s = Status::Corruption(
|
||||||
|
"Unable to read entire compressed large value file");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
s = Status::Corruption("Unable to read entire large value file");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
delete file; // Ignore errors on closing
|
||||||
|
}
|
||||||
|
if (!s.ok()) {
|
||||||
|
large_->value.clear();
|
||||||
|
large_->status = s;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
} // anonymous namespace
|
} // anonymous namespace
|
||||||
|
|
||||||
Iterator* NewDBIterator(
|
Iterator* NewDBIterator(
|
|
@ -119,6 +119,9 @@ class DBTest {
|
||||||
case kTypeValue:
|
case kTypeValue:
|
||||||
result += iter->value().ToString();
|
result += iter->value().ToString();
|
||||||
break;
|
break;
|
||||||
|
case kTypeLargeValueRef:
|
||||||
|
result += "LARGEVALUE(" + EscapeString(iter->value()) + ")";
|
||||||
|
break;
|
||||||
case kTypeDeletion:
|
case kTypeDeletion:
|
||||||
result += "DEL";
|
result += "DEL";
|
||||||
break;
|
break;
|
||||||
|
@ -150,6 +153,26 @@ class DBTest {
|
||||||
return size;
|
return size;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::set<LargeValueRef> LargeValueFiles() const {
|
||||||
|
// Return the set of large value files that exist in the database
|
||||||
|
std::vector<std::string> filenames;
|
||||||
|
env_->GetChildren(dbname_, &filenames); // Ignoring errors on purpose
|
||||||
|
uint64_t number;
|
||||||
|
LargeValueRef large_ref;
|
||||||
|
FileType type;
|
||||||
|
std::set<LargeValueRef> live;
|
||||||
|
for (int i = 0; i < filenames.size(); i++) {
|
||||||
|
if (ParseFileName(filenames[i], &number, &large_ref, &type) &&
|
||||||
|
type == kLargeValueFile) {
|
||||||
|
fprintf(stderr, " live: %s\n",
|
||||||
|
LargeValueRefToFilenameString(large_ref).c_str());
|
||||||
|
live.insert(large_ref);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
fprintf(stderr, "Found %d live large value files\n", (int)live.size());
|
||||||
|
return live;
|
||||||
|
}
|
||||||
|
|
||||||
void Compact(const Slice& start, const Slice& limit) {
|
void Compact(const Slice& start, const Slice& limit) {
|
||||||
dbfull()->TEST_CompactMemTable();
|
dbfull()->TEST_CompactMemTable();
|
||||||
int max_level_with_files = 1;
|
int max_level_with_files = 1;
|
||||||
|
@ -448,6 +471,7 @@ TEST(DBTest, MinorCompactionsHappen) {
|
||||||
TEST(DBTest, RecoverWithLargeLog) {
|
TEST(DBTest, RecoverWithLargeLog) {
|
||||||
{
|
{
|
||||||
Options options;
|
Options options;
|
||||||
|
options.large_value_threshold = 1048576;
|
||||||
Reopen(&options);
|
Reopen(&options);
|
||||||
ASSERT_OK(Put("big1", std::string(200000, '1')));
|
ASSERT_OK(Put("big1", std::string(200000, '1')));
|
||||||
ASSERT_OK(Put("big2", std::string(200000, '2')));
|
ASSERT_OK(Put("big2", std::string(200000, '2')));
|
||||||
|
@ -460,6 +484,7 @@ TEST(DBTest, RecoverWithLargeLog) {
|
||||||
// we flush table files in the middle of a large log file.
|
// we flush table files in the middle of a large log file.
|
||||||
Options options;
|
Options options;
|
||||||
options.write_buffer_size = 100000;
|
options.write_buffer_size = 100000;
|
||||||
|
options.large_value_threshold = 1048576;
|
||||||
Reopen(&options);
|
Reopen(&options);
|
||||||
ASSERT_EQ(NumTableFilesAtLevel(0), 3);
|
ASSERT_EQ(NumTableFilesAtLevel(0), 3);
|
||||||
ASSERT_EQ(std::string(200000, '1'), Get("big1"));
|
ASSERT_EQ(std::string(200000, '1'), Get("big1"));
|
||||||
|
@ -472,6 +497,7 @@ TEST(DBTest, RecoverWithLargeLog) {
|
||||||
TEST(DBTest, CompactionsGenerateMultipleFiles) {
|
TEST(DBTest, CompactionsGenerateMultipleFiles) {
|
||||||
Options options;
|
Options options;
|
||||||
options.write_buffer_size = 100000000; // Large write buffer
|
options.write_buffer_size = 100000000; // Large write buffer
|
||||||
|
options.large_value_threshold = 1048576;
|
||||||
Reopen(&options);
|
Reopen(&options);
|
||||||
|
|
||||||
Random rnd(301);
|
Random rnd(301);
|
||||||
|
@ -544,7 +570,11 @@ static bool Between(uint64_t val, uint64_t low, uint64_t high) {
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(DBTest, ApproximateSizes) {
|
TEST(DBTest, ApproximateSizes) {
|
||||||
|
for (int test = 0; test < 2; test++) {
|
||||||
|
// test==0: default large_value_threshold
|
||||||
|
// test==1: 1 MB large_value_threshold
|
||||||
Options options;
|
Options options;
|
||||||
|
options.large_value_threshold = (test == 0) ? 65536 : 1048576;
|
||||||
options.write_buffer_size = 100000000; // Large write buffer
|
options.write_buffer_size = 100000000; // Large write buffer
|
||||||
options.compression = kNoCompression;
|
options.compression = kNoCompression;
|
||||||
DestroyAndReopen();
|
DestroyAndReopen();
|
||||||
|
@ -560,9 +590,15 @@ TEST(DBTest, ApproximateSizes) {
|
||||||
for (int i = 0; i < N; i++) {
|
for (int i = 0; i < N; i++) {
|
||||||
ASSERT_OK(Put(Key(i), RandomString(&rnd, 100000)));
|
ASSERT_OK(Put(Key(i), RandomString(&rnd, 100000)));
|
||||||
}
|
}
|
||||||
|
if (test == 1) {
|
||||||
// 0 because GetApproximateSizes() does not account for memtable space
|
// 0 because GetApproximateSizes() does not account for memtable space for
|
||||||
|
// non-large values
|
||||||
ASSERT_TRUE(Between(Size("", Key(50)), 0, 0));
|
ASSERT_TRUE(Between(Size("", Key(50)), 0, 0));
|
||||||
|
} else {
|
||||||
|
ASSERT_TRUE(Between(Size("", Key(50)), 100000*50, 100000*50 + 10000));
|
||||||
|
ASSERT_TRUE(Between(Size(Key(20), Key(30)),
|
||||||
|
100000*10, 100000*10 + 10000));
|
||||||
|
}
|
||||||
|
|
||||||
// Check sizes across recovery by reopening a few times
|
// Check sizes across recovery by reopening a few times
|
||||||
for (int run = 0; run < 3; run++) {
|
for (int run = 0; run < 3; run++) {
|
||||||
|
@ -588,9 +624,11 @@ TEST(DBTest, ApproximateSizes) {
|
||||||
ASSERT_GT(NumTableFilesAtLevel(1), 0);
|
ASSERT_GT(NumTableFilesAtLevel(1), 0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
TEST(DBTest, ApproximateSizes_MixOfSmallAndLarge) {
|
TEST(DBTest, ApproximateSizes_MixOfSmallAndLarge) {
|
||||||
Options options;
|
Options options;
|
||||||
|
options.large_value_threshold = 65536;
|
||||||
options.compression = kNoCompression;
|
options.compression = kNoCompression;
|
||||||
Reopen();
|
Reopen();
|
||||||
|
|
||||||
|
@ -763,6 +801,146 @@ TEST(DBTest, ComparatorCheck) {
|
||||||
<< s.ToString();
|
<< s.ToString();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool LargeValuesOK(DBTest* db,
|
||||||
|
const std::set<LargeValueRef>& expected) {
|
||||||
|
std::set<LargeValueRef> actual = db->LargeValueFiles();
|
||||||
|
if (actual.size() != expected.size()) {
|
||||||
|
fprintf(stderr, "Sets differ in size: %d vs %d\n",
|
||||||
|
(int)actual.size(), (int)expected.size());
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
for (std::set<LargeValueRef>::const_iterator it = expected.begin();
|
||||||
|
it != expected.end();
|
||||||
|
++it) {
|
||||||
|
if (actual.count(*it) != 1) {
|
||||||
|
fprintf(stderr, " key '%s' not found in actual set\n",
|
||||||
|
LargeValueRefToFilenameString(*it).c_str());
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(DBTest, LargeValues1) {
|
||||||
|
Options options;
|
||||||
|
options.large_value_threshold = 10000;
|
||||||
|
Reopen(&options);
|
||||||
|
|
||||||
|
Random rnd(301);
|
||||||
|
|
||||||
|
std::string big1;
|
||||||
|
test::CompressibleString(&rnd, 1.0, 100000, &big1); // Not compressible
|
||||||
|
std::set<LargeValueRef> expected;
|
||||||
|
|
||||||
|
ASSERT_OK(Put("big1", big1));
|
||||||
|
expected.insert(LargeValueRef::Make(big1, kNoCompression));
|
||||||
|
ASSERT_TRUE(LargeValuesOK(this, expected));
|
||||||
|
|
||||||
|
ASSERT_OK(Delete("big1"));
|
||||||
|
ASSERT_TRUE(LargeValuesOK(this, expected));
|
||||||
|
ASSERT_OK(dbfull()->TEST_CompactMemTable());
|
||||||
|
// No handling of deletion markers on memtable compactions, so big1 remains
|
||||||
|
ASSERT_TRUE(LargeValuesOK(this, expected));
|
||||||
|
|
||||||
|
dbfull()->TEST_CompactRange(0, "", "z");
|
||||||
|
expected.erase(LargeValueRef::Make(big1, kNoCompression));
|
||||||
|
ASSERT_TRUE(LargeValuesOK(this, expected));
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool SnappyCompressionSupported() {
|
||||||
|
std::string out;
|
||||||
|
Slice in = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
|
||||||
|
return port::Snappy_Compress(in.data(), in.size(), &out);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(DBTest, LargeValues2) {
|
||||||
|
Options options;
|
||||||
|
options.large_value_threshold = 10000;
|
||||||
|
Reopen(&options);
|
||||||
|
|
||||||
|
Random rnd(301);
|
||||||
|
|
||||||
|
std::string big1, big2;
|
||||||
|
test::CompressibleString(&rnd, 1.0, 20000, &big1); // Not compressible
|
||||||
|
test::CompressibleString(&rnd, 0.6, 40000, &big2); // Compressible
|
||||||
|
std::set<LargeValueRef> expected;
|
||||||
|
ASSERT_TRUE(LargeValuesOK(this, expected));
|
||||||
|
|
||||||
|
ASSERT_OK(Put("big1", big1));
|
||||||
|
expected.insert(LargeValueRef::Make(big1, kNoCompression));
|
||||||
|
ASSERT_EQ(big1, Get("big1"));
|
||||||
|
ASSERT_TRUE(LargeValuesOK(this, expected));
|
||||||
|
|
||||||
|
ASSERT_OK(Put("big2", big2));
|
||||||
|
ASSERT_EQ(big2, Get("big2"));
|
||||||
|
if (SnappyCompressionSupported()) {
|
||||||
|
expected.insert(LargeValueRef::Make(big2, kSnappyCompression));
|
||||||
|
} else {
|
||||||
|
expected.insert(LargeValueRef::Make(big2, kNoCompression));
|
||||||
|
}
|
||||||
|
ASSERT_TRUE(LargeValuesOK(this, expected));
|
||||||
|
|
||||||
|
ASSERT_OK(dbfull()->TEST_CompactMemTable());
|
||||||
|
ASSERT_TRUE(LargeValuesOK(this, expected));
|
||||||
|
|
||||||
|
dbfull()->TEST_CompactRange(0, "", "z");
|
||||||
|
ASSERT_TRUE(LargeValuesOK(this, expected));
|
||||||
|
|
||||||
|
ASSERT_OK(Put("big2", big2));
|
||||||
|
ASSERT_OK(Put("big2_b", big2));
|
||||||
|
ASSERT_EQ(big1, Get("big1"));
|
||||||
|
ASSERT_EQ(big2, Get("big2"));
|
||||||
|
ASSERT_EQ(big2, Get("big2_b"));
|
||||||
|
ASSERT_TRUE(LargeValuesOK(this, expected));
|
||||||
|
|
||||||
|
ASSERT_OK(Delete("big1"));
|
||||||
|
ASSERT_EQ("NOT_FOUND", Get("big1"));
|
||||||
|
ASSERT_TRUE(LargeValuesOK(this, expected));
|
||||||
|
|
||||||
|
ASSERT_OK(dbfull()->TEST_CompactMemTable());
|
||||||
|
ASSERT_TRUE(LargeValuesOK(this, expected));
|
||||||
|
dbfull()->TEST_CompactRange(0, "", "z");
|
||||||
|
expected.erase(LargeValueRef::Make(big1, kNoCompression));
|
||||||
|
ASSERT_TRUE(LargeValuesOK(this, expected));
|
||||||
|
dbfull()->TEST_CompactRange(1, "", "z");
|
||||||
|
|
||||||
|
ASSERT_OK(Delete("big2"));
|
||||||
|
ASSERT_EQ("NOT_FOUND", Get("big2"));
|
||||||
|
ASSERT_EQ(big2, Get("big2_b"));
|
||||||
|
ASSERT_OK(dbfull()->TEST_CompactMemTable());
|
||||||
|
ASSERT_TRUE(LargeValuesOK(this, expected));
|
||||||
|
dbfull()->TEST_CompactRange(0, "", "z");
|
||||||
|
ASSERT_TRUE(LargeValuesOK(this, expected));
|
||||||
|
|
||||||
|
// Make sure the large value refs survive a reload and compactions after
|
||||||
|
// the reload.
|
||||||
|
Reopen();
|
||||||
|
ASSERT_TRUE(LargeValuesOK(this, expected));
|
||||||
|
ASSERT_OK(Put("foo", "bar"));
|
||||||
|
ASSERT_OK(dbfull()->TEST_CompactMemTable());
|
||||||
|
dbfull()->TEST_CompactRange(0, "", "z");
|
||||||
|
ASSERT_TRUE(LargeValuesOK(this, expected));
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(DBTest, LargeValues3) {
|
||||||
|
// Make sure we don't compress values if
|
||||||
|
Options options;
|
||||||
|
options.large_value_threshold = 10000;
|
||||||
|
options.compression = kNoCompression;
|
||||||
|
Reopen(&options);
|
||||||
|
|
||||||
|
Random rnd(301);
|
||||||
|
|
||||||
|
std::string big1 = std::string(100000, 'x'); // Very compressible
|
||||||
|
std::set<LargeValueRef> expected;
|
||||||
|
|
||||||
|
ASSERT_OK(Put("big1", big1));
|
||||||
|
ASSERT_EQ(big1, Get("big1"));
|
||||||
|
expected.insert(LargeValueRef::Make(big1, kNoCompression));
|
||||||
|
ASSERT_TRUE(LargeValuesOK(this, expected));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
TEST(DBTest, DBOpen_Options) {
|
TEST(DBTest, DBOpen_Options) {
|
||||||
std::string dbname = test::TmpDir() + "/db_options_test";
|
std::string dbname = test::TmpDir() + "/db_options_test";
|
||||||
DestroyDB(dbname, Options());
|
DestroyDB(dbname, Options());
|
||||||
|
@ -847,6 +1025,9 @@ class ModelDB: public DB {
|
||||||
case kTypeValue:
|
case kTypeValue:
|
||||||
map_[it.key().ToString()] = it.value().ToString();
|
map_[it.key().ToString()] = it.value().ToString();
|
||||||
break;
|
break;
|
||||||
|
case kTypeLargeValueRef:
|
||||||
|
assert(false); // Should not occur
|
||||||
|
break;
|
||||||
case kTypeDeletion:
|
case kTypeDeletion:
|
||||||
map_.erase(it.key().ToString());
|
map_.erase(it.key().ToString());
|
||||||
break;
|
break;
|
|
@ -84,4 +84,69 @@ void InternalKeyComparator::FindShortSuccessor(std::string* key) const {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
LargeValueRef LargeValueRef::Make(const Slice& value, CompressionType ctype) {
|
||||||
|
LargeValueRef result;
|
||||||
|
port::SHA1_Hash(value.data(), value.size(), &result.data[0]);
|
||||||
|
EncodeFixed64(&result.data[20], value.size());
|
||||||
|
result.data[28] = static_cast<unsigned char>(ctype);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string LargeValueRefToFilenameString(const LargeValueRef& h) {
|
||||||
|
assert(sizeof(h.data) == LargeValueRef::ByteSize());
|
||||||
|
assert(sizeof(h.data) == 29); // So we can hardcode the array size of buf
|
||||||
|
static const char tohex[] = "0123456789abcdef";
|
||||||
|
char buf[20*2];
|
||||||
|
for (int i = 0; i < 20; i++) {
|
||||||
|
buf[2*i] = tohex[(h.data[i] >> 4) & 0xf];
|
||||||
|
buf[2*i+1] = tohex[h.data[i] & 0xf];
|
||||||
|
}
|
||||||
|
std::string result = std::string(buf, sizeof(buf));
|
||||||
|
result += "-";
|
||||||
|
result += NumberToString(h.ValueSize());
|
||||||
|
result += "-";
|
||||||
|
result += NumberToString(static_cast<uint64_t>(h.compression_type()));
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static uint32_t hexvalue(char c) {
|
||||||
|
if (c >= '0' && c <= '9') {
|
||||||
|
return c - '0';
|
||||||
|
} else if (c >= 'A' && c <= 'F') {
|
||||||
|
return 10 + c - 'A';
|
||||||
|
} else {
|
||||||
|
assert(c >= 'a' && c <= 'f');
|
||||||
|
return 10 + c - 'a';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool FilenameStringToLargeValueRef(const Slice& s, LargeValueRef* h) {
|
||||||
|
Slice in = s;
|
||||||
|
if (in.size() < 40) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
for (int i = 0; i < 20; i++) {
|
||||||
|
if (!isxdigit(in[i*2]) || !isxdigit(in[i*2+1])) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
unsigned char c = (hexvalue(in[i*2])<<4) | hexvalue(in[i*2+1]);
|
||||||
|
h->data[i] = c;
|
||||||
|
}
|
||||||
|
in.remove_prefix(40);
|
||||||
|
uint64_t value_size, ctype;
|
||||||
|
|
||||||
|
if (ConsumeChar(&in, '-') &&
|
||||||
|
ConsumeDecimalNumber(&in, &value_size) &&
|
||||||
|
ConsumeChar(&in, '-') &&
|
||||||
|
ConsumeDecimalNumber(&in, &ctype) &&
|
||||||
|
in.empty() &&
|
||||||
|
(ctype <= kSnappyCompression)) {
|
||||||
|
EncodeFixed64(&h->data[20], value_size);
|
||||||
|
h->data[28] = static_cast<unsigned char>(ctype);
|
||||||
|
return true;
|
||||||
|
} else {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
|
@ -29,6 +29,7 @@ class InternalKey;
|
||||||
enum ValueType {
|
enum ValueType {
|
||||||
kTypeDeletion = 0x0,
|
kTypeDeletion = 0x0,
|
||||||
kTypeValue = 0x1,
|
kTypeValue = 0x1,
|
||||||
|
kTypeLargeValueRef = 0x2,
|
||||||
};
|
};
|
||||||
// kValueTypeForSeek defines the ValueType that should be passed when
|
// kValueTypeForSeek defines the ValueType that should be passed when
|
||||||
// constructing a ParsedInternalKey object for seeking to a particular
|
// constructing a ParsedInternalKey object for seeking to a particular
|
||||||
|
@ -36,7 +37,7 @@ enum ValueType {
|
||||||
// and the value type is embedded as the low 8 bits in the sequence
|
// and the value type is embedded as the low 8 bits in the sequence
|
||||||
// number in internal keys, we need to use the highest-numbered
|
// number in internal keys, we need to use the highest-numbered
|
||||||
// ValueType, not the lowest).
|
// ValueType, not the lowest).
|
||||||
static const ValueType kValueTypeForSeek = kTypeValue;
|
static const ValueType kValueTypeForSeek = kTypeLargeValueRef;
|
||||||
|
|
||||||
typedef uint64_t SequenceNumber;
|
typedef uint64_t SequenceNumber;
|
||||||
|
|
||||||
|
@ -138,6 +139,54 @@ inline int InternalKeyComparator::Compare(
|
||||||
return Compare(a.Encode(), b.Encode());
|
return Compare(a.Encode(), b.Encode());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// LargeValueRef is a 160-bit hash value (20 bytes), plus an 8 byte
|
||||||
|
// uncompressed size, and a 1 byte CompressionType code. An
|
||||||
|
// encoded form of it is embedded in the filenames of large value
|
||||||
|
// files stored in the database, and the raw binary form is stored as
|
||||||
|
// the iter->value() result for values of type kTypeLargeValueRef in
|
||||||
|
// the table and log files that make up the database.
|
||||||
|
struct LargeValueRef {
|
||||||
|
char data[29];
|
||||||
|
|
||||||
|
// Initialize a large value ref for the given data
|
||||||
|
static LargeValueRef Make(const Slice& data,
|
||||||
|
CompressionType compression_type);
|
||||||
|
|
||||||
|
// Initialize a large value ref from a serialized, 29-byte reference value
|
||||||
|
static LargeValueRef FromRef(const Slice& ref) {
|
||||||
|
LargeValueRef result;
|
||||||
|
assert(ref.size() == sizeof(result.data));
|
||||||
|
memcpy(result.data, ref.data(), sizeof(result.data));
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Return the number of bytes in a LargeValueRef (not the
|
||||||
|
// number of bytes in the value referenced).
|
||||||
|
static size_t ByteSize() { return sizeof(LargeValueRef().data); }
|
||||||
|
|
||||||
|
// Return the number of bytes in the value referenced by "*this".
|
||||||
|
uint64_t ValueSize() const { return DecodeFixed64(&data[20]); }
|
||||||
|
|
||||||
|
CompressionType compression_type() const {
|
||||||
|
return static_cast<CompressionType>(data[28]);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool operator==(const LargeValueRef& b) const {
|
||||||
|
return memcmp(data, b.data, sizeof(data)) == 0;
|
||||||
|
}
|
||||||
|
bool operator<(const LargeValueRef& b) const {
|
||||||
|
return memcmp(data, b.data, sizeof(data)) < 0;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// Convert the large value ref to a human-readable string suitable
|
||||||
|
// for embedding in a large value filename.
|
||||||
|
extern std::string LargeValueRefToFilenameString(const LargeValueRef& h);
|
||||||
|
|
||||||
|
// Parse the large value filename string in "input" and store it in
|
||||||
|
// "*h". If successful, returns true. Otherwise returns false.
|
||||||
|
extern bool FilenameStringToLargeValueRef(const Slice& in, LargeValueRef* ref);
|
||||||
|
|
||||||
inline bool ParseInternalKey(const Slice& internal_key,
|
inline bool ParseInternalKey(const Slice& internal_key,
|
||||||
ParsedInternalKey* result) {
|
ParsedInternalKey* result) {
|
||||||
const size_t n = internal_key.size();
|
const size_t n = internal_key.size();
|
||||||
|
@ -147,7 +196,7 @@ inline bool ParseInternalKey(const Slice& internal_key,
|
||||||
result->sequence = num >> 8;
|
result->sequence = num >> 8;
|
||||||
result->type = static_cast<ValueType>(c);
|
result->type = static_cast<ValueType>(c);
|
||||||
result->user_key = Slice(internal_key.data(), n - 8);
|
result->user_key = Slice(internal_key.data(), n - 8);
|
||||||
return (c <= static_cast<unsigned char>(kTypeValue));
|
return (c <= static_cast<unsigned char>(kTypeLargeValueRef));
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
|
@ -76,6 +76,9 @@ TEST(FormatTest, InternalKeyShortSeparator) {
|
||||||
ASSERT_EQ(IKey("foo", 100, kTypeValue),
|
ASSERT_EQ(IKey("foo", 100, kTypeValue),
|
||||||
Shorten(IKey("foo", 100, kTypeValue),
|
Shorten(IKey("foo", 100, kTypeValue),
|
||||||
IKey("foo", 100, kTypeDeletion)));
|
IKey("foo", 100, kTypeDeletion)));
|
||||||
|
ASSERT_EQ(IKey("foo", 100, kTypeValue),
|
||||||
|
Shorten(IKey("foo", 100, kTypeValue),
|
||||||
|
IKey("foo", 100, kTypeLargeValueRef)));
|
||||||
|
|
||||||
// When user keys are misordered
|
// When user keys are misordered
|
||||||
ASSERT_EQ(IKey("foo", 100, kTypeValue),
|
ASSERT_EQ(IKey("foo", 100, kTypeValue),
|
||||||
|
@ -105,6 +108,18 @@ TEST(FormatTest, InternalKeyShortestSuccessor) {
|
||||||
ShortSuccessor(IKey("\xff\xff", 100, kTypeValue)));
|
ShortSuccessor(IKey("\xff\xff", 100, kTypeValue)));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST(FormatTest, SHA1) {
|
||||||
|
// Check that we are computing the same value as sha1.
|
||||||
|
// Note that the last two numbers are the length of the input and the
|
||||||
|
// compression type.
|
||||||
|
ASSERT_EQ("aaf4c61ddcc5e8a2dabede0f3b482cd9aea9434d-5-0", // SHA1, uncompr
|
||||||
|
LargeValueRefToFilenameString(
|
||||||
|
LargeValueRef::Make("hello", kNoCompression)));
|
||||||
|
ASSERT_EQ("aaf4c61ddcc5e8a2dabede0f3b482cd9aea9434d-5-1", // SHA1, lwcompr
|
||||||
|
LargeValueRefToFilenameString(
|
||||||
|
LargeValueRef::Make("hello", kSnappyCompression)));
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int main(int argc, char** argv) {
|
int main(int argc, char** argv) {
|
|
@ -30,6 +30,14 @@ std::string TableFileName(const std::string& name, uint64_t number) {
|
||||||
return MakeFileName(name, number, "sst");
|
return MakeFileName(name, number, "sst");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::string LargeValueFileName(const std::string& name,
|
||||||
|
const LargeValueRef& large_ref) {
|
||||||
|
std::string result = name + "/";
|
||||||
|
result += LargeValueRefToFilenameString(large_ref);
|
||||||
|
result += ".val";
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
std::string DescriptorFileName(const std::string& dbname, uint64_t number) {
|
std::string DescriptorFileName(const std::string& dbname, uint64_t number) {
|
||||||
assert(number > 0);
|
assert(number > 0);
|
||||||
char buf[100];
|
char buf[100];
|
||||||
|
@ -67,9 +75,11 @@ std::string OldInfoLogFileName(const std::string& dbname) {
|
||||||
// dbname/LOG
|
// dbname/LOG
|
||||||
// dbname/LOG.old
|
// dbname/LOG.old
|
||||||
// dbname/MANIFEST-[0-9]+
|
// dbname/MANIFEST-[0-9]+
|
||||||
|
// dbname/[0-9a-f]{20}-[0-9]+-[0-9]+.val
|
||||||
// dbname/[0-9]+.(log|sst)
|
// dbname/[0-9]+.(log|sst)
|
||||||
bool ParseFileName(const std::string& fname,
|
bool ParseFileName(const std::string& fname,
|
||||||
uint64_t* number,
|
uint64_t* number,
|
||||||
|
LargeValueRef* large_ref,
|
||||||
FileType* type) {
|
FileType* type) {
|
||||||
Slice rest(fname);
|
Slice rest(fname);
|
||||||
if (rest == "CURRENT") {
|
if (rest == "CURRENT") {
|
||||||
|
@ -81,6 +91,15 @@ bool ParseFileName(const std::string& fname,
|
||||||
} else if (rest == "LOG" || rest == "LOG.old") {
|
} else if (rest == "LOG" || rest == "LOG.old") {
|
||||||
*number = 0;
|
*number = 0;
|
||||||
*type = kInfoLogFile;
|
*type = kInfoLogFile;
|
||||||
|
} else if (rest.size() >= 4 &&
|
||||||
|
Slice(rest.data() + rest.size() - 4, 4) == ".val") {
|
||||||
|
LargeValueRef h;
|
||||||
|
if (!FilenameStringToLargeValueRef(Slice(rest.data(), rest.size() - 4),
|
||||||
|
&h)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
*large_ref = h;
|
||||||
|
*type = kLargeValueFile;
|
||||||
} else if (rest.starts_with("MANIFEST-")) {
|
} else if (rest.starts_with("MANIFEST-")) {
|
||||||
rest.remove_prefix(strlen("MANIFEST-"));
|
rest.remove_prefix(strlen("MANIFEST-"));
|
||||||
uint64_t num;
|
uint64_t num;
|
|
@ -16,11 +16,13 @@
|
||||||
namespace leveldb {
|
namespace leveldb {
|
||||||
|
|
||||||
class Env;
|
class Env;
|
||||||
|
struct LargeValueRef;
|
||||||
|
|
||||||
enum FileType {
|
enum FileType {
|
||||||
kLogFile,
|
kLogFile,
|
||||||
kDBLockFile,
|
kDBLockFile,
|
||||||
kTableFile,
|
kTableFile,
|
||||||
|
kLargeValueFile,
|
||||||
kDescriptorFile,
|
kDescriptorFile,
|
||||||
kCurrentFile,
|
kCurrentFile,
|
||||||
kTempFile,
|
kTempFile,
|
||||||
|
@ -37,6 +39,12 @@ extern std::string LogFileName(const std::string& dbname, uint64_t number);
|
||||||
// "dbname".
|
// "dbname".
|
||||||
extern std::string TableFileName(const std::string& dbname, uint64_t number);
|
extern std::string TableFileName(const std::string& dbname, uint64_t number);
|
||||||
|
|
||||||
|
// Return the name of the large value file with the specified large
|
||||||
|
// value reference in the db named by "dbname". The result will be
|
||||||
|
// prefixed with "dbname".
|
||||||
|
extern std::string LargeValueFileName(const std::string& dbname,
|
||||||
|
const LargeValueRef& large_ref);
|
||||||
|
|
||||||
// Return the name of the descriptor file for the db named by
|
// Return the name of the descriptor file for the db named by
|
||||||
// "dbname" and the specified incarnation number. The result will be
|
// "dbname" and the specified incarnation number. The result will be
|
||||||
// prefixed with "dbname".
|
// prefixed with "dbname".
|
||||||
|
@ -63,10 +71,14 @@ extern std::string InfoLogFileName(const std::string& dbname);
|
||||||
extern std::string OldInfoLogFileName(const std::string& dbname);
|
extern std::string OldInfoLogFileName(const std::string& dbname);
|
||||||
|
|
||||||
// If filename is a leveldb file, store the type of the file in *type.
|
// If filename is a leveldb file, store the type of the file in *type.
|
||||||
// The number encoded in the filename is stored in *number. If the
|
// If *type is kLargeValueFile, then the large value reference data
|
||||||
// filename was successfully parsed, returns true. Else return false.
|
// from the filename is stored in "*large_ref. For all other types of
|
||||||
|
// files, the number encoded in the filename is stored in *number. If
|
||||||
|
// the filename was successfully parsed, returns true. Else return
|
||||||
|
// false.
|
||||||
extern bool ParseFileName(const std::string& filename,
|
extern bool ParseFileName(const std::string& filename,
|
||||||
uint64_t* number,
|
uint64_t* number,
|
||||||
|
LargeValueRef* large_ref,
|
||||||
FileType* type);
|
FileType* type);
|
||||||
|
|
||||||
// Make the CURRENT file point to the descriptor file with the
|
// Make the CURRENT file point to the descriptor file with the
|
|
@ -0,0 +1,156 @@
|
||||||
|
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style license that can be
|
||||||
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||||
|
|
||||||
|
#include "db/filename.h"
|
||||||
|
|
||||||
|
#include "db/dbformat.h"
|
||||||
|
#include "port/port.h"
|
||||||
|
#include "util/logging.h"
|
||||||
|
#include "util/testharness.h"
|
||||||
|
|
||||||
|
namespace leveldb {
|
||||||
|
|
||||||
|
class FileNameTest { };
|
||||||
|
|
||||||
|
TEST(FileNameTest, Parse) {
|
||||||
|
Slice db;
|
||||||
|
FileType type;
|
||||||
|
uint64_t number;
|
||||||
|
LargeValueRef large_ref;
|
||||||
|
|
||||||
|
// Successful parses
|
||||||
|
static struct {
|
||||||
|
const char* fname;
|
||||||
|
uint64_t number;
|
||||||
|
const char* large_ref;
|
||||||
|
FileType type;
|
||||||
|
} cases[] = {
|
||||||
|
{ "100.log", 100, "", kLogFile },
|
||||||
|
{ "0.log", 0, "", kLogFile },
|
||||||
|
{ "0.sst", 0, "", kTableFile },
|
||||||
|
{ "CURRENT", 0, "", kCurrentFile },
|
||||||
|
{ "LOCK", 0, "", kDBLockFile },
|
||||||
|
{ "MANIFEST-2", 2, "", kDescriptorFile },
|
||||||
|
{ "MANIFEST-7", 7, "", kDescriptorFile },
|
||||||
|
{ "LOG", 0, "", kInfoLogFile },
|
||||||
|
{ "LOG.old", 0, "", kInfoLogFile },
|
||||||
|
{ "18446744073709551615.log", 18446744073709551615ull, "",
|
||||||
|
kLogFile },
|
||||||
|
{ "2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2323-1234-0.val", 0,
|
||||||
|
"2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2323-1234-0", kLargeValueFile },
|
||||||
|
{ "2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2323-10000000000-0.val", 0,
|
||||||
|
"2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2323-10000000000-0",
|
||||||
|
kLargeValueFile },
|
||||||
|
};
|
||||||
|
for (int i = 0; i < sizeof(cases) / sizeof(cases[0]); i++) {
|
||||||
|
std::string f = cases[i].fname;
|
||||||
|
ASSERT_TRUE(ParseFileName(f, &number, &large_ref, &type)) << f;
|
||||||
|
ASSERT_EQ(cases[i].type, type) << f;
|
||||||
|
if (type == kLargeValueFile) {
|
||||||
|
ASSERT_EQ(cases[i].large_ref, LargeValueRefToFilenameString(large_ref))
|
||||||
|
<< f;
|
||||||
|
} else {
|
||||||
|
ASSERT_EQ(cases[i].number, number) << f;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Errors
|
||||||
|
static const char* errors[] = {
|
||||||
|
"",
|
||||||
|
"foo",
|
||||||
|
"foo-dx-100.log",
|
||||||
|
".log",
|
||||||
|
"",
|
||||||
|
"manifest",
|
||||||
|
"CURREN",
|
||||||
|
"CURRENTX",
|
||||||
|
"MANIFES",
|
||||||
|
"MANIFEST",
|
||||||
|
"MANIFEST-",
|
||||||
|
"XMANIFEST-3",
|
||||||
|
"MANIFEST-3x",
|
||||||
|
"LOC",
|
||||||
|
"LOCKx",
|
||||||
|
"LO",
|
||||||
|
"LOGx",
|
||||||
|
"18446744073709551616.log",
|
||||||
|
"184467440737095516150.log",
|
||||||
|
"100",
|
||||||
|
"100.",
|
||||||
|
"100.lop",
|
||||||
|
"100.val",
|
||||||
|
".val",
|
||||||
|
"123456789012345678901234567890123456789-12340.val",
|
||||||
|
"1234567890123456789012345678901234567-123-0.val",
|
||||||
|
"12345678901234567890123456789012345678902-100-1-.val",
|
||||||
|
// Overflow on value size
|
||||||
|
"2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2323-100000000000000000000-1.val",
|
||||||
|
// '03.val' is a bad compression type
|
||||||
|
"2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2323-100000-3.val" };
|
||||||
|
for (int i = 0; i < sizeof(errors) / sizeof(errors[0]); i++) {
|
||||||
|
std::string f = errors[i];
|
||||||
|
ASSERT_TRUE(!ParseFileName(f, &number, &large_ref, &type)) << f;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(FileNameTest, Construction) {
|
||||||
|
uint64_t number;
|
||||||
|
FileType type;
|
||||||
|
LargeValueRef large_ref;
|
||||||
|
std::string fname;
|
||||||
|
|
||||||
|
fname = CurrentFileName("foo");
|
||||||
|
ASSERT_EQ("foo/", std::string(fname.data(), 4));
|
||||||
|
ASSERT_TRUE(ParseFileName(fname.c_str() + 4, &number, &large_ref, &type));
|
||||||
|
ASSERT_EQ(0, number);
|
||||||
|
ASSERT_EQ(kCurrentFile, type);
|
||||||
|
|
||||||
|
fname = LockFileName("foo");
|
||||||
|
ASSERT_EQ("foo/", std::string(fname.data(), 4));
|
||||||
|
ASSERT_TRUE(ParseFileName(fname.c_str() + 4, &number, &large_ref, &type));
|
||||||
|
ASSERT_EQ(0, number);
|
||||||
|
ASSERT_EQ(kDBLockFile, type);
|
||||||
|
|
||||||
|
fname = LogFileName("foo", 192);
|
||||||
|
ASSERT_EQ("foo/", std::string(fname.data(), 4));
|
||||||
|
ASSERT_TRUE(ParseFileName(fname.c_str() + 4, &number, &large_ref, &type));
|
||||||
|
ASSERT_EQ(192, number);
|
||||||
|
ASSERT_EQ(kLogFile, type);
|
||||||
|
|
||||||
|
fname = TableFileName("bar", 200);
|
||||||
|
ASSERT_EQ("bar/", std::string(fname.data(), 4));
|
||||||
|
ASSERT_TRUE(ParseFileName(fname.c_str() + 4, &number, &large_ref, &type));
|
||||||
|
ASSERT_EQ(200, number);
|
||||||
|
ASSERT_EQ(kTableFile, type);
|
||||||
|
|
||||||
|
fname = DescriptorFileName("bar", 100);
|
||||||
|
ASSERT_EQ("bar/", std::string(fname.data(), 4));
|
||||||
|
ASSERT_TRUE(ParseFileName(fname.c_str() + 4, &number, &large_ref, &type));
|
||||||
|
ASSERT_EQ(100, number);
|
||||||
|
ASSERT_EQ(kDescriptorFile, type);
|
||||||
|
|
||||||
|
fname = TempFileName("tmp", 999);
|
||||||
|
ASSERT_EQ("tmp/", std::string(fname.data(), 4));
|
||||||
|
ASSERT_TRUE(ParseFileName(fname.c_str() + 4, &number, &large_ref, &type));
|
||||||
|
ASSERT_EQ(999, number);
|
||||||
|
ASSERT_EQ(kTempFile, type);
|
||||||
|
|
||||||
|
for (int i = 0; i <= kSnappyCompression; i++) {
|
||||||
|
CompressionType ctype = static_cast<CompressionType>(i);
|
||||||
|
std::string value = "abcdef";
|
||||||
|
LargeValueRef real_large_ref = LargeValueRef::Make(Slice(value), ctype);
|
||||||
|
fname = LargeValueFileName("tmp", real_large_ref);
|
||||||
|
ASSERT_EQ("tmp/", std::string(fname.data(), 4));
|
||||||
|
ASSERT_TRUE(ParseFileName(fname.c_str() + 4, &number, &large_ref, &type));
|
||||||
|
ASSERT_TRUE(real_large_ref == large_ref);
|
||||||
|
ASSERT_EQ(kLargeValueFile, type);
|
||||||
|
ASSERT_EQ(large_ref.compression_type(), ctype);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char** argv) {
|
||||||
|
return leveldb::test::RunAllTests();
|
||||||
|
}
|
|
@ -46,9 +46,9 @@ Status Writer::AddRecord(const Slice& slice) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Invariant: we never leave < kHeaderSize bytes in a block.
|
// Invariant: we never leave < kHeaderSize bytes in a block.
|
||||||
assert(kBlockSize - block_offset_ - kHeaderSize >= 0);
|
const int avail = kBlockSize - block_offset_ - kHeaderSize;
|
||||||
|
assert(avail >= 0);
|
||||||
|
|
||||||
const size_t avail = kBlockSize - block_offset_ - kHeaderSize;
|
|
||||||
const size_t fragment_length = (left < avail) ? left : avail;
|
const size_t fragment_length = (left < avail) ? left : avail;
|
||||||
|
|
||||||
RecordType type;
|
RecordType type;
|
|
@ -6,7 +6,8 @@
|
||||||
// (1) Any log files are first converted to tables
|
// (1) Any log files are first converted to tables
|
||||||
// (2) We scan every table to compute
|
// (2) We scan every table to compute
|
||||||
// (a) smallest/largest for the table
|
// (a) smallest/largest for the table
|
||||||
// (b) largest sequence number in the table
|
// (b) large value refs from the table
|
||||||
|
// (c) largest sequence number in the table
|
||||||
// (3) We generate descriptor contents:
|
// (3) We generate descriptor contents:
|
||||||
// - log number is set to zero
|
// - log number is set to zero
|
||||||
// - next-file-number is set to 1 + largest file number we found
|
// - next-file-number is set to 1 + largest file number we found
|
||||||
|
@ -21,8 +22,9 @@
|
||||||
// (c) For each table: if it overlaps earlier table, place in level-0,
|
// (c) For each table: if it overlaps earlier table, place in level-0,
|
||||||
// else place in level-M.
|
// else place in level-M.
|
||||||
// Possible optimization 2:
|
// Possible optimization 2:
|
||||||
// Store per-table metadata (smallest, largest, largest-seq#, ...)
|
// Store per-table metadata (smallest, largest, largest-seq#,
|
||||||
// in the table's meta section to speed up ScanTable.
|
// large-value-refs, ...) in the table's meta section to speed up
|
||||||
|
// ScanTable.
|
||||||
|
|
||||||
#include "db/builder.h"
|
#include "db/builder.h"
|
||||||
#include "db/db_impl.h"
|
#include "db/db_impl.h"
|
||||||
|
@ -71,7 +73,7 @@ class Repairer {
|
||||||
}
|
}
|
||||||
if (status.ok()) {
|
if (status.ok()) {
|
||||||
unsigned long long bytes = 0;
|
unsigned long long bytes = 0;
|
||||||
for (size_t i = 0; i < tables_.size(); i++) {
|
for (int i = 0; i < tables_.size(); i++) {
|
||||||
bytes += tables_[i].meta.file_size;
|
bytes += tables_[i].meta.file_size;
|
||||||
}
|
}
|
||||||
Log(env_, options_.info_log,
|
Log(env_, options_.info_log,
|
||||||
|
@ -117,10 +119,13 @@ class Repairer {
|
||||||
}
|
}
|
||||||
|
|
||||||
uint64_t number;
|
uint64_t number;
|
||||||
|
LargeValueRef large_ref;
|
||||||
FileType type;
|
FileType type;
|
||||||
for (size_t i = 0; i < filenames.size(); i++) {
|
for (int i = 0; i < filenames.size(); i++) {
|
||||||
if (ParseFileName(filenames[i], &number, &type)) {
|
if (ParseFileName(filenames[i], &number, &large_ref, &type)) {
|
||||||
if (type == kDescriptorFile) {
|
if (type == kLargeValueFile) {
|
||||||
|
// Will be picked up when we process a Table that points to it
|
||||||
|
} else if (type == kDescriptorFile) {
|
||||||
manifests_.push_back(filenames[i]);
|
manifests_.push_back(filenames[i]);
|
||||||
} else {
|
} else {
|
||||||
if (number + 1 > next_file_number_) {
|
if (number + 1 > next_file_number_) {
|
||||||
|
@ -140,7 +145,7 @@ class Repairer {
|
||||||
}
|
}
|
||||||
|
|
||||||
void ConvertLogFilesToTables() {
|
void ConvertLogFilesToTables() {
|
||||||
for (size_t i = 0; i < logs_.size(); i++) {
|
for (int i = 0; i < logs_.size(); i++) {
|
||||||
std::string logname = LogFileName(dbname_, logs_[i]);
|
std::string logname = LogFileName(dbname_, logs_[i]);
|
||||||
Status status = ConvertLogToTable(logs_[i]);
|
Status status = ConvertLogToTable(logs_[i]);
|
||||||
if (!status.ok()) {
|
if (!status.ok()) {
|
||||||
|
@ -234,7 +239,7 @@ class Repairer {
|
||||||
|
|
||||||
void ExtractMetaData() {
|
void ExtractMetaData() {
|
||||||
std::vector<TableInfo> kept;
|
std::vector<TableInfo> kept;
|
||||||
for (size_t i = 0; i < table_numbers_.size(); i++) {
|
for (int i = 0; i < table_numbers_.size(); i++) {
|
||||||
TableInfo t;
|
TableInfo t;
|
||||||
t.meta.number = table_numbers_[i];
|
t.meta.number = table_numbers_[i];
|
||||||
Status status = ScanTable(&t);
|
Status status = ScanTable(&t);
|
||||||
|
@ -278,6 +283,17 @@ class Repairer {
|
||||||
if (parsed.sequence > t->max_sequence) {
|
if (parsed.sequence > t->max_sequence) {
|
||||||
t->max_sequence = parsed.sequence;
|
t->max_sequence = parsed.sequence;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (ExtractValueType(key) == kTypeLargeValueRef) {
|
||||||
|
if (iter->value().size() != LargeValueRef::ByteSize()) {
|
||||||
|
Log(env_, options_.info_log, "Table #%llu: bad large value ref",
|
||||||
|
(unsigned long long) t->meta.number);
|
||||||
|
} else {
|
||||||
|
edit_.AddLargeValueRef(LargeValueRef::FromRef(iter->value()),
|
||||||
|
t->meta.number,
|
||||||
|
key);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (!iter->status().ok()) {
|
if (!iter->status().ok()) {
|
||||||
status = iter->status();
|
status = iter->status();
|
||||||
|
@ -300,7 +316,7 @@ class Repairer {
|
||||||
}
|
}
|
||||||
|
|
||||||
SequenceNumber max_sequence = 0;
|
SequenceNumber max_sequence = 0;
|
||||||
for (size_t i = 0; i < tables_.size(); i++) {
|
for (int i = 0; i < tables_.size(); i++) {
|
||||||
if (max_sequence < tables_[i].max_sequence) {
|
if (max_sequence < tables_[i].max_sequence) {
|
||||||
max_sequence = tables_[i].max_sequence;
|
max_sequence = tables_[i].max_sequence;
|
||||||
}
|
}
|
||||||
|
@ -311,7 +327,7 @@ class Repairer {
|
||||||
edit_.SetNextFile(next_file_number_);
|
edit_.SetNextFile(next_file_number_);
|
||||||
edit_.SetLastSequence(max_sequence);
|
edit_.SetLastSequence(max_sequence);
|
||||||
|
|
||||||
for (size_t i = 0; i < tables_.size(); i++) {
|
for (int i = 0; i < tables_.size(); i++) {
|
||||||
// TODO(opt): separate out into multiple levels
|
// TODO(opt): separate out into multiple levels
|
||||||
const TableInfo& t = tables_[i];
|
const TableInfo& t = tables_[i];
|
||||||
edit_.AddFile(0, t.meta.number, t.meta.file_size,
|
edit_.AddFile(0, t.meta.number, t.meta.file_size,
|
||||||
|
@ -335,7 +351,7 @@ class Repairer {
|
||||||
env_->DeleteFile(tmp);
|
env_->DeleteFile(tmp);
|
||||||
} else {
|
} else {
|
||||||
// Discard older manifests
|
// Discard older manifests
|
||||||
for (size_t i = 0; i < manifests_.size(); i++) {
|
for (int i = 0; i < manifests_.size(); i++) {
|
||||||
ArchiveFile(dbname_ + "/" + manifests_[i]);
|
ArchiveFile(dbname_ + "/" + manifests_[i]);
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,7 +19,7 @@ enum Tag {
|
||||||
kCompactPointer = 5,
|
kCompactPointer = 5,
|
||||||
kDeletedFile = 6,
|
kDeletedFile = 6,
|
||||||
kNewFile = 7,
|
kNewFile = 7,
|
||||||
// 8 was used for large value refs
|
kLargeValueRef = 8,
|
||||||
kPrevLogNumber = 9,
|
kPrevLogNumber = 9,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -36,6 +36,7 @@ void VersionEdit::Clear() {
|
||||||
has_last_sequence_ = false;
|
has_last_sequence_ = false;
|
||||||
deleted_files_.clear();
|
deleted_files_.clear();
|
||||||
new_files_.clear();
|
new_files_.clear();
|
||||||
|
large_refs_added_.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
void VersionEdit::EncodeTo(std::string* dst) const {
|
void VersionEdit::EncodeTo(std::string* dst) const {
|
||||||
|
@ -60,7 +61,7 @@ void VersionEdit::EncodeTo(std::string* dst) const {
|
||||||
PutVarint64(dst, last_sequence_);
|
PutVarint64(dst, last_sequence_);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (size_t i = 0; i < compact_pointers_.size(); i++) {
|
for (int i = 0; i < compact_pointers_.size(); i++) {
|
||||||
PutVarint32(dst, kCompactPointer);
|
PutVarint32(dst, kCompactPointer);
|
||||||
PutVarint32(dst, compact_pointers_[i].first); // level
|
PutVarint32(dst, compact_pointers_[i].first); // level
|
||||||
PutLengthPrefixedSlice(dst, compact_pointers_[i].second.Encode());
|
PutLengthPrefixedSlice(dst, compact_pointers_[i].second.Encode());
|
||||||
|
@ -74,7 +75,7 @@ void VersionEdit::EncodeTo(std::string* dst) const {
|
||||||
PutVarint64(dst, iter->second); // file number
|
PutVarint64(dst, iter->second); // file number
|
||||||
}
|
}
|
||||||
|
|
||||||
for (size_t i = 0; i < new_files_.size(); i++) {
|
for (int i = 0; i < new_files_.size(); i++) {
|
||||||
const FileMetaData& f = new_files_[i].second;
|
const FileMetaData& f = new_files_[i].second;
|
||||||
PutVarint32(dst, kNewFile);
|
PutVarint32(dst, kNewFile);
|
||||||
PutVarint32(dst, new_files_[i].first); // level
|
PutVarint32(dst, new_files_[i].first); // level
|
||||||
|
@ -83,6 +84,15 @@ void VersionEdit::EncodeTo(std::string* dst) const {
|
||||||
PutLengthPrefixedSlice(dst, f.smallest.Encode());
|
PutLengthPrefixedSlice(dst, f.smallest.Encode());
|
||||||
PutLengthPrefixedSlice(dst, f.largest.Encode());
|
PutLengthPrefixedSlice(dst, f.largest.Encode());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < large_refs_added_.size(); i++) {
|
||||||
|
const VersionEdit::Large& l = large_refs_added_[i];
|
||||||
|
PutVarint32(dst, kLargeValueRef);
|
||||||
|
PutLengthPrefixedSlice(dst,
|
||||||
|
Slice(l.large_ref.data, LargeValueRef::ByteSize()));
|
||||||
|
PutVarint64(dst, l.fnum);
|
||||||
|
PutLengthPrefixedSlice(dst, l.internal_key.Encode());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool GetInternalKey(Slice* input, InternalKey* dst) {
|
static bool GetInternalKey(Slice* input, InternalKey* dst) {
|
||||||
|
@ -117,6 +127,7 @@ Status VersionEdit::DecodeFrom(const Slice& src) {
|
||||||
uint64_t number;
|
uint64_t number;
|
||||||
FileMetaData f;
|
FileMetaData f;
|
||||||
Slice str;
|
Slice str;
|
||||||
|
Large large;
|
||||||
InternalKey key;
|
InternalKey key;
|
||||||
|
|
||||||
while (msg == NULL && GetVarint32(&input, &tag)) {
|
while (msg == NULL && GetVarint32(&input, &tag)) {
|
||||||
|
@ -192,6 +203,18 @@ Status VersionEdit::DecodeFrom(const Slice& src) {
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case kLargeValueRef:
|
||||||
|
if (GetLengthPrefixedSlice(&input, &str) &&
|
||||||
|
(str.size() == LargeValueRef::ByteSize()) &&
|
||||||
|
GetVarint64(&input, &large.fnum) &&
|
||||||
|
GetInternalKey(&input, &large.internal_key)) {
|
||||||
|
large.large_ref = LargeValueRef::FromRef(str);
|
||||||
|
large_refs_added_.push_back(large);
|
||||||
|
} else {
|
||||||
|
msg = "large ref";
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
msg = "unknown tag";
|
msg = "unknown tag";
|
||||||
break;
|
break;
|
||||||
|
@ -232,7 +255,7 @@ std::string VersionEdit::DebugString() const {
|
||||||
r.append("\n LastSeq: ");
|
r.append("\n LastSeq: ");
|
||||||
AppendNumberTo(&r, last_sequence_);
|
AppendNumberTo(&r, last_sequence_);
|
||||||
}
|
}
|
||||||
for (size_t i = 0; i < compact_pointers_.size(); i++) {
|
for (int i = 0; i < compact_pointers_.size(); i++) {
|
||||||
r.append("\n CompactPointer: ");
|
r.append("\n CompactPointer: ");
|
||||||
AppendNumberTo(&r, compact_pointers_[i].first);
|
AppendNumberTo(&r, compact_pointers_[i].first);
|
||||||
r.append(" '");
|
r.append(" '");
|
||||||
|
@ -247,7 +270,7 @@ std::string VersionEdit::DebugString() const {
|
||||||
r.append(" ");
|
r.append(" ");
|
||||||
AppendNumberTo(&r, iter->second);
|
AppendNumberTo(&r, iter->second);
|
||||||
}
|
}
|
||||||
for (size_t i = 0; i < new_files_.size(); i++) {
|
for (int i = 0; i < new_files_.size(); i++) {
|
||||||
const FileMetaData& f = new_files_[i].second;
|
const FileMetaData& f = new_files_[i].second;
|
||||||
r.append("\n AddFile: ");
|
r.append("\n AddFile: ");
|
||||||
AppendNumberTo(&r, new_files_[i].first);
|
AppendNumberTo(&r, new_files_[i].first);
|
||||||
|
@ -261,6 +284,16 @@ std::string VersionEdit::DebugString() const {
|
||||||
AppendEscapedStringTo(&r, f.largest.Encode());
|
AppendEscapedStringTo(&r, f.largest.Encode());
|
||||||
r.append("'");
|
r.append("'");
|
||||||
}
|
}
|
||||||
|
for (int i = 0; i < large_refs_added_.size(); i++) {
|
||||||
|
const VersionEdit::Large& l = large_refs_added_[i];
|
||||||
|
r.append("\n LargeRef: ");
|
||||||
|
AppendNumberTo(&r, l.fnum);
|
||||||
|
r.append(" ");
|
||||||
|
r.append(LargeValueRefToFilenameString(l.large_ref));
|
||||||
|
r.append(" '");
|
||||||
|
AppendEscapedStringTo(&r, l.internal_key.Encode());
|
||||||
|
r.append("'");
|
||||||
|
}
|
||||||
r.append("\n}\n");
|
r.append("\n}\n");
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
|
@ -75,6 +75,18 @@ class VersionEdit {
|
||||||
deleted_files_.insert(std::make_pair(level, file));
|
deleted_files_.insert(std::make_pair(level, file));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Record that a large value with the specified large_ref was
|
||||||
|
// written to the output file numbered "fnum"
|
||||||
|
void AddLargeValueRef(const LargeValueRef& large_ref,
|
||||||
|
uint64_t fnum,
|
||||||
|
const Slice& internal_key) {
|
||||||
|
large_refs_added_.resize(large_refs_added_.size() + 1);
|
||||||
|
Large* large = &(large_refs_added_.back());
|
||||||
|
large->large_ref = large_ref;
|
||||||
|
large->fnum = fnum;
|
||||||
|
large->internal_key.DecodeFrom(internal_key);
|
||||||
|
}
|
||||||
|
|
||||||
void EncodeTo(std::string* dst) const;
|
void EncodeTo(std::string* dst) const;
|
||||||
Status DecodeFrom(const Slice& src);
|
Status DecodeFrom(const Slice& src);
|
||||||
|
|
||||||
|
@ -99,6 +111,12 @@ class VersionEdit {
|
||||||
std::vector< std::pair<int, InternalKey> > compact_pointers_;
|
std::vector< std::pair<int, InternalKey> > compact_pointers_;
|
||||||
DeletedFileSet deleted_files_;
|
DeletedFileSet deleted_files_;
|
||||||
std::vector< std::pair<int, FileMetaData> > new_files_;
|
std::vector< std::pair<int, FileMetaData> > new_files_;
|
||||||
|
struct Large {
|
||||||
|
LargeValueRef large_ref;
|
||||||
|
uint64_t fnum;
|
||||||
|
InternalKey internal_key;
|
||||||
|
};
|
||||||
|
std::vector<Large> large_refs_added_;
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
|
@ -26,9 +26,13 @@ TEST(VersionEditTest, EncodeDecode) {
|
||||||
for (int i = 0; i < 4; i++) {
|
for (int i = 0; i < 4; i++) {
|
||||||
TestEncodeDecode(edit);
|
TestEncodeDecode(edit);
|
||||||
edit.AddFile(3, kBig + 300 + i, kBig + 400 + i,
|
edit.AddFile(3, kBig + 300 + i, kBig + 400 + i,
|
||||||
InternalKey("foo", kBig + 500 + i, kTypeValue),
|
InternalKey("foo", kBig + 500 + i, kTypeLargeValueRef),
|
||||||
InternalKey("zoo", kBig + 600 + i, kTypeDeletion));
|
InternalKey("zoo", kBig + 600 + i, kTypeDeletion));
|
||||||
edit.DeleteFile(4, kBig + 700 + i);
|
edit.DeleteFile(4, kBig + 700 + i);
|
||||||
|
edit.AddLargeValueRef(LargeValueRef::Make("big", kNoCompression),
|
||||||
|
kBig + 800 + i, "foobar");
|
||||||
|
edit.AddLargeValueRef(LargeValueRef::Make("big2", kSnappyCompression),
|
||||||
|
kBig + 801 + i, "baz");
|
||||||
edit.SetCompactPointer(i, InternalKey("x", kBig + 900 + i, kTypeValue));
|
edit.SetCompactPointer(i, InternalKey("x", kBig + 900 + i, kTypeValue));
|
||||||
}
|
}
|
||||||
|
|
|
@ -58,7 +58,7 @@ std::string IntSetToString(const std::set<uint64_t>& s) {
|
||||||
Version::~Version() {
|
Version::~Version() {
|
||||||
assert(refs_ == 0);
|
assert(refs_ == 0);
|
||||||
for (int level = 0; level < config::kNumLevels; level++) {
|
for (int level = 0; level < config::kNumLevels; level++) {
|
||||||
for (size_t i = 0; i < files_[level].size(); i++) {
|
for (int i = 0; i < files_[level].size(); i++) {
|
||||||
FileMetaData* f = files_[level][i];
|
FileMetaData* f = files_[level][i];
|
||||||
assert(f->refs >= 0);
|
assert(f->refs >= 0);
|
||||||
f->refs--;
|
f->refs--;
|
||||||
|
@ -134,7 +134,7 @@ class Version::LevelFileNumIterator : public Iterator {
|
||||||
private:
|
private:
|
||||||
const InternalKeyComparator icmp_;
|
const InternalKeyComparator icmp_;
|
||||||
const std::vector<FileMetaData*>* const flist_;
|
const std::vector<FileMetaData*>* const flist_;
|
||||||
uint32_t index_;
|
int index_;
|
||||||
|
|
||||||
// Backing store for value(). Holds the file number and size.
|
// Backing store for value(). Holds the file number and size.
|
||||||
mutable char value_buf_[16];
|
mutable char value_buf_[16];
|
||||||
|
@ -164,7 +164,7 @@ Iterator* Version::NewConcatenatingIterator(const ReadOptions& options,
|
||||||
void Version::AddIterators(const ReadOptions& options,
|
void Version::AddIterators(const ReadOptions& options,
|
||||||
std::vector<Iterator*>* iters) {
|
std::vector<Iterator*>* iters) {
|
||||||
// Merge all level zero files together since they may overlap
|
// Merge all level zero files together since they may overlap
|
||||||
for (size_t i = 0; i < files_[0].size(); i++) {
|
for (int i = 0; i < files_[0].size(); i++) {
|
||||||
iters->push_back(
|
iters->push_back(
|
||||||
vset_->table_cache_->NewIterator(
|
vset_->table_cache_->NewIterator(
|
||||||
options, files_[0][i]->number, files_[0][i]->file_size));
|
options, files_[0][i]->number, files_[0][i]->file_size));
|
||||||
|
@ -201,7 +201,7 @@ std::string Version::DebugString() const {
|
||||||
AppendNumberTo(&r, level);
|
AppendNumberTo(&r, level);
|
||||||
r.push_back(':');
|
r.push_back(':');
|
||||||
const std::vector<FileMetaData*>& files = files_[level];
|
const std::vector<FileMetaData*>& files = files_[level];
|
||||||
for (size_t i = 0; i < files.size(); i++) {
|
for (int i = 0; i < files.size(); i++) {
|
||||||
r.push_back(' ');
|
r.push_back(' ');
|
||||||
AppendNumberTo(&r, files[i]->number);
|
AppendNumberTo(&r, files[i]->number);
|
||||||
r.push_back(':');
|
r.push_back(':');
|
||||||
|
@ -232,7 +232,7 @@ class VersionSet::Builder {
|
||||||
: vset_(vset) {
|
: vset_(vset) {
|
||||||
for (int level = 0; level < config::kNumLevels; level++) {
|
for (int level = 0; level < config::kNumLevels; level++) {
|
||||||
const std::vector<FileMetaData*>& files = base->files_[level];
|
const std::vector<FileMetaData*>& files = base->files_[level];
|
||||||
for (size_t i = 0; i < files.size(); i++) {
|
for (int i = 0; i < files.size(); i++) {
|
||||||
FileMetaData* f = files[i];
|
FileMetaData* f = files[i];
|
||||||
f->refs++;
|
f->refs++;
|
||||||
files_[level].insert(std::make_pair(f->number, f));
|
files_[level].insert(std::make_pair(f->number, f));
|
||||||
|
@ -258,7 +258,7 @@ class VersionSet::Builder {
|
||||||
// Apply all of the edits in *edit to the current state.
|
// Apply all of the edits in *edit to the current state.
|
||||||
void Apply(VersionEdit* edit) {
|
void Apply(VersionEdit* edit) {
|
||||||
// Update compaction pointers
|
// Update compaction pointers
|
||||||
for (size_t i = 0; i < edit->compact_pointers_.size(); i++) {
|
for (int i = 0; i < edit->compact_pointers_.size(); i++) {
|
||||||
const int level = edit->compact_pointers_[i].first;
|
const int level = edit->compact_pointers_[i].first;
|
||||||
vset_->compact_pointer_[level] =
|
vset_->compact_pointer_[level] =
|
||||||
edit->compact_pointers_[i].second.Encode().ToString();
|
edit->compact_pointers_[i].second.Encode().ToString();
|
||||||
|
@ -284,13 +284,19 @@ class VersionSet::Builder {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add new files
|
// Add new files
|
||||||
for (size_t i = 0; i < edit->new_files_.size(); i++) {
|
for (int i = 0; i < edit->new_files_.size(); i++) {
|
||||||
const int level = edit->new_files_[i].first;
|
const int level = edit->new_files_[i].first;
|
||||||
FileMetaData* f = new FileMetaData(edit->new_files_[i].second);
|
FileMetaData* f = new FileMetaData(edit->new_files_[i].second);
|
||||||
f->refs = 1;
|
f->refs = 1;
|
||||||
assert(files_[level].count(f->number) == 0);
|
assert(files_[level].count(f->number) == 0);
|
||||||
files_[level].insert(std::make_pair(f->number, f));
|
files_[level].insert(std::make_pair(f->number, f));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Add large value refs
|
||||||
|
for (int i = 0; i < edit->large_refs_added_.size(); i++) {
|
||||||
|
const VersionEdit::Large& l = edit->large_refs_added_[i];
|
||||||
|
vset_->RegisterLargeValueRef(l.large_ref, l.fnum, l.internal_key);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Save the current state in *v.
|
// Save the current state in *v.
|
||||||
|
@ -539,7 +545,7 @@ Status VersionSet::Recover() {
|
||||||
|
|
||||||
static int64_t TotalFileSize(const std::vector<FileMetaData*>& files) {
|
static int64_t TotalFileSize(const std::vector<FileMetaData*>& files) {
|
||||||
int64_t sum = 0;
|
int64_t sum = 0;
|
||||||
for (size_t i = 0; i < files.size(); i++) {
|
for (int i = 0; i < files.size(); i++) {
|
||||||
sum += files[i]->file_size;
|
sum += files[i]->file_size;
|
||||||
}
|
}
|
||||||
return sum;
|
return sum;
|
||||||
|
@ -604,12 +610,25 @@ Status VersionSet::WriteSnapshot(log::Writer* log) {
|
||||||
// Save files
|
// Save files
|
||||||
for (int level = 0; level < config::kNumLevels; level++) {
|
for (int level = 0; level < config::kNumLevels; level++) {
|
||||||
const std::vector<FileMetaData*>& files = current_->files_[level];
|
const std::vector<FileMetaData*>& files = current_->files_[level];
|
||||||
for (size_t i = 0; i < files.size(); i++) {
|
for (int i = 0; i < files.size(); i++) {
|
||||||
const FileMetaData* f = files[i];
|
const FileMetaData* f = files[i];
|
||||||
edit.AddFile(level, f->number, f->file_size, f->smallest, f->largest);
|
edit.AddFile(level, f->number, f->file_size, f->smallest, f->largest);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Save large value refs
|
||||||
|
for (LargeValueMap::const_iterator it = large_value_refs_.begin();
|
||||||
|
it != large_value_refs_.end();
|
||||||
|
++it) {
|
||||||
|
const LargeValueRef& ref = it->first;
|
||||||
|
const LargeReferencesSet& pointers = it->second;
|
||||||
|
for (LargeReferencesSet::const_iterator j = pointers.begin();
|
||||||
|
j != pointers.end();
|
||||||
|
++j) {
|
||||||
|
edit.AddLargeValueRef(ref, j->first, j->second);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
std::string record;
|
std::string record;
|
||||||
edit.EncodeTo(&record);
|
edit.EncodeTo(&record);
|
||||||
return log->AddRecord(record);
|
return log->AddRecord(record);
|
||||||
|
@ -632,7 +651,7 @@ Status VersionSet::SortLevel(Version* v, uint64_t level) {
|
||||||
|
|
||||||
if (result.ok() && level > 0) {
|
if (result.ok() && level > 0) {
|
||||||
// There should be no overlap
|
// There should be no overlap
|
||||||
for (size_t i = 1; i < v->files_[level].size(); i++) {
|
for (int i = 1; i < v->files_[level].size(); i++) {
|
||||||
const InternalKey& prev_end = v->files_[level][i-1]->largest;
|
const InternalKey& prev_end = v->files_[level][i-1]->largest;
|
||||||
const InternalKey& this_begin = v->files_[level][i]->smallest;
|
const InternalKey& this_begin = v->files_[level][i]->smallest;
|
||||||
if (icmp_.Compare(prev_end, this_begin) >= 0) {
|
if (icmp_.Compare(prev_end, this_begin) >= 0) {
|
||||||
|
@ -657,7 +676,7 @@ uint64_t VersionSet::ApproximateOffsetOf(Version* v, const InternalKey& ikey) {
|
||||||
uint64_t result = 0;
|
uint64_t result = 0;
|
||||||
for (int level = 0; level < config::kNumLevels; level++) {
|
for (int level = 0; level < config::kNumLevels; level++) {
|
||||||
const std::vector<FileMetaData*>& files = v->files_[level];
|
const std::vector<FileMetaData*>& files = v->files_[level];
|
||||||
for (size_t i = 0; i < files.size(); i++) {
|
for (int i = 0; i < files.size(); i++) {
|
||||||
if (icmp_.Compare(files[i]->largest, ikey) <= 0) {
|
if (icmp_.Compare(files[i]->largest, ikey) <= 0) {
|
||||||
// Entire file is before "ikey", so just add the file size
|
// Entire file is before "ikey", so just add the file size
|
||||||
result += files[i]->file_size;
|
result += files[i]->file_size;
|
||||||
|
@ -682,9 +701,83 @@ uint64_t VersionSet::ApproximateOffsetOf(Version* v, const InternalKey& ikey) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Add in large value files which are references from internal keys
|
||||||
|
// stored in the table files
|
||||||
|
//
|
||||||
|
// TODO(opt): this is O(# large values in db). If this becomes too slow,
|
||||||
|
// we could store an auxiliary data structure indexed by internal key
|
||||||
|
for (LargeValueMap::const_iterator it = large_value_refs_.begin();
|
||||||
|
it != large_value_refs_.end();
|
||||||
|
++it) {
|
||||||
|
const LargeValueRef& lref = it->first;
|
||||||
|
for (LargeReferencesSet::const_iterator it2 = it->second.begin();
|
||||||
|
it2 != it->second.end();
|
||||||
|
++it2) {
|
||||||
|
if (icmp_.Compare(it2->second, ikey.Encode()) <= 0) {
|
||||||
|
// Internal key for large value is before our key of interest
|
||||||
|
result += lref.ValueSize();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool VersionSet::RegisterLargeValueRef(const LargeValueRef& large_ref,
|
||||||
|
uint64_t fnum,
|
||||||
|
const InternalKey& internal_key) {
|
||||||
|
LargeReferencesSet* refs = &large_value_refs_[large_ref];
|
||||||
|
bool is_first = refs->empty();
|
||||||
|
refs->insert(make_pair(fnum, internal_key.Encode().ToString()));
|
||||||
|
return is_first;
|
||||||
|
}
|
||||||
|
|
||||||
|
void VersionSet::CleanupLargeValueRefs(const std::set<uint64_t>& live_tables) {
|
||||||
|
for (LargeValueMap::iterator it = large_value_refs_.begin();
|
||||||
|
it != large_value_refs_.end();
|
||||||
|
) {
|
||||||
|
LargeReferencesSet* refs = &it->second;
|
||||||
|
for (LargeReferencesSet::iterator ref_it = refs->begin();
|
||||||
|
ref_it != refs->end();
|
||||||
|
) {
|
||||||
|
if (ref_it->first != log_number_ && // Not in log file
|
||||||
|
ref_it->first != prev_log_number_ && // Not in prev log
|
||||||
|
live_tables.count(ref_it->first) == 0) { // Not in a live table
|
||||||
|
// No longer live: erase
|
||||||
|
LargeReferencesSet::iterator to_erase = ref_it;
|
||||||
|
++ref_it;
|
||||||
|
refs->erase(to_erase);
|
||||||
|
} else {
|
||||||
|
// Still live: leave this reference alone
|
||||||
|
++ref_it;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (refs->empty()) {
|
||||||
|
// No longer any live references to this large value: remove from
|
||||||
|
// large_value_refs
|
||||||
|
Log(env_, options_->info_log, "large value is dead: '%s'",
|
||||||
|
LargeValueRefToFilenameString(it->first).c_str());
|
||||||
|
LargeValueMap::iterator to_erase = it;
|
||||||
|
++it;
|
||||||
|
large_value_refs_.erase(to_erase);
|
||||||
|
} else {
|
||||||
|
++it;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool VersionSet::LargeValueIsLive(const LargeValueRef& large_ref) {
|
||||||
|
LargeValueMap::iterator it = large_value_refs_.find(large_ref);
|
||||||
|
if (it == large_value_refs_.end()) {
|
||||||
|
return false;
|
||||||
|
} else {
|
||||||
|
assert(!it->second.empty());
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void VersionSet::MaybeDeleteOldVersions() {
|
void VersionSet::MaybeDeleteOldVersions() {
|
||||||
// Note: it is important to delete versions in order since a newer
|
// Note: it is important to delete versions in order since a newer
|
||||||
// version with zero refs may be holding a pointer to a memtable
|
// version with zero refs may be holding a pointer to a memtable
|
||||||
|
@ -700,7 +793,7 @@ void VersionSet::AddLiveFiles(std::set<uint64_t>* live) {
|
||||||
for (Version* v = oldest_; v != NULL; v = v->next_) {
|
for (Version* v = oldest_; v != NULL; v = v->next_) {
|
||||||
for (int level = 0; level < config::kNumLevels; level++) {
|
for (int level = 0; level < config::kNumLevels; level++) {
|
||||||
const std::vector<FileMetaData*>& files = v->files_[level];
|
const std::vector<FileMetaData*>& files = v->files_[level];
|
||||||
for (size_t i = 0; i < files.size(); i++) {
|
for (int i = 0; i < files.size(); i++) {
|
||||||
live->insert(files[i]->number);
|
live->insert(files[i]->number);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -717,7 +810,7 @@ int64_t VersionSet::MaxNextLevelOverlappingBytes() {
|
||||||
int64_t result = 0;
|
int64_t result = 0;
|
||||||
std::vector<FileMetaData*> overlaps;
|
std::vector<FileMetaData*> overlaps;
|
||||||
for (int level = 0; level < config::kNumLevels - 1; level++) {
|
for (int level = 0; level < config::kNumLevels - 1; level++) {
|
||||||
for (size_t i = 0; i < current_->files_[level].size(); i++) {
|
for (int i = 0; i < current_->files_[level].size(); i++) {
|
||||||
const FileMetaData* f = current_->files_[level][i];
|
const FileMetaData* f = current_->files_[level][i];
|
||||||
GetOverlappingInputs(level+1, f->smallest, f->largest, &overlaps);
|
GetOverlappingInputs(level+1, f->smallest, f->largest, &overlaps);
|
||||||
const int64_t sum = TotalFileSize(overlaps);
|
const int64_t sum = TotalFileSize(overlaps);
|
||||||
|
@ -739,7 +832,7 @@ void VersionSet::GetOverlappingInputs(
|
||||||
Slice user_begin = begin.user_key();
|
Slice user_begin = begin.user_key();
|
||||||
Slice user_end = end.user_key();
|
Slice user_end = end.user_key();
|
||||||
const Comparator* user_cmp = icmp_.user_comparator();
|
const Comparator* user_cmp = icmp_.user_comparator();
|
||||||
for (size_t i = 0; i < current_->files_[level].size(); i++) {
|
for (int i = 0; i < current_->files_[level].size(); i++) {
|
||||||
FileMetaData* f = current_->files_[level][i];
|
FileMetaData* f = current_->files_[level][i];
|
||||||
if (user_cmp->Compare(f->largest.user_key(), user_begin) < 0 ||
|
if (user_cmp->Compare(f->largest.user_key(), user_begin) < 0 ||
|
||||||
user_cmp->Compare(f->smallest.user_key(), user_end) > 0) {
|
user_cmp->Compare(f->smallest.user_key(), user_end) > 0) {
|
||||||
|
@ -759,7 +852,7 @@ void VersionSet::GetRange(const std::vector<FileMetaData*>& inputs,
|
||||||
assert(!inputs.empty());
|
assert(!inputs.empty());
|
||||||
smallest->Clear();
|
smallest->Clear();
|
||||||
largest->Clear();
|
largest->Clear();
|
||||||
for (size_t i = 0; i < inputs.size(); i++) {
|
for (int i = 0; i < inputs.size(); i++) {
|
||||||
FileMetaData* f = inputs[i];
|
FileMetaData* f = inputs[i];
|
||||||
if (i == 0) {
|
if (i == 0) {
|
||||||
*smallest = f->smallest;
|
*smallest = f->smallest;
|
||||||
|
@ -802,7 +895,7 @@ Iterator* VersionSet::MakeInputIterator(Compaction* c) {
|
||||||
if (!c->inputs_[which].empty()) {
|
if (!c->inputs_[which].empty()) {
|
||||||
if (c->level() + which == 0) {
|
if (c->level() + which == 0) {
|
||||||
const std::vector<FileMetaData*>& files = c->inputs_[which];
|
const std::vector<FileMetaData*>& files = c->inputs_[which];
|
||||||
for (size_t i = 0; i < files.size(); i++) {
|
for (int i = 0; i < files.size(); i++) {
|
||||||
list[num++] = table_cache_->NewIterator(
|
list[num++] = table_cache_->NewIterator(
|
||||||
options, files[i]->number, files[i]->file_size);
|
options, files[i]->number, files[i]->file_size);
|
||||||
}
|
}
|
||||||
|
@ -834,7 +927,7 @@ Compaction* VersionSet::PickCompaction() {
|
||||||
c->input_version_->Ref();
|
c->input_version_->Ref();
|
||||||
|
|
||||||
// Pick the first file that comes after compact_pointer_[level]
|
// Pick the first file that comes after compact_pointer_[level]
|
||||||
for (size_t i = 0; i < current_->files_[level].size(); i++) {
|
for (int i = 0; i < current_->files_[level].size(); i++) {
|
||||||
FileMetaData* f = current_->files_[level][i];
|
FileMetaData* f = current_->files_[level][i];
|
||||||
if (compact_pointer_[level].empty() ||
|
if (compact_pointer_[level].empty() ||
|
||||||
icmp_.Compare(f->largest.Encode(), compact_pointer_[level]) > 0) {
|
icmp_.Compare(f->largest.Encode(), compact_pointer_[level]) > 0) {
|
||||||
|
@ -969,7 +1062,7 @@ bool Compaction::IsTrivialMove() const {
|
||||||
|
|
||||||
void Compaction::AddInputDeletions(VersionEdit* edit) {
|
void Compaction::AddInputDeletions(VersionEdit* edit) {
|
||||||
for (int which = 0; which < 2; which++) {
|
for (int which = 0; which < 2; which++) {
|
||||||
for (size_t i = 0; i < inputs_[which].size(); i++) {
|
for (int i = 0; i < inputs_[which].size(); i++) {
|
||||||
edit->DeleteFile(level_ + which, inputs_[which][i]->number);
|
edit->DeleteFile(level_ + which, inputs_[which][i]->number);
|
||||||
}
|
}
|
||||||
}
|
}
|
|
@ -171,6 +171,22 @@ class VersionSet {
|
||||||
// "key" as of version "v".
|
// "key" as of version "v".
|
||||||
uint64_t ApproximateOffsetOf(Version* v, const InternalKey& key);
|
uint64_t ApproximateOffsetOf(Version* v, const InternalKey& key);
|
||||||
|
|
||||||
|
// Register a reference to a large value with the specified
|
||||||
|
// large_ref from the specified file number. Returns "true" if this
|
||||||
|
// is the first recorded reference to the "large_ref" value in the
|
||||||
|
// database, and false otherwise.
|
||||||
|
bool RegisterLargeValueRef(const LargeValueRef& large_ref,
|
||||||
|
uint64_t filenum,
|
||||||
|
const InternalKey& internal_key);
|
||||||
|
|
||||||
|
// Cleanup the large value reference state by eliminating any
|
||||||
|
// references from files that are not includes in either "live_tables"
|
||||||
|
// or the current log.
|
||||||
|
void CleanupLargeValueRefs(const std::set<uint64_t>& live_tables);
|
||||||
|
|
||||||
|
// Returns true if a large value with the given reference is live.
|
||||||
|
bool LargeValueIsLive(const LargeValueRef& large_ref);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
class Builder;
|
class Builder;
|
||||||
|
|
||||||
|
@ -221,6 +237,14 @@ class VersionSet {
|
||||||
Version* current_; // Pointer to the last (newest) list entry
|
Version* current_; // Pointer to the last (newest) list entry
|
||||||
Version* oldest_; // Pointer to the first (oldest) list entry
|
Version* oldest_; // Pointer to the first (oldest) list entry
|
||||||
|
|
||||||
|
// Map from large value reference to the set of <file numbers,internal_key>
|
||||||
|
// values containing references to the value. We keep the
|
||||||
|
// internal key as a std::string rather than as an InternalKey because
|
||||||
|
// we want to be able to easily use a set.
|
||||||
|
typedef std::set<std::pair<uint64_t, std::string> > LargeReferencesSet;
|
||||||
|
typedef std::map<LargeValueRef, LargeReferencesSet> LargeValueMap;
|
||||||
|
LargeValueMap large_value_refs_;
|
||||||
|
|
||||||
// Per-level key at which the next compaction at that level should start.
|
// Per-level key at which the next compaction at that level should start.
|
||||||
// Either an empty string, or a valid InternalKey.
|
// Either an empty string, or a valid InternalKey.
|
||||||
std::string compact_pointer_[config::kNumLevels];
|
std::string compact_pointer_[config::kNumLevels];
|
||||||
|
@ -289,7 +313,7 @@ class Compaction {
|
||||||
// State used to check for number of of overlapping grandparent files
|
// State used to check for number of of overlapping grandparent files
|
||||||
// (parent == level_ + 1, grandparent == level_ + 2)
|
// (parent == level_ + 1, grandparent == level_ + 2)
|
||||||
std::vector<FileMetaData*> grandparents_;
|
std::vector<FileMetaData*> grandparents_;
|
||||||
size_t grandparent_index_; // Index in grandparent_starts_
|
int grandparent_index_; // Index in grandparent_starts_
|
||||||
bool seen_key_; // Some output key has been seen
|
bool seen_key_; // Some output key has been seen
|
||||||
int64_t overlapped_bytes_; // Bytes of overlap between current output
|
int64_t overlapped_bytes_; // Bytes of overlap between current output
|
||||||
// and grandparent files
|
// and grandparent files
|
||||||
|
@ -300,7 +324,7 @@ class Compaction {
|
||||||
// is that we are positioned at one of the file ranges for each
|
// is that we are positioned at one of the file ranges for each
|
||||||
// higher level than the ones involved in this compaction (i.e. for
|
// higher level than the ones involved in this compaction (i.e. for
|
||||||
// all L >= level_ + 2).
|
// all L >= level_ + 2).
|
||||||
size_t level_ptrs_[config::kNumLevels];
|
int level_ptrs_[config::kNumLevels];
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
|
@ -8,6 +8,7 @@
|
||||||
// data: record[count]
|
// data: record[count]
|
||||||
// record :=
|
// record :=
|
||||||
// kTypeValue varstring varstring |
|
// kTypeValue varstring varstring |
|
||||||
|
// kTypeLargeValueRef varstring varstring |
|
||||||
// kTypeDeletion varstring
|
// kTypeDeletion varstring
|
||||||
// varstring :=
|
// varstring :=
|
||||||
// len: varint32
|
// len: varint32
|
||||||
|
@ -57,6 +58,16 @@ void WriteBatch::Put(const Slice& key, const Slice& value) {
|
||||||
PutLengthPrefixedSlice(&rep_, value);
|
PutLengthPrefixedSlice(&rep_, value);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void WriteBatchInternal::PutLargeValueRef(WriteBatch* b,
|
||||||
|
const Slice& key,
|
||||||
|
const LargeValueRef& large_ref) {
|
||||||
|
WriteBatchInternal::SetCount(b, WriteBatchInternal::Count(b) + 1);
|
||||||
|
b->rep_.push_back(static_cast<char>(kTypeLargeValueRef));
|
||||||
|
PutLengthPrefixedSlice(&b->rep_, key);
|
||||||
|
PutLengthPrefixedSlice(&b->rep_,
|
||||||
|
Slice(large_ref.data, sizeof(large_ref.data)));
|
||||||
|
}
|
||||||
|
|
||||||
void WriteBatch::Delete(const Slice& key) {
|
void WriteBatch::Delete(const Slice& key) {
|
||||||
WriteBatchInternal::SetCount(this, WriteBatchInternal::Count(this) + 1);
|
WriteBatchInternal::SetCount(this, WriteBatchInternal::Count(this) + 1);
|
||||||
rep_.push_back(static_cast<char>(kTypeDeletion));
|
rep_.push_back(static_cast<char>(kTypeDeletion));
|
||||||
|
@ -76,6 +87,10 @@ Status WriteBatchInternal::InsertInto(const WriteBatch* b,
|
||||||
case kTypeValue:
|
case kTypeValue:
|
||||||
memtable->Add(it.sequence_number(), kTypeValue, it.key(), it.value());
|
memtable->Add(it.sequence_number(), kTypeValue, it.key(), it.value());
|
||||||
break;
|
break;
|
||||||
|
case kTypeLargeValueRef:
|
||||||
|
memtable->Add(it.sequence_number(), kTypeLargeValueRef,
|
||||||
|
it.key(), it.value());
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
found++;
|
found++;
|
||||||
}
|
}
|
||||||
|
@ -119,6 +134,7 @@ void WriteBatchInternal::Iterator::GetNextEntry() {
|
||||||
input_.remove_prefix(1);
|
input_.remove_prefix(1);
|
||||||
switch (tag) {
|
switch (tag) {
|
||||||
case kTypeValue:
|
case kTypeValue:
|
||||||
|
case kTypeLargeValueRef:
|
||||||
if (GetLengthPrefixedSlice(&input_, &key_) &&
|
if (GetLengthPrefixedSlice(&input_, &key_) &&
|
||||||
GetLengthPrefixedSlice(&input_, &value_)) {
|
GetLengthPrefixedSlice(&input_, &value_)) {
|
||||||
op_ = static_cast<ValueType>(tag);
|
op_ = static_cast<ValueType>(tag);
|
|
@ -13,6 +13,10 @@ namespace leveldb {
|
||||||
// WriteBatch that we don't want in the public WriteBatch interface.
|
// WriteBatch that we don't want in the public WriteBatch interface.
|
||||||
class WriteBatchInternal {
|
class WriteBatchInternal {
|
||||||
public:
|
public:
|
||||||
|
static void PutLargeValueRef(WriteBatch* batch,
|
||||||
|
const Slice& key,
|
||||||
|
const LargeValueRef& large_ref);
|
||||||
|
|
||||||
// Return the number of entries in the batch.
|
// Return the number of entries in the batch.
|
||||||
static int Count(const WriteBatch* batch);
|
static int Count(const WriteBatch* batch);
|
||||||
|
|
|
@ -29,6 +29,13 @@ static std::string PrintContents(WriteBatch* b) {
|
||||||
state.append(iter->value().ToString());
|
state.append(iter->value().ToString());
|
||||||
state.append(")");
|
state.append(")");
|
||||||
break;
|
break;
|
||||||
|
case kTypeLargeValueRef:
|
||||||
|
state.append("PutRef(");
|
||||||
|
state.append(ikey.user_key.ToString());
|
||||||
|
state.append(", ");
|
||||||
|
state.append(iter->value().ToString());
|
||||||
|
state.append(")");
|
||||||
|
break;
|
||||||
case kTypeDeletion:
|
case kTypeDeletion:
|
||||||
state.append("Delete(");
|
state.append("Delete(");
|
||||||
state.append(ikey.user_key.ToString());
|
state.append(ikey.user_key.ToString());
|
||||||
|
@ -67,6 +74,22 @@ TEST(WriteBatchTest, Multiple) {
|
||||||
PrintContents(&batch));
|
PrintContents(&batch));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST(WriteBatchTest, PutIndirect) {
|
||||||
|
WriteBatch batch;
|
||||||
|
batch.Put(Slice("baz"), Slice("boo"));
|
||||||
|
LargeValueRef h;
|
||||||
|
for (int i = 0; i < LargeValueRef::ByteSize(); i++) {
|
||||||
|
h.data[i] = (i < 20) ? 'a' : 'b';
|
||||||
|
}
|
||||||
|
WriteBatchInternal::PutLargeValueRef(&batch, Slice("foo"), h);
|
||||||
|
WriteBatchInternal::SetSequence(&batch, 100);
|
||||||
|
ASSERT_EQ(100, WriteBatchInternal::Sequence(&batch));
|
||||||
|
ASSERT_EQ(2, WriteBatchInternal::Count(&batch));
|
||||||
|
ASSERT_EQ("Put(baz, boo)@100"
|
||||||
|
"PutRef(foo, aaaaaaaaaaaaaaaaaaaabbbbbbbbb)@101",
|
||||||
|
PrintContents(&batch));
|
||||||
|
}
|
||||||
|
|
||||||
TEST(WriteBatchTest, Corruption) {
|
TEST(WriteBatchTest, Corruption) {
|
||||||
WriteBatch batch;
|
WriteBatch batch;
|
||||||
batch.Put(Slice("foo"), Slice("bar"));
|
batch.Put(Slice("foo"), Slice("bar"));
|
|
@ -57,6 +57,15 @@ These merges have the effect of gradually migrating new updates from
|
||||||
the young level to the largest level using only bulk reads and writes
|
the young level to the largest level using only bulk reads and writes
|
||||||
(i.e., minimizing expensive seeks).
|
(i.e., minimizing expensive seeks).
|
||||||
|
|
||||||
|
<h2>Large value files</h2>
|
||||||
|
<p>
|
||||||
|
Each large value (greater than 64KB by default) is placed in a large
|
||||||
|
value file (*.val) of its own. An entry is maintained in the log
|
||||||
|
and/or sorted tables that maps from the corresponding key to the
|
||||||
|
name of this large value file. The name of the large value file
|
||||||
|
is derived from a SHA1 hash of the value and its length so that
|
||||||
|
identical values share the same file.
|
||||||
|
<p>
|
||||||
<h2>Manifest</h2>
|
<h2>Manifest</h2>
|
||||||
<p>
|
<p>
|
||||||
A MANIFEST file lists the set of sorted tables that make up each
|
A MANIFEST file lists the set of sorted tables that make up each
|
||||||
|
@ -211,7 +220,9 @@ So maybe even the sharding is not necessary on modern filesystems?
|
||||||
compaction and at the end of recovery. It finds the names of all
|
compaction and at the end of recovery. It finds the names of all
|
||||||
files in the database. It deletes all log files that are not the
|
files in the database. It deletes all log files that are not the
|
||||||
current log file. It deletes all table files that are not referenced
|
current log file. It deletes all table files that are not referenced
|
||||||
from some level and are not the output of an active compaction.
|
from some level and are not the output of an active compaction. It
|
||||||
|
deletes all large value files that are not referenced from any live
|
||||||
|
table or log file.
|
||||||
|
|
||||||
</body>
|
</body>
|
||||||
</html>
|
</html>
|
|
@ -412,6 +412,17 @@ We might want to prefix <code>filename</code> keys with one letter (say '/') and
|
||||||
over just the metadata do not force us to fetch and cache bulky file
|
over just the metadata do not force us to fetch and cache bulky file
|
||||||
contents.
|
contents.
|
||||||
<p>
|
<p>
|
||||||
|
<h2>Large Values</h2>
|
||||||
|
<p>
|
||||||
|
<code>leveldb</code> has special treatment of large values (by default, a value
|
||||||
|
of length greater than or equal to 64K is considered large, though a
|
||||||
|
field in Options can be used to adjust this threshold). Each such
|
||||||
|
large value is placed in a separate operating system file, and the
|
||||||
|
normal database blocks just contain pointers to such files.
|
||||||
|
<p>
|
||||||
|
Furthermore, if the same large value occurs multiple times in a single
|
||||||
|
database, it will be stored just once.
|
||||||
|
<p>
|
||||||
<h1>Checksums</h1>
|
<h1>Checksums</h1>
|
||||||
<p>
|
<p>
|
||||||
<code>leveldb</code> associates checksums with all data it stores in the file system.
|
<code>leveldb</code> associates checksums with all data it stores in the file system.
|
|
@ -86,6 +86,16 @@ struct Options {
|
||||||
// Default: 1000
|
// Default: 1000
|
||||||
int max_open_files;
|
int max_open_files;
|
||||||
|
|
||||||
|
// Handle values larger than "large_value_threshold" bytes
|
||||||
|
// specially, by writing them into their own files (to avoid
|
||||||
|
// compaction overhead) and doing content-based elimination of
|
||||||
|
// duplicate values to save space.
|
||||||
|
//
|
||||||
|
// We recommend against changing this value.
|
||||||
|
//
|
||||||
|
// Default: 64K
|
||||||
|
size_t large_value_threshold;
|
||||||
|
|
||||||
// Control over blocks (user data is stored in a set of blocks, and
|
// Control over blocks (user data is stored in a set of blocks, and
|
||||||
// a block is the unit of reading from disk).
|
// a block is the unit of reading from disk).
|
||||||
|
|
||||||
|
@ -100,7 +110,7 @@ struct Options {
|
||||||
// compression is enabled. This parameter can be changed dynamically.
|
// compression is enabled. This parameter can be changed dynamically.
|
||||||
//
|
//
|
||||||
// Default: 4K
|
// Default: 4K
|
||||||
size_t block_size;
|
int block_size;
|
||||||
|
|
||||||
// Number of keys between restart points for delta encoding of keys.
|
// Number of keys between restart points for delta encoding of keys.
|
||||||
// This parameter can be changed dynamically. Most clients should
|
// This parameter can be changed dynamically. Most clients should
|
|
@ -96,6 +96,8 @@
|
||||||
'port/port_example.h',
|
'port/port_example.h',
|
||||||
'port/port_posix.cc',
|
'port/port_posix.cc',
|
||||||
'port/port_posix.h',
|
'port/port_posix.h',
|
||||||
|
'port/sha1_portable.cc',
|
||||||
|
'port/sha1_portable.h',
|
||||||
'table/block.cc',
|
'table/block.cc',
|
||||||
'table/block.h',
|
'table/block.h',
|
||||||
'table/block_builder.cc',
|
'table/block_builder.cc',
|
||||||
|
@ -265,6 +267,16 @@
|
||||||
'db/log_test.cc',
|
'db/log_test.cc',
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
'target_name': 'leveldb_sha1_test',
|
||||||
|
'type': 'executable',
|
||||||
|
'dependencies': [
|
||||||
|
'leveldb_testutil',
|
||||||
|
],
|
||||||
|
'sources': [
|
||||||
|
'port/sha1_test.cc',
|
||||||
|
],
|
||||||
|
},
|
||||||
{
|
{
|
||||||
'target_name': 'leveldb_skiplist_test',
|
'target_name': 'leveldb_skiplist_test',
|
||||||
'type': 'executable',
|
'type': 'executable',
|
|
@ -1,122 +0,0 @@
|
||||||
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
|
||||||
// Use of this source code is governed by a BSD-style license that can be
|
|
||||||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
|
||||||
|
|
||||||
#include "db/filename.h"
|
|
||||||
|
|
||||||
#include "db/dbformat.h"
|
|
||||||
#include "port/port.h"
|
|
||||||
#include "util/logging.h"
|
|
||||||
#include "util/testharness.h"
|
|
||||||
|
|
||||||
namespace leveldb {
|
|
||||||
|
|
||||||
class FileNameTest { };
|
|
||||||
|
|
||||||
TEST(FileNameTest, Parse) {
|
|
||||||
Slice db;
|
|
||||||
FileType type;
|
|
||||||
uint64_t number;
|
|
||||||
|
|
||||||
// Successful parses
|
|
||||||
static struct {
|
|
||||||
const char* fname;
|
|
||||||
uint64_t number;
|
|
||||||
FileType type;
|
|
||||||
} cases[] = {
|
|
||||||
{ "100.log", 100, kLogFile },
|
|
||||||
{ "0.log", 0, kLogFile },
|
|
||||||
{ "0.sst", 0, kTableFile },
|
|
||||||
{ "CURRENT", 0, kCurrentFile },
|
|
||||||
{ "LOCK", 0, kDBLockFile },
|
|
||||||
{ "MANIFEST-2", 2, kDescriptorFile },
|
|
||||||
{ "MANIFEST-7", 7, kDescriptorFile },
|
|
||||||
{ "LOG", 0, kInfoLogFile },
|
|
||||||
{ "LOG.old", 0, kInfoLogFile },
|
|
||||||
{ "18446744073709551615.log", 18446744073709551615ull, kLogFile },
|
|
||||||
};
|
|
||||||
for (int i = 0; i < sizeof(cases) / sizeof(cases[0]); i++) {
|
|
||||||
std::string f = cases[i].fname;
|
|
||||||
ASSERT_TRUE(ParseFileName(f, &number, &type)) << f;
|
|
||||||
ASSERT_EQ(cases[i].type, type) << f;
|
|
||||||
ASSERT_EQ(cases[i].number, number) << f;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Errors
|
|
||||||
static const char* errors[] = {
|
|
||||||
"",
|
|
||||||
"foo",
|
|
||||||
"foo-dx-100.log",
|
|
||||||
".log",
|
|
||||||
"",
|
|
||||||
"manifest",
|
|
||||||
"CURREN",
|
|
||||||
"CURRENTX",
|
|
||||||
"MANIFES",
|
|
||||||
"MANIFEST",
|
|
||||||
"MANIFEST-",
|
|
||||||
"XMANIFEST-3",
|
|
||||||
"MANIFEST-3x",
|
|
||||||
"LOC",
|
|
||||||
"LOCKx",
|
|
||||||
"LO",
|
|
||||||
"LOGx",
|
|
||||||
"18446744073709551616.log",
|
|
||||||
"184467440737095516150.log",
|
|
||||||
"100",
|
|
||||||
"100.",
|
|
||||||
"100.lop"
|
|
||||||
};
|
|
||||||
for (int i = 0; i < sizeof(errors) / sizeof(errors[0]); i++) {
|
|
||||||
std::string f = errors[i];
|
|
||||||
ASSERT_TRUE(!ParseFileName(f, &number, &type)) << f;
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
TEST(FileNameTest, Construction) {
|
|
||||||
uint64_t number;
|
|
||||||
FileType type;
|
|
||||||
std::string fname;
|
|
||||||
|
|
||||||
fname = CurrentFileName("foo");
|
|
||||||
ASSERT_EQ("foo/", std::string(fname.data(), 4));
|
|
||||||
ASSERT_TRUE(ParseFileName(fname.c_str() + 4, &number, &type));
|
|
||||||
ASSERT_EQ(0, number);
|
|
||||||
ASSERT_EQ(kCurrentFile, type);
|
|
||||||
|
|
||||||
fname = LockFileName("foo");
|
|
||||||
ASSERT_EQ("foo/", std::string(fname.data(), 4));
|
|
||||||
ASSERT_TRUE(ParseFileName(fname.c_str() + 4, &number, &type));
|
|
||||||
ASSERT_EQ(0, number);
|
|
||||||
ASSERT_EQ(kDBLockFile, type);
|
|
||||||
|
|
||||||
fname = LogFileName("foo", 192);
|
|
||||||
ASSERT_EQ("foo/", std::string(fname.data(), 4));
|
|
||||||
ASSERT_TRUE(ParseFileName(fname.c_str() + 4, &number, &type));
|
|
||||||
ASSERT_EQ(192, number);
|
|
||||||
ASSERT_EQ(kLogFile, type);
|
|
||||||
|
|
||||||
fname = TableFileName("bar", 200);
|
|
||||||
ASSERT_EQ("bar/", std::string(fname.data(), 4));
|
|
||||||
ASSERT_TRUE(ParseFileName(fname.c_str() + 4, &number, &type));
|
|
||||||
ASSERT_EQ(200, number);
|
|
||||||
ASSERT_EQ(kTableFile, type);
|
|
||||||
|
|
||||||
fname = DescriptorFileName("bar", 100);
|
|
||||||
ASSERT_EQ("bar/", std::string(fname.data(), 4));
|
|
||||||
ASSERT_TRUE(ParseFileName(fname.c_str() + 4, &number, &type));
|
|
||||||
ASSERT_EQ(100, number);
|
|
||||||
ASSERT_EQ(kDescriptorFile, type);
|
|
||||||
|
|
||||||
fname = TempFileName("tmp", 999);
|
|
||||||
ASSERT_EQ("tmp/", std::string(fname.data(), 4));
|
|
||||||
ASSERT_TRUE(ParseFileName(fname.c_str() + 4, &number, &type));
|
|
||||||
ASSERT_EQ(999, number);
|
|
||||||
ASSERT_EQ(kTempFile, type);
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
int main(int argc, char** argv) {
|
|
||||||
return leveldb::test::RunAllTests();
|
|
||||||
}
|
|
|
@ -10,6 +10,7 @@
|
||||||
#include <endian.h>
|
#include <endian.h>
|
||||||
#include <pthread.h>
|
#include <pthread.h>
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
|
#include <sha1.h>
|
||||||
#include <cstdatomic>
|
#include <cstdatomic>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <cctype>
|
#include <cctype>
|
||||||
|
@ -133,6 +134,13 @@ inline bool Snappy_Uncompress(
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline void SHA1_Hash(const char* data, size_t len, char* hash_array) {
|
||||||
|
SHA1_CTX sha1_ctx;
|
||||||
|
SHA1Init(&sha1_ctx);
|
||||||
|
SHA1Update(&sha1_ctx, (const u_char*)data, len);
|
||||||
|
SHA1Final((u_char*)hash_array, &sha1_ctx);
|
||||||
|
}
|
||||||
|
|
||||||
inline uint64_t ThreadIdentifier() {
|
inline uint64_t ThreadIdentifier() {
|
||||||
pthread_t tid = pthread_self();
|
pthread_t tid = pthread_self();
|
||||||
uint64_t r = 0;
|
uint64_t r = 0;
|
|
@ -13,6 +13,7 @@
|
||||||
#include "base/atomicops.h"
|
#include "base/atomicops.h"
|
||||||
#include "base/basictypes.h"
|
#include "base/basictypes.h"
|
||||||
#include "base/logging.h"
|
#include "base/logging.h"
|
||||||
|
#include "base/sha1.h"
|
||||||
#include "base/synchronization/condition_variable.h"
|
#include "base/synchronization/condition_variable.h"
|
||||||
#include "base/synchronization/lock.h"
|
#include "base/synchronization/lock.h"
|
||||||
|
|
||||||
|
@ -82,6 +83,12 @@ class AtomicPointer {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
inline void SHA1_Hash(const char* data, size_t len, char* hash_array) {
|
||||||
|
return ::base::SHA1HashBytes(reinterpret_cast<const unsigned char*>(data),
|
||||||
|
len,
|
||||||
|
reinterpret_cast<unsigned char*>(hash_array));
|
||||||
|
}
|
||||||
|
|
||||||
bool Snappy_Compress(const char* input, size_t input_length,
|
bool Snappy_Compress(const char* input, size_t input_length,
|
||||||
std::string* output);
|
std::string* output);
|
||||||
bool Snappy_Uncompress(const char* input_data, size_t input_length,
|
bool Snappy_Uncompress(const char* input_data, size_t input_length,
|
|
@ -89,6 +89,11 @@ class AtomicPointer {
|
||||||
void NoBarrier_Store(void* v);
|
void NoBarrier_Store(void* v);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// ------------------ Checksumming -------------------
|
||||||
|
|
||||||
|
// Store a 160-bit hash of "data[0..len-1]" in "hash_array[0]..hash_array[19]"
|
||||||
|
extern void SHA1_Hash(const char* data, size_t len, char* hash_array);
|
||||||
|
|
||||||
// ------------------ Compression -------------------
|
// ------------------ Compression -------------------
|
||||||
|
|
||||||
// Store the snappy compression of "input[0,input_length-1]" in *output.
|
// Store the snappy compression of "input[0,input_length-1]" in *output.
|
|
@ -13,6 +13,7 @@
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <cstdatomic>
|
#include <cstdatomic>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
|
#include "port/sha1_portable.h"
|
||||||
|
|
||||||
namespace leveldb {
|
namespace leveldb {
|
||||||
namespace port {
|
namespace port {
|
||||||
|
@ -72,6 +73,10 @@ class AtomicPointer {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
inline void SHA1_Hash(const char* data, size_t len, char* hash_array) {
|
||||||
|
SHA1_Hash_Portable(data, len, hash_array);
|
||||||
|
}
|
||||||
|
|
||||||
// TODO(gabor): Implement actual compress
|
// TODO(gabor): Implement actual compress
|
||||||
inline bool Snappy_Compress(const char* input, size_t input_length,
|
inline bool Snappy_Compress(const char* input, size_t input_length,
|
||||||
std::string* output) {
|
std::string* output) {
|
|
@ -0,0 +1,298 @@
|
||||||
|
// Portions copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style license that can be
|
||||||
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||||
|
//
|
||||||
|
// This module provides a slow but portable implementation of
|
||||||
|
// the SHA1 hash function.
|
||||||
|
//
|
||||||
|
// It is adapted from free code written by Paul E. Jones
|
||||||
|
// <paulej@packetizer.com>. See http://www.packetizer.com/security/sha1/
|
||||||
|
//
|
||||||
|
// The license for the original code is:
|
||||||
|
/*
|
||||||
|
Copyright (C) 1998, 2009
|
||||||
|
Paul E. Jones <paulej@packetizer.com>
|
||||||
|
|
||||||
|
Freeware Public License (FPL)
|
||||||
|
|
||||||
|
This software is licensed as "freeware." Permission to distribute
|
||||||
|
this software in source and binary forms, including incorporation
|
||||||
|
into other products, is hereby granted without a fee. THIS SOFTWARE
|
||||||
|
IS PROVIDED 'AS IS' AND WITHOUT ANY EXPRESSED OR IMPLIED WARRANTIES,
|
||||||
|
INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
|
||||||
|
AND FITNESS FOR A PARTICULAR PURPOSE. THE AUTHOR SHALL NOT BE HELD
|
||||||
|
LIABLE FOR ANY DAMAGES RESULTING FROM THE USE OF THIS SOFTWARE, EITHER
|
||||||
|
DIRECTLY OR INDIRECTLY, INCLUDING, BUT NOT LIMITED TO, LOSS OF DATA
|
||||||
|
OR DATA BEING RENDERED INACCURATE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "port/sha1_portable.h"
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
namespace leveldb {
|
||||||
|
namespace port {
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Description:
|
||||||
|
* This class implements the Secure Hashing Standard as defined
|
||||||
|
* in FIPS PUB 180-1 published April 17, 1995.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This structure will hold context information for the hashing
|
||||||
|
* operation
|
||||||
|
*/
|
||||||
|
typedef struct SHA1Context {
|
||||||
|
unsigned Message_Digest[5]; /* Message Digest (output) */
|
||||||
|
|
||||||
|
unsigned Length_Low; /* Message length in bits */
|
||||||
|
unsigned Length_High; /* Message length in bits */
|
||||||
|
|
||||||
|
unsigned char Message_Block[64]; /* 512-bit message blocks */
|
||||||
|
int Message_Block_Index; /* Index into message block array */
|
||||||
|
|
||||||
|
bool Computed; /* Is the digest computed? */
|
||||||
|
bool Corrupted; /* Is the message digest corruped? */
|
||||||
|
} SHA1Context;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Portability Issues:
|
||||||
|
* SHA-1 is defined in terms of 32-bit "words". This code was
|
||||||
|
* written with the expectation that the processor has at least
|
||||||
|
* a 32-bit machine word size. If the machine word size is larger,
|
||||||
|
* the code should still function properly. One caveat to that
|
||||||
|
* is that the input functions taking characters and character
|
||||||
|
* arrays assume that only 8 bits of information are stored in each
|
||||||
|
* character.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Define the circular shift macro
|
||||||
|
*/
|
||||||
|
#define SHA1CircularShift(bits,word) \
|
||||||
|
((((word) << (bits)) & 0xFFFFFFFF) | \
|
||||||
|
((word) >> (32-(bits))))
|
||||||
|
|
||||||
|
/* Function prototypes */
|
||||||
|
static void SHA1ProcessMessageBlock(SHA1Context *);
|
||||||
|
static void SHA1PadMessage(SHA1Context *);
|
||||||
|
|
||||||
|
// Initialize the SHA1Context in preparation for computing a new
|
||||||
|
// message digest.
|
||||||
|
static void SHA1Reset(SHA1Context* context) {
|
||||||
|
context->Length_Low = 0;
|
||||||
|
context->Length_High = 0;
|
||||||
|
context->Message_Block_Index = 0;
|
||||||
|
|
||||||
|
context->Message_Digest[0] = 0x67452301;
|
||||||
|
context->Message_Digest[1] = 0xEFCDAB89;
|
||||||
|
context->Message_Digest[2] = 0x98BADCFE;
|
||||||
|
context->Message_Digest[3] = 0x10325476;
|
||||||
|
context->Message_Digest[4] = 0xC3D2E1F0;
|
||||||
|
|
||||||
|
context->Computed = false;
|
||||||
|
context->Corrupted = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// This function will return the 160-bit message digest into the
|
||||||
|
// Message_Digest array within the SHA1Context provided
|
||||||
|
static bool SHA1Result(SHA1Context *context) {
|
||||||
|
if (context->Corrupted) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!context->Computed) {
|
||||||
|
SHA1PadMessage(context);
|
||||||
|
context->Computed = true;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// This function accepts an array of bytes as the next portion of
|
||||||
|
// the message.
|
||||||
|
static void SHA1Input(SHA1Context *context,
|
||||||
|
const unsigned char *message_array,
|
||||||
|
unsigned length) {
|
||||||
|
if (!length) return;
|
||||||
|
|
||||||
|
if (context->Computed || context->Corrupted) {
|
||||||
|
context->Corrupted = true;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
while(length-- && !context->Corrupted) {
|
||||||
|
context->Message_Block[context->Message_Block_Index++] =
|
||||||
|
(*message_array & 0xFF);
|
||||||
|
|
||||||
|
context->Length_Low += 8;
|
||||||
|
/* Force it to 32 bits */
|
||||||
|
context->Length_Low &= 0xFFFFFFFF;
|
||||||
|
if (context->Length_Low == 0) {
|
||||||
|
context->Length_High++;
|
||||||
|
/* Force it to 32 bits */
|
||||||
|
context->Length_High &= 0xFFFFFFFF;
|
||||||
|
if (context->Length_High == 0)
|
||||||
|
{
|
||||||
|
/* Message is too long */
|
||||||
|
context->Corrupted = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (context->Message_Block_Index == 64)
|
||||||
|
{
|
||||||
|
SHA1ProcessMessageBlock(context);
|
||||||
|
}
|
||||||
|
|
||||||
|
message_array++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// This function will process the next 512 bits of the message stored
|
||||||
|
// in the Message_Block array.
|
||||||
|
static void SHA1ProcessMessageBlock(SHA1Context *context) {
|
||||||
|
const unsigned K[] = // Constants defined in SHA-1
|
||||||
|
{
|
||||||
|
0x5A827999,
|
||||||
|
0x6ED9EBA1,
|
||||||
|
0x8F1BBCDC,
|
||||||
|
0xCA62C1D6
|
||||||
|
};
|
||||||
|
int t; // Loop counter
|
||||||
|
unsigned temp; // Temporary word value
|
||||||
|
unsigned W[80]; // Word sequence
|
||||||
|
unsigned A, B, C, D, E; // Word buffers
|
||||||
|
|
||||||
|
// Initialize the first 16 words in the array W
|
||||||
|
for(t = 0; t < 16; t++) {
|
||||||
|
W[t] = ((unsigned) context->Message_Block[t * 4]) << 24;
|
||||||
|
W[t] |= ((unsigned) context->Message_Block[t * 4 + 1]) << 16;
|
||||||
|
W[t] |= ((unsigned) context->Message_Block[t * 4 + 2]) << 8;
|
||||||
|
W[t] |= ((unsigned) context->Message_Block[t * 4 + 3]);
|
||||||
|
}
|
||||||
|
|
||||||
|
for(t = 16; t < 80; t++) {
|
||||||
|
W[t] = SHA1CircularShift(1,W[t-3] ^ W[t-8] ^ W[t-14] ^ W[t-16]);
|
||||||
|
}
|
||||||
|
|
||||||
|
A = context->Message_Digest[0];
|
||||||
|
B = context->Message_Digest[1];
|
||||||
|
C = context->Message_Digest[2];
|
||||||
|
D = context->Message_Digest[3];
|
||||||
|
E = context->Message_Digest[4];
|
||||||
|
|
||||||
|
for(t = 0; t < 20; t++) {
|
||||||
|
temp = SHA1CircularShift(5,A) +
|
||||||
|
((B & C) | ((~B) & D)) + E + W[t] + K[0];
|
||||||
|
temp &= 0xFFFFFFFF;
|
||||||
|
E = D;
|
||||||
|
D = C;
|
||||||
|
C = SHA1CircularShift(30,B);
|
||||||
|
B = A;
|
||||||
|
A = temp;
|
||||||
|
}
|
||||||
|
|
||||||
|
for(t = 20; t < 40; t++) {
|
||||||
|
temp = SHA1CircularShift(5,A) + (B ^ C ^ D) + E + W[t] + K[1];
|
||||||
|
temp &= 0xFFFFFFFF;
|
||||||
|
E = D;
|
||||||
|
D = C;
|
||||||
|
C = SHA1CircularShift(30,B);
|
||||||
|
B = A;
|
||||||
|
A = temp;
|
||||||
|
}
|
||||||
|
|
||||||
|
for(t = 40; t < 60; t++) {
|
||||||
|
temp = SHA1CircularShift(5,A) +
|
||||||
|
((B & C) | (B & D) | (C & D)) + E + W[t] + K[2];
|
||||||
|
temp &= 0xFFFFFFFF;
|
||||||
|
E = D;
|
||||||
|
D = C;
|
||||||
|
C = SHA1CircularShift(30,B);
|
||||||
|
B = A;
|
||||||
|
A = temp;
|
||||||
|
}
|
||||||
|
|
||||||
|
for(t = 60; t < 80; t++) {
|
||||||
|
temp = SHA1CircularShift(5,A) + (B ^ C ^ D) + E + W[t] + K[3];
|
||||||
|
temp &= 0xFFFFFFFF;
|
||||||
|
E = D;
|
||||||
|
D = C;
|
||||||
|
C = SHA1CircularShift(30,B);
|
||||||
|
B = A;
|
||||||
|
A = temp;
|
||||||
|
}
|
||||||
|
|
||||||
|
context->Message_Digest[0] = (context->Message_Digest[0] + A) & 0xFFFFFFFF;
|
||||||
|
context->Message_Digest[1] = (context->Message_Digest[1] + B) & 0xFFFFFFFF;
|
||||||
|
context->Message_Digest[2] = (context->Message_Digest[2] + C) & 0xFFFFFFFF;
|
||||||
|
context->Message_Digest[3] = (context->Message_Digest[3] + D) & 0xFFFFFFFF;
|
||||||
|
context->Message_Digest[4] = (context->Message_Digest[4] + E) & 0xFFFFFFFF;
|
||||||
|
|
||||||
|
context->Message_Block_Index = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// According to the standard, the message must be padded to an even
|
||||||
|
// 512 bits. The first padding bit must be a '1'. The last 64 bits
|
||||||
|
// represent the length of the original message. All bits in between
|
||||||
|
// should be 0. This function will pad the message according to those
|
||||||
|
// rules by filling the Message_Block array accordingly. It will also
|
||||||
|
// call SHA1ProcessMessageBlock() appropriately. When it returns, it
|
||||||
|
// can be assumed that the message digest has been computed.
|
||||||
|
static void SHA1PadMessage(SHA1Context *context) {
|
||||||
|
// Check to see if the current message block is too small to hold
|
||||||
|
// the initial padding bits and length. If so, we will pad the
|
||||||
|
// block, process it, and then continue padding into a second block.
|
||||||
|
if (context->Message_Block_Index > 55) {
|
||||||
|
context->Message_Block[context->Message_Block_Index++] = 0x80;
|
||||||
|
while(context->Message_Block_Index < 64) {
|
||||||
|
context->Message_Block[context->Message_Block_Index++] = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
SHA1ProcessMessageBlock(context);
|
||||||
|
|
||||||
|
while(context->Message_Block_Index < 56) {
|
||||||
|
context->Message_Block[context->Message_Block_Index++] = 0;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
context->Message_Block[context->Message_Block_Index++] = 0x80;
|
||||||
|
while(context->Message_Block_Index < 56) {
|
||||||
|
context->Message_Block[context->Message_Block_Index++] = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Store the message length as the last 8 octets
|
||||||
|
context->Message_Block[56] = (context->Length_High >> 24) & 0xFF;
|
||||||
|
context->Message_Block[57] = (context->Length_High >> 16) & 0xFF;
|
||||||
|
context->Message_Block[58] = (context->Length_High >> 8) & 0xFF;
|
||||||
|
context->Message_Block[59] = (context->Length_High) & 0xFF;
|
||||||
|
context->Message_Block[60] = (context->Length_Low >> 24) & 0xFF;
|
||||||
|
context->Message_Block[61] = (context->Length_Low >> 16) & 0xFF;
|
||||||
|
context->Message_Block[62] = (context->Length_Low >> 8) & 0xFF;
|
||||||
|
context->Message_Block[63] = (context->Length_Low) & 0xFF;
|
||||||
|
|
||||||
|
SHA1ProcessMessageBlock(context);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void SHA1_Hash_Portable(const char* data, size_t len, char* hash_array) {
|
||||||
|
SHA1Context context;
|
||||||
|
SHA1Reset(&context);
|
||||||
|
SHA1Input(&context, reinterpret_cast<const unsigned char*>(data), len);
|
||||||
|
bool ok = SHA1Result(&context);
|
||||||
|
if (!ok) {
|
||||||
|
fprintf(stderr, "Unexpected error in SHA1_Hash_Portable code\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
for (int i = 0; i < 5; i++) {
|
||||||
|
uint32_t value = context.Message_Digest[i];
|
||||||
|
hash_array[i*4 + 0] = (value >> 24) & 0xff;
|
||||||
|
hash_array[i*4 + 1] = (value >> 16) & 0xff;
|
||||||
|
hash_array[i*4 + 2] = (value >> 8) & 0xff;
|
||||||
|
hash_array[i*4 + 3] = value & 0xff;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,25 @@
|
||||||
|
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style license that can be
|
||||||
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||||
|
|
||||||
|
#ifndef STORAGE_LEVELDB_PORT_SHA1_PORTABLE_H_
|
||||||
|
#define STORAGE_LEVELDB_PORT_SHA1_PORTABLE_H_
|
||||||
|
|
||||||
|
#include <stddef.h>
|
||||||
|
|
||||||
|
namespace leveldb {
|
||||||
|
namespace port {
|
||||||
|
|
||||||
|
// Compute the SHA1 hash value of "data[0..len-1]" and store it in
|
||||||
|
// "hash_array[0..19]". hash_array must have 20 bytes of space available.
|
||||||
|
//
|
||||||
|
// This function is portable but may not be as fast as a version
|
||||||
|
// optimized for your platform. It is provided as a default method
|
||||||
|
// that can be used when porting leveldb to a new platform if no
|
||||||
|
// better SHA1 hash implementation is available.
|
||||||
|
void SHA1_Hash_Portable(const char* data, size_t len, char* hash_array);
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // STORAGE_LEVELDB_PORT_SHA1_PORTABLE_H_
|
|
@ -0,0 +1,39 @@
|
||||||
|
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style license that can be
|
||||||
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||||
|
|
||||||
|
#include "port/port.h"
|
||||||
|
#include "util/testharness.h"
|
||||||
|
|
||||||
|
namespace leveldb {
|
||||||
|
namespace port {
|
||||||
|
|
||||||
|
class SHA1 { };
|
||||||
|
|
||||||
|
static std::string TestSHA1(const char* data, size_t len) {
|
||||||
|
char hash_val[20];
|
||||||
|
SHA1_Hash(data, len, hash_val);
|
||||||
|
char buf[41];
|
||||||
|
for (int i = 0; i < 20; i++) {
|
||||||
|
snprintf(buf + i * 2, 41 - i * 2,
|
||||||
|
"%02x",
|
||||||
|
static_cast<unsigned int>(static_cast<unsigned char>(
|
||||||
|
hash_val[i])));
|
||||||
|
}
|
||||||
|
return std::string(buf, 40);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(SHA1, Simple) {
|
||||||
|
ASSERT_EQ("da39a3ee5e6b4b0d3255bfef95601890afd80709", TestSHA1("", 0));
|
||||||
|
ASSERT_EQ("aaf4c61ddcc5e8a2dabede0f3b482cd9aea9434d", TestSHA1("hello", 5));
|
||||||
|
std::string x(10000, 'x');
|
||||||
|
ASSERT_EQ("f8c5cde791c5056cf515881e701c8a9ecb439a75",
|
||||||
|
TestSHA1(x.data(), x.size()));
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char** argv) {
|
||||||
|
return leveldb::test::RunAllTests();
|
||||||
|
}
|
|
@ -62,9 +62,7 @@ static inline const char* DecodeEntry(const char* p, const char* limit,
|
||||||
if ((p = GetVarint32Ptr(p, limit, value_length)) == NULL) return NULL;
|
if ((p = GetVarint32Ptr(p, limit, value_length)) == NULL) return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (static_cast<uint32>(limit - p) < (*non_shared + *value_length)) {
|
if (limit - p < (*non_shared + *value_length)) return NULL;
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
return p;
|
return p;
|
||||||
}
|
}
|
||||||
|
|
|
@ -62,7 +62,7 @@ size_t BlockBuilder::CurrentSizeEstimate() const {
|
||||||
|
|
||||||
Slice BlockBuilder::Finish() {
|
Slice BlockBuilder::Finish() {
|
||||||
// Append restart array
|
// Append restart array
|
||||||
for (size_t i = 0; i < restarts_.size(); i++) {
|
for (int i = 0; i < restarts_.size(); i++) {
|
||||||
PutFixed32(&buffer_, restarts_[i]);
|
PutFixed32(&buffer_, restarts_[i]);
|
||||||
}
|
}
|
||||||
PutFixed32(&buffer_, restarts_.size());
|
PutFixed32(&buffer_, restarts_.size());
|
|
@ -36,7 +36,7 @@ void Footer::EncodeTo(std::string* dst) const {
|
||||||
metaindex_handle_.EncodeTo(dst);
|
metaindex_handle_.EncodeTo(dst);
|
||||||
index_handle_.EncodeTo(dst);
|
index_handle_.EncodeTo(dst);
|
||||||
dst->resize(2 * BlockHandle::kMaxEncodedLength); // Padding
|
dst->resize(2 * BlockHandle::kMaxEncodedLength); // Padding
|
||||||
PutFixed32(dst, static_cast<uint32_t>(kTableMagicNumber & 0xffffffffu));
|
PutFixed32(dst, static_cast<uint32_t>(kTableMagicNumber));
|
||||||
PutFixed32(dst, static_cast<uint32_t>(kTableMagicNumber >> 32));
|
PutFixed32(dst, static_cast<uint32_t>(kTableMagicNumber >> 32));
|
||||||
assert(dst->size() == original_size + kEncodedLength);
|
assert(dst->size() == original_size + kEncodedLength);
|
||||||
}
|
}
|
||||||
|
@ -71,7 +71,7 @@ Status ReadBlock(RandomAccessFile* file,
|
||||||
|
|
||||||
// Read the block contents as well as the type/crc footer.
|
// Read the block contents as well as the type/crc footer.
|
||||||
// See table_builder.cc for the code that built this structure.
|
// See table_builder.cc for the code that built this structure.
|
||||||
size_t n = static_cast<size_t>(handle.size());
|
size_t n = handle.size();
|
||||||
char* buf = new char[n + kBlockTrailerSize];
|
char* buf = new char[n + kBlockTrailerSize];
|
||||||
Slice contents;
|
Slice contents;
|
||||||
Status s = file->Read(handle.offset(), n + kBlockTrailerSize, &contents, buf);
|
Status s = file->Read(handle.offset(), n + kBlockTrailerSize, &contents, buf);
|
|
@ -16,7 +16,7 @@ Arena::Arena() {
|
||||||
}
|
}
|
||||||
|
|
||||||
Arena::~Arena() {
|
Arena::~Arena() {
|
||||||
for (size_t i = 0; i < blocks_.size(); i++) {
|
for (int i = 0; i < blocks_.size(); i++) {
|
||||||
delete[] blocks_[i];
|
delete[] blocks_[i];
|
||||||
}
|
}
|
||||||
}
|
}
|
|
@ -85,7 +85,7 @@ char* EncodeVarint64(char* dst, uint64_t v) {
|
||||||
*(ptr++) = (v & (B-1)) | B;
|
*(ptr++) = (v & (B-1)) | B;
|
||||||
v >>= 7;
|
v >>= 7;
|
||||||
}
|
}
|
||||||
*(ptr++) = static_cast<unsigned char>(v);
|
*(ptr++) = v;
|
||||||
return reinterpret_cast<char*>(ptr);
|
return reinterpret_cast<char*>(ptr);
|
||||||
}
|
}
|
||||||
|
|
|
@ -51,7 +51,7 @@ class BytewiseComparatorImpl : public Comparator {
|
||||||
virtual void FindShortSuccessor(std::string* key) const {
|
virtual void FindShortSuccessor(std::string* key) const {
|
||||||
// Find first character that can be incremented
|
// Find first character that can be incremented
|
||||||
size_t n = key->size();
|
size_t n = key->size();
|
||||||
for (size_t i = 0; i < n; i++) {
|
for (int i = 0; i < n; i++) {
|
||||||
const uint8_t byte = (*key)[i];
|
const uint8_t byte = (*key)[i];
|
||||||
if (byte != static_cast<uint8_t>(0xff)) {
|
if (byte != static_cast<uint8_t>(0xff)) {
|
||||||
(*key)[i] = byte + 1;
|
(*key)[i] = byte + 1;
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue