mirror of https://github.com/facebook/rocksdb.git
132 lines
4.5 KiB
C++
132 lines
4.5 KiB
C++
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
|
// This source code is licensed under both the GPLv2 (found in the
|
|
// COPYING file in the root directory) and Apache 2.0 License
|
|
// (found in the LICENSE.Apache file in the root directory).
|
|
//
|
|
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style license that can be
|
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
|
//
|
|
// BlockBuilder generates blocks where keys are prefix-compressed:
|
|
//
|
|
// When we store a key, we drop the prefix shared with the previous
|
|
// string. This helps reduce the space requirement significantly.
|
|
// Furthermore, once every K keys, we do not apply the prefix
|
|
// compression and store the entire key. We call this a "restart
|
|
// point". The tail end of the block stores the offsets of all of the
|
|
// restart points, and can be used to do a binary search when looking
|
|
// for a particular key. Values are stored as-is (without compression)
|
|
// immediately following the corresponding key.
|
|
//
|
|
// An entry for a particular key-value pair has the form:
|
|
// shared_bytes: varint32
|
|
// unshared_bytes: varint32
|
|
// value_length: varint32
|
|
// key_delta: char[unshared_bytes]
|
|
// value: char[value_length]
|
|
// shared_bytes == 0 for restart points.
|
|
//
|
|
// The trailer of the block has the form:
|
|
// restarts: uint32[num_restarts]
|
|
// num_restarts: uint32
|
|
// restarts[i] contains the offset within the block of the ith restart point.
|
|
|
|
#include "table/block_builder.h"
|
|
|
|
#include <algorithm>
|
|
#include <assert.h>
|
|
#include "rocksdb/comparator.h"
|
|
#include "db/dbformat.h"
|
|
#include "util/coding.h"
|
|
|
|
namespace rocksdb {
|
|
|
|
BlockBuilder::BlockBuilder(int block_restart_interval, bool use_delta_encoding)
|
|
: block_restart_interval_(block_restart_interval),
|
|
use_delta_encoding_(use_delta_encoding),
|
|
restarts_(),
|
|
counter_(0),
|
|
finished_(false) {
|
|
assert(block_restart_interval_ >= 1);
|
|
restarts_.push_back(0); // First restart point is at offset 0
|
|
estimate_ = sizeof(uint32_t) + sizeof(uint32_t);
|
|
}
|
|
|
|
void BlockBuilder::Reset() {
|
|
buffer_.clear();
|
|
restarts_.clear();
|
|
restarts_.push_back(0); // First restart point is at offset 0
|
|
estimate_ = sizeof(uint32_t) + sizeof(uint32_t);
|
|
counter_ = 0;
|
|
finished_ = false;
|
|
last_key_.clear();
|
|
}
|
|
|
|
size_t BlockBuilder::EstimateSizeAfterKV(const Slice& key, const Slice& value)
|
|
const {
|
|
size_t estimate = CurrentSizeEstimate();
|
|
estimate += key.size() + value.size();
|
|
if (counter_ >= block_restart_interval_) {
|
|
estimate += sizeof(uint32_t); // a new restart entry.
|
|
}
|
|
|
|
estimate += sizeof(int32_t); // varint for shared prefix length.
|
|
estimate += VarintLength(key.size()); // varint for key length.
|
|
estimate += VarintLength(value.size()); // varint for value length.
|
|
|
|
return estimate;
|
|
}
|
|
|
|
Slice BlockBuilder::Finish() {
|
|
// Append restart array
|
|
for (size_t i = 0; i < restarts_.size(); i++) {
|
|
PutFixed32(&buffer_, restarts_[i]);
|
|
}
|
|
PutFixed32(&buffer_, static_cast<uint32_t>(restarts_.size()));
|
|
finished_ = true;
|
|
return Slice(buffer_);
|
|
}
|
|
|
|
void BlockBuilder::Add(const Slice& key, const Slice& value) {
|
|
assert(!finished_);
|
|
assert(counter_ <= block_restart_interval_);
|
|
size_t shared = 0; // number of bytes shared with prev key
|
|
if (counter_ >= block_restart_interval_) {
|
|
// Restart compression
|
|
restarts_.push_back(static_cast<uint32_t>(buffer_.size()));
|
|
estimate_ += sizeof(uint32_t);
|
|
counter_ = 0;
|
|
|
|
if (use_delta_encoding_) {
|
|
// Update state
|
|
last_key_.assign(key.data(), key.size());
|
|
}
|
|
} else if (use_delta_encoding_) {
|
|
Slice last_key_piece(last_key_);
|
|
// See how much sharing to do with previous string
|
|
shared = key.difference_offset(last_key_piece);
|
|
|
|
// Update state
|
|
// We used to just copy the changed data here, but it appears to be
|
|
// faster to just copy the whole thing.
|
|
last_key_.assign(key.data(), key.size());
|
|
}
|
|
|
|
const size_t non_shared = key.size() - shared;
|
|
const size_t curr_size = buffer_.size();
|
|
|
|
// Add "<shared><non_shared><value_size>" to buffer_
|
|
PutVarint32Varint32Varint32(&buffer_, static_cast<uint32_t>(shared),
|
|
static_cast<uint32_t>(non_shared),
|
|
static_cast<uint32_t>(value.size()));
|
|
|
|
// Add string delta to buffer_ followed by value
|
|
buffer_.append(key.data() + shared, non_shared);
|
|
buffer_.append(value.data(), value.size());
|
|
|
|
counter_++;
|
|
estimate_ += buffer_.size() - curr_size;
|
|
}
|
|
|
|
} // namespace rocksdb
|