Simple blob file dumper

Summary:
A simple blob file dumper.
Closes https://github.com/facebook/rocksdb/pull/2242

Differential Revision: D5097553

Pulled By: yiwu-arbug

fbshipit-source-id: c6e00d949fcd3658f9f68da9352f06339fac418d
This commit is contained in:
Yi Wu 2017-05-23 10:30:04 -07:00 committed by Facebook Github Bot
parent ac39d6bec5
commit 578fb0b1dc
11 changed files with 471 additions and 29 deletions

1
.gitignore vendored
View File

@ -31,6 +31,7 @@ build/
ldb
manifest_dump
sst_dump
blob_dump
column_aware_encoding_exp
util/build_version.cc
build_tools/VALGRIND_LOGS/

View File

@ -485,7 +485,8 @@ TOOLS = \
ldb \
db_repl_stress \
rocksdb_dump \
rocksdb_undump
rocksdb_undump \
blob_dump \
TEST_LIBS = \
librocksdb_env_basic_test.a
@ -1343,6 +1344,9 @@ transaction_test: utilities/transactions/transaction_test.o $(LIBOBJECTS) $(TEST
sst_dump: tools/sst_dump.o $(LIBOBJECTS)
$(AM_LINK)
blob_dump: tools/blob_dump.o $(LIBOBJECTS)
$(AM_LINK)
column_aware_encoding_exp: utilities/column_aware_encoding_exp.o $(EXPOBJECTS)
$(AM_LINK)

1
src.mk
View File

@ -202,6 +202,7 @@ TOOL_LIB_SOURCES = \
tools/ldb_cmd.cc \
tools/ldb_tool.cc \
tools/sst_dump_tool.cc \
utilities/blob_db/blob_dump_tool.cc \
MOCK_LIB_SOURCES = \
env/mock_env.cc \

91
tools/blob_dump.cc Normal file
View File

@ -0,0 +1,91 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
// This source code is also licensed under the GPLv2 license found in the
// COPYING file in the root directory of this source tree.
#ifndef ROCKSDB_LITE
#include <getopt.h>
#include <cstdio>
#include <string>
#include <unordered_map>
#include "utilities/blob_db/blob_dump_tool.h"
using namespace rocksdb;
using namespace rocksdb::blob_db;
int main(int argc, char** argv) {
using DisplayType = BlobDumpTool::DisplayType;
const std::unordered_map<std::string, DisplayType> display_types = {
{"none", DisplayType::kNone},
{"raw", DisplayType::kRaw},
{"hex", DisplayType::kHex},
{"detail", DisplayType::kDetail},
};
const struct option options[] = {
{"help", no_argument, nullptr, 'h'},
{"file", required_argument, nullptr, 'f'},
{"show_key", optional_argument, nullptr, 'k'},
{"show_blob", optional_argument, nullptr, 'b'},
};
DisplayType show_key = DisplayType::kRaw;
DisplayType show_blob = DisplayType::kNone;
std::string file;
while (true) {
int c = getopt_long(argc, argv, "hk::b::f:", options, nullptr);
if (c < 0) {
break;
}
std::string arg_str(optarg ? optarg : "");
switch (c) {
case 'h':
fprintf(stdout,
"Usage: blob_dump --file=filename "
"[--show_key[=none|raw|hex|detail]] "
"[--show_blob[=none|raw|hex|detail]]\n");
return 0;
case 'f':
file = optarg;
break;
case 'k':
if (optarg) {
if (display_types.count(arg_str) == 0) {
fprintf(stderr, "Unrecognized key display type.\n");
return -1;
}
show_key = display_types.at(arg_str);
}
break;
case 'b':
if (optarg) {
if (display_types.count(arg_str) == 0) {
fprintf(stderr, "Unrecognized blob display type.\n");
return -1;
}
show_blob = display_types.at(arg_str);
} else {
show_blob = DisplayType::kDetail;
}
break;
default:
fprintf(stderr, "Unrecognized option.\n");
return -1;
}
}
BlobDumpTool tool;
Status s = tool.Run(file, show_key, show_blob);
if (!s.ok()) {
fprintf(stderr, "Failed: %s\n", s.ToString().c_str());
return -1;
}
return 0;
}
#else
#include <stdio.h>
int main(int argc, char** argv) {
fprintf(stderr, "Not supported in lite mode.\n");
return -1;
}
#endif // ROCKSDB_LITE

View File

@ -0,0 +1,255 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
// This source code is also licensed under the GPLv2 license found in the
// COPYING file in the root directory of this source tree.
#ifndef ROCKSDB_LITE
#include "utilities/blob_db/blob_dump_tool.h"
#include <inttypes.h>
#include <stdio.h>
#include <iostream>
#include <memory>
#include <string>
#include "port/port.h"
#include "rocksdb/convenience.h"
#include "rocksdb/env.h"
#include "util/coding.h"
#include "util/crc32c.h"
#include "util/string_util.h"
namespace rocksdb {
namespace blob_db {
BlobDumpTool::BlobDumpTool()
: reader_(nullptr), buffer_(nullptr), buffer_size_(0) {}
Status BlobDumpTool::Run(const std::string& filename, DisplayType show_key,
DisplayType show_blob) {
Status s;
Env* env = Env::Default();
s = env->FileExists(filename);
if (!s.ok()) {
return s;
}
uint64_t file_size = 0;
s = env->GetFileSize(filename, &file_size);
if (!s.ok()) {
return s;
}
std::unique_ptr<RandomAccessFile> file;
s = env->NewRandomAccessFile(filename, &file, EnvOptions());
if (!s.ok()) {
return s;
}
if (file_size == 0) {
return Status::Corruption("File is empty.");
}
reader_.reset(new RandomAccessFileReader(std::move(file)));
uint64_t offset = 0;
uint64_t footer_offset = 0;
s = DumpBlobLogHeader(&offset);
if (!s.ok()) {
return s;
}
s = DumpBlobLogFooter(file_size, &footer_offset);
if (!s.ok()) {
return s;
}
if (show_key != DisplayType::kNone) {
while (offset < footer_offset) {
s = DumpRecord(show_key, show_blob, &offset);
if (!s.ok()) {
return s;
}
}
}
return s;
}
Status BlobDumpTool::Read(uint64_t offset, size_t size, Slice* result) {
if (buffer_size_ < size) {
if (buffer_size_ == 0) {
buffer_size_ = 4096;
}
while (buffer_size_ < size) {
buffer_size_ *= 2;
}
buffer_.reset(new char[buffer_size_]);
}
Status s = reader_->Read(offset, size, result, buffer_.get());
if (!s.ok()) {
return s;
}
if (result->size() != size) {
return Status::Corruption("Reach the end of the file unexpectedly.");
}
return s;
}
Status BlobDumpTool::DumpBlobLogHeader(uint64_t* offset) {
Slice slice;
Status s = Read(0, BlobLogHeader::kHeaderSize, &slice);
if (!s.ok()) {
return s;
}
BlobLogHeader header;
s = header.DecodeFrom(slice);
if (!s.ok()) {
return s;
}
fprintf(stdout, "Blob log header:\n");
fprintf(stdout, " Magic Number : %u\n", header.magic_number());
fprintf(stdout, " Version : %d\n", header.version());
CompressionType compression = header.compression();
std::string compression_str;
if (!GetStringFromCompressionType(&compression_str, compression).ok()) {
compression_str = "Unrecongnized compression type (" +
ToString((int)header.compression()) + ")";
}
fprintf(stdout, " Compression : %s\n", compression_str.c_str());
fprintf(stdout, " TTL Range : %s\n",
GetString(header.ttl_range()).c_str());
fprintf(stdout, " Timestamp Range: %s\n",
GetString(header.ts_range()).c_str());
*offset = BlobLogHeader::kHeaderSize;
return s;
}
Status BlobDumpTool::DumpBlobLogFooter(uint64_t file_size,
uint64_t* footer_offset) {
auto no_footer = [&]() {
*footer_offset = file_size;
fprintf(stdout, "No blob log footer.\n");
return Status::OK();
};
if (file_size < BlobLogHeader::kHeaderSize + BlobLogFooter::kFooterSize) {
return no_footer();
}
Slice slice;
Status s = Read(file_size - 4, 4, &slice);
if (!s.ok()) {
return s;
}
uint32_t magic_number = DecodeFixed32(slice.data());
if (magic_number != kMagicNumber) {
return no_footer();
}
*footer_offset = file_size - BlobLogFooter::kFooterSize;
s = Read(*footer_offset, BlobLogFooter::kFooterSize, &slice);
if (!s.ok()) {
return s;
}
BlobLogFooter footer;
s = footer.DecodeFrom(slice);
if (!s.ok()) {
return s;
}
fprintf(stdout, "Blob log footer:\n");
fprintf(stdout, " Blob count : %" PRIu64 "\n", footer.GetBlobCount());
fprintf(stdout, " TTL Range : %s\n",
GetString(footer.GetTTLRange()).c_str());
fprintf(stdout, " Time Range : %s\n",
GetString(footer.GetTimeRange()).c_str());
fprintf(stdout, " Sequence Range : %s\n",
GetString(footer.GetSNRange()).c_str());
return s;
}
Status BlobDumpTool::DumpRecord(DisplayType show_key, DisplayType show_blob,
uint64_t* offset) {
fprintf(stdout, "Read record with offset 0x%" PRIx64 " (%" PRIu64 "):\n",
*offset, *offset);
Slice slice;
Status s = Read(*offset, BlobLogRecord::kHeaderSize, &slice);
if (!s.ok()) {
return s;
}
BlobLogRecord record;
s = record.DecodeHeaderFrom(slice);
if (!s.ok()) {
return s;
}
uint32_t key_size = record.GetKeySize();
uint64_t blob_size = record.GetBlobSize();
fprintf(stdout, " key size : %d\n", key_size);
fprintf(stdout, " blob size : %" PRIu64 "\n", record.GetBlobSize());
fprintf(stdout, " TTL : %u\n", record.GetTTL());
fprintf(stdout, " time : %" PRIu64 "\n", record.GetTimeVal());
fprintf(stdout, " type : %d, %d\n", record.type(), record.subtype());
fprintf(stdout, " header CRC : %u\n", record.header_checksum());
fprintf(stdout, " CRC : %u\n", record.checksum());
uint32_t header_crc =
crc32c::Extend(0, slice.data(), slice.size() - 2 * sizeof(uint32_t));
*offset += BlobLogRecord::kHeaderSize;
s = Read(*offset, key_size + blob_size + BlobLogRecord::kFooterSize, &slice);
if (!s.ok()) {
return s;
}
header_crc = crc32c::Extend(header_crc, slice.data(), key_size);
header_crc = crc32c::Mask(header_crc);
if (header_crc != record.header_checksum()) {
return Status::Corruption("Record header checksum mismatch.");
}
uint32_t blob_crc = crc32c::Extend(0, slice.data() + key_size, blob_size);
blob_crc = crc32c::Mask(blob_crc);
if (blob_crc != record.checksum()) {
return Status::Corruption("Blob checksum mismatch.");
}
if (show_key != DisplayType::kNone) {
fprintf(stdout, " key : ");
DumpSlice(Slice(slice.data(), key_size), show_key);
if (show_blob != DisplayType::kNone) {
fprintf(stdout, " blob : ");
DumpSlice(Slice(slice.data() + key_size, blob_size), show_blob);
}
}
Slice footer_slice(slice.data() + record.GetKeySize() + record.GetBlobSize(),
BlobLogRecord::kFooterSize);
s = record.DecodeFooterFrom(footer_slice);
if (!s.ok()) {
return s;
}
fprintf(stdout, " footer CRC : %u\n", record.footer_checksum());
fprintf(stdout, " sequence : %" PRIu64 "\n", record.GetSN());
*offset += key_size + blob_size + BlobLogRecord::kFooterSize;
return s;
}
void BlobDumpTool::DumpSlice(const Slice s, DisplayType type) {
if (type == DisplayType::kRaw) {
fprintf(stdout, "%s\n", s.ToString().c_str());
} else if (type == DisplayType::kHex) {
fprintf(stdout, "%s\n", s.ToString(true /*hex*/).c_str());
} else if (type == DisplayType::kDetail) {
char buf[100];
for (size_t i = 0; i < s.size(); i += 16) {
memset(buf, 0, sizeof(buf));
for (size_t j = 0; j < 16 && i + j < s.size(); j++) {
unsigned char c = s[i + j];
snprintf(buf + j * 3 + 15, 2, "%x", c >> 4);
snprintf(buf + j * 3 + 16, 2, "%x", c & 0xf);
snprintf(buf + j + 65, 2, "%c", (0x20 <= c && c <= 0x7e) ? c : '.');
}
for (size_t p = 0; p < sizeof(buf) - 1; p++) {
if (buf[p] == 0) {
buf[p] = ' ';
}
}
fprintf(stdout, "%s\n", i == 0 ? buf + 15 : buf);
}
}
}
template <class T>
std::string BlobDumpTool::GetString(std::pair<T, T> p) {
if (p.first == 0 && p.second == 0) {
return "nil";
}
return "(" + ToString(p.first) + ", " + ToString(p.second) + ")";
}
} // namespace blob_db
} // namespace rocksdb
#endif // ROCKSDB_LITE

View File

@ -0,0 +1,54 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
// This source code is also licensed under the GPLv2 license found in the
// COPYING file in the root directory of this source tree.
#pragma once
#ifndef ROCKSDB_LITE
#include <memory>
#include <string>
#include <utility>
#include "rocksdb/slice.h"
#include "rocksdb/status.h"
#include "util/file_reader_writer.h"
#include "utilities/blob_db/blob_log_format.h"
namespace rocksdb {
namespace blob_db {
class BlobDumpTool {
public:
enum class DisplayType {
kNone,
kRaw,
kHex,
kDetail,
};
BlobDumpTool();
Status Run(const std::string& filename, DisplayType key_type,
DisplayType blob_type);
private:
std::unique_ptr<RandomAccessFileReader> reader_;
std::unique_ptr<char> buffer_;
size_t buffer_size_;
Status Read(uint64_t offset, size_t size, Slice* result);
Status DumpBlobLogHeader(uint64_t* offset);
Status DumpBlobLogFooter(uint64_t file_size, uint64_t* footer_offset);
Status DumpRecord(DisplayType show_key, DisplayType show_blob,
uint64_t* offset);
void DumpSlice(const Slice s, DisplayType type);
template <class T>
std::string GetString(std::pair<T, T> p);
};
} // namespace blob_db
} // namespace rocksdb
#endif // ROCKSDB_LITE

View File

@ -153,7 +153,7 @@ Status BlobFile::ReadFooter(BlobLogFooter* bf) {
return Status::IOError("EOF reached before footer");
}
s = bf->DecodeFrom(&result);
s = bf->DecodeFrom(result);
return s;
}

View File

@ -32,9 +32,10 @@ BlobLogHeader& BlobLogHeader::operator=(BlobLogHeader&& in) noexcept {
BlobLogFooter::BlobLogFooter() : magic_number_(kMagicNumber), blob_count_(0) {}
Status BlobLogFooter::DecodeFrom(Slice* input) {
Status BlobLogFooter::DecodeFrom(const Slice& input) {
Slice slice(input);
uint32_t val;
if (!GetFixed32(input, &val)) {
if (!GetFixed32(&slice, &val)) {
return Status::Corruption("Invalid Blob Footer: flags");
}
@ -55,33 +56,34 @@ Status BlobLogFooter::DecodeFrom(Slice* input) {
return Status::Corruption("Invalid Blob Footer: flags_val");
}
if (!GetFixed64(input, &blob_count_)) {
if (!GetFixed64(&slice, &blob_count_)) {
return Status::Corruption("Invalid Blob Footer: blob_count");
}
ttlrange_t temp_ttl;
if (!GetFixed32(input, &temp_ttl.first) ||
!GetFixed32(input, &temp_ttl.second)) {
if (!GetFixed32(&slice, &temp_ttl.first) ||
!GetFixed32(&slice, &temp_ttl.second)) {
return Status::Corruption("Invalid Blob Footer: ttl_range");
}
if (has_ttl) {
printf("has ttl\n");
ttl_range_.reset(new ttlrange_t(temp_ttl));
}
if (!GetFixed64(input, &sn_range_.first) ||
!GetFixed64(input, &sn_range_.second)) {
if (!GetFixed64(&slice, &sn_range_.first) ||
!GetFixed64(&slice, &sn_range_.second)) {
return Status::Corruption("Invalid Blob Footer: sn_range");
}
tsrange_t temp_ts;
if (!GetFixed64(input, &temp_ts.first) ||
!GetFixed64(input, &temp_ts.second)) {
if (!GetFixed64(&slice, &temp_ts.first) ||
!GetFixed64(&slice, &temp_ts.second)) {
return Status::Corruption("Invalid Blob Footer: ts_range");
}
if (has_ts) ts_range_.reset(new tsrange_t(temp_ts));
if (has_ts) {
ts_range_.reset(new tsrange_t(temp_ts));
}
if (!GetFixed32(input, &magic_number_) || magic_number_ != kMagicNumber) {
if (!GetFixed32(&slice, &magic_number_) || magic_number_ != kMagicNumber) {
return Status::Corruption("Invalid Blob Footer: magic");
}
@ -163,18 +165,19 @@ void BlobLogHeader::EncodeTo(std::string* dst) const {
}
}
Status BlobLogHeader::DecodeFrom(Slice* input) {
if (!GetFixed32(input, &magic_number_) || magic_number_ != kMagicNumber) {
Status BlobLogHeader::DecodeFrom(const Slice& input) {
Slice slice(input);
if (!GetFixed32(&slice, &magic_number_) || magic_number_ != kMagicNumber) {
return Status::Corruption("Invalid Blob Log Header: magic");
}
// as of today, we only support 1 version
if (!GetFixed32(input, &version_) || version_ != kVersion1) {
if (!GetFixed32(&slice, &version_) || version_ != kVersion1) {
return Status::Corruption("Invalid Blob Log Header: version");
}
uint32_t val;
if (!GetFixed32(input, &val)) {
if (!GetFixed32(&slice, &val)) {
return Status::Corruption("Invalid Blob Log Header: subtype");
}
@ -196,15 +199,15 @@ Status BlobLogHeader::DecodeFrom(Slice* input) {
}
ttlrange_t temp_ttl;
if (!GetFixed32(input, &temp_ttl.first) ||
!GetFixed32(input, &temp_ttl.second)) {
if (!GetFixed32(&slice, &temp_ttl.first) ||
!GetFixed32(&slice, &temp_ttl.second)) {
return Status::Corruption("Invalid Blob Log Header: ttl");
}
if (has_ttl) set_ttl_guess(temp_ttl);
tsrange_t temp_ts;
if (!GetFixed64(input, &temp_ts.first) ||
!GetFixed64(input, &temp_ts.second)) {
if (!GetFixed64(&slice, &temp_ts.first) ||
!GetFixed64(&slice, &temp_ts.second)) {
return Status::Corruption("Invalid Blob Log Header: timestamp");
}
if (has_ts) set_ts_guess(temp_ts);

View File

@ -70,16 +70,35 @@ class BlobLogHeader {
void set_ts_guess(const tsrange_t& ts) { ts_guess_.reset(new tsrange_t(ts)); }
public:
// magic number + version + flags + ttl guess + timestamp range
// magic number + version + flags + ttl guess + timestamp range = 36
static const size_t kHeaderSize = 4 + 4 + 4 + 4 * 2 + 8 * 2;
// 32
void EncodeTo(std::string* dst) const;
Status DecodeFrom(Slice* input);
Status DecodeFrom(const Slice& input);
BlobLogHeader();
uint32_t magic_number() const { return magic_number_; }
uint32_t version() const { return version_; }
CompressionType compression() const { return compression_; }
ttlrange_t ttl_range() const {
if (!ttl_guess_) {
return {0, 0};
}
return *ttl_guess_;
}
tsrange_t ts_range() const {
if (!ts_guess_) {
return {0, 0};
}
return *ts_guess_;
}
bool HasTTL() const { return !!ttl_guess_; }
bool HasTimestamp() const { return !!ts_guess_; }
@ -97,11 +116,11 @@ class BlobLogFooter {
// EncodeTo(). Never use this constructor with DecodeFrom().
BlobLogFooter();
uint64_t magic_number() const { return magic_number_; }
uint32_t magic_number() const { return magic_number_; }
void EncodeTo(std::string* dst) const;
Status DecodeFrom(Slice* input);
Status DecodeFrom(const Slice& input);
// convert this object to a human readable form
std::string ToString() const;
@ -214,8 +233,18 @@ class BlobLogRecord {
uint64_t GetTimeVal() const { return time_val_; }
char type() const { return type_; }
char subtype() const { return subtype_; }
SequenceNumber GetSN() const { return sn_; }
uint32_t header_checksum() const { return header_cksum_; }
uint32_t checksum() const { return checksum_; }
uint32_t footer_checksum() const { return footer_cksum_; }
Status DecodeHeaderFrom(const Slice& hdrslice);
Status DecodeFooterFrom(const Slice& footerslice);

View File

@ -36,7 +36,7 @@ Status Reader::ReadHeader(BlobLogHeader* header) {
return Status::IOError("EOF reached before file header");
}
status = header->DecodeFrom(&buffer_);
status = header->DecodeFrom(buffer_);
return status;
}
@ -55,7 +55,9 @@ Status Reader::ReadRecord(BlobLogRecord* record, ReadLevel level,
}
status = record->DecodeHeaderFrom(buffer_);
if (!status.ok()) return status;
if (!status.ok()) {
return status;
}
uint32_t header_crc = 0;
uint32_t blob_crc = 0;

View File

@ -70,6 +70,8 @@ class Reader {
uint64_t GetNextByte() const { return next_byte_; }
const SequentialFileReader* file_reader() const { return file_.get(); }
private:
char* GetReadBuffer() { return &(backing_store_[0]); }