Add blob dump support to the dump command (#9881)

Summary:
This patch is the first part of adding blob dump support. It only adds blob dump support to the dump command. A follow up patch will add blob dump support to the dump_live_files command.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/9881

Reviewed By: ltamasi

Differential Revision: D35796731

Pulled By: jowlyzhang

fbshipit-source-id: 2cc5973b222d505a331ac7b969edcf992b47c5ee
This commit is contained in:
yuzhangyu 2022-04-21 20:37:07 -07:00 committed by Facebook GitHub Bot
parent d13825e586
commit fff28a7725
4 changed files with 59 additions and 6 deletions

View File

@ -70,6 +70,7 @@ class LDBCommand {
static const std::string ARG_BLOB_GARBAGE_COLLECTION_FORCE_THRESHOLD; static const std::string ARG_BLOB_GARBAGE_COLLECTION_FORCE_THRESHOLD;
static const std::string ARG_BLOB_COMPACTION_READAHEAD_SIZE; static const std::string ARG_BLOB_COMPACTION_READAHEAD_SIZE;
static const std::string ARG_DECODE_BLOB_INDEX; static const std::string ARG_DECODE_BLOB_INDEX;
static const std::string ARG_DUMP_UNCOMPRESSED_BLOBS;
struct ParsedParams { struct ParsedParams {
std::string cmd; std::string cmd;

View File

@ -45,6 +45,7 @@
#include "util/file_checksum_helper.h" #include "util/file_checksum_helper.h"
#include "util/stderr_logger.h" #include "util/stderr_logger.h"
#include "util/string_util.h" #include "util/string_util.h"
#include "utilities/blob_db/blob_dump_tool.h"
#include "utilities/merge_operators.h" #include "utilities/merge_operators.h"
#include "utilities/ttl/db_ttl_impl.h" #include "utilities/ttl/db_ttl_impl.h"
@ -100,6 +101,8 @@ const std::string LDBCommand::ARG_BLOB_GARBAGE_COLLECTION_FORCE_THRESHOLD =
const std::string LDBCommand::ARG_BLOB_COMPACTION_READAHEAD_SIZE = const std::string LDBCommand::ARG_BLOB_COMPACTION_READAHEAD_SIZE =
"blob_compaction_readahead_size"; "blob_compaction_readahead_size";
const std::string LDBCommand::ARG_DECODE_BLOB_INDEX = "decode_blob_index"; const std::string LDBCommand::ARG_DECODE_BLOB_INDEX = "decode_blob_index";
const std::string LDBCommand::ARG_DUMP_UNCOMPRESSED_BLOBS =
"dump_uncompressed_blobs";
const char* LDBCommand::DELIM = " ==> "; const char* LDBCommand::DELIM = " ==> ";
@ -111,6 +114,9 @@ void DumpWalFile(Options options, std::string wal_file, bool print_header,
void DumpSstFile(Options options, std::string filename, bool output_hex, void DumpSstFile(Options options, std::string filename, bool output_hex,
bool show_properties, bool decode_blob_index); bool show_properties, bool decode_blob_index);
void DumpBlobFile(const std::string& filename, bool is_key_hex,
bool is_value_hex, bool dump_uncompressed_blobs);
}; };
LDBCommand* LDBCommand::InitFromCmdLineArgs( LDBCommand* LDBCommand::InitFromCmdLineArgs(
@ -1853,12 +1859,13 @@ DBDumperCommand::DBDumperCommand(
const std::vector<std::string>& /*params*/, const std::vector<std::string>& /*params*/,
const std::map<std::string, std::string>& options, const std::map<std::string, std::string>& options,
const std::vector<std::string>& flags) const std::vector<std::string>& flags)
: LDBCommand(options, flags, true, : LDBCommand(
BuildCmdLineOptions( options, flags, true,
{ARG_TTL, ARG_HEX, ARG_KEY_HEX, ARG_VALUE_HEX, ARG_FROM, BuildCmdLineOptions(
ARG_TO, ARG_MAX_KEYS, ARG_COUNT_ONLY, ARG_COUNT_DELIM, {ARG_TTL, ARG_HEX, ARG_KEY_HEX, ARG_VALUE_HEX, ARG_FROM, ARG_TO,
ARG_STATS, ARG_TTL_START, ARG_TTL_END, ARG_TTL_BUCKET, ARG_MAX_KEYS, ARG_COUNT_ONLY, ARG_COUNT_DELIM, ARG_STATS,
ARG_TIMESTAMP, ARG_PATH, ARG_DECODE_BLOB_INDEX})), ARG_TTL_START, ARG_TTL_END, ARG_TTL_BUCKET, ARG_TIMESTAMP,
ARG_PATH, ARG_DECODE_BLOB_INDEX, ARG_DUMP_UNCOMPRESSED_BLOBS})),
null_from_(true), null_from_(true),
null_to_(true), null_to_(true),
max_keys_(-1), max_keys_(-1),
@ -1906,6 +1913,7 @@ DBDumperCommand::DBDumperCommand(
print_stats_ = IsFlagPresent(flags, ARG_STATS); print_stats_ = IsFlagPresent(flags, ARG_STATS);
count_only_ = IsFlagPresent(flags, ARG_COUNT_ONLY); count_only_ = IsFlagPresent(flags, ARG_COUNT_ONLY);
decode_blob_index_ = IsFlagPresent(flags, ARG_DECODE_BLOB_INDEX); decode_blob_index_ = IsFlagPresent(flags, ARG_DECODE_BLOB_INDEX);
dump_uncompressed_blobs_ = IsFlagPresent(flags, ARG_DUMP_UNCOMPRESSED_BLOBS);
if (is_key_hex_) { if (is_key_hex_) {
if (!null_from_) { if (!null_from_) {
@ -1940,6 +1948,7 @@ void DBDumperCommand::Help(std::string& ret) {
ret.append(" [--" + ARG_TTL_END + "=<N>:- is exclusive]"); ret.append(" [--" + ARG_TTL_END + "=<N>:- is exclusive]");
ret.append(" [--" + ARG_PATH + "=<path_to_a_file>]"); ret.append(" [--" + ARG_PATH + "=<path_to_a_file>]");
ret.append(" [--" + ARG_DECODE_BLOB_INDEX + "]"); ret.append(" [--" + ARG_DECODE_BLOB_INDEX + "]");
ret.append(" [--" + ARG_DUMP_UNCOMPRESSED_BLOBS + "]");
ret.append("\n"); ret.append("\n");
} }
@ -1984,6 +1993,10 @@ void DBDumperCommand::DoCommand() {
DumpManifestFile(options_, path_, /* verbose_ */ false, is_key_hex_, DumpManifestFile(options_, path_, /* verbose_ */ false, is_key_hex_,
/* json_ */ false); /* json_ */ false);
break; break;
case kBlobFile:
DumpBlobFile(path_, is_key_hex_, is_value_hex_,
dump_uncompressed_blobs_);
break;
default: default:
exec_state_ = LDBCommandExecuteResult::Failed( exec_state_ = LDBCommandExecuteResult::Failed(
"File type not supported: " + path_); "File type not supported: " + path_);
@ -3533,6 +3546,27 @@ void DumpSstFile(Options options, std::string filename, bool output_hex,
} }
} }
void DumpBlobFile(const std::string& filename, bool is_key_hex,
bool is_value_hex, bool dump_uncompressed_blobs) {
using ROCKSDB_NAMESPACE::blob_db::BlobDumpTool;
BlobDumpTool tool;
BlobDumpTool::DisplayType blob_type = is_value_hex
? BlobDumpTool::DisplayType::kHex
: BlobDumpTool::DisplayType::kRaw;
BlobDumpTool::DisplayType show_uncompressed_blob =
dump_uncompressed_blobs ? blob_type : BlobDumpTool::DisplayType::kNone;
BlobDumpTool::DisplayType show_blob =
dump_uncompressed_blobs ? BlobDumpTool::DisplayType::kNone : blob_type;
BlobDumpTool::DisplayType show_key = is_key_hex
? BlobDumpTool::DisplayType::kHex
: BlobDumpTool::DisplayType::kRaw;
Status s = tool.Run(filename, show_key, show_blob, show_uncompressed_blob,
/* show_summary */ true);
if (!s.ok()) {
fprintf(stderr, "Failed: %s\n", s.ToString().c_str());
}
}
} // namespace } // namespace
DBFileDumperCommand::DBFileDumperCommand( DBFileDumperCommand::DBFileDumperCommand(

View File

@ -108,6 +108,7 @@ class DBDumperCommand : public LDBCommand {
bool print_stats_; bool print_stats_;
std::string path_; std::string path_;
bool decode_blob_index_; bool decode_blob_index_;
bool dump_uncompressed_blobs_;
static const std::string ARG_COUNT_ONLY; static const std::string ARG_COUNT_ONLY;
static const std::string ARG_COUNT_DELIM; static const std::string ARG_COUNT_DELIM;

View File

@ -716,6 +716,23 @@ class LDBTestCase(unittest.TestCase):
expected_pattern, unexpected=False, expected_pattern, unexpected=False,
isPattern=True) isPattern=True)
def testBlobDump(self):
print("Running testBlobDump")
dbPath = os.path.join(self.TMP_DIR, self.DB_NAME)
self.assertRunOK("batchput x1 y1 --create_if_missing --enable_blob_files", "OK")
self.assertRunOK("batchput --enable_blob_files x2 y2 x3 y3 \"x4 abc\" \"y4 xyz\"", "OK")
# Pattern to expect from blob file dump.
regex = ".*Blob log header[\s\S]*Blob log footer[\s\S]*Read record[\s\S]*Summary"
expected_pattern = re.compile(regex)
blob_files = self.getBlobFiles(dbPath)
self.assertTrue(len(blob_files) >= 1)
cmd = "dump --path=%s --dump_uncompressed_blobs"
self.assertRunOKFull((cmd)
% (blob_files[0]),
expected_pattern, unexpected=False,
isPattern=True)
def testWALDump(self): def testWALDump(self):
print("Running testWALDump...") print("Running testWALDump...")