2016-08-01 21:50:19 +00:00
|
|
|
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
2017-07-15 23:03:42 +00:00
|
|
|
// This source code is licensed under both the GPLv2 (found in the
|
|
|
|
// COPYING file in the root directory) and Apache 2.0 License
|
|
|
|
// (found in the LICENSE.Apache file in the root directory).
|
2016-08-01 21:50:19 +00:00
|
|
|
//
|
2016-08-03 17:41:27 +00:00
|
|
|
#ifndef __STDC_FORMAT_MACROS
|
|
|
|
#define __STDC_FORMAT_MACROS
|
|
|
|
#endif
|
|
|
|
|
2016-08-01 21:50:19 +00:00
|
|
|
#include <cstdio>
|
2017-01-10 19:35:36 +00:00
|
|
|
#include <cstdlib>
|
2016-08-01 21:50:19 +00:00
|
|
|
|
|
|
|
#ifndef ROCKSDB_LITE
|
|
|
|
#ifdef GFLAGS
|
|
|
|
|
|
|
|
#include <gflags/gflags.h>
|
2016-08-03 17:41:27 +00:00
|
|
|
#include <inttypes.h>
|
2016-08-01 21:50:19 +00:00
|
|
|
#include <vector>
|
|
|
|
#include "rocksdb/env.h"
|
|
|
|
#include "rocksdb/options.h"
|
|
|
|
#include "table/block_based_table_builder.h"
|
|
|
|
#include "table/block_based_table_reader.h"
|
|
|
|
#include "table/format.h"
|
|
|
|
#include "tools/sst_dump_tool_imp.h"
|
|
|
|
#include "util/compression.h"
|
|
|
|
#include "util/stop_watch.h"
|
|
|
|
#include "utilities/col_buf_encoder.h"
|
|
|
|
#include "utilities/column_aware_encoding_util.h"
|
|
|
|
|
|
|
|
using GFLAGS::ParseCommandLineFlags;
|
|
|
|
DEFINE_string(encoded_file, "", "file to store encoded data blocks");
|
|
|
|
DEFINE_string(decoded_file, "",
|
|
|
|
"file to store decoded data blocks after encoding");
|
|
|
|
DEFINE_string(format, "col", "Output Format. Can be 'row' or 'col'");
|
|
|
|
// TODO(jhli): option `col` should be removed and replaced by general
|
|
|
|
// column specifications.
|
|
|
|
DEFINE_string(index_type, "col", "Index type. Can be 'primary' or 'secondary'");
|
|
|
|
DEFINE_string(dump_file, "",
|
|
|
|
"Dump data blocks separated by columns in human-readable format");
|
|
|
|
DEFINE_bool(decode, false, "Deocde blocks after they are encoded");
|
|
|
|
DEFINE_bool(stat, false,
|
|
|
|
"Print column distribution statistics. Cannot decode in this mode");
|
|
|
|
DEFINE_string(compression_type, "kNoCompression",
|
|
|
|
"The compression algorithm used to compress data blocks");
|
|
|
|
|
|
|
|
namespace rocksdb {
|
|
|
|
|
|
|
|
class ColumnAwareEncodingExp {
|
|
|
|
public:
|
|
|
|
static void Run(const std::string& sst_file) {
|
|
|
|
bool decode = FLAGS_decode;
|
|
|
|
if (FLAGS_decoded_file.size() > 0) {
|
|
|
|
decode = true;
|
|
|
|
}
|
|
|
|
if (FLAGS_stat) {
|
|
|
|
decode = false;
|
|
|
|
}
|
|
|
|
|
|
|
|
ColumnAwareEncodingReader reader(sst_file);
|
|
|
|
std::vector<ColDeclaration>* key_col_declarations;
|
|
|
|
std::vector<ColDeclaration>* value_col_declarations;
|
|
|
|
ColDeclaration* value_checksum_declaration;
|
|
|
|
if (FLAGS_index_type == "primary") {
|
|
|
|
ColumnAwareEncodingReader::GetColDeclarationsPrimary(
|
|
|
|
&key_col_declarations, &value_col_declarations,
|
|
|
|
&value_checksum_declaration);
|
|
|
|
} else {
|
|
|
|
ColumnAwareEncodingReader::GetColDeclarationsSecondary(
|
|
|
|
&key_col_declarations, &value_col_declarations,
|
|
|
|
&value_checksum_declaration);
|
|
|
|
}
|
|
|
|
KVPairColDeclarations kvp_cd(key_col_declarations, value_col_declarations,
|
|
|
|
value_checksum_declaration);
|
|
|
|
|
|
|
|
if (!FLAGS_dump_file.empty()) {
|
|
|
|
std::vector<KVPairBlock> kv_pair_blocks;
|
|
|
|
reader.GetKVPairsFromDataBlocks(&kv_pair_blocks);
|
|
|
|
reader.DumpDataColumns(FLAGS_dump_file, kvp_cd, kv_pair_blocks);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
std::unordered_map<std::string, CompressionType> compressions = {
|
|
|
|
{"kNoCompression", CompressionType::kNoCompression},
|
|
|
|
{"kZlibCompression", CompressionType::kZlibCompression},
|
2016-09-01 22:28:40 +00:00
|
|
|
{"kZSTD", CompressionType::kZSTD}};
|
2016-08-01 21:50:19 +00:00
|
|
|
|
|
|
|
// Find Compression
|
|
|
|
CompressionType compression_type = compressions[FLAGS_compression_type];
|
|
|
|
EnvOptions env_options;
|
|
|
|
if (CompressionTypeSupported(compression_type)) {
|
|
|
|
fprintf(stdout, "[%s]\n", FLAGS_compression_type.c_str());
|
|
|
|
unique_ptr<WritableFile> encoded_out_file;
|
|
|
|
|
|
|
|
std::unique_ptr<Env> env(NewMemEnv(Env::Default()));
|
|
|
|
if (!FLAGS_encoded_file.empty()) {
|
|
|
|
env->NewWritableFile(FLAGS_encoded_file, &encoded_out_file,
|
|
|
|
env_options);
|
|
|
|
}
|
|
|
|
|
|
|
|
std::vector<KVPairBlock> kv_pair_blocks;
|
|
|
|
reader.GetKVPairsFromDataBlocks(&kv_pair_blocks);
|
|
|
|
|
|
|
|
std::vector<std::string> encoded_blocks;
|
|
|
|
StopWatchNano sw(env.get(), true);
|
|
|
|
if (FLAGS_format == "col") {
|
|
|
|
reader.EncodeBlocks(kvp_cd, encoded_out_file.get(), compression_type,
|
|
|
|
kv_pair_blocks, &encoded_blocks, FLAGS_stat);
|
|
|
|
} else { // row format
|
|
|
|
reader.EncodeBlocksToRowFormat(encoded_out_file.get(), compression_type,
|
|
|
|
kv_pair_blocks, &encoded_blocks);
|
|
|
|
}
|
|
|
|
if (encoded_out_file != nullptr) {
|
|
|
|
uint64_t size = 0;
|
|
|
|
env->GetFileSize(FLAGS_encoded_file, &size);
|
2016-08-03 17:41:27 +00:00
|
|
|
fprintf(stdout, "File size: %" PRIu64 "\n", size);
|
2016-08-01 21:50:19 +00:00
|
|
|
}
|
|
|
|
uint64_t encode_time = sw.ElapsedNanosSafe(false /* reset */);
|
2016-08-03 17:41:27 +00:00
|
|
|
fprintf(stdout, "Encode time: %" PRIu64 "\n", encode_time);
|
2016-08-01 21:50:19 +00:00
|
|
|
if (decode) {
|
|
|
|
unique_ptr<WritableFile> decoded_out_file;
|
|
|
|
if (!FLAGS_decoded_file.empty()) {
|
|
|
|
env->NewWritableFile(FLAGS_decoded_file, &decoded_out_file,
|
|
|
|
env_options);
|
|
|
|
}
|
|
|
|
sw.Start();
|
|
|
|
if (FLAGS_format == "col") {
|
|
|
|
reader.DecodeBlocks(kvp_cd, decoded_out_file.get(), &encoded_blocks);
|
|
|
|
} else {
|
|
|
|
reader.DecodeBlocksFromRowFormat(decoded_out_file.get(),
|
|
|
|
&encoded_blocks);
|
|
|
|
}
|
|
|
|
uint64_t decode_time = sw.ElapsedNanosSafe(true /* reset */);
|
2016-08-03 17:41:27 +00:00
|
|
|
fprintf(stdout, "Decode time: %" PRIu64 "\n", decode_time);
|
2016-08-01 21:50:19 +00:00
|
|
|
}
|
|
|
|
} else {
|
|
|
|
fprintf(stdout, "Unsupported compression type: %s.\n",
|
|
|
|
FLAGS_compression_type.c_str());
|
|
|
|
}
|
|
|
|
delete key_col_declarations;
|
|
|
|
delete value_col_declarations;
|
|
|
|
delete value_checksum_declaration;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
} // namespace rocksdb
|
|
|
|
|
|
|
|
int main(int argc, char** argv) {
|
|
|
|
int arg_idx = ParseCommandLineFlags(&argc, &argv, true);
|
|
|
|
if (arg_idx >= argc) {
|
|
|
|
fprintf(stdout, "SST filename required.\n");
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
std::string sst_file(argv[arg_idx]);
|
|
|
|
if (FLAGS_format != "row" && FLAGS_format != "col") {
|
|
|
|
fprintf(stderr, "Format must be 'row' or 'col'\n");
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
if (FLAGS_index_type != "primary" && FLAGS_index_type != "secondary") {
|
|
|
|
fprintf(stderr, "Format must be 'primary' or 'secondary'\n");
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
rocksdb::ColumnAwareEncodingExp::Run(sst_file);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
#else
|
|
|
|
int main() {
|
|
|
|
fprintf(stderr, "Please install gflags to run rocksdb tools\n");
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
#endif // GFLAGS
|
|
|
|
#else
|
|
|
|
int main(int argc, char** argv) {
|
|
|
|
fprintf(stderr, "Not supported in lite mode.\n");
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
#endif // ROCKSDB_LITE
|