mirror of https://github.com/facebook/rocksdb.git
Augment sst_dump tool to verify num_entries in table property (#12322)
Summary: sst_dump --command=check can now compare number of keys in a file with num_entries in table property and reports corruption is there is a mismatch. Pull Request resolved: https://github.com/facebook/rocksdb/pull/12322 Test Plan: - new unit test for API `SstFileDumper::ReadSequential` - ran sst_dump on a good and a bad file: ``` sst_dump --file=./32316112.sst options.env is 0x7f68bfcb5000 Process ./32316112.sst Sst file format: block-based from [] to [] sst_dump --file=./32316115.sst options.env is 0x7f6d0d2b5000 Process ./32316115.sst Sst file format: block-based from [] to [] ./32316115.sst: Corruption: Table property has num_entries = 6050408 but scanning the table returns 6050406 records. ``` Reviewed By: jowlyzhang Differential Revision: D53320481 Pulled By: cbi42 fbshipit-source-id: d84c996346a9575a5a2ea5f5fb09a9d3ee672cd6
This commit is contained in:
parent
f9d45358ca
commit
c6b1f6d182
|
@ -460,7 +460,7 @@ Status SstFileDumper::SetOldTableOptions() {
|
|||
return Status::OK();
|
||||
}
|
||||
|
||||
Status SstFileDumper::ReadSequential(bool print_kv, uint64_t read_num,
|
||||
Status SstFileDumper::ReadSequential(bool print_kv, uint64_t read_num_limit,
|
||||
bool has_from, const std::string& from_key,
|
||||
bool has_to, const std::string& to_key,
|
||||
bool use_from_as_prefix) {
|
||||
|
@ -494,7 +494,7 @@ Status SstFileDumper::ReadSequential(bool print_kv, uint64_t read_num,
|
|||
Slice key = iter->key();
|
||||
Slice value = iter->value();
|
||||
++i;
|
||||
if (read_num > 0 && i > read_num) {
|
||||
if (read_num_limit > 0 && i > read_num_limit) {
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -554,6 +554,28 @@ Status SstFileDumper::ReadSequential(bool print_kv, uint64_t read_num,
|
|||
read_num_ += i;
|
||||
|
||||
Status ret = iter->status();
|
||||
|
||||
bool verify_num_entries =
|
||||
(read_num_limit == 0 ||
|
||||
read_num_limit == std::numeric_limits<uint64_t>::max()) &&
|
||||
!has_from && !has_to;
|
||||
if (verify_num_entries && ret.ok()) {
|
||||
// Compare the number of entries
|
||||
if (!table_properties_) {
|
||||
fprintf(stderr, "Table properties not available.");
|
||||
} else {
|
||||
// TODO: verify num_range_deletions
|
||||
if (i != table_properties_->num_entries -
|
||||
table_properties_->num_range_deletions) {
|
||||
ret =
|
||||
Status::Corruption("Table property has num_entries = " +
|
||||
std::to_string(table_properties_->num_entries) +
|
||||
" but scanning the table returns " +
|
||||
std::to_string(i) + " records.");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
delete iter;
|
||||
return ret;
|
||||
}
|
||||
|
|
|
@ -23,7 +23,12 @@ class SstFileDumper {
|
|||
const EnvOptions& soptions = EnvOptions(),
|
||||
bool silent = false);
|
||||
|
||||
Status ReadSequential(bool print_kv, uint64_t read_num, bool has_from,
|
||||
// read_num_limit limits the total number of keys read. If read_num_limit = 0,
|
||||
// then there is no limit. If read_num_limit = 0 or
|
||||
// std::numeric_limits<uint64_t>::max(), has_from and has_to are false, then
|
||||
// the number of keys read is compared with `num_entries` field in table
|
||||
// properties. A Corruption status is returned if they do not match.
|
||||
Status ReadSequential(bool print_kv, uint64_t read_num_limit, bool has_from,
|
||||
const std::string& from_key, bool has_to,
|
||||
const std::string& to_key,
|
||||
bool use_from_as_prefix = false);
|
||||
|
|
|
@ -109,7 +109,7 @@ class SSTDumpToolTest : public testing::Test {
|
|||
}
|
||||
|
||||
void createSST(const Options& opts, const std::string& file_name,
|
||||
uint32_t wide_column_one_in = 0) {
|
||||
uint32_t wide_column_one_in = 0, bool range_del = false) {
|
||||
Env* test_env = opts.env;
|
||||
FileOptions file_options(opts);
|
||||
ReadOptions read_options;
|
||||
|
@ -139,7 +139,7 @@ class SSTDumpToolTest : public testing::Test {
|
|||
uint32_t num_keys = kNumKey;
|
||||
const char* comparator_name = ikc.user_comparator()->Name();
|
||||
if (strcmp(comparator_name, ReverseBytewiseComparator()->Name()) == 0) {
|
||||
for (int32_t i = num_keys; i >= 0; i--) {
|
||||
for (int32_t i = num_keys; i > 0; i--) {
|
||||
if (wide_column_one_in == 0 || i % wide_column_one_in != 0) {
|
||||
tb->Add(MakeKey(i), MakeValue(i));
|
||||
} else {
|
||||
|
@ -154,7 +154,12 @@ class SSTDumpToolTest : public testing::Test {
|
|||
tb->Add(MakeKeyWithTimeStamp(i, 100 + i), MakeValue(i));
|
||||
}
|
||||
} else {
|
||||
for (uint32_t i = 0; i < num_keys; i++) {
|
||||
uint32_t i = 0;
|
||||
if (range_del) {
|
||||
tb->Add(MakeKey(i, kTypeRangeDeletion), MakeValue(i + 1));
|
||||
i = 1;
|
||||
}
|
||||
for (; i < num_keys; i++) {
|
||||
if (wide_column_one_in == 0 || i % wide_column_one_in != 0) {
|
||||
tb->Add(MakeKey(i), MakeValue(i));
|
||||
} else {
|
||||
|
@ -520,6 +525,90 @@ TEST_F(SSTDumpToolTest, SstFileDumperMmapReads) {
|
|||
|
||||
cleanup(opts, file_path);
|
||||
}
|
||||
|
||||
TEST_F(SSTDumpToolTest, SstFileDumperVerifyNumRecords) {
|
||||
Options opts;
|
||||
opts.env = env();
|
||||
|
||||
EnvOptions env_opts;
|
||||
std::string file_path = MakeFilePath("rocksdb_sst_test.sst");
|
||||
{
|
||||
createSST(opts, file_path, 10);
|
||||
SstFileDumper dumper(opts, file_path, Temperature::kUnknown,
|
||||
1024 /*readahead_size*/, true /*verify_checksum*/,
|
||||
false /*output_hex*/, false /*decode_blob_index*/,
|
||||
env_opts, /*silent=*/true);
|
||||
ASSERT_OK(dumper.getStatus());
|
||||
ASSERT_OK(dumper.ReadSequential(
|
||||
/*print_kv=*/false,
|
||||
/*read_num_limit=*/std::numeric_limits<uint64_t>::max(),
|
||||
/*has_from=*/false, /*from_key=*/"",
|
||||
/*has_to=*/false, /*to_key=*/""));
|
||||
cleanup(opts, file_path);
|
||||
}
|
||||
|
||||
{
|
||||
// Test with range del
|
||||
createSST(opts, file_path, 10, /*range_del=*/true);
|
||||
SstFileDumper dumper(opts, file_path, Temperature::kUnknown,
|
||||
1024 /*readahead_size*/, true /*verify_checksum*/,
|
||||
false /*output_hex*/, false /*decode_blob_index*/,
|
||||
env_opts, /*silent=*/true);
|
||||
ASSERT_OK(dumper.getStatus());
|
||||
ASSERT_OK(dumper.ReadSequential(
|
||||
/*print_kv=*/false,
|
||||
/*read_num_limit=*/std::numeric_limits<uint64_t>::max(),
|
||||
/*has_from=*/false, /*from_key=*/"",
|
||||
/*has_to=*/false, /*to_key=*/""));
|
||||
cleanup(opts, file_path);
|
||||
}
|
||||
|
||||
{
|
||||
SyncPoint::GetInstance()->SetCallBack(
|
||||
"PropertyBlockBuilder::AddTableProperty:Start", [&](void* arg) {
|
||||
TableProperties* props = reinterpret_cast<TableProperties*>(arg);
|
||||
props->num_entries = kNumKey + 2;
|
||||
});
|
||||
SyncPoint::GetInstance()->EnableProcessing();
|
||||
createSST(opts, file_path, 10);
|
||||
SstFileDumper dumper(opts, file_path, Temperature::kUnknown,
|
||||
1024 /*readahead_size*/, true /*verify_checksum*/,
|
||||
false /*output_hex*/, false /*decode_blob_index*/,
|
||||
env_opts, /*silent=*/true);
|
||||
ASSERT_OK(dumper.getStatus());
|
||||
Status s = dumper.ReadSequential(
|
||||
/*print_kv=*/false,
|
||||
/*read_num_limit==*/std::numeric_limits<uint64_t>::max(),
|
||||
/*has_from=*/false, /*from_key=*/"",
|
||||
/*has_to=*/false, /*to_key=*/"");
|
||||
ASSERT_TRUE(s.IsCorruption());
|
||||
ASSERT_TRUE(
|
||||
std::strstr("Table property has num_entries = 1026 but scanning the "
|
||||
"table returns 1024 records.",
|
||||
s.getState()));
|
||||
|
||||
// Validation is not performed when read_num, has_from, has_to are set
|
||||
ASSERT_OK(dumper.ReadSequential(
|
||||
/*print_kv=*/false, /*read_num_limit=*/10,
|
||||
/*has_from=*/false, /*from_key=*/"",
|
||||
/*has_to=*/false, /*to_key=*/""));
|
||||
|
||||
ASSERT_OK(dumper.ReadSequential(
|
||||
/*print_kv=*/false,
|
||||
/*read_num_limit=*/std::numeric_limits<uint64_t>::max(),
|
||||
/*has_from=*/true, /*from_key=*/MakeKey(100),
|
||||
/*has_to=*/false, /*to_key=*/""));
|
||||
|
||||
ASSERT_OK(dumper.ReadSequential(
|
||||
/*print_kv=*/false,
|
||||
/*read_num_limit=*/std::numeric_limits<uint64_t>::max(),
|
||||
/*has_from=*/false, /*from_key=*/"",
|
||||
/*has_to=*/true, /*to_key=*/MakeKey(100)));
|
||||
|
||||
cleanup(opts, file_path);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace ROCKSDB_NAMESPACE
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
|
|
|
@ -55,6 +55,8 @@ void print_help(bool to_stderr) {
|
|||
|
||||
--command=check|scan|raw|verify|identify
|
||||
check: Iterate over entries in files but don't print anything except if an error is encountered (default command)
|
||||
When read_num, from and to are not set, it compares the number of keys read with num_entries in table
|
||||
property and will report corruption if there is a mismatch.
|
||||
scan: Iterate over entries in files and print them to screen
|
||||
raw: Dump all the table contents to <file_name>_dump.txt
|
||||
verify: Iterate all the blocks in files verifying checksum to detect possible corruption but don't print anything except if a corruption is encountered
|
||||
|
|
Loading…
Reference in New Issue