Fix a bug when ingest plaintable sst file (#11969)

Summary:
Plaintable doesn't support SeekToLast. And GetIngestedFileInfo is using SeekToLast without checking the validity.

We are using IngestExternalFile or CreateColumnFamilyWithImport with some sst file in PlainTable format . But after running for a while, compaction error often happens. Such as
![image](https://github.com/facebook/rocksdb/assets/13954644/b4fa49fc-73fc-49ce-96c6-f198a30800b8)

I simply add some std::cerr log to find why.
![image](https://github.com/facebook/rocksdb/assets/13954644/2cf1d5ff-48cc-4125-b917-87090f764fcd)
It shows that the smallest key is always equal to largest key.
![image](https://github.com/facebook/rocksdb/assets/13954644/6d43e978-0be0-4306-aae3-f9e4ae366395)
Then I found the root cause is that PlainTable do not support SeekToLast, so the smallest key is always the same with the largest

I try to write an unit test. But it's not easy to reproduce this error.
(This PR is similar to https://github.com/facebook/rocksdb/pull/11266. Sorry for open another PR)

Pull Request resolved: https://github.com/facebook/rocksdb/pull/11969

Reviewed By: ajkr

Differential Revision: D50933854

Pulled By: cbi42

fbshipit-source-id: 6c6af53c1388922cbabbe64ed3be1cdc58df5431
This commit is contained in:
914022466 2023-11-02 13:45:37 -07:00 committed by Facebook GitHub Bot
parent a42910537d
commit 2648e0a747
2 changed files with 53 additions and 4 deletions

View File

@ -792,8 +792,33 @@ Status ExternalSstFileIngestionJob::GetIngestedFileInfo(
}
file_to_ingest->smallest_internal_key.SetFrom(key);
iter->SeekToLast();
pik_status = ParseInternalKey(iter->key(), &key, allow_data_in_errors);
Slice largest;
if (strcmp(cfd_->ioptions()->table_factory->Name(), "PlainTable") == 0) {
// PlainTable iterator does not support SeekToLast().
largest = iter->key();
for (; iter->Valid(); iter->Next()) {
if (cfd_->internal_comparator().Compare(iter->key(), largest) > 0) {
largest = iter->key();
}
}
if (!iter->status().ok()) {
return iter->status();
}
} else {
iter->SeekToLast();
if (!iter->Valid()) {
if (iter->status().ok()) {
// The file contains at least 1 key since iter is valid after
// SeekToFirst().
return Status::Corruption("Can not find largest key in sst file");
} else {
return iter->status();
}
}
largest = iter->key();
}
pik_status = ParseInternalKey(largest, &key, allow_data_in_errors);
if (!pik_status.ok()) {
return Status::Corruption("Corrupted key in external file. ",
pik_status.getState());

View File

@ -366,8 +366,32 @@ Status ImportColumnFamilyJob::GetIngestedFileInfo(
bool bound_set = false;
if (iter->Valid()) {
file_to_import->smallest_internal_key.DecodeFrom(iter->key());
iter->SeekToLast();
file_to_import->largest_internal_key.DecodeFrom(iter->key());
Slice largest;
if (strcmp(cfd_->ioptions()->table_factory->Name(), "PlainTable") == 0) {
// PlainTable iterator does not support SeekToLast().
largest = iter->key();
for (; iter->Valid(); iter->Next()) {
if (cfd_->internal_comparator().Compare(iter->key(), largest) > 0) {
largest = iter->key();
}
}
if (!iter->status().ok()) {
return iter->status();
}
} else {
iter->SeekToLast();
if (!iter->Valid()) {
if (iter->status().ok()) {
// The file contains at least 1 key since iter is valid after
// SeekToFirst().
return Status::Corruption("Can not find largest key in sst file");
} else {
return iter->status();
}
}
largest = iter->key();
}
file_to_import->largest_internal_key.DecodeFrom(largest);
bound_set = true;
}