Add documentation to some formatting util functions (#11674)

Summary:
As titled, mostly adding documentation. While updating one usage of these util functions in the external file ingestion job based on code inspection.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/11674

Test Plan:
```
make check
```

Note that no unit test was added or updated to check the change in the external file ingestion flow works. This is because user-defined timestamp doesn't support bulk loading yet. There could be other missing pieces that are needed to make this flow functional and testable. That work is separately tracked and unit tests will be added then.

Reviewed By: cbi42

Differential Revision: D48271338

Pulled By: jowlyzhang

fbshipit-source-id: c05c3440f1c08632dd0de51b563a30b44b4eb8b5
This commit is contained in:
Yu Zhang 2023-08-14 22:04:18 -07:00 committed by Facebook GitHub Bot
parent a09c141dde
commit 6a3da5635e
4 changed files with 70 additions and 3 deletions

View File

@ -88,6 +88,13 @@ void AppendKeyWithMaxTimestamp(std::string* result, const Slice& key,
result->append(kTsMax.data(), ts_sz);
}
void AppendUserKeyWithMinTimestamp(std::string* result, const Slice& key,
size_t ts_sz) {
assert(ts_sz > 0);
result->append(key.data(), key.size() - ts_sz);
result->append(ts_sz, static_cast<unsigned char>(0));
}
void AppendUserKeyWithMaxTimestamp(std::string* result, const Slice& key,
size_t ts_sz) {
assert(ts_sz > 0);

View File

@ -168,10 +168,18 @@ inline void UnPackSequenceAndType(uint64_t packed, uint64_t* seq,
EntryType GetEntryType(ValueType value_type);
// Append the serialization of "key" to *result.
//
// input [internal key]: <user_key | seqno + type>
// output before: empty
// output: <user_key | seqno + type>
void AppendInternalKey(std::string* result, const ParsedInternalKey& key);
// Append the serialization of "key" to *result, replacing the original
// timestamp with argument ts.
//
// input [internal key]: <user_provided_key | original_ts | seqno + type>
// output before: empty
// output after: <user_provided_key | ts | seqno + type>
void AppendInternalKeyWithDifferentTimestamp(std::string* result,
const ParsedInternalKey& key,
const Slice& ts);
@ -179,37 +187,73 @@ void AppendInternalKeyWithDifferentTimestamp(std::string* result,
// Serialized internal key consists of user key followed by footer.
// This function appends the footer to *result, assuming that *result already
// contains the user key at the end.
//
// output before: <user_key>
// output after: <user_key | seqno + type>
void AppendInternalKeyFooter(std::string* result, SequenceNumber s,
ValueType t);
// Append the key and a minimal timestamp to *result
//
// input [user key without ts]: <user_provided_key>
// output before: empty
// output after: <user_provided_key | min_ts>
void AppendKeyWithMinTimestamp(std::string* result, const Slice& key,
size_t ts_sz);
// Append the key and a maximal timestamp to *result
//
// input [user key without ts]: <user_provided_key>
// output before: empty
// output after: <user_provided_key | max_ts>
void AppendKeyWithMaxTimestamp(std::string* result, const Slice& key,
size_t ts_sz);
// `key` is a user key with timestamp. Append the user key without timestamp
// and the minimum timestamp to *result.
//
// input [user key]: <user_provided_key | original_ts>
// output before: empty
// output after: <user_provided_key | min_ts>
void AppendUserKeyWithMinTimestamp(std::string* result, const Slice& key,
size_t ts_sz);
// `key` is a user key with timestamp. Append the user key without timestamp
// and the maximal timestamp to *result.
//
// input [user key]: <user_provided_key | original_ts>
// output before: empty
// output after: <user_provided_key | max_ts>
void AppendUserKeyWithMaxTimestamp(std::string* result, const Slice& key,
size_t ts_sz);
// `key` is an internal key containing a user key without timestamp. Create a
// new key in *result by padding a min timestamp of size `ts_sz` to the user key
// and copying the remaining internal key bytes.
//
// input [internal key]: <user_provided_key | seqno + type>
// output before: empty
// output after: <user_provided_key | min_ts | seqno + type>
void PadInternalKeyWithMinTimestamp(std::string* result, const Slice& key,
size_t ts_sz);
// `key` is an internal key containing a user key with timestamp of size
// `ts_sz`. Create a new internal key in *result by stripping the timestamp from
// the user key and copying the remaining internal key bytes.
//
// input [internal key]: <user_provided_key | original_ts | seqno + type>
// output before: empty
// output after: <user_provided_key | seqno + type>
void StripTimestampFromInternalKey(std::string* result, const Slice& key,
size_t ts_sz);
// `key` is an internal key containing a user key with timestamp of size
// `ts_sz`. Create a new internal key in *result while replace the original
// timestamp with min timestamp.
//
// input [internal key]: <user_provided_key | original_ts | seqno + type>
// output before: empty
// output after: <user_provided_key | min_ts | seqno + type>
void ReplaceInternalKeyWithMinTimestamp(std::string* result, const Slice& key,
size_t ts_sz);
@ -221,11 +265,16 @@ Status ParseInternalKey(const Slice& internal_key, ParsedInternalKey* result,
bool log_err_key);
// Returns the user key portion of an internal key.
//
// input [internal key]: <user_key | seqno + type>
// output: <user_key>
inline Slice ExtractUserKey(const Slice& internal_key) {
assert(internal_key.size() >= kNumInternalBytes);
return Slice(internal_key.data(), internal_key.size() - kNumInternalBytes);
}
// input [internal key]: <user_provided_key | ts | seqno + type>
// output : <user_provided_key>
inline Slice ExtractUserKeyAndStripTimestamp(const Slice& internal_key,
size_t ts_sz) {
Slice ret = internal_key;
@ -233,17 +282,23 @@ inline Slice ExtractUserKeyAndStripTimestamp(const Slice& internal_key,
return ret;
}
// input [user key]: <user_provided_key | ts>
// output: <user_provided_key>
inline Slice StripTimestampFromUserKey(const Slice& user_key, size_t ts_sz) {
Slice ret = user_key;
ret.remove_suffix(ts_sz);
return ret;
}
// input [user key]: <user_provided_key | ts>
// output: <ts>
inline Slice ExtractTimestampFromUserKey(const Slice& user_key, size_t ts_sz) {
assert(user_key.size() >= ts_sz);
return Slice(user_key.data() + user_key.size() - ts_sz, ts_sz);
}
// input [internal key]: <user_provided_key | ts | seqno + type>
// output: <ts>
inline Slice ExtractTimestampFromKey(const Slice& internal_key, size_t ts_sz) {
const size_t key_size = internal_key.size();
assert(key_size >= kNumInternalBytes + ts_sz);
@ -251,12 +306,16 @@ inline Slice ExtractTimestampFromKey(const Slice& internal_key, size_t ts_sz) {
ts_sz);
}
// input [internal key]: <user_provided_key | ts | seqno + type>
// output: <seqno + type>
inline uint64_t ExtractInternalKeyFooter(const Slice& internal_key) {
assert(internal_key.size() >= kNumInternalBytes);
const size_t n = internal_key.size();
return DecodeFixed64(internal_key.data() + n - kNumInternalBytes);
}
// input [internal key]: <user_provided_key | ts | seqno + type>
// output: <type>
inline ValueType ExtractValueType(const Slice& internal_key) {
uint64_t num = ExtractInternalKeyFooter(internal_key);
unsigned char c = num & 0xff;

View File

@ -349,7 +349,7 @@ Status ExternalSstFileIngestionJob::NeedsFlush(bool* flush_needed,
std::string end_str;
AppendUserKeyWithMaxTimestamp(
&begin_str, file_to_ingest.smallest_internal_key.user_key(), ts_sz);
AppendKeyWithMinTimestamp(
AppendUserKeyWithMinTimestamp(
&end_str, file_to_ingest.largest_internal_key.user_key(), ts_sz);
keys.emplace_back(std::move(begin_str));
keys.emplace_back(std::move(end_str));

View File

@ -1361,8 +1361,9 @@ class DB {
// the files. In this case, client could set options.change_level to true, to
// move the files back to the minimum level capable of holding the data set
// or a given level (specified by non-negative options.target_level).
// In case of user_defined timestamp, if enabled, `start` and `end` should
// point to key without timestamp part.
//
// In case of user-defined timestamp, if enabled, `begin` and `end` should
// not contain timestamp.
virtual Status CompactRange(const CompactRangeOptions& options,
ColumnFamilyHandle* column_family,
const Slice* begin, const Slice* end) = 0;