GetEntity and PutEntity Support in ldb (#11796)

Summary:
- `get_entity` and `put_entity` command support in ldb
- Input Format for `put_entity`: `ldb --db=<DB_PATH> put_entity <KEY> <COLUMN_1_NAME>:<COLUMN_1_VALUE> <COLUMN_2_NAME>:<COLUMN_2_VALUE> ...`
- Output Format for `get_entity`: `<COLUMN_1_NAME>:<COLUMN_1_VALUE> <COLUMN_2_NAME>:<COLUMN_2_VALUE>`
- If `get_entity` is called against non-wide column value (existing behavior), empty key (kDefaultWideColumnName) will be printed, appended by `:<COLUMN_VALUE>`
- If `get` is called against wide column value (existing behavior), first column value is printed if the first column name is kDefaultWideColumnName.

# Test

Checks for `put_entity` and `get_entity` added in `ldb_test.py`
```
❯ python3 tools/ldb_test.py                                                                                                                                                                                                                                                                                                                                                                    took 45s at 10:45:44 AM
Running testBlobBatchPut...
.Running testBlobDump
.Running testBlobPut...
.Running testBlobStartingLevel...
.Running testCheckConsistency...
.Running testColumnFamilies...
.Running testCountDelimDump...
.Running testCountDelimIDump...
.Running testDumpLiveFiles...
.Running testDumpLoad...
Warning: 7 bad lines ignored.
.Running testGetProperty...
.Running testHexPutGet...
.Running testIDumpBasics...
.Running testIDumpDecodeBlobIndex...
.Running testIngestExternalSst...
.Running testInvalidCmdLines...
.Running testListColumnFamilies...
.Running testListLiveFilesMetadata...
.Running testManifestDump...
.Running testMiscAdminTask...
Compacting the db...
Sequence,Count,ByteSize,Physical Offset,Key(s)
.Running testSSTDump...
.Running testSimpleStringPutGet...
.Running testStringBatchPut...
.Running testTtlPutGet...
.Running testWALDump...
.
----------------------------------------------------------------------
Ran 25 tests in 57.742s
```

Manual Test
```
# Invalid format for wide columns
❯ ./ldb --db=/tmp/test_db put_entity x4 x5
Failed: wide column format needs to be <column_name>:<column_value> (did you mean put <key> <value>?)

# empty column name (kDefaultWideColumnName)
❯ ./ldb --db=/tmp/test_db put_entity x4 :x5
OK
❯ ./ldb --db=/tmp/test_db get_entity x4
:x5
❯ ./ldb --db=/tmp/test_db get x4
x5

❯ ./ldb --db=/tmp/test_db put_entity a1 :z1 b1:c1 b2:f1
OK
❯ ./ldb --db=/tmp/test_db get_entity a1
:z1 b1:c1 b2:f1

# Keeping the existing behavior if `get` was called on wide column values
❯ ./ldb --db=/tmp/test_db get a1
z1

# Scan
❯ ./ldb --db=/tmp/test_db scan
a1 ==> b1:c1 b2:f1
x4 ==> x5
x5 ==> cn1:cv1 cn2:cv2

# Scan hex
❯ ./ldb --db=/tmp/test_db scan --hex
0x6131 ==> 0x6231:0x6331 0x6232:0x6631
0x7834 ==> 0x7835
0x7835 ==> 0x636E31:0x637631 0x636E32:0x637632

# More testing with hex values
❯ ./ldb --db=/tmp/test_db get_entity 0x6131 --hex
0x6231:0x6331 0x6232:0x6631

❯ ./ldb --db=/tmp/test_db get_entity 0x78 --hex
Failed: GetEntity failed: NotFound:

❯ ./ldb --db=/tmp/test_db get_entity 0x7834 --hex
:0x7835

❯ ./ldb --db=/tmp/test_db put_entity 0x7834 0x6234:0x6635 --hex
OK

❯ ./ldb --db=/tmp/test_db get_entity 0x7834 --hex
0x6234:0x6635

❯ ./ldb --db=/tmp/test_db get_entity 0x7834 --key_hex
b4:f5

❯ ./ldb --db=/tmp/test_db get_entity x4 --value_hex
0x6234:0x6635
```

Pull Request resolved: https://github.com/facebook/rocksdb/pull/11796

Reviewed By: jowlyzhang

Differential Revision: D48978141

Pulled By: jaykorean

fbshipit-source-id: 4f87c222417ed90a6dbf39bd7b0f068b01e68393
This commit is contained in:
Jay Huh 2023-09-12 16:32:40 -07:00 committed by Facebook GitHub Bot
parent ef3e289b2d
commit 4b79e8c003
3 changed files with 194 additions and 5 deletions

View File

@ -6,7 +6,7 @@
//
#include "rocksdb/utilities/ldb_cmd.h"
#include <cinttypes>
#include <cstddef>
#include <cstdlib>
#include <ctime>
#include <fstream>
@ -208,9 +208,15 @@ LDBCommand* LDBCommand::SelectCommand(const ParsedParams& parsed_params) {
if (parsed_params.cmd == GetCommand::Name()) {
return new GetCommand(parsed_params.cmd_params, parsed_params.option_map,
parsed_params.flags);
} else if (parsed_params.cmd == GetEntityCommand::Name()) {
return new GetEntityCommand(parsed_params.cmd_params,
parsed_params.option_map, parsed_params.flags);
} else if (parsed_params.cmd == PutCommand::Name()) {
return new PutCommand(parsed_params.cmd_params, parsed_params.option_map,
parsed_params.flags);
} else if (parsed_params.cmd == PutEntityCommand::Name()) {
return new PutEntityCommand(parsed_params.cmd_params,
parsed_params.option_map, parsed_params.flags);
} else if (parsed_params.cmd == BatchPutCommand::Name()) {
return new BatchPutCommand(parsed_params.cmd_params,
parsed_params.option_map, parsed_params.flags);
@ -2838,6 +2844,55 @@ void GetCommand::DoCommand() {
// ----------------------------------------------------------------------------
GetEntityCommand::GetEntityCommand(
const std::vector<std::string>& params,
const std::map<std::string, std::string>& options,
const std::vector<std::string>& flags)
: LDBCommand(
options, flags, true,
BuildCmdLineOptions({ARG_TTL, ARG_HEX, ARG_KEY_HEX, ARG_VALUE_HEX})) {
if (params.size() != 1) {
exec_state_ = LDBCommandExecuteResult::Failed(
"<key> must be specified for the get_entity command");
} else {
key_ = params.at(0);
}
if (is_key_hex_) {
key_ = HexToString(key_);
}
}
void GetEntityCommand::Help(std::string& ret) {
ret.append(" ");
ret.append(GetEntityCommand::Name());
ret.append(" <key>");
ret.append(" [--" + ARG_TTL + "]");
ret.append("\n");
}
void GetEntityCommand::DoCommand() {
if (!db_) {
assert(GetExecuteState().IsFailed());
return;
}
PinnableWideColumns pinnable_wide_columns;
Status st = db_->GetEntity(ReadOptions(), GetCfHandle(), key_,
&pinnable_wide_columns);
if (st.ok()) {
std::ostringstream oss;
WideColumnsHelper::DumpWideColumns(pinnable_wide_columns.columns(), oss,
is_value_hex_);
fprintf(stdout, "%s\n", oss.str().c_str());
} else {
std::stringstream oss;
oss << "GetEntity failed: " << st.ToString();
exec_state_ = LDBCommandExecuteResult::Failed(oss.str());
}
}
// ----------------------------------------------------------------------------
ApproxSizeCommand::ApproxSizeCommand(
const std::vector<std::string>& /*params*/,
const std::map<std::string, std::string>& options,
@ -3274,6 +3329,81 @@ void PutCommand::OverrideBaseOptions() {
// ----------------------------------------------------------------------------
PutEntityCommand::PutEntityCommand(
const std::vector<std::string>& params,
const std::map<std::string, std::string>& options,
const std::vector<std::string>& flags)
: LDBCommand(options, flags, false,
BuildCmdLineOptions({ARG_TTL, ARG_HEX, ARG_KEY_HEX,
ARG_VALUE_HEX, ARG_CREATE_IF_MISSING})) {
if (params.size() < 2) {
exec_state_ = LDBCommandExecuteResult::Failed(
"<key> and at least one column <column_name>:<column_value> must be "
"specified for the put_entity command");
} else {
auto iter = params.begin();
key_ = *iter;
if (is_key_hex_) {
key_ = HexToString(key_);
}
for (++iter; iter != params.end(); ++iter) {
auto split = StringSplit(*iter, ':');
if (split.size() != 2) {
exec_state_ = LDBCommandExecuteResult::Failed(
"wide column format needs to be <column_name>:<column_value> (did "
"you mean put <key> <value>?)");
return;
}
std::string name(split[0]);
std::string value(split[1]);
if (is_value_hex_) {
name = HexToString(name);
value = HexToString(value);
}
column_names_.push_back(name);
column_values_.push_back(value);
}
}
create_if_missing_ = IsFlagPresent(flags_, ARG_CREATE_IF_MISSING);
}
void PutEntityCommand::Help(std::string& ret) {
ret.append(" ");
ret.append(PutCommand::Name());
ret.append(
" <key> <column1_name>:<column1_value> <column2_name>:<column2_value> "
"<...>");
ret.append(" [--" + ARG_CREATE_IF_MISSING + "]");
ret.append(" [--" + ARG_TTL + "]");
ret.append("\n");
}
void PutEntityCommand::DoCommand() {
if (!db_) {
assert(GetExecuteState().IsFailed());
return;
}
assert(column_names_.size() == column_values_.size());
WideColumns columns;
for (size_t i = 0; i < column_names_.size(); i++) {
WideColumn column(column_names_[i], column_values_[i]);
columns.emplace_back(column);
}
Status st = db_->PutEntity(WriteOptions(), GetCfHandle(), key_, columns);
if (st.ok()) {
fprintf(stdout, "OK\n");
} else {
exec_state_ = LDBCommandExecuteResult::Failed(st.ToString());
}
}
void PutEntityCommand::OverrideBaseOptions() {
LDBCommand::OverrideBaseOptions();
options_.create_if_missing = create_if_missing_;
}
// ----------------------------------------------------------------------------
const char* DBQuerierCommand::HELP_CMD = "help";
const char* DBQuerierCommand::GET_CMD = "get";
const char* DBQuerierCommand::PUT_CMD = "put";

View File

@ -403,6 +403,22 @@ class GetCommand : public LDBCommand {
std::string key_;
};
class GetEntityCommand : public LDBCommand {
public:
static std::string Name() { return "get_entity"; }
GetEntityCommand(const std::vector<std::string>& params,
const std::map<std::string, std::string>& options,
const std::vector<std::string>& flags);
void DoCommand() override;
static void Help(std::string& ret);
private:
std::string key_;
};
class ApproxSizeCommand : public LDBCommand {
public:
static std::string Name() { return "approxsize"; }
@ -530,6 +546,26 @@ class PutCommand : public LDBCommand {
std::string value_;
};
class PutEntityCommand : public LDBCommand {
public:
static std::string Name() { return "put_entity"; }
PutEntityCommand(const std::vector<std::string>& params,
const std::map<std::string, std::string>& options,
const std::vector<std::string>& flags);
void DoCommand() override;
static void Help(std::string& ret);
void OverrideBaseOptions() override;
private:
std::string key_;
std::vector<std::string> column_names_;
std::vector<std::string> column_values_;
};
/**
* Command that starts up a REPL shell that allows
* get/put/delete.

View File

@ -121,17 +121,40 @@ class LDBTestCase(unittest.TestCase):
self.assertRunOK("get x2", "y2")
self.assertRunFAIL("get x3")
self.assertRunOK("scan --from=x1 --to=z", "x1 ==> y1\nx2 ==> y2")
self.assertRunFAIL("put_entity x4")
self.assertRunFAIL("put_entity x4 cv1")
self.assertRunOK("put_entity x4 :cv1", "OK")
self.assertRunOK("get_entity x4", ":cv1")
self.assertRunOK("put_entity x5 cn1:cv1 cn2:cv2", "OK")
self.assertRunOK("get_entity x5", "cn1:cv1 cn2:cv2")
self.assertRunOK(
"scan --from=x1 --to=z",
"x1 ==> y1\nx2 ==> y2\nx4 ==> cv1\nx5 ==> cn1:cv1 cn2:cv2",
)
self.assertRunOK("put x3 y3", "OK")
self.assertRunOK("scan --from=x1 --to=z", "x1 ==> y1\nx2 ==> y2\nx3 ==> y3")
self.assertRunOK("scan", "x1 ==> y1\nx2 ==> y2\nx3 ==> y3")
self.assertRunOK("scan --from=x", "x1 ==> y1\nx2 ==> y2\nx3 ==> y3")
self.assertRunOK(
"scan --from=x1 --to=z",
"x1 ==> y1\nx2 ==> y2\nx3 ==> y3\nx4 ==> cv1\nx5 ==> cn1:cv1 cn2:cv2",
)
self.assertRunOK(
"scan",
"x1 ==> y1\nx2 ==> y2\nx3 ==> y3\nx4 ==> cv1\nx5 ==> cn1:cv1 cn2:cv2",
)
self.assertRunOK(
"scan --from=x",
"x1 ==> y1\nx2 ==> y2\nx3 ==> y3\nx4 ==> cv1\nx5 ==> cn1:cv1 cn2:cv2",
)
self.assertRunOK("scan --to=x2", "x1 ==> y1")
self.assertRunOK("scan --from=x1 --to=z --max_keys=1", "x1 ==> y1")
self.assertRunOK("scan --from=x1 --to=z --max_keys=2", "x1 ==> y1\nx2 ==> y2")
self.assertRunOK("delete x4", "OK")
self.assertRunOK("delete x5", "OK")
self.assertRunOK(
"scan --from=x1 --to=z --max_keys=3", "x1 ==> y1\nx2 ==> y2\nx3 ==> y3"
)