rocksdb/tools/ldb_test.py
Jay Huh 4b79e8c003 GetEntity and PutEntity Support in ldb (#11796)
Summary:
- `get_entity` and `put_entity` command support in ldb
- Input Format for `put_entity`: `ldb --db=<DB_PATH> put_entity <KEY> <COLUMN_1_NAME>:<COLUMN_1_VALUE> <COLUMN_2_NAME>:<COLUMN_2_VALUE> ...`
- Output Format for `get_entity`: `<COLUMN_1_NAME>:<COLUMN_1_VALUE> <COLUMN_2_NAME>:<COLUMN_2_VALUE>`
- If `get_entity` is called against non-wide column value (existing behavior), empty key (kDefaultWideColumnName) will be printed, appended by `:<COLUMN_VALUE>`
- If `get` is called against wide column value (existing behavior), first column value is printed if the first column name is kDefaultWideColumnName.

# Test

Checks for `put_entity` and `get_entity` added in `ldb_test.py`
```
❯ python3 tools/ldb_test.py                                                                                                                                                                                                                                                                                                                                                                    took 45s at 10:45:44 AM
Running testBlobBatchPut...
.Running testBlobDump
.Running testBlobPut...
.Running testBlobStartingLevel...
.Running testCheckConsistency...
.Running testColumnFamilies...
.Running testCountDelimDump...
.Running testCountDelimIDump...
.Running testDumpLiveFiles...
.Running testDumpLoad...
Warning: 7 bad lines ignored.
.Running testGetProperty...
.Running testHexPutGet...
.Running testIDumpBasics...
.Running testIDumpDecodeBlobIndex...
.Running testIngestExternalSst...
.Running testInvalidCmdLines...
.Running testListColumnFamilies...
.Running testListLiveFilesMetadata...
.Running testManifestDump...
.Running testMiscAdminTask...
Compacting the db...
Sequence,Count,ByteSize,Physical Offset,Key(s)
.Running testSSTDump...
.Running testSimpleStringPutGet...
.Running testStringBatchPut...
.Running testTtlPutGet...
.Running testWALDump...
.
----------------------------------------------------------------------
Ran 25 tests in 57.742s
```

Manual Test
```
# Invalid format for wide columns
❯ ./ldb --db=/tmp/test_db put_entity x4 x5
Failed: wide column format needs to be <column_name>:<column_value> (did you mean put <key> <value>?)

# empty column name (kDefaultWideColumnName)
❯ ./ldb --db=/tmp/test_db put_entity x4 :x5
OK
❯ ./ldb --db=/tmp/test_db get_entity x4
:x5
❯ ./ldb --db=/tmp/test_db get x4
x5

❯ ./ldb --db=/tmp/test_db put_entity a1 :z1 b1:c1 b2:f1
OK
❯ ./ldb --db=/tmp/test_db get_entity a1
:z1 b1:c1 b2:f1

# Keeping the existing behavior if `get` was called on wide column values
❯ ./ldb --db=/tmp/test_db get a1
z1

# Scan
❯ ./ldb --db=/tmp/test_db scan
a1 ==> b1:c1 b2:f1
x4 ==> x5
x5 ==> cn1:cv1 cn2:cv2

# Scan hex
❯ ./ldb --db=/tmp/test_db scan --hex
0x6131 ==> 0x6231:0x6331 0x6232:0x6631
0x7834 ==> 0x7835
0x7835 ==> 0x636E31:0x637631 0x636E32:0x637632

# More testing with hex values
❯ ./ldb --db=/tmp/test_db get_entity 0x6131 --hex
0x6231:0x6331 0x6232:0x6631

❯ ./ldb --db=/tmp/test_db get_entity 0x78 --hex
Failed: GetEntity failed: NotFound:

❯ ./ldb --db=/tmp/test_db get_entity 0x7834 --hex
:0x7835

❯ ./ldb --db=/tmp/test_db put_entity 0x7834 0x6234:0x6635 --hex
OK

❯ ./ldb --db=/tmp/test_db get_entity 0x7834 --hex
0x6234:0x6635

❯ ./ldb --db=/tmp/test_db get_entity 0x7834 --key_hex
b4:f5

❯ ./ldb --db=/tmp/test_db get_entity x4 --value_hex
0x6234:0x6635
```

Pull Request resolved: https://github.com/facebook/rocksdb/pull/11796

Reviewed By: jowlyzhang

Differential Revision: D48978141

Pulled By: jaykorean

fbshipit-source-id: 4f87c222417ed90a6dbf39bd7b0f068b01e68393
2023-09-12 16:32:40 -07:00

984 lines
40 KiB
Python

#!/usr/bin/env python3
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
from __future__ import absolute_import, division, print_function, unicode_literals
import glob
import os
import os.path
import re
import shutil
import subprocess
import tempfile
import time
import unittest
def my_check_output(*popenargs, **kwargs):
"""
If we had python 2.7, we should simply use subprocess.check_output.
This is a stop-gap solution for python 2.6
"""
if "stdout" in kwargs:
raise ValueError("stdout argument not allowed, it will be overridden.")
process = subprocess.Popen(
stderr=subprocess.PIPE, stdout=subprocess.PIPE, *popenargs, **kwargs
)
output, unused_err = process.communicate()
retcode = process.poll()
if retcode:
cmd = kwargs.get("args")
if cmd is None:
cmd = popenargs[0]
raise Exception("Exit code is not 0. It is %d. Command: %s" % (retcode, cmd))
return output.decode("utf-8")
def run_err_null(cmd):
return os.system(cmd + " 2>/dev/null ")
class LDBTestCase(unittest.TestCase):
def setUp(self):
self.TMP_DIR = tempfile.mkdtemp(prefix="ldb_test_")
self.DB_NAME = "testdb"
def tearDown(self):
assert (
self.TMP_DIR.strip() != "/"
and self.TMP_DIR.strip() != "/tmp"
and self.TMP_DIR.strip() != "/tmp/"
) # Just some paranoia
shutil.rmtree(self.TMP_DIR)
def dbParam(self, dbName):
return "--db=%s" % os.path.join(self.TMP_DIR, dbName)
def assertRunOKFull(
self, params, expectedOutput, unexpected=False, isPattern=False
):
"""
All command-line params must be specified.
Allows full flexibility in testing; for example: missing db param.
"""
output = my_check_output(
'./ldb %s |grep -v "Created bg thread"' % params, shell=True
)
if not unexpected:
if isPattern:
self.assertNotEqual(expectedOutput.search(output.strip()), None)
else:
self.assertEqual(output.strip(), expectedOutput.strip())
else:
if isPattern:
self.assertEqual(expectedOutput.search(output.strip()), None)
else:
self.assertNotEqual(output.strip(), expectedOutput.strip())
def assertRunFAILFull(self, params):
"""
All command-line params must be specified.
Allows full flexibility in testing; for example: missing db param.
"""
try:
my_check_output(
'./ldb %s >/dev/null 2>&1 |grep -v "Created bg \
thread"'
% params,
shell=True,
)
except Exception:
return
self.fail(
"Exception should have been raised for command with params: %s" % params
)
def assertRunOK(self, params, expectedOutput, unexpected=False):
"""
Uses the default test db.
"""
self.assertRunOKFull(
"%s %s" % (self.dbParam(self.DB_NAME), params), expectedOutput, unexpected
)
def assertRunFAIL(self, params):
"""
Uses the default test db.
"""
self.assertRunFAILFull("%s %s" % (self.dbParam(self.DB_NAME), params))
def testSimpleStringPutGet(self):
print("Running testSimpleStringPutGet...")
self.assertRunFAIL("put x1 y1")
self.assertRunOK("put --create_if_missing x1 y1", "OK")
self.assertRunOK("get x1", "y1")
self.assertRunFAIL("get x2")
self.assertRunOK("put x2 y2", "OK")
self.assertRunOK("get x1", "y1")
self.assertRunOK("get x2", "y2")
self.assertRunFAIL("get x3")
self.assertRunFAIL("put_entity x4")
self.assertRunFAIL("put_entity x4 cv1")
self.assertRunOK("put_entity x4 :cv1", "OK")
self.assertRunOK("get_entity x4", ":cv1")
self.assertRunOK("put_entity x5 cn1:cv1 cn2:cv2", "OK")
self.assertRunOK("get_entity x5", "cn1:cv1 cn2:cv2")
self.assertRunOK(
"scan --from=x1 --to=z",
"x1 ==> y1\nx2 ==> y2\nx4 ==> cv1\nx5 ==> cn1:cv1 cn2:cv2",
)
self.assertRunOK("put x3 y3", "OK")
self.assertRunOK(
"scan --from=x1 --to=z",
"x1 ==> y1\nx2 ==> y2\nx3 ==> y3\nx4 ==> cv1\nx5 ==> cn1:cv1 cn2:cv2",
)
self.assertRunOK(
"scan",
"x1 ==> y1\nx2 ==> y2\nx3 ==> y3\nx4 ==> cv1\nx5 ==> cn1:cv1 cn2:cv2",
)
self.assertRunOK(
"scan --from=x",
"x1 ==> y1\nx2 ==> y2\nx3 ==> y3\nx4 ==> cv1\nx5 ==> cn1:cv1 cn2:cv2",
)
self.assertRunOK("scan --to=x2", "x1 ==> y1")
self.assertRunOK("scan --from=x1 --to=z --max_keys=1", "x1 ==> y1")
self.assertRunOK("scan --from=x1 --to=z --max_keys=2", "x1 ==> y1\nx2 ==> y2")
self.assertRunOK("delete x4", "OK")
self.assertRunOK("delete x5", "OK")
self.assertRunOK(
"scan --from=x1 --to=z --max_keys=3", "x1 ==> y1\nx2 ==> y2\nx3 ==> y3"
)
self.assertRunOK(
"scan --from=x1 --to=z --max_keys=4", "x1 ==> y1\nx2 ==> y2\nx3 ==> y3"
)
self.assertRunOK("scan --from=x1 --to=x2", "x1 ==> y1")
self.assertRunOK("scan --from=x2 --to=x4", "x2 ==> y2\nx3 ==> y3")
self.assertRunFAIL("scan --from=x4 --to=z") # No results => FAIL
self.assertRunFAIL("scan --from=x1 --to=z --max_keys=foo")
self.assertRunOK("scan", "x1 ==> y1\nx2 ==> y2\nx3 ==> y3")
self.assertRunOK("delete x1", "OK")
self.assertRunOK("scan", "x2 ==> y2\nx3 ==> y3")
self.assertRunOK("delete NonExistentKey", "OK")
# It is weird that GET and SCAN raise exception for
# non-existent key, while delete does not
self.assertRunOK("checkconsistency", "OK")
def dumpDb(self, params, dumpFile):
return 0 == run_err_null("./ldb dump %s > %s" % (params, dumpFile))
def loadDb(self, params, dumpFile):
return 0 == run_err_null("cat %s | ./ldb load %s" % (dumpFile, params))
def writeExternSst(self, params, inputDumpFile, outputSst):
return 0 == run_err_null(
"cat %s | ./ldb write_extern_sst %s %s" % (inputDumpFile, outputSst, params)
)
def ingestExternSst(self, params, inputSst):
return 0 == run_err_null("./ldb ingest_extern_sst %s %s" % (inputSst, params))
def testStringBatchPut(self):
print("Running testStringBatchPut...")
self.assertRunOK("batchput x1 y1 --create_if_missing", "OK")
self.assertRunOK("scan", "x1 ==> y1")
self.assertRunOK('batchput x2 y2 x3 y3 "x4 abc" "y4 xyz"', "OK")
self.assertRunOK("scan", "x1 ==> y1\nx2 ==> y2\nx3 ==> y3\nx4 abc ==> y4 xyz")
self.assertRunFAIL("batchput")
self.assertRunFAIL("batchput k1")
self.assertRunFAIL("batchput k1 v1 k2")
def testBlobBatchPut(self):
print("Running testBlobBatchPut...")
dbPath = os.path.join(self.TMP_DIR, self.DB_NAME)
self.assertRunOK("batchput x1 y1 --create_if_missing --enable_blob_files", "OK")
self.assertRunOK("scan", "x1 ==> y1")
self.assertRunOK(
'batchput --enable_blob_files x2 y2 x3 y3 "x4 abc" "y4 xyz"', "OK"
)
self.assertRunOK("scan", "x1 ==> y1\nx2 ==> y2\nx3 ==> y3\nx4 abc ==> y4 xyz")
blob_files = self.getBlobFiles(dbPath)
self.assertTrue(len(blob_files) >= 1)
def testBlobPut(self):
print("Running testBlobPut...")
dbPath = os.path.join(self.TMP_DIR, self.DB_NAME)
self.assertRunOK("put --create_if_missing --enable_blob_files x1 y1", "OK")
self.assertRunOK("get x1", "y1")
self.assertRunOK("put --enable_blob_files x2 y2", "OK")
self.assertRunOK("get x1", "y1")
self.assertRunOK("get x2", "y2")
self.assertRunFAIL("get x3")
blob_files = self.getBlobFiles(dbPath)
self.assertTrue(len(blob_files) >= 1)
def testBlobStartingLevel(self):
print("Running testBlobStartingLevel...")
dbPath = os.path.join(self.TMP_DIR, self.DB_NAME)
self.assertRunOK(
"put --create_if_missing --enable_blob_files --blob_file_starting_level=10 x1 y1",
"OK",
)
self.assertRunOK("get x1", "y1")
blob_files = self.getBlobFiles(dbPath)
self.assertTrue(len(blob_files) == 0)
self.assertRunOK(
"put --enable_blob_files --blob_file_starting_level=0 x2 y2", "OK"
)
self.assertRunOK("get x1", "y1")
self.assertRunOK("get x2", "y2")
self.assertRunFAIL("get x3")
blob_files = self.getBlobFiles(dbPath)
self.assertTrue(len(blob_files) >= 1)
def testCountDelimDump(self):
print("Running testCountDelimDump...")
self.assertRunOK("batchput x.1 x1 --create_if_missing", "OK")
self.assertRunOK("batchput y.abc abc y.2 2 z.13c pqr", "OK")
self.assertRunOK(
"dump --count_delim",
"x => count:1\tsize:5\ny => count:2\tsize:12\nz => count:1\tsize:8",
)
self.assertRunOK(
'dump --count_delim="."',
"x => count:1\tsize:5\ny => count:2\tsize:12\nz => count:1\tsize:8",
)
self.assertRunOK("batchput x,2 x2 x,abc xabc", "OK")
self.assertRunOK(
'dump --count_delim=","',
"x => count:2\tsize:14\nx.1 => count:1\tsize:5\ny.2 => count:1\tsize:4\ny.abc => count:1\tsize:8\nz.13c => count:1\tsize:8",
)
def testCountDelimIDump(self):
print("Running testCountDelimIDump...")
self.assertRunOK("batchput x.1 x1 --create_if_missing", "OK")
self.assertRunOK("batchput y.abc abc y.2 2 z.13c pqr", "OK")
self.assertRunOK(
"idump --count_delim",
"x => count:1\tsize:5\ny => count:2\tsize:12\nz => count:1\tsize:8",
)
self.assertRunOK(
'idump --count_delim="."',
"x => count:1\tsize:5\ny => count:2\tsize:12\nz => count:1\tsize:8",
)
self.assertRunOK("batchput x,2 x2 x,abc xabc", "OK")
self.assertRunOK(
'idump --count_delim=","',
"x => count:2\tsize:14\nx.1 => count:1\tsize:5\ny.2 => count:1\tsize:4\ny.abc => count:1\tsize:8\nz.13c => count:1\tsize:8",
)
def testInvalidCmdLines(self):
print("Running testInvalidCmdLines...")
# db not specified
self.assertRunFAILFull("put 0x6133 0x6233 --hex --create_if_missing")
# No param called he
self.assertRunFAIL("put 0x6133 0x6233 --he --create_if_missing")
# max_keys is not applicable for put
self.assertRunFAIL("put 0x6133 0x6233 --max_keys=1 --create_if_missing")
# hex has invalid boolean value
def testHexPutGet(self):
print("Running testHexPutGet...")
self.assertRunOK("put a1 b1 --create_if_missing", "OK")
self.assertRunOK("scan", "a1 ==> b1")
self.assertRunOK("scan --hex", "0x6131 ==> 0x6231")
self.assertRunFAIL("put --hex 6132 6232")
self.assertRunOK("put --hex 0x6132 0x6232", "OK")
self.assertRunOK("scan --hex", "0x6131 ==> 0x6231\n0x6132 ==> 0x6232")
self.assertRunOK("scan", "a1 ==> b1\na2 ==> b2")
self.assertRunOK("get a1", "b1")
self.assertRunOK("get --hex 0x6131", "0x6231")
self.assertRunOK("get a2", "b2")
self.assertRunOK("get --hex 0x6132", "0x6232")
self.assertRunOK("get --key_hex 0x6132", "b2")
self.assertRunOK("get --key_hex --value_hex 0x6132", "0x6232")
self.assertRunOK("get --value_hex a2", "0x6232")
self.assertRunOK(
"scan --key_hex --value_hex", "0x6131 ==> 0x6231\n0x6132 ==> 0x6232"
)
self.assertRunOK(
"scan --hex --from=0x6131 --to=0x6133",
"0x6131 ==> 0x6231\n0x6132 ==> 0x6232",
)
self.assertRunOK("scan --hex --from=0x6131 --to=0x6132", "0x6131 ==> 0x6231")
self.assertRunOK("scan --key_hex", "0x6131 ==> b1\n0x6132 ==> b2")
self.assertRunOK("scan --value_hex", "a1 ==> 0x6231\na2 ==> 0x6232")
self.assertRunOK("batchput --hex 0x6133 0x6233 0x6134 0x6234", "OK")
self.assertRunOK("scan", "a1 ==> b1\na2 ==> b2\na3 ==> b3\na4 ==> b4")
self.assertRunOK("delete --hex 0x6133", "OK")
self.assertRunOK("scan", "a1 ==> b1\na2 ==> b2\na4 ==> b4")
self.assertRunOK("checkconsistency", "OK")
def testTtlPutGet(self):
print("Running testTtlPutGet...")
self.assertRunOK("put a1 b1 --ttl --create_if_missing", "OK")
self.assertRunOK("scan --hex", "0x6131 ==> 0x6231", True)
self.assertRunOK("dump --ttl ", "a1 ==> b1", True)
self.assertRunOK("dump --hex --ttl ", "0x6131 ==> 0x6231\nKeys in range: 1")
self.assertRunOK("scan --hex --ttl", "0x6131 ==> 0x6231")
self.assertRunOK("get --value_hex a1", "0x6231", True)
self.assertRunOK("get --ttl a1", "b1")
self.assertRunOK("put a3 b3 --create_if_missing", "OK")
# fails because timstamp's length is greater than value's
self.assertRunFAIL("get --ttl a3")
self.assertRunOK("checkconsistency", "OK")
def testInvalidCmdLines(self): # noqa: F811 T25377293 Grandfathered in
print("Running testInvalidCmdLines...")
# db not specified
self.assertRunFAILFull("put 0x6133 0x6233 --hex --create_if_missing")
# No param called he
self.assertRunFAIL("put 0x6133 0x6233 --he --create_if_missing")
# max_keys is not applicable for put
self.assertRunFAIL("put 0x6133 0x6233 --max_keys=1 --create_if_missing")
# hex has invalid boolean value
self.assertRunFAIL("put 0x6133 0x6233 --hex=Boo --create_if_missing")
def testDumpLoad(self):
print("Running testDumpLoad...")
self.assertRunOK("batchput --create_if_missing x1 y1 x2 y2 x3 y3 x4 y4", "OK")
self.assertRunOK("scan", "x1 ==> y1\nx2 ==> y2\nx3 ==> y3\nx4 ==> y4")
origDbPath = os.path.join(self.TMP_DIR, self.DB_NAME)
# Dump and load without any additional params specified
dumpFilePath = os.path.join(self.TMP_DIR, "dump1")
loadedDbPath = os.path.join(self.TMP_DIR, "loaded_from_dump1")
self.assertTrue(self.dumpDb("--db=%s" % origDbPath, dumpFilePath))
self.assertTrue(
self.loadDb("--db=%s --create_if_missing" % loadedDbPath, dumpFilePath)
)
self.assertRunOKFull(
"scan --db=%s" % loadedDbPath, "x1 ==> y1\nx2 ==> y2\nx3 ==> y3\nx4 ==> y4"
)
# Dump and load in hex
dumpFilePath = os.path.join(self.TMP_DIR, "dump2")
loadedDbPath = os.path.join(self.TMP_DIR, "loaded_from_dump2")
self.assertTrue(self.dumpDb("--db=%s --hex" % origDbPath, dumpFilePath))
self.assertTrue(
self.loadDb(
"--db=%s --hex --create_if_missing" % loadedDbPath, dumpFilePath
)
)
self.assertRunOKFull(
"scan --db=%s" % loadedDbPath, "x1 ==> y1\nx2 ==> y2\nx3 ==> y3\nx4 ==> y4"
)
# Dump only a portion of the key range
dumpFilePath = os.path.join(self.TMP_DIR, "dump3")
loadedDbPath = os.path.join(self.TMP_DIR, "loaded_from_dump3")
self.assertTrue(
self.dumpDb("--db=%s --from=x1 --to=x3" % origDbPath, dumpFilePath)
)
self.assertTrue(
self.loadDb("--db=%s --create_if_missing" % loadedDbPath, dumpFilePath)
)
self.assertRunOKFull("scan --db=%s" % loadedDbPath, "x1 ==> y1\nx2 ==> y2")
# Dump upto max_keys rows
dumpFilePath = os.path.join(self.TMP_DIR, "dump4")
loadedDbPath = os.path.join(self.TMP_DIR, "loaded_from_dump4")
self.assertTrue(self.dumpDb("--db=%s --max_keys=3" % origDbPath, dumpFilePath))
self.assertTrue(
self.loadDb("--db=%s --create_if_missing" % loadedDbPath, dumpFilePath)
)
self.assertRunOKFull(
"scan --db=%s" % loadedDbPath, "x1 ==> y1\nx2 ==> y2\nx3 ==> y3"
)
# Load into an existing db, create_if_missing is not specified
self.assertTrue(self.dumpDb("--db=%s" % origDbPath, dumpFilePath))
self.assertTrue(self.loadDb("--db=%s" % loadedDbPath, dumpFilePath))
self.assertRunOKFull(
"scan --db=%s" % loadedDbPath, "x1 ==> y1\nx2 ==> y2\nx3 ==> y3\nx4 ==> y4"
)
# Dump and load with WAL disabled
dumpFilePath = os.path.join(self.TMP_DIR, "dump5")
loadedDbPath = os.path.join(self.TMP_DIR, "loaded_from_dump5")
self.assertTrue(self.dumpDb("--db=%s" % origDbPath, dumpFilePath))
self.assertTrue(
self.loadDb(
"--db=%s --disable_wal --create_if_missing" % loadedDbPath, dumpFilePath
)
)
self.assertRunOKFull(
"scan --db=%s" % loadedDbPath, "x1 ==> y1\nx2 ==> y2\nx3 ==> y3\nx4 ==> y4"
)
# Dump and load with lots of extra params specified
extraParams = " ".join(
[
"--bloom_bits=14",
"--block_size=1024",
"--auto_compaction=true",
"--write_buffer_size=4194304",
"--file_size=2097152",
]
)
dumpFilePath = os.path.join(self.TMP_DIR, "dump6")
loadedDbPath = os.path.join(self.TMP_DIR, "loaded_from_dump6")
self.assertTrue(
self.dumpDb("--db=%s %s" % (origDbPath, extraParams), dumpFilePath)
)
self.assertTrue(
self.loadDb(
"--db=%s %s --create_if_missing" % (loadedDbPath, extraParams),
dumpFilePath,
)
)
self.assertRunOKFull(
"scan --db=%s" % loadedDbPath, "x1 ==> y1\nx2 ==> y2\nx3 ==> y3\nx4 ==> y4"
)
# Dump with count_only
dumpFilePath = os.path.join(self.TMP_DIR, "dump7")
loadedDbPath = os.path.join(self.TMP_DIR, "loaded_from_dump7")
self.assertTrue(self.dumpDb("--db=%s --count_only" % origDbPath, dumpFilePath))
self.assertTrue(
self.loadDb("--db=%s --create_if_missing" % loadedDbPath, dumpFilePath)
)
# DB should have atleast one value for scan to work
self.assertRunOKFull("put --db=%s k1 v1" % loadedDbPath, "OK")
self.assertRunOKFull("scan --db=%s" % loadedDbPath, "k1 ==> v1")
# Dump command fails because of typo in params
dumpFilePath = os.path.join(self.TMP_DIR, "dump8")
self.assertFalse(
self.dumpDb("--db=%s --create_if_missing" % origDbPath, dumpFilePath)
)
# Dump and load with BlobDB enabled
blobParams = " ".join(
["--enable_blob_files", "--min_blob_size=1", "--blob_file_size=2097152"]
)
dumpFilePath = os.path.join(self.TMP_DIR, "dump9")
loadedDbPath = os.path.join(self.TMP_DIR, "loaded_from_dump9")
self.assertTrue(self.dumpDb("--db=%s" % (origDbPath), dumpFilePath))
self.assertTrue(
self.loadDb(
"--db=%s %s --create_if_missing --disable_wal"
% (loadedDbPath, blobParams),
dumpFilePath,
)
)
self.assertRunOKFull(
"scan --db=%s" % loadedDbPath, "x1 ==> y1\nx2 ==> y2\nx3 ==> y3\nx4 ==> y4"
)
blob_files = self.getBlobFiles(loadedDbPath)
self.assertTrue(len(blob_files) >= 1)
def testIDumpBasics(self):
print("Running testIDumpBasics...")
self.assertRunOK("put a val --create_if_missing", "OK")
self.assertRunOK("put b val", "OK")
self.assertRunOK(
"idump",
"'a' seq:1, type:1 => val\n"
"'b' seq:2, type:1 => val\nInternal keys in range: 2",
)
self.assertRunOK(
"idump --input_key_hex --from=%s --to=%s" % (hex(ord("a")), hex(ord("b"))),
"'a' seq:1, type:1 => val\nInternal keys in range: 1",
)
def testIDumpDecodeBlobIndex(self):
print("Running testIDumpDecodeBlobIndex...")
self.assertRunOK("put a val --create_if_missing", "OK")
self.assertRunOK("put b val --enable_blob_files", "OK")
# Pattern to expect from dump with decode_blob_index flag enabled.
regex = ".*\[blob ref\].*"
expected_pattern = re.compile(regex)
cmd = "idump %s --decode_blob_index"
self.assertRunOKFull(
(cmd) % (self.dbParam(self.DB_NAME)),
expected_pattern,
unexpected=False,
isPattern=True,
)
def testMiscAdminTask(self):
print("Running testMiscAdminTask...")
# These tests need to be improved; for example with asserts about
# whether compaction or level reduction actually took place.
self.assertRunOK("batchput --create_if_missing x1 y1 x2 y2 x3 y3 x4 y4", "OK")
self.assertRunOK("scan", "x1 ==> y1\nx2 ==> y2\nx3 ==> y3\nx4 ==> y4")
origDbPath = os.path.join(self.TMP_DIR, self.DB_NAME)
self.assertTrue(0 == run_err_null("./ldb compact --db=%s" % origDbPath))
self.assertRunOK("scan", "x1 ==> y1\nx2 ==> y2\nx3 ==> y3\nx4 ==> y4")
self.assertTrue(
0 == run_err_null("./ldb reduce_levels --db=%s --new_levels=2" % origDbPath)
)
self.assertRunOK("scan", "x1 ==> y1\nx2 ==> y2\nx3 ==> y3\nx4 ==> y4")
self.assertTrue(
0 == run_err_null("./ldb reduce_levels --db=%s --new_levels=3" % origDbPath)
)
self.assertRunOK("scan", "x1 ==> y1\nx2 ==> y2\nx3 ==> y3\nx4 ==> y4")
self.assertTrue(
0 == run_err_null("./ldb compact --db=%s --from=x1 --to=x3" % origDbPath)
)
self.assertRunOK("scan", "x1 ==> y1\nx2 ==> y2\nx3 ==> y3\nx4 ==> y4")
self.assertTrue(
0
== run_err_null(
"./ldb compact --db=%s --hex --from=0x6131 --to=0x6134" % origDbPath
)
)
self.assertRunOK("scan", "x1 ==> y1\nx2 ==> y2\nx3 ==> y3\nx4 ==> y4")
# TODO(dilip): Not sure what should be passed to WAL.Currently corrupted.
self.assertTrue(
0
== run_err_null(
"./ldb dump_wal --db=%s --walfile=%s --header"
% (origDbPath, os.path.join(origDbPath, "LOG"))
)
)
self.assertRunOK("scan", "x1 ==> y1\nx2 ==> y2\nx3 ==> y3\nx4 ==> y4")
def testCheckConsistency(self):
print("Running testCheckConsistency...")
dbPath = os.path.join(self.TMP_DIR, self.DB_NAME)
self.assertRunOK("put x1 y1 --create_if_missing", "OK")
self.assertRunOK("put x2 y2", "OK")
self.assertRunOK("get x1", "y1")
self.assertRunOK("checkconsistency", "OK")
sstFilePath = my_check_output(
"ls %s" % os.path.join(dbPath, "*.sst"), shell=True
)
# Modify the file
my_check_output("echo 'evil' > %s" % sstFilePath, shell=True)
self.assertRunFAIL("checkconsistency")
# Delete the file
my_check_output("rm -f %s" % sstFilePath, shell=True)
self.assertRunFAIL("checkconsistency")
def dumpLiveFiles(self, params, dumpFile):
return 0 == run_err_null("./ldb dump_live_files %s > %s" % (params, dumpFile))
def testDumpLiveFiles(self):
print("Running testDumpLiveFiles...")
dbPath = os.path.join(self.TMP_DIR, self.DB_NAME)
self.assertRunOK("put x1 y1 --create_if_missing", "OK")
self.assertRunOK("put x2 y2 --enable_blob_files", "OK")
dumpFilePath = os.path.join(self.TMP_DIR, "dump1")
self.assertTrue(self.dumpLiveFiles("--db=%s" % dbPath, dumpFilePath))
self.assertRunOK("delete x1", "OK")
self.assertRunOK("put x3 y3", "OK")
dumpFilePath = os.path.join(self.TMP_DIR, "dump2")
# Test that if the user provides a db path that ends with
# a slash '/', there is no double (or more!) slashes in the
# SST and manifest file names.
# Add a '/' at the end of dbPath (which normally shouldnt contain any)
if dbPath[-1] != "/":
dbPath += "/"
# Call the dump_live_files function with the edited dbPath name.
self.assertTrue(
self.dumpLiveFiles(
"--db=%s --decode_blob_index --dump_uncompressed_blobs" % dbPath,
dumpFilePath,
)
)
# Investigate the output
with open(dumpFilePath, "r") as tmp:
data = tmp.read()
# Check that all the SST filenames have a correct full path (no multiple '/').
sstFileList = re.findall(r"%s.*\d+.sst" % dbPath, data)
self.assertTrue(len(sstFileList) >= 1)
for sstFilename in sstFileList:
filenumber = re.findall(r"\d+.sst", sstFilename)[0]
self.assertEqual(sstFilename, dbPath + filenumber)
# Check that all the Blob filenames have a correct full path (no multiple '/').
blobFileList = re.findall(r"%s.*\d+.blob" % dbPath, data)
self.assertTrue(len(blobFileList) >= 1)
for blobFilename in blobFileList:
filenumber = re.findall(r"\d+.blob", blobFilename)[0]
self.assertEqual(blobFilename, dbPath + filenumber)
# Check that all the manifest filenames
# have a correct full path (no multiple '/').
manifestFileList = re.findall(r"%s.*MANIFEST-\d+" % dbPath, data)
self.assertTrue(len(manifestFileList) >= 1)
for manifestFilename in manifestFileList:
filenumber = re.findall(r"(?<=MANIFEST-)\d+", manifestFilename)[0]
self.assertEqual(manifestFilename, dbPath + "MANIFEST-" + filenumber)
# Check that the blob file index is decoded.
decodedBlobIndex = re.findall(r"\[blob ref\]", data)
self.assertTrue(len(decodedBlobIndex) >= 1)
def listLiveFilesMetadata(self, params, dumpFile):
return 0 == run_err_null(
"./ldb list_live_files_metadata %s > %s" % (params, dumpFile)
)
def testListLiveFilesMetadata(self):
print("Running testListLiveFilesMetadata...")
dbPath = os.path.join(self.TMP_DIR, self.DB_NAME)
self.assertRunOK("put x1 y1 --create_if_missing", "OK")
self.assertRunOK("put x2 y2", "OK")
# Compare the SST filename and the level of list_live_files_metadata
# with the data collected from dump_live_files.
dumpFilePath1 = os.path.join(self.TMP_DIR, "dump1")
self.assertTrue(self.dumpLiveFiles("--db=%s" % dbPath, dumpFilePath1))
dumpFilePath2 = os.path.join(self.TMP_DIR, "dump2")
self.assertTrue(
self.listLiveFilesMetadata(
"--sort_by_filename --db=%s" % dbPath, dumpFilePath2
)
)
# Collect SST filename and level from dump_live_files
with open(dumpFilePath1, "r") as tmp:
data = tmp.read()
filename1 = re.findall(r".*\d+\.sst", data)[0]
level1 = re.findall(r"level:\d+", data)[0].split(":")[1]
# Collect SST filename and level from list_live_files_metadata
with open(dumpFilePath2, "r") as tmp:
data = tmp.read()
filename2 = re.findall(r".*\d+\.sst", data)[0]
level2 = re.findall(r"level \d+", data)[0].split(" ")[1]
# Assert equality between filenames and levels.
self.assertEqual(filename1, filename2)
self.assertEqual(level1, level2)
# Create multiple column families and compare the output
# of list_live_files_metadata with dump_live_files once again.
# Create new CF, and insert data:
self.assertRunOK("create_column_family mycol1", "OK")
self.assertRunOK("put --column_family=mycol1 v1 v2", "OK")
self.assertRunOK("create_column_family mycol2", "OK")
self.assertRunOK("put --column_family=mycol2 h1 h2", "OK")
self.assertRunOK("put --column_family=mycol2 h3 h4", "OK")
# Call dump_live_files and list_live_files_metadata
# and pipe the output to compare them later.
dumpFilePath3 = os.path.join(self.TMP_DIR, "dump3")
self.assertTrue(self.dumpLiveFiles("--db=%s" % dbPath, dumpFilePath3))
dumpFilePath4 = os.path.join(self.TMP_DIR, "dump4")
self.assertTrue(
self.listLiveFilesMetadata(
"--sort_by_filename --db=%s" % dbPath, dumpFilePath4
)
)
# dump_live_files:
# parse the output and create a map:
# [key: sstFilename]->[value:[LSM level, Column Family Name]]
referenceMap = {}
with open(dumpFilePath3, "r") as tmp:
data = tmp.read()
# Note: the following regex are contingent on what the
# dump_live_files outputs.
namesAndLevels = re.findall(r"\d+.sst level:\d+", data)
cfs = re.findall(r"(?<=column family name=)\w+", data)
# re.findall should not reorder the data.
# Therefore namesAndLevels[i] matches the data from cfs[i].
for count, nameAndLevel in enumerate(namesAndLevels):
sstFilename = re.findall(r"\d+.sst", nameAndLevel)[0]
sstLevel = re.findall(r"(?<=level:)\d+", nameAndLevel)[0]
cf = cfs[count]
referenceMap[sstFilename] = [sstLevel, cf]
# list_live_files_metadata:
# parse the output and create a map:
# [key: sstFilename]->[value:[LSM level, Column Family Name]]
testMap = {}
with open(dumpFilePath4, "r") as tmp:
data = tmp.read()
# Since for each SST file, all the information is contained
# on one line, the parsing is easy to perform and relies on
# the appearance of an "00xxx.sst" pattern.
sstLines = re.findall(r".*\d+.sst.*", data)
for line in sstLines:
sstFilename = re.findall(r"\d+.sst", line)[0]
sstLevel = re.findall(r"(?<=level )\d+", line)[0]
cf = re.findall(r"(?<=column family \')\w+(?=\')", line)[0]
testMap[sstFilename] = [sstLevel, cf]
# Compare the map obtained from dump_live_files and the map
# obtained from list_live_files_metadata. Everything should match.
self.assertEqual(referenceMap, testMap)
def getManifests(self, directory):
return glob.glob(directory + "/MANIFEST-*")
def getSSTFiles(self, directory):
return glob.glob(directory + "/*.sst")
def getWALFiles(self, directory):
return glob.glob(directory + "/*.log")
def getBlobFiles(self, directory):
return glob.glob(directory + "/*.blob")
def copyManifests(self, src, dest):
return 0 == run_err_null("cp " + src + " " + dest)
def testManifestDump(self):
print("Running testManifestDump...")
dbPath = os.path.join(self.TMP_DIR, self.DB_NAME)
self.assertRunOK("put 1 1 --create_if_missing", "OK")
self.assertRunOK("put 2 2", "OK")
self.assertRunOK("put 3 3", "OK")
# Pattern to expect from manifest_dump.
num = "[0-9]+"
st = ".*"
subpat = st + " seq:" + num + ", type:" + num
regex = num + ":" + num + "\[" + subpat + ".." + subpat + "\]"
expected_pattern = re.compile(regex)
cmd = "manifest_dump --db=%s"
manifest_files = self.getManifests(dbPath)
self.assertTrue(len(manifest_files) == 1)
# Test with the default manifest file in dbPath.
self.assertRunOKFull(
cmd % dbPath, expected_pattern, unexpected=False, isPattern=True
)
self.copyManifests(manifest_files[0], manifest_files[0] + "1")
manifest_files = self.getManifests(dbPath)
self.assertTrue(len(manifest_files) == 2)
# Test with multiple manifest files in dbPath.
self.assertRunFAILFull(cmd % dbPath)
# Running it with the copy we just created should pass.
self.assertRunOKFull(
(cmd + " --path=%s") % (dbPath, manifest_files[1]),
expected_pattern,
unexpected=False,
isPattern=True,
)
# Make sure that using the dump with --path will result in identical
# output as just using manifest_dump.
cmd = "dump --path=%s"
self.assertRunOKFull(
(cmd) % (manifest_files[1]),
expected_pattern,
unexpected=False,
isPattern=True,
)
# Check if null characters doesn't infer with output format.
self.assertRunOK("put a1 b1", "OK")
self.assertRunOK("put a2 b2", "OK")
self.assertRunOK("put --hex 0x12000DA0 0x80C0000B", "OK")
self.assertRunOK("put --hex 0x7200004f 0x80000004", "OK")
self.assertRunOK("put --hex 0xa000000a 0xf000000f", "OK")
self.assertRunOK("put a3 b3", "OK")
self.assertRunOK("put a4 b4", "OK")
# Verifies that all "levels" are printed out.
# There should be 66 mentions of levels.
expected_verbose_output = re.compile("matched")
# Test manifest_dump verbose and verify that key 0x7200004f
# is present. Note that we are forced to use grep here because
# an output with a non-terminating null character in it isn't piped
# correctly through the Python subprocess object.
# Also note that 0x72=r and 0x4f=O, hence the regex \'r.{2}O\'
# (we cannot use null character in the subprocess input either,
# so we have to use '.{2}')
cmd_verbose = (
"manifest_dump --verbose --db=%s | grep -aq $''r.{2}O'' && echo 'matched' || echo 'not matched'"
% dbPath
)
self.assertRunOKFull(
cmd_verbose, expected_verbose_output, unexpected=False, isPattern=True
)
def testGetProperty(self):
print("Running testGetProperty...")
dbPath = os.path.join(self.TMP_DIR, self.DB_NAME)
self.assertRunOK("put 1 1 --create_if_missing", "OK")
self.assertRunOK("put 2 2", "OK")
# A "string" property
cmd = "--db=%s get_property rocksdb.estimate-num-keys"
self.assertRunOKFull(cmd % dbPath, "rocksdb.estimate-num-keys: 2")
# A "map" property
# FIXME: why doesn't this pick up two entries?
cmd = "--db=%s get_property rocksdb.aggregated-table-properties"
part = "rocksdb.aggregated-table-properties.num_entries: "
expected_pattern = re.compile(part)
self.assertRunOKFull(
cmd % dbPath, expected_pattern, unexpected=False, isPattern=True
)
# An invalid property
cmd = "--db=%s get_property rocksdb.this-property-does-not-exist"
self.assertRunFAILFull(cmd % dbPath)
def testSSTDump(self):
print("Running testSSTDump...")
dbPath = os.path.join(self.TMP_DIR, self.DB_NAME)
self.assertRunOK("put sst1 sst1_val --create_if_missing", "OK")
self.assertRunOK("put sst2 sst2_val --enable_blob_files", "OK")
self.assertRunOK("get sst1", "sst1_val")
# Pattern to expect from SST dump.
regex = ".*Sst file format:.*\n.*\[blob ref\].*"
expected_pattern = re.compile(regex)
sst_files = self.getSSTFiles(dbPath)
self.assertTrue(len(sst_files) >= 1)
cmd = "dump --path=%s --decode_blob_index"
self.assertRunOKFull(
(cmd) % (sst_files[0]), expected_pattern, unexpected=False, isPattern=True
)
def testBlobDump(self):
print("Running testBlobDump")
dbPath = os.path.join(self.TMP_DIR, self.DB_NAME)
self.assertRunOK("batchput x1 y1 --create_if_missing --enable_blob_files", "OK")
self.assertRunOK(
'batchput --enable_blob_files x2 y2 x3 y3 "x4 abc" "y4 xyz"', "OK"
)
# Pattern to expect from blob file dump.
regex = ".*Blob log header[\s\S]*Blob log footer[\s\S]*Read record[\s\S]*Summary" # noqa
expected_pattern = re.compile(regex)
blob_files = self.getBlobFiles(dbPath)
self.assertTrue(len(blob_files) >= 1)
cmd = "dump --path=%s --dump_uncompressed_blobs"
self.assertRunOKFull(
(cmd) % (blob_files[0]), expected_pattern, unexpected=False, isPattern=True
)
def testWALDump(self):
print("Running testWALDump...")
dbPath = os.path.join(self.TMP_DIR, self.DB_NAME)
self.assertRunOK("put wal1 wal1_val --create_if_missing", "OK")
self.assertRunOK("put wal2 wal2_val", "OK")
self.assertRunOK("get wal1", "wal1_val")
# Pattern to expect from WAL dump.
regex = "^Sequence,Count,ByteSize,Physical Offset,Key\(s\).*"
expected_pattern = re.compile(regex)
wal_files = self.getWALFiles(dbPath)
self.assertTrue(len(wal_files) >= 1)
cmd = "dump --path=%s"
self.assertRunOKFull(
(cmd) % (wal_files[0]), expected_pattern, unexpected=False, isPattern=True
)
def testListColumnFamilies(self):
print("Running testListColumnFamilies...")
self.assertRunOK("put x1 y1 --create_if_missing", "OK")
cmd = 'list_column_families | grep -v "Column families"'
# Test on valid dbPath.
self.assertRunOK(cmd, "{default}")
# Test on empty path.
self.assertRunFAIL(cmd)
def testColumnFamilies(self):
print("Running testColumnFamilies...")
_ = os.path.join(self.TMP_DIR, self.DB_NAME)
self.assertRunOK("put cf1_1 1 --create_if_missing", "OK")
self.assertRunOK("put cf1_2 2 --create_if_missing", "OK")
self.assertRunOK("put cf1_3 3 --try_load_options", "OK")
# Given non-default column family to single CF DB.
self.assertRunFAIL("get cf1_1 --column_family=two")
self.assertRunOK("create_column_family two", "OK")
self.assertRunOK("put cf2_1 1 --create_if_missing --column_family=two", "OK")
self.assertRunOK("put cf2_2 2 --create_if_missing --column_family=two", "OK")
self.assertRunOK("delete cf1_2", "OK")
self.assertRunOK("create_column_family three", "OK")
self.assertRunOK("delete cf2_2 --column_family=two", "OK")
self.assertRunOK("put cf3_1 3 --create_if_missing --column_family=three", "OK")
self.assertRunOK("get cf1_1 --column_family=default", "1")
self.assertRunOK("dump --column_family=two", "cf2_1 ==> 1\nKeys in range: 1")
self.assertRunOK(
"dump --column_family=two --try_load_options",
"cf2_1 ==> 1\nKeys in range: 1",
)
self.assertRunOK("dump", "cf1_1 ==> 1\ncf1_3 ==> 3\nKeys in range: 2")
self.assertRunOK("get cf2_1 --column_family=two", "1")
self.assertRunOK("get cf3_1 --column_family=three", "3")
self.assertRunOK("drop_column_family three", "OK")
# non-existing column family.
self.assertRunFAIL("get cf3_1 --column_family=four")
self.assertRunFAIL("drop_column_family four")
def testIngestExternalSst(self):
print("Running testIngestExternalSst...")
# Dump, load, write external sst and ingest it in another db
dbPath = os.path.join(self.TMP_DIR, "db1")
self.assertRunOK(
"batchput --db=%s --create_if_missing x1 y1 x2 y2 x3 y3 x4 y4" % dbPath,
"OK",
)
self.assertRunOK(
"scan --db=%s" % dbPath, "x1 ==> y1\nx2 ==> y2\nx3 ==> y3\nx4 ==> y4"
)
dumpFilePath = os.path.join(self.TMP_DIR, "dump1")
with open(dumpFilePath, "w") as f:
f.write("x1 ==> y10\nx2 ==> y20\nx3 ==> y30\nx4 ==> y40")
externSstPath = os.path.join(self.TMP_DIR, "extern_data1.sst")
self.assertTrue(
self.writeExternSst(
"--create_if_missing --db=%s" % dbPath, dumpFilePath, externSstPath
)
)
# cannot ingest if allow_global_seqno is false
self.assertFalse(
self.ingestExternSst(
"--create_if_missing --allow_global_seqno=false --db=%s" % dbPath,
externSstPath,
)
)
self.assertTrue(
self.ingestExternSst(
"--create_if_missing --allow_global_seqno --db=%s" % dbPath,
externSstPath,
)
)
self.assertRunOKFull(
"scan --db=%s" % dbPath, "x1 ==> y10\nx2 ==> y20\nx3 ==> y30\nx4 ==> y40"
)
if __name__ == "__main__":
unittest.main()