rocksdb/buckifier/buckify_rocksdb.py
Peter Dillinger 1aac814578 Use optimized folly DistributedMutex in LRUCache when available (#10179)
Summary:
folly DistributedMutex is faster than standard mutexes though
imposes some static obligations on usage. See
https://github.com/facebook/folly/blob/main/folly/synchronization/DistributedMutex.h
for details. Here we use this alternative for our Cache implementations
(especially LRUCache) for better locking performance, when RocksDB is
compiled with folly.

Also added information about which distributed mutex implementation is
being used to cache_bench output and to DB LOG.

Intended follow-up:
* Use DMutex in more places, perhaps improving API to support non-scoped
locking
* Fix linking with fbcode compiler (needs ROCKSDB_NO_FBCODE=1 currently)

Credit: Thanks Siying for reminding me about this line of work that was previously
left unfinished.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/10179

Test Plan:
for correctness, existing tests. CircleCI config updated.
Also Meta-internal buck build updated.

For performance, ran simultaneous before & after cache_bench. Out of three
comparison runs, the middle improvement to ops/sec was +21%:

Baseline: USE_CLANG=1 DEBUG_LEVEL=0 make -j24 cache_bench (fbcode
compiler)

```
Complete in 20.201 s; Rough parallel ops/sec = 1584062
Thread ops/sec = 107176

Operation latency (ns):
Count: 32000000 Average: 9257.9421  StdDev: 122412.04
Min: 134  Median: 3623.0493  Max: 56918500
Percentiles: P50: 3623.05 P75: 10288.02 P99: 30219.35 P99.9: 683522.04 P99.99: 7302791.63
```

New: (add USE_FOLLY=1)

```
Complete in 16.674 s; Rough parallel ops/sec = 1919135  (+21%)
Thread ops/sec = 135487

Operation latency (ns):
Count: 32000000 Average: 7304.9294  StdDev: 108530.28
Min: 132  Median: 3777.6012  Max: 91030902
Percentiles: P50: 3777.60 P75: 10169.89 P99: 24504.51 P99.9: 59721.59 P99.99: 1861151.83
```

Reviewed By: anand1976

Differential Revision: D37182983

Pulled By: pdillinger

fbshipit-source-id: a17eb05f25b832b6a2c1356f5c657e831a5af8d1
2022-06-17 13:08:45 -07:00

323 lines
12 KiB
Python
Executable file

#!/usr/bin/env python3
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
try:
from builtins import str
except ImportError:
from __builtin__ import str
from targets_builder import TARGETSBuilder
import json
import os
import fnmatch
import sys
from util import ColorString
# This script generates TARGETS file for Buck.
# Buck is a build tool specifying dependencies among different build targets.
# User can pass extra dependencies as a JSON object via command line, and this
# script can include these dependencies in the generate TARGETS file.
# Usage:
# $python3 buckifier/buckify_rocksdb.py
# (This generates a TARGET file without user-specified dependency for unit
# tests.)
# $python3 buckifier/buckify_rocksdb.py \
# '{"fake": {
# "extra_deps": [":test_dep", "//fakes/module:mock1"],
# "extra_compiler_flags": ["-DROCKSDB_LITE", "-Os"]
# }
# }'
# (Generated TARGETS file has test_dep and mock1 as dependencies for RocksDB
# unit tests, and will use the extra_compiler_flags to compile the unit test
# source.)
# tests to export as libraries for inclusion in other projects
_EXPORTED_TEST_LIBS = ["env_basic_test"]
# Parse src.mk files as a Dictionary of
# VAR_NAME => list of files
def parse_src_mk(repo_path):
src_mk = repo_path + "/src.mk"
src_files = {}
for line in open(src_mk):
line = line.strip()
if len(line) == 0 or line[0] == '#':
continue
if '=' in line:
current_src = line.split('=')[0].strip()
src_files[current_src] = []
elif '.c' in line:
src_path = line.split('\\')[0].strip()
src_files[current_src].append(src_path)
return src_files
# get all .cc / .c files
def get_cc_files(repo_path):
cc_files = []
for root, dirnames, filenames in os.walk(repo_path): # noqa: B007 T25377293 Grandfathered in
root = root[(len(repo_path) + 1):]
if "java" in root:
# Skip java
continue
for filename in fnmatch.filter(filenames, '*.cc'):
cc_files.append(os.path.join(root, filename))
for filename in fnmatch.filter(filenames, '*.c'):
cc_files.append(os.path.join(root, filename))
return cc_files
# Get non_parallel tests from Makefile
def get_non_parallel_tests(repo_path):
Makefile = repo_path + "/Makefile"
s = set({})
found_non_parallel_tests = False
for line in open(Makefile):
line = line.strip()
if line.startswith("NON_PARALLEL_TEST ="):
found_non_parallel_tests = True
elif found_non_parallel_tests:
if line.endswith("\\"):
# remove the trailing \
line = line[:-1]
line = line.strip()
s.add(line)
else:
# we consumed all the non_parallel tests
break
return s
# Parse extra dependencies passed by user from command line
def get_dependencies():
deps_map = {
'': {
'extra_deps': [],
'extra_compiler_flags': []
}
}
if len(sys.argv) < 2:
return deps_map
def encode_dict(data):
rv = {}
for k, v in data.items():
if isinstance(v, dict):
v = encode_dict(v)
rv[k] = v
return rv
extra_deps = json.loads(sys.argv[1], object_hook=encode_dict)
for target_alias, deps in extra_deps.items():
deps_map[target_alias] = deps
return deps_map
# Prepare TARGETS file for buck
def generate_targets(repo_path, deps_map):
print(ColorString.info("Generating TARGETS"))
# parsed src.mk file
src_mk = parse_src_mk(repo_path)
# get all .cc files
cc_files = get_cc_files(repo_path)
# get non_parallel tests from Makefile
non_parallel_tests = get_non_parallel_tests(repo_path)
if src_mk is None or cc_files is None or non_parallel_tests is None:
return False
extra_argv = ""
if len(sys.argv) >= 2:
# Heuristically quote and canonicalize whitespace for inclusion
# in how the file was generated.
extra_argv = " '{0}'".format(" ".join(sys.argv[1].split()))
TARGETS = TARGETSBuilder("%s/TARGETS" % repo_path, extra_argv)
# rocksdb_lib
TARGETS.add_library(
"rocksdb_lib",
src_mk["LIB_SOURCES"] +
# always add range_tree, it's only excluded on ppc64, which we don't use internally
src_mk["RANGE_TREE_SOURCES"] +
src_mk["TOOL_LIB_SOURCES"],
deps=[
"//folly/container:f14_hash",
"//folly/experimental/coro:blocking_wait",
"//folly/experimental/coro:collect",
"//folly/experimental/coro:coroutine",
"//folly/experimental/coro:task",
"//folly/synchronization:distributed_mutex"])
# rocksdb_whole_archive_lib
TARGETS.add_library(
"rocksdb_whole_archive_lib",
src_mk["LIB_SOURCES"] +
# always add range_tree, it's only excluded on ppc64, which we don't use internally
src_mk["RANGE_TREE_SOURCES"] +
src_mk["TOOL_LIB_SOURCES"],
deps=[
"//folly/container:f14_hash",
"//folly/experimental/coro:blocking_wait",
"//folly/experimental/coro:collect",
"//folly/experimental/coro:coroutine",
"//folly/experimental/coro:task",
"//folly/synchronization:distributed_mutex"],
headers=None,
extra_external_deps="",
link_whole=True)
# rocksdb_test_lib
TARGETS.add_library(
"rocksdb_test_lib",
src_mk.get("MOCK_LIB_SOURCES", []) +
src_mk.get("TEST_LIB_SOURCES", []) +
src_mk.get("EXP_LIB_SOURCES", []) +
src_mk.get("ANALYZER_LIB_SOURCES", []),
[":rocksdb_lib"],
extra_test_libs=True
)
# rocksdb_tools_lib
TARGETS.add_library(
"rocksdb_tools_lib",
src_mk.get("BENCH_LIB_SOURCES", []) +
src_mk.get("ANALYZER_LIB_SOURCES", []) +
["test_util/testutil.cc"],
[":rocksdb_lib"])
# rocksdb_cache_bench_tools_lib
TARGETS.add_library(
"rocksdb_cache_bench_tools_lib",
src_mk.get("CACHE_BENCH_LIB_SOURCES", []),
[":rocksdb_lib"])
# rocksdb_stress_lib
TARGETS.add_rocksdb_library(
"rocksdb_stress_lib",
src_mk.get("ANALYZER_LIB_SOURCES", [])
+ src_mk.get('STRESS_LIB_SOURCES', [])
+ ["test_util/testutil.cc"])
# db_stress binary
TARGETS.add_binary("db_stress",
["db_stress_tool/db_stress.cc"],
[":rocksdb_stress_lib"])
# bench binaries
for src in src_mk.get("MICROBENCH_SOURCES", []):
name = src.rsplit('/',1)[1].split('.')[0] if '/' in src else src.split('.')[0]
TARGETS.add_binary(
name,
[src],
[],
extra_bench_libs=True
)
print("Extra dependencies:\n{0}".format(json.dumps(deps_map)))
# Dictionary test executable name -> relative source file path
test_source_map = {}
# c_test.c is added through TARGETS.add_c_test(). If there
# are more than one .c test file, we need to extend
# TARGETS.add_c_test() to include other C tests too.
for test_src in src_mk.get("TEST_MAIN_SOURCES_C", []):
if test_src != 'db/c_test.c':
print("Don't know how to deal with " + test_src)
return False
TARGETS.add_c_test()
try:
with open(f"{repo_path}/buckifier/bench.json") as json_file:
fast_fancy_bench_config_list = json.load(json_file)
for config_dict in fast_fancy_bench_config_list:
clean_benchmarks = {}
benchmarks = config_dict['benchmarks']
for binary, benchmark_dict in benchmarks.items():
clean_benchmarks[binary] = {}
for benchmark, overloaded_metric_list in benchmark_dict.items():
clean_benchmarks[binary][benchmark] = []
for metric in overloaded_metric_list:
if not isinstance(metric, dict):
clean_benchmarks[binary][benchmark].append(metric)
TARGETS.add_fancy_bench_config(config_dict['name'], clean_benchmarks, False, config_dict['expected_runtime_one_iter'], config_dict['sl_iterations'], config_dict['regression_threshold'])
with open(f"{repo_path}/buckifier/bench-slow.json") as json_file:
slow_fancy_bench_config_list = json.load(json_file)
for config_dict in slow_fancy_bench_config_list:
clean_benchmarks = {}
benchmarks = config_dict['benchmarks']
for binary, benchmark_dict in benchmarks.items():
clean_benchmarks[binary] = {}
for benchmark, overloaded_metric_list in benchmark_dict.items():
clean_benchmarks[binary][benchmark] = []
for metric in overloaded_metric_list:
if not isinstance(metric, dict):
clean_benchmarks[binary][benchmark].append(metric)
for config_dict in slow_fancy_bench_config_list:
TARGETS.add_fancy_bench_config(config_dict['name']+"_slow", clean_benchmarks, True, config_dict['expected_runtime_one_iter'], config_dict['sl_iterations'], config_dict['regression_threshold'])
# it is better servicelab experiments break
# than rocksdb github ci
except Exception:
pass
TARGETS.add_test_header()
for test_src in src_mk.get("TEST_MAIN_SOURCES", []):
test = test_src.split('.c')[0].strip().split('/')[-1].strip()
test_source_map[test] = test_src
print("" + test + " " + test_src)
for target_alias, deps in deps_map.items():
for test, test_src in sorted(test_source_map.items()):
if len(test) == 0:
print(ColorString.warning("Failed to get test name for %s" % test_src))
continue
test_target_name = \
test if not target_alias else test + "_" + target_alias
if test in _EXPORTED_TEST_LIBS:
test_library = "%s_lib" % test_target_name
TARGETS.add_library(test_library, [test_src], deps=[":rocksdb_test_lib"], extra_test_libs=True)
TARGETS.register_test(
test_target_name,
test_src,
deps = json.dumps(deps['extra_deps'] + [':'+test_library]),
extra_compiler_flags = json.dumps(deps['extra_compiler_flags']))
else:
TARGETS.register_test(
test_target_name,
test_src,
deps = json.dumps(deps['extra_deps'] + [":rocksdb_test_lib"] ),
extra_compiler_flags = json.dumps(deps['extra_compiler_flags']))
print(ColorString.info("Generated TARGETS Summary:"))
print(ColorString.info("- %d libs" % TARGETS.total_lib))
print(ColorString.info("- %d binarys" % TARGETS.total_bin))
print(ColorString.info("- %d tests" % TARGETS.total_test))
return True
def get_rocksdb_path():
# rocksdb = {script_dir}/..
script_dir = os.path.dirname(sys.argv[0])
script_dir = os.path.abspath(script_dir)
rocksdb_path = os.path.abspath(
os.path.join(script_dir, "../"))
return rocksdb_path
def exit_with_error(msg):
print(ColorString.error(msg))
sys.exit(1)
def main():
deps_map = get_dependencies()
# Generate TARGETS file for buck
ok = generate_targets(get_rocksdb_path(), deps_map)
if not ok:
exit_with_error("Failed to generate TARGETS files")
if __name__ == "__main__":
main()