From e637470f64d0e6844c8ac0a748948e427aef4a0b Mon Sep 17 00:00:00 2001 From: Alan Paxton Date: Mon, 25 Jul 2022 14:44:10 -0700 Subject: [PATCH] Run new benchmark script in branch. (#10303) Summary: Configure CI to run modernised benchmark script Pull Request resolved: https://github.com/facebook/rocksdb/pull/10303 Reviewed By: ramvadiv Differential Revision: D37719116 Pulled By: jay-zhuang fbshipit-source-id: 79ecb1cd0abd4d800c6906ba6673268c2adee10e --- .circleci/config.yml | 17 +-- build_tools/benchmark_log_tool.py | 4 + tools/benchmark_ci.py | 165 ++++++++++++++++++++++++++++++ tools/benchmark_compare.sh | 8 +- 4 files changed, 186 insertions(+), 8 deletions(-) create mode 100755 tools/benchmark_ci.py diff --git a/.circleci/config.yml b/.circleci/config.yml index 8c2c7e9316..d9bf25fdf0 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -190,14 +190,17 @@ commands: perform-benchmarks: steps: - run: - name: "Run basic benchmark, 5 min" - command: ./tools/benchmark.sh fillseq_enable_wal + name: "Test low-variance benchmarks" + command: ./tools/benchmark_ci.py --db_dir /tmp/rocksdb-benchmark-datadir --output_dir /tmp/benchmark-results --num_keys 10000000 environment: LD_LIBRARY_PATH: /usr/local/lib - DB_DIR: /tmp/rocksdb-benchmark-datadir - WAL_DIR: /tmp/rocksdb-benchmark-waldir - OUTPUT_DIR: /tmp/benchmark-results - NUM_KEYS: 10000000 + # How long to run parts of the test(s) + DURATION_RO: 450 + DURATION_RW: 450 + # The benchmark host has 32GB memory + # The folllowing values are tailored to work with that + # Note, tests may not exercise the targeted issues if the memory is increased on new test hosts. + post-benchmarks: steps: @@ -209,7 +212,7 @@ commands: command: | set +e set +o pipefail - ./build_tools/benchmark_log_tool.py --tsvfile /tmp/benchmark-results/report.tsv --esdocument https://search-rocksdb-bench-k2izhptfeap2hjfxteolsgsynm.us-west-2.es.amazonaws.com/bench_test3/_doc + ./build_tools/benchmark_log_tool.py --tsvfile /tmp/benchmark-results/report.tsv --esdocument https://search-rocksdb-bench-k2izhptfeap2hjfxteolsgsynm.us-west-2.es.amazonaws.com/bench_test3_rix/_doc true executors: diff --git a/build_tools/benchmark_log_tool.py b/build_tools/benchmark_log_tool.py index 572888eefb..2d5f962e1d 100755 --- a/build_tools/benchmark_log_tool.py +++ b/build_tools/benchmark_log_tool.py @@ -62,7 +62,11 @@ class BenchmarkUtils: def conform_opensearch(row): (dt, _) = parser.parse(row['date'], fuzzy_with_tokens=True) + # create a test_date field, which was previously what was expected + # repair the date field, which has what can be a WRONG ISO FORMAT, (no leading 0 on single-digit day-of-month) + # e.g. 2022-07-1T00:14:55 should be 2022-07-01T00:14:55 row['test_date'] = dt.isoformat() + row['date'] = dt.isoformat() return dict((key.replace('.', '_'), value) for (key, value) in row.items()) diff --git a/tools/benchmark_ci.py b/tools/benchmark_ci.py new file mode 100755 index 0000000000..efe18963ae --- /dev/null +++ b/tools/benchmark_ci.py @@ -0,0 +1,165 @@ +#!/usr/bin/env python3 +# Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +# This source code is licensed under both the GPLv2 (found in the +# COPYING file in the root directory) and Apache 2.0 License +# (found in the LICENSE.Apache file in the root directory). + +'''Run benchmark_compare.sh on the most recent build, for CI +''' + +import argparse +import glob +import os +import re +import shutil +import subprocess +import sys +import logging + +logging.basicConfig(level=logging.INFO) + + +class Config: + def __init__(self, args): + self.version_file = './include/rocksdb/version.h' + self.data_dir = os.path.expanduser(f"{args.db_dir}") + self.results_dir = os.path.expanduser(f"{args.output_dir}") + self.benchmark_script = f"{os.getcwd()}/tools/benchmark_compare.sh" + self.benchmark_cwd = f"{os.getcwd()}/tools" + + benchmark_env_keys = ['LD_LIBRARY_PATH', + 'NUM_KEYS', + 'KEY_SIZE', + 'VALUE_SIZE', + 'CACHE_SIZE_MB', + 'DURATION_RW', + 'DURATION_RO', + 'MB_WRITE_PER_SEC', + 'NUM_THREADS', + 'COMPRESSION_TYPE', + 'MIN_LEVEL_TO_COMPRESS', + 'WRITE_BUFFER_SIZE_MB', + 'TARGET_FILE_SIZE_BASE_MB', + 'MAX_BYTES_FOR_LEVEL_BASE_MB', + 'MAX_BACKGROUND_JOBS', + 'CACHE_INDEX_AND_FILTER_BLOCKS', + 'USE_O_DIRECT', + 'STATS_INTERVAL_SECONDS', + 'SUBCOMPACTIONS', + 'COMPACTION_STYLE'] + + +def read_version(config): + majorRegex = re.compile(r'#define ROCKSDB_MAJOR\s([0-9]+)') + minorRegex = re.compile(r'#define ROCKSDB_MINOR\s([0-9]+)') + patchRegex = re.compile(r'#define ROCKSDB_PATCH\s([0-9]+)') + with open(config.version_file, 'r') as reader: + major = None + minor = None + patch = None + for line in reader: + if major is None: + major = majorRegex.match(line) + elif minor is None: + minor = minorRegex.match(line) + elif patch is None: + patch = patchRegex.match(line) + + if patch is not None: + break + + if patch is not None: + return (major.group(1), minor.group(1), patch.group(1)) + + # Didn't complete a match + return None + + +def prepare(version_str, config): + old_files = glob.glob(f"{config.results_dir}/{version_str}/**", + recursive=True) + for f in old_files: + if os.path.isfile(f): + logging.debug(f"remove file {f}") + os.remove(f) + for f in old_files: + if os.path.isdir(f): + logging.debug(f"remove dir {f}") + os.rmdir(f) + + db_bench_vers = f"{config.benchmark_cwd}/db_bench.{version_str}" + + # Create a symlink to the db_bench executable + os.symlink(f"{os.getcwd()}/db_bench", db_bench_vers) + + +def results(version_str, config): + # Copy the report TSV file back to the top level of results + shutil.copyfile(f"{config.results_dir}/{version_str}/report.tsv", + f"{config.results_dir}/report.tsv") + + +def cleanup(version_str, config): + # Remove the symlink to the db_bench executable + db_bench_vers = f"{config.benchmark_cwd}/db_bench.{version_str}" + os.remove(db_bench_vers) + + +def get_benchmark_env(): + env = [] + for key in Config.benchmark_env_keys: + value = os.getenv(key) + if value is not None: + env.append((key, value)) + return env + + +def main(): + '''Tool for running benchmark_compare.sh on the most recent build, for CI + This tool will + + (1) Work out the current version of RocksDB + (2) Run benchmark_compare with that version alone + ''' + + parser = argparse.ArgumentParser( + description='benchmark_compare.sh Python wrapper for CI.') + + # --tsvfile is the name of the file to read results from + # --esdocument is the ElasticSearch document to push these results into + # + parser.add_argument('--db_dir', default='~/tmp/rocksdb-benchmark-datadir', + help='Database directory hierarchy to use') + parser.add_argument('--output_dir', default='~/tmp/benchmark-results', + help='Benchmark output goes here') + parser.add_argument('--num_keys', default='10000', + help='Number of database keys to use in benchmark test(s) (determines size of test job)') + args = parser.parse_args() + config = Config(args) + + version = read_version(config) + if version is None: + raise Exception( + f"Could not read RocksDB version from {config.version_file}") + version_str = f"{version[0]}.{version[1]}.{version[2]}" + logging.info(f"Run benchmark_ci with RocksDB version {version_str}") + + prepare(version_str, config) + + try: + env = get_benchmark_env() + env.append(('NUM_KEYS', args.num_keys)) + cmd = [config.benchmark_script, + config.data_dir, config.results_dir, version_str] + logging.info(f"Run {cmd} env={env} cwd={config.benchmark_cwd}") + subprocess.run(cmd, env=dict(env), cwd=config.benchmark_cwd) + + results(version_str, config) + finally: + cleanup(version_str, config) + + return 0 + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/tools/benchmark_compare.sh b/tools/benchmark_compare.sh index 327f6b4f65..9adea432ce 100755 --- a/tools/benchmark_compare.sh +++ b/tools/benchmark_compare.sh @@ -9,6 +9,9 @@ odir=$2 K=1024 M=$((1024 * K)) +# Dynamic loader configuration +ld_library_path=${LD_LIBRARY_PATH:-""} + # Benchmark configuration duration_rw=${DURATION_RW:-65} duration_ro=${DURATION_RO:-65} @@ -64,8 +67,11 @@ blob_compression_type=${BLOB_COMPRESSION_TYPE:-${compression_type}} blob_gc_age_cutoff=${BLOB_GC_AGE_CUTOFF:-"0.25"} blob_gc_force_threshold=${BLOB_GC_FORCE_THRESHOLD:-1} +# Arguments for dynamic loading +base_args=( LD_LIBRARY_PATH="$ld_library_path" ) + # Arguments used for all tests -base_args=( NUM_KEYS="$num_keys" ) +base_args+=( NUM_KEYS="$num_keys" ) base_args+=( NUM_THREADS="$num_threads" ) base_args+=( KEY_SIZE="$key_size" ) base_args+=( VALUE_SIZE="$value_size" )