From 7fbee01f0ca7440bfafba5dd7fc71388d48dad63 Mon Sep 17 00:00:00 2001
From: Alan Paxton <alan.paxton@gmail.com>
Date: Thu, 25 Aug 2022 09:47:03 -0700
Subject: [PATCH] CI benchmarks refine configuration (#10514)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Summary:
CI benchmarks refine configuration

Run only “essential” benchmarks, but for longer
Fix (reduce) the NUM_KEYS to ensure cached behaviour
Reduce level size to try to ensure more levels

Refine test durations again, more time per test, but fewer tests.
In CI benchmark mode, the only read test is readrandom.
There are still 3 mostly-read tests.

Goal is to squeeze complete run a little bit inside 1 hour so it doesn’t clash with the next run (cron scheduled for main branch), but it gets to run as long as possible, so that results are as credible as possible.

Reduce thread count to physical capacity, in an attempt to reduce throughput variance for write heavy tests. See Mark Callaghan’s comments in related documentation..

Pull Request resolved: https://github.com/facebook/rocksdb/pull/10514

Reviewed By: ajkr

Differential Revision: D38952469

Pulled By: jay-zhuang

fbshipit-source-id: 72fa6bba897cc47066ced65facd1fd36e28f30a8
---
 .circleci/config.yml       | 17 +++++++++++++----
 tools/benchmark_ci.py      |  3 ++-
 tools/benchmark_compare.sh | 13 +++++++++++--
 3 files changed, 26 insertions(+), 7 deletions(-)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index 36db6a3960..4697568cc4 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -105,14 +105,23 @@ commands:
     steps:
       - run:
           name: "Test low-variance benchmarks"
-          command: ./tools/benchmark_ci.py --db_dir /tmp/rocksdb-benchmark-datadir --output_dir /tmp/benchmark-results --num_keys 10000000
+          command: ./tools/benchmark_ci.py --db_dir /tmp/rocksdb-benchmark-datadir --output_dir /tmp/benchmark-results --num_keys 50000
           environment:
             LD_LIBRARY_PATH: /usr/local/lib
             # How long to run parts of the test(s)
-            DURATION_RO: 450
-            DURATION_RW: 450
+            DURATION_RO: 400
+            DURATION_RW: 700
+            # Keep threads within physical capacity of server (much lower than default)
+            NUM_THREADS: 1
+            MAX_BACKGROUND_JOBS: 3
+            # Don't run a couple of "optional" initial tests
+            CI_TESTS_ONLY: "true"
+            # Reduce configured size of levels to ensure more levels in the leveled compaction LSM tree
+            WRITE_BUFFER_SIZE_MB: 16
+            TARGET_FILE_SIZE_BASE_MB: 16
+            MAX_BYTES_FOR_LEVEL_BASE_MB: 64
             # The benchmark host has 32GB memory
-            # The folllowing values are tailored to work with that
+            # The following values are tailored to work with that
             # Note, tests may not exercise the targeted issues if the memory is increased on new test hosts.
 
 
diff --git a/tools/benchmark_ci.py b/tools/benchmark_ci.py
index efe18963ae..0a82af55e3 100755
--- a/tools/benchmark_ci.py
+++ b/tools/benchmark_ci.py
@@ -46,7 +46,8 @@ class Config:
                           'USE_O_DIRECT',
                           'STATS_INTERVAL_SECONDS',
                           'SUBCOMPACTIONS',
-                          'COMPACTION_STYLE']
+                          'COMPACTION_STYLE',
+                          'CI_TESTS_ONLY']
 
 
 def read_version(config):
diff --git a/tools/benchmark_compare.sh b/tools/benchmark_compare.sh
index 9adea432ce..964b71cf4a 100755
--- a/tools/benchmark_compare.sh
+++ b/tools/benchmark_compare.sh
@@ -20,6 +20,7 @@ num_threads=${NUM_THREADS:-16}
 key_size=${KEY_SIZE:-20}
 value_size=${VALUE_SIZE:-400}
 mb_write_per_sec=${MB_WRITE_PER_SEC:-2}
+ci_tests_only=${CI_TESTS_ONLY:-"false"}
 
 # RocksDB configuration
 compression_type=${COMPRESSION_TYPE:-lz4}
@@ -157,6 +158,7 @@ function usage {
   echo -e "\tSTATS_INTERVAL_SECONDS\t\tvalue for stats_interval_seconds"
   echo -e "\tSUBCOMPACTIONS\t\t\tvalue for subcompactions"
   echo -e "\tCOMPACTION_STYLE\t\tCompaction style to use, one of: leveled, universal, blob"
+  echo -e "\tCI_TESTS_ONLY\t\tRun a subset of tests tailored to a CI regression job, one of: true, false (default)"
   echo ""
   echo -e "\tOptions specific to leveled compaction:"
   echo -e "\t\tLEVEL0_FILE_NUM_COMPACTION_TRIGGER\tvalue for level0_file_num_compaction_trigger"
@@ -257,8 +259,15 @@ for v in "$@" ; do
   # was used during the load.
 
   env -i "${args_nolim[@]}" DURATION="$duration_ro" bash ./benchmark.sh readrandom
-  env -i "${args_nolim[@]}" DURATION="$duration_ro" bash ./benchmark.sh fwdrange
-  env -i "${args_lim[@]}"   DURATION="$duration_ro" bash ./benchmark.sh multireadrandom
+
+  # Skipped for CI - a single essentail readrandom is enough to set up for other tests
+  if [ "$ci_tests_only" != "true" ]; then
+    env -i "${args_nolim[@]}" DURATION="$duration_ro" bash ./benchmark.sh fwdrange
+    env -i "${args_lim[@]}"   DURATION="$duration_ro" bash ./benchmark.sh multireadrandom
+  else
+    echo "CI_TESTS_ONLY is set, skipping optional read steps."
+  fi
+
   # Skipping --multiread_batched for now because it isn't supported on older 6.X releases
   # env "${args_lim[@]}" DURATION=$duration_ro bash ./benchmark.sh multireadrandom --multiread_batched