Rewrite compare_bench.py argument parsing.

This patch cleans up a number of issues with how compare_bench.py handled
the command line arguments.

* Use the 'argparse' python module instead of hand rolled parsing. This gives
  better usage messages.

* Add diagnostics for certain --benchmark flags that cannot or should not
  be used with compare_bench.py (eg --benchmark_out_format=csv).

* Don't override the user specified --benchmark_out flag if it's provided.

In future I would like the user to be able to capture both benchmark output
files, but this change is big enough for now.

This fixes issue #313.
This commit is contained in:
Eric Fiselier 2016-11-18 15:42:02 -07:00
parent 4f8bfeae47
commit 2373382284
2 changed files with 82 additions and 15 deletions

View file

@ -3,25 +3,63 @@
compare_bench.py - Compare two benchmarks or their results and report the compare_bench.py - Compare two benchmarks or their results and report the
difference. difference.
""" """
import argparse
from argparse import ArgumentParser
import sys import sys
import gbench import gbench
from gbench import util, report from gbench import util, report
from gbench.util import *
def check_inputs(in1, in2, flags):
"""
Perform checking on the user provided inputs and diagnose any abnormalities
"""
in1_kind, in1_err = classify_input_file(in1)
in2_kind, in2_err = classify_input_file(in2)
output_file = find_benchmark_flag('--benchmark_out=', flags)
output_type = find_benchmark_flag('--benchmark_out_format=', flags)
if in1_kind == IT_Executable and in2_kind == IT_Executable and output_file:
print(("WARNING: '--benchmark_out=%s' will be passed to both "
"benchmarks causing it to be overwritten") % output_file)
if in1_kind == IT_JSON and in2_kind == IT_JSON and len(flags) > 0:
print("WARNING: passing --benchmark flags has no effect since both "
"inputs are JSON")
if output_type is not None and output_type != 'json':
print(("ERROR: passing '--benchmark_out_format=%s' to 'compare_bench.py`"
" is not supported.") % output_type)
sys.exit(1)
def main(): def main():
parser = ArgumentParser(
description='compare the results of two benchmarks')
parser.add_argument(
'test1', metavar='test1', type=str, nargs=1,
help='A benchmark executable or JSON output file')
parser.add_argument(
'test2', metavar='test2', type=str, nargs=1,
help='A benchmark executable or JSON output file')
# FIXME this is a dummy argument which will never actually match
# any --benchmark flags but it helps generate a better usage message
parser.add_argument(
'benchmark_options', metavar='benchmark_option', nargs='*',
help='Arguments to pass when running benchmark executables'
)
args, unknown_args = parser.parse_known_args()
# Parse the command line flags # Parse the command line flags
def usage(): test1 = args.test1[0]
print('compare_bench.py <test1> <test2> [benchmark options]...') test2 = args.test2[0]
if args.benchmark_options:
print("Unrecognized positional argument arguments: '%s'"
% args.benchmark_options)
exit(1) exit(1)
if '--help' in sys.argv or len(sys.argv) < 3: benchmark_options = unknown_args
usage() check_inputs(test1, test2, benchmark_options)
tests = sys.argv[1:3]
bench_opts = sys.argv[3:]
bench_opts = list(bench_opts)
# Run the benchmarks and report the results # Run the benchmarks and report the results
json1 = gbench.util.run_or_load_benchmark(tests[0], bench_opts) json1 = gbench.util.run_or_load_benchmark(test1, benchmark_options)
json2 = gbench.util.run_or_load_benchmark(tests[1], bench_opts) json2 = gbench.util.run_or_load_benchmark(test2, benchmark_options)
output_lines = gbench.report.generate_difference_report(json1, json2) output_lines = gbench.report.generate_difference_report(json1, json2)
print 'Comparing %s to %s' % (tests[0], tests[1]) print 'Comparing %s to %s' % (test1, test2)
for ln in output_lines: for ln in output_lines:
print(ln) print(ln)

View file

@ -84,6 +84,26 @@ def check_input_file(filename):
sys.exit(1) sys.exit(1)
return ftype return ftype
def find_benchmark_flag(prefix, benchmark_flags):
"""
Search the specified list of flags for a flag matching `<prefix><arg>` and
if it is found return the arg it specifies. If specified more than once the
last value is returned. If the flag is not found None is returned.
"""
assert prefix.startswith('--') and prefix.endswith('=')
result = None
for f in benchmark_flags:
if f.startswith(prefix):
result = f[len(prefix):]
return result
def remove_benchmark_flags(prefix, benchmark_flags):
"""
Return a new list containing the specified benchmark_flags except those
with the specified prefix.
"""
assert prefix.startswith('--') and prefix.endswith('=')
return [f for f in benchmark_flags if not f.startswith(prefix)]
def load_benchmark_results(fname): def load_benchmark_results(fname):
""" """
@ -101,16 +121,25 @@ def run_benchmark(exe_name, benchmark_flags):
real time console output. real time console output.
RETURNS: A JSON object representing the benchmark output RETURNS: A JSON object representing the benchmark output
""" """
thandle, tname = tempfile.mkstemp() output_name = find_benchmark_flag('--benchmark_out=',
os.close(thandle) benchmark_flags)
is_temp_output = False
if output_name is None:
is_temp_output = True
thandle, output_name = tempfile.mkstemp()
os.close(thandle)
benchmark_flags = list(benchmark_flags) + \
['--benchmark_out=' % output_name]
cmd = [exe_name] + benchmark_flags cmd = [exe_name] + benchmark_flags
print("RUNNING: %s" % ' '.join(cmd)) print("RUNNING: %s" % ' '.join(cmd))
exitCode = subprocess.call(cmd + ['--benchmark_out=%s' % tname]) exitCode = subprocess.call(cmd)
if exitCode != 0: if exitCode != 0:
print('TEST FAILED...') print('TEST FAILED...')
sys.exit(exitCode) sys.exit(exitCode)
json_res = load_benchmark_results(tname) json_res = load_benchmark_results(output_name)
os.unlink(tname) if is_temp_output:
os.unlink(output_name)
return json_res return json_res