From 2373382284918fda13f726aefd6e2f700784797f Mon Sep 17 00:00:00 2001
From: Eric Fiselier <eric@efcs.ca>
Date: Fri, 18 Nov 2016 15:42:02 -0700
Subject: [PATCH] Rewrite compare_bench.py argument parsing.

This patch cleans up a number of issues with how compare_bench.py handled
the command line arguments.

* Use the 'argparse' python module instead of hand rolled parsing. This gives
  better usage messages.

* Add diagnostics for certain --benchmark flags that cannot or should not
  be used with compare_bench.py (eg --benchmark_out_format=csv).

* Don't override the user specified --benchmark_out flag if it's provided.

In future I would like the user to be able to capture both benchmark output
files, but this change is big enough for now.

This fixes issue #313.
---
 tools/compare_bench.py | 58 ++++++++++++++++++++++++++++++++++--------
 tools/gbench/util.py   | 39 ++++++++++++++++++++++++----
 2 files changed, 82 insertions(+), 15 deletions(-)
diff --git a/tools/compare_bench.py b/tools/compare_bench.py
index ed0f133e..8a7e7991 100755
--- a/tools/compare_bench.py
+++ b/tools/compare_bench.py
@@ -3,25 +3,63 @@
 compare_bench.py - Compare two benchmarks or their results and report the
                    difference.
 """
+import argparse
+from argparse import ArgumentParser
 import sys
 import gbench
 from gbench import util, report
+from gbench.util import *
+
+def check_inputs(in1, in2, flags):
+    """
+    Perform checking on the user provided inputs and diagnose any abnormalities
+    """
+    in1_kind, in1_err = classify_input_file(in1)
+    in2_kind, in2_err = classify_input_file(in2)
+    output_file = find_benchmark_flag('--benchmark_out=', flags)
+    output_type = find_benchmark_flag('--benchmark_out_format=', flags)
+    if in1_kind == IT_Executable and in2_kind == IT_Executable and output_file:
+        print(("WARNING: '--benchmark_out=%s' will be passed to both "
+              "benchmarks causing it to be overwritten") % output_file)
+    if in1_kind == IT_JSON and in2_kind == IT_JSON and len(flags) > 0:
+        print("WARNING: passing --benchmark flags has no effect since both "
+              "inputs are JSON")
+    if output_type is not None and output_type != 'json':
+        print(("ERROR: passing '--benchmark_out_format=%s' to 'compare_bench.py`"
+              " is not supported.") % output_type)
+        sys.exit(1)
+
 
 def main():
+    parser = ArgumentParser(
+        description='compare the results of two benchmarks')
+    parser.add_argument(
+        'test1', metavar='test1', type=str, nargs=1,
+        help='A benchmark executable or JSON output file')
+    parser.add_argument(
+        'test2', metavar='test2', type=str, nargs=1,
+        help='A benchmark executable or JSON output file')
+    # FIXME this is a dummy argument which will never actually match
+    # any --benchmark flags but it helps generate a better usage message
+    parser.add_argument(
+        'benchmark_options', metavar='benchmark_option', nargs='*',
+        help='Arguments to pass when running benchmark executables'
+    )
+    args, unknown_args = parser.parse_known_args()
     # Parse the command line flags
-    def usage():
-        print('compare_bench.py <test1> <test2> [benchmark options]...')
+    test1 = args.test1[0]
+    test2 = args.test2[0]
+    if args.benchmark_options:
+        print("Unrecognized positional argument arguments: '%s'"
+              % args.benchmark_options)
         exit(1)
-    if '--help' in sys.argv or len(sys.argv) < 3:
-        usage()
-    tests = sys.argv[1:3]
-    bench_opts = sys.argv[3:]
-    bench_opts = list(bench_opts)
+    benchmark_options = unknown_args
+    check_inputs(test1, test2, benchmark_options)
     # Run the benchmarks and report the results
-    json1 = gbench.util.run_or_load_benchmark(tests[0], bench_opts)
-    json2 = gbench.util.run_or_load_benchmark(tests[1], bench_opts)
+    json1 = gbench.util.run_or_load_benchmark(test1, benchmark_options)
+    json2 = gbench.util.run_or_load_benchmark(test2, benchmark_options)
     output_lines = gbench.report.generate_difference_report(json1, json2)
-    print 'Comparing %s to %s' % (tests[0], tests[1])
+    print 'Comparing %s to %s' % (test1, test2)
     for ln in output_lines:
         print(ln)
 
diff --git a/tools/gbench/util.py b/tools/gbench/util.py
index 169b71c2..67b1e4c5 100644
--- a/tools/gbench/util.py
+++ b/tools/gbench/util.py
@@ -84,6 +84,26 @@ def check_input_file(filename):
         sys.exit(1)
     return ftype
 
+def find_benchmark_flag(prefix, benchmark_flags):
+    """
+    Search the specified list of flags for a flag matching `<prefix><arg>` and
+    if it is found return the arg it specifies. If specified more than once the
+    last value is returned. If the flag is not found None is returned.
+    """
+    assert prefix.startswith('--') and prefix.endswith('=')
+    result = None
+    for f in benchmark_flags:
+        if f.startswith(prefix):
+            result = f[len(prefix):]
+    return result
+
+def remove_benchmark_flags(prefix, benchmark_flags):
+    """
+    Return a new list containing the specified benchmark_flags except those
+    with the specified prefix.
+    """
+    assert prefix.startswith('--') and prefix.endswith('=')
+    return [f for f in benchmark_flags if not f.startswith(prefix)]
 
 def load_benchmark_results(fname):
     """
@@ -101,16 +121,25 @@ def run_benchmark(exe_name, benchmark_flags):
     real time console output.
     RETURNS: A JSON object representing the benchmark output
     """
-    thandle, tname = tempfile.mkstemp()
-    os.close(thandle)
+    output_name = find_benchmark_flag('--benchmark_out=',
+                                      benchmark_flags)
+    is_temp_output = False
+    if output_name is None:
+        is_temp_output = True
+        thandle, output_name = tempfile.mkstemp()
+        os.close(thandle)
+        benchmark_flags = list(benchmark_flags) + \
+                          ['--benchmark_out=' % output_name]
+
     cmd = [exe_name] + benchmark_flags
     print("RUNNING: %s" % ' '.join(cmd))
-    exitCode = subprocess.call(cmd + ['--benchmark_out=%s' % tname])
+    exitCode = subprocess.call(cmd)
     if exitCode != 0:
         print('TEST FAILED...')
         sys.exit(exitCode)
-    json_res = load_benchmark_results(tname)
-    os.unlink(tname)
+    json_res = load_benchmark_results(output_name)
+    if is_temp_output:
+        os.unlink(output_name)
     return json_res