From b93f5a592972b9017539cf15a5d299149c1cc2f4 Mon Sep 17 00:00:00 2001
From: Nicholas Junge <nicholas.junge@web.de>
Date: Mon, 30 Oct 2023 16:35:37 +0100
Subject: [PATCH] Add pre-commit config and GitHub Actions job (#1688)

* Add pre-commit config and GitHub Actions job

Contains the following hooks:
* buildifier - for formatting and linting Bazel files.
* mypy, ruff, isort, black - for Python typechecking, import hygiene,
static analysis, and formatting.

The pylint CI job was changed to be a pre-commit CI job, where pre-commit
is bootstrapped via Python.

Pylint is currently no longer part of the
code checks, but can be re-added if requested. The reason to drop was
that it does not play nicely with pre-commit, and lots of its
functionality and responsibilities are actually covered in ruff.

* Add dev extra to pyproject.toml for development installs

* Clarify that pre-commit contains only Python and Bazel hooks

* Add one-line docstrings to Bazel modules

* Apply buildifier pre-commit fixes to Bazel files

* Apply pre-commit fixes to Python files

* Supply --profile=black to isort to prevent conflicts

* Fix nanobind build file formatting

* Add tooling configs to `pyproject.toml`

In particular, set line length 80 for all Python files.

* Reformat all Python files to line length 80, fix return type annotations

Also ignores the `tools/compare.py` and `tools/gbench/report.py` files
for mypy, since they emit a barrage of errors which we can deal with
later. The errors are mostly related to dynamic classmethod definition.
---
 .github/workflows/pre-commit.yml             |   39 +
 .github/workflows/pylint.yml                 |   28 -
 .pre-commit-config.yaml                      |   26 +
 .ycm_extra_conf.py                           |  165 +-
 BUILD.bazel                                  |   27 +-
 MODULE.bazel                                 |   14 +-
 WORKSPACE                                    |    6 +-
 bazel/benchmark_deps.bzl                     |    6 +-
 bindings/python/BUILD                        |    2 +-
 bindings/python/build_defs.bzl               |    4 +
 bindings/python/google_benchmark/BUILD       |    1 -
 bindings/python/google_benchmark/__init__.py |   18 +-
 bindings/python/google_benchmark/example.py  |    5 +-
 bindings/python/nanobind.BUILD               |    4 +-
 pyproject.toml                               |   39 +
 setup.py                                     |   14 +-
 tools/BUILD.bazel                            |    4 +-
 tools/compare.py                             |  445 +++--
 tools/gbench/__init__.py                     |    8 +-
 tools/gbench/report.py                       | 1624 +++++++++++-------
 tools/gbench/util.py                         |   97 +-
 tools/strip_asm.py                           |  118 +-
 22 files changed, 1621 insertions(+), 1073 deletions(-)
 create mode 100644 .github/workflows/pre-commit.yml
 delete mode 100644 .github/workflows/pylint.yml
 create mode 100644 .pre-commit-config.yaml

diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml
new file mode 100644
index 00000000..f78a90d8
--- /dev/null
+++ b/.github/workflows/pre-commit.yml
@@ -0,0 +1,39 @@
+name: python + Bazel pre-commit checks
+
+on:
+  push:
+    branches: [ main ]
+  pull_request:
+    branches: [ main ]
+
+jobs:
+  pre-commit:
+    runs-on: ubuntu-latest
+    env:
+      MYPY_CACHE_DIR: "${{ github.workspace }}/.cache/mypy"
+      RUFF_CACHE_DIR: "${{ github.workspace }}/.cache/ruff"
+      PRE_COMMIT_HOME: "${{ github.workspace }}/.cache/pre-commit"
+
+    steps:
+    - uses: actions/checkout@v4
+    - name: Set up Python
+      uses: actions/setup-python@v4
+      with:
+        python-version: 3.11
+        cache: 'pip'
+        cache-dependency-path: pyproject.toml
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install -e ".[dev]"
+    - name: Cache pre-commit tools
+      uses: actions/cache@v3
+      with:
+        path: |
+          ${{ env.MYPY_CACHE_DIR }}
+          ${{ env.RUFF_CACHE_DIR }}
+          ${{ env.PRE_COMMIT_HOME }}
+        key: ${{ runner.os }}-${{ hashFiles('.pre-commit-config.yaml') }}-linter-cache
+    - name: Run pre-commit checks
+      run: |
+        pre-commit run --all-files --verbose --show-diff-on-failure
diff --git a/.github/workflows/pylint.yml b/.github/workflows/pylint.yml
deleted file mode 100644
index c6939b50..00000000
--- a/.github/workflows/pylint.yml
+++ /dev/null
@@ -1,28 +0,0 @@
-name: pylint
-
-on:
-  push:
-    branches: [ main ]
-  pull_request:
-    branches: [ main ]
-
-jobs:
-  pylint:
-
-    runs-on: ubuntu-latest
-
-    steps:
-    - uses: actions/checkout@v3
-    - name: Set up Python 3.8
-      uses: actions/setup-python@v1
-      with:
-        python-version: 3.8
-
-    - name: Install dependencies
-      run: |
-        python -m pip install --upgrade pip
-        pip install pylint pylint-exit conan
-
-    - name: Run pylint
-      run: |
-        pylint `find . -name '*.py'|xargs` || pylint-exit $?
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 00000000..94ae788f
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,26 @@
+repos:
+  -   repo: https://github.com/keith/pre-commit-buildifier
+      rev: 6.3.3.1
+      hooks:
+      -   id: buildifier
+      -   id: buildifier-lint
+  - repo: https://github.com/pre-commit/mirrors-mypy
+    rev: v1.6.1
+    hooks:
+      - id: mypy
+        types_or: [ python, pyi ]
+        args: [ "--ignore-missing-imports", "--scripts-are-modules" ]
+  - repo: https://github.com/psf/black
+    rev: 23.10.1
+    hooks:
+      - id: black
+  - repo: https://github.com/pycqa/isort
+    rev: 5.12.0
+    hooks:
+      - id: isort
+        args: [--profile, black]
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.1.3
+    hooks:
+      - id: ruff
+        args: [ --fix, --exit-non-zero-on-fix ]
\ No newline at end of file
diff --git a/.ycm_extra_conf.py b/.ycm_extra_conf.py
index 5649ddcc..caf257f0 100644
--- a/.ycm_extra_conf.py
+++ b/.ycm_extra_conf.py
@@ -1,25 +1,30 @@
 import os
+
 import ycm_core
 
 # These are the compilation flags that will be used in case there's no
 # compilation database set (by default, one is not set).
 # CHANGE THIS LIST OF FLAGS. YES, THIS IS THE DROID YOU HAVE BEEN LOOKING FOR.
 flags = [
-'-Wall',
-'-Werror',
-'-pedantic-errors',
-'-std=c++0x',
-'-fno-strict-aliasing',
-'-O3',
-'-DNDEBUG',
-# ...and the same thing goes for the magic -x option which specifies the
-# language that the files to be compiled are written in. This is mostly
-# relevant for c++ headers.
-# For a C project, you would set this to 'c' instead of 'c++'.
-'-x', 'c++',
-'-I', 'include',
-'-isystem', '/usr/include',
-'-isystem', '/usr/local/include',
+    "-Wall",
+    "-Werror",
+    "-pedantic-errors",
+    "-std=c++0x",
+    "-fno-strict-aliasing",
+    "-O3",
+    "-DNDEBUG",
+    # ...and the same thing goes for the magic -x option which specifies the
+    # language that the files to be compiled are written in. This is mostly
+    # relevant for c++ headers.
+    # For a C project, you would set this to 'c' instead of 'c++'.
+    "-x",
+    "c++",
+    "-I",
+    "include",
+    "-isystem",
+    "/usr/include",
+    "-isystem",
+    "/usr/local/include",
 ]
 
 
@@ -29,87 +34,87 @@ flags = [
 #
 # Most projects will NOT need to set this to anything; you can just change the
 # 'flags' list of compilation flags. Notice that YCM itself uses that approach.
-compilation_database_folder = ''
+compilation_database_folder = ""
 
-if os.path.exists( compilation_database_folder ):
-  database = ycm_core.CompilationDatabase( compilation_database_folder )
+if os.path.exists(compilation_database_folder):
+    database = ycm_core.CompilationDatabase(compilation_database_folder)
 else:
-  database = None
+    database = None
+
+SOURCE_EXTENSIONS = [".cc"]
 
-SOURCE_EXTENSIONS = [ '.cc' ]
 
 def DirectoryOfThisScript():
-  return os.path.dirname( os.path.abspath( __file__ ) )
+    return os.path.dirname(os.path.abspath(__file__))
 
 
-def MakeRelativePathsInFlagsAbsolute( flags, working_directory ):
-  if not working_directory:
-    return list( flags )
-  new_flags = []
-  make_next_absolute = False
-  path_flags = [ '-isystem', '-I', '-iquote', '--sysroot=' ]
-  for flag in flags:
-    new_flag = flag
+def MakeRelativePathsInFlagsAbsolute(flags, working_directory):
+    if not working_directory:
+        return list(flags)
+    new_flags = []
+    make_next_absolute = False
+    path_flags = ["-isystem", "-I", "-iquote", "--sysroot="]
+    for flag in flags:
+        new_flag = flag
 
-    if make_next_absolute:
-      make_next_absolute = False
-      if not flag.startswith( '/' ):
-        new_flag = os.path.join( working_directory, flag )
+        if make_next_absolute:
+            make_next_absolute = False
+            if not flag.startswith("/"):
+                new_flag = os.path.join(working_directory, flag)
 
-    for path_flag in path_flags:
-      if flag == path_flag:
-        make_next_absolute = True
-        break
+        for path_flag in path_flags:
+            if flag == path_flag:
+                make_next_absolute = True
+                break
 
-      if flag.startswith( path_flag ):
-        path = flag[ len( path_flag ): ]
-        new_flag = path_flag + os.path.join( working_directory, path )
-        break
+            if flag.startswith(path_flag):
+                path = flag[len(path_flag) :]
+                new_flag = path_flag + os.path.join(working_directory, path)
+                break
 
-    if new_flag:
-      new_flags.append( new_flag )
-  return new_flags
+        if new_flag:
+            new_flags.append(new_flag)
+    return new_flags
 
 
-def IsHeaderFile( filename ):
-  extension = os.path.splitext( filename )[ 1 ]
-  return extension in [ '.h', '.hxx', '.hpp', '.hh' ]
+def IsHeaderFile(filename):
+    extension = os.path.splitext(filename)[1]
+    return extension in [".h", ".hxx", ".hpp", ".hh"]
 
 
-def GetCompilationInfoForFile( filename ):
-  # The compilation_commands.json file generated by CMake does not have entries
-  # for header files. So we do our best by asking the db for flags for a
-  # corresponding source file, if any. If one exists, the flags for that file
-  # should be good enough.
-  if IsHeaderFile( filename ):
-    basename = os.path.splitext( filename )[ 0 ]
-    for extension in SOURCE_EXTENSIONS:
-      replacement_file = basename + extension
-      if os.path.exists( replacement_file ):
-        compilation_info = database.GetCompilationInfoForFile(
-          replacement_file )
-        if compilation_info.compiler_flags_:
-          return compilation_info
-    return None
-  return database.GetCompilationInfoForFile( filename )
+def GetCompilationInfoForFile(filename):
+    # The compilation_commands.json file generated by CMake does not have entries
+    # for header files. So we do our best by asking the db for flags for a
+    # corresponding source file, if any. If one exists, the flags for that file
+    # should be good enough.
+    if IsHeaderFile(filename):
+        basename = os.path.splitext(filename)[0]
+        for extension in SOURCE_EXTENSIONS:
+            replacement_file = basename + extension
+            if os.path.exists(replacement_file):
+                compilation_info = database.GetCompilationInfoForFile(
+                    replacement_file
+                )
+                if compilation_info.compiler_flags_:
+                    return compilation_info
+        return None
+    return database.GetCompilationInfoForFile(filename)
 
 
-def FlagsForFile( filename, **kwargs ):
-  if database:
-    # Bear in mind that compilation_info.compiler_flags_ does NOT return a
-    # python list, but a "list-like" StringVec object
-    compilation_info = GetCompilationInfoForFile( filename )
-    if not compilation_info:
-      return None
+def FlagsForFile(filename, **kwargs):
+    if database:
+        # Bear in mind that compilation_info.compiler_flags_ does NOT return a
+        # python list, but a "list-like" StringVec object
+        compilation_info = GetCompilationInfoForFile(filename)
+        if not compilation_info:
+            return None
 
-    final_flags = MakeRelativePathsInFlagsAbsolute(
-      compilation_info.compiler_flags_,
-      compilation_info.compiler_working_dir_ )
-  else:
-    relative_to = DirectoryOfThisScript()
-    final_flags = MakeRelativePathsInFlagsAbsolute( flags, relative_to )
+        final_flags = MakeRelativePathsInFlagsAbsolute(
+            compilation_info.compiler_flags_,
+            compilation_info.compiler_working_dir_,
+        )
+    else:
+        relative_to = DirectoryOfThisScript()
+        final_flags = MakeRelativePathsInFlagsAbsolute(flags, relative_to)
 
-  return {
-    'flags': final_flags,
-    'do_cache': True
-  }
+    return {"flags": final_flags, "do_cache": True}
diff --git a/BUILD.bazel b/BUILD.bazel
index 60d31d2f..64188344 100644
--- a/BUILD.bazel
+++ b/BUILD.bazel
@@ -45,28 +45,28 @@ cc_library(
         "include/benchmark/benchmark.h",
         "include/benchmark/export.h",
     ],
-    linkopts = select({
-        ":windows": ["-DEFAULTLIB:shlwapi.lib"],
-        "//conditions:default": ["-pthread"],
-    }),
     copts = select({
         ":windows": [],
         "//conditions:default": ["-Werror=old-style-cast"],
     }),
-    strip_include_prefix = "include",
-    visibility = ["//visibility:public"],
-    # Only static linking is allowed; no .so will be produced.
-    # Using `defines` (i.e. not `local_defines`) means that no
-    # dependent rules need to bother about defining the macro.
-    linkstatic = True,
     defines = [
         "BENCHMARK_STATIC_DEFINE",
     ] + select({
         ":perfcounters": ["HAVE_LIBPFM"],
         "//conditions:default": [],
     }),
+    linkopts = select({
+        ":windows": ["-DEFAULTLIB:shlwapi.lib"],
+        "//conditions:default": ["-pthread"],
+    }),
+    # Only static linking is allowed; no .so will be produced.
+    # Using `defines` (i.e. not `local_defines`) means that no
+    # dependent rules need to bother about defining the macro.
+    linkstatic = True,
+    strip_include_prefix = "include",
+    visibility = ["//visibility:public"],
     deps = select({
-        ":perfcounters": ["@libpfm//:libpfm"],
+        ":perfcounters": ["@libpfm"],
         "//conditions:default": [],
     }),
 )
@@ -74,7 +74,10 @@ cc_library(
 cc_library(
     name = "benchmark_main",
     srcs = ["src/benchmark_main.cc"],
-    hdrs = ["include/benchmark/benchmark.h", "include/benchmark/export.h"],
+    hdrs = [
+        "include/benchmark/benchmark.h",
+        "include/benchmark/export.h",
+    ],
     strip_include_prefix = "include",
     visibility = ["//visibility:public"],
     deps = [":benchmark"],
diff --git a/MODULE.bazel b/MODULE.bazel
index a8930590..8dd3d831 100644
--- a/MODULE.bazel
+++ b/MODULE.bazel
@@ -1,11 +1,16 @@
-module(name = "google_benchmark", version="1.8.3")
+module(
+    name = "google_benchmark",
+    version = "1.8.3",
+)
 
 bazel_dep(name = "bazel_skylib", version = "1.4.2")
 bazel_dep(name = "platforms", version = "0.0.6")
 bazel_dep(name = "rules_foreign_cc", version = "0.9.0")
 bazel_dep(name = "rules_cc", version = "0.0.6")
+
 bazel_dep(name = "rules_python", version = "0.24.0", dev_dependency = True)
-bazel_dep(name = "googletest", version = "1.12.1", repo_name = "com_google_googletest", dev_dependency = True)
+bazel_dep(name = "googletest", version = "1.12.1", dev_dependency = True, repo_name = "com_google_googletest")
+
 bazel_dep(name = "libpfm", version = "4.11.0")
 
 # Register a toolchain for Python 3.9 to be able to build numpy. Python
@@ -18,7 +23,8 @@ python.toolchain(python_version = "3.9")
 
 pip = use_extension("@rules_python//python/extensions:pip.bzl", "pip", dev_dependency = True)
 pip.parse(
-    hub_name="tools_pip_deps",
+    hub_name = "tools_pip_deps",
     python_version = "3.9",
-    requirements_lock="//tools:requirements.txt")
+    requirements_lock = "//tools:requirements.txt",
+)
 use_repo(pip, "tools_pip_deps")
diff --git a/WORKSPACE b/WORKSPACE
index 833590f2..a9cf5b37 100644
--- a/WORKSPACE
+++ b/WORKSPACE
@@ -8,11 +8,11 @@ load("@rules_foreign_cc//foreign_cc:repositories.bzl", "rules_foreign_cc_depende
 
 rules_foreign_cc_dependencies()
 
-load("@rules_python//python:pip.bzl", pip3_install="pip_install")
+load("@rules_python//python:pip.bzl", pip3_install = "pip_install")
 
 pip3_install(
-   name = "tools_pip_deps",
-   requirements = "//tools:requirements.txt",
+    name = "tools_pip_deps",
+    requirements = "//tools:requirements.txt",
 )
 
 new_local_repository(
diff --git a/bazel/benchmark_deps.bzl b/bazel/benchmark_deps.bzl
index 8fda0131..91a36742 100644
--- a/bazel/benchmark_deps.bzl
+++ b/bazel/benchmark_deps.bzl
@@ -1,5 +1,9 @@
-load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
+"""
+This file contains the Bazel build dependencies for Google Benchmark (both C++ source and Python bindings).
+"""
+
 load("@bazel_tools//tools/build_defs/repo:git.bzl", "new_git_repository")
+load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
 
 def benchmark_deps():
     """Loads dependencies required to build Google Benchmark."""
diff --git a/bindings/python/BUILD b/bindings/python/BUILD
index 9559a76b..d61dcb12 100644
--- a/bindings/python/BUILD
+++ b/bindings/python/BUILD
@@ -1,3 +1,3 @@
 exports_files(glob(["*.BUILD"]))
-exports_files(["build_defs.bzl"])
 
+exports_files(["build_defs.bzl"])
diff --git a/bindings/python/build_defs.bzl b/bindings/python/build_defs.bzl
index 009820af..b0c1b0f5 100644
--- a/bindings/python/build_defs.bzl
+++ b/bindings/python/build_defs.bzl
@@ -1,3 +1,7 @@
+"""
+This file contains some build definitions for C++ extensions used in the Google Benchmark Python bindings.
+"""
+
 _SHARED_LIB_SUFFIX = {
     "//conditions:default": ".so",
     "//:windows": ".dll",
diff --git a/bindings/python/google_benchmark/BUILD b/bindings/python/google_benchmark/BUILD
index 89ec76e0..f516a693 100644
--- a/bindings/python/google_benchmark/BUILD
+++ b/bindings/python/google_benchmark/BUILD
@@ -37,4 +37,3 @@ py_test(
         ":google_benchmark",
     ],
 )
-
diff --git a/bindings/python/google_benchmark/__init__.py b/bindings/python/google_benchmark/__init__.py
index 642d78a7..63b4f661 100644
--- a/bindings/python/google_benchmark/__init__.py
+++ b/bindings/python/google_benchmark/__init__.py
@@ -32,23 +32,22 @@ from absl import app
 from google_benchmark import _benchmark
 from google_benchmark._benchmark import (
     Counter,
-    kNanosecond,
+    State,
     kMicrosecond,
     kMillisecond,
+    kNanosecond,
     kSecond,
-    oNone,
     o1,
-    oN,
-    oNSquared,
-    oNCubed,
-    oLogN,
-    oNLogN,
     oAuto,
     oLambda,
-    State,
+    oLogN,
+    oN,
+    oNCubed,
+    oNLogN,
+    oNone,
+    oNSquared,
 )
 
-
 __all__ = [
     "register",
     "main",
@@ -97,7 +96,6 @@ class __OptionMaker:
 
         # The function that get returned on @option.range(start=0, limit=1<<5).
         def __builder_method(*args, **kwargs):
-
             # The decorator that get called, either with the benchmared function
             # or the previous Options
             def __decorator(func_or_options):
diff --git a/bindings/python/google_benchmark/example.py b/bindings/python/google_benchmark/example.py
index d95a0438..b5b2f88f 100644
--- a/bindings/python/google_benchmark/example.py
+++ b/bindings/python/google_benchmark/example.py
@@ -38,6 +38,7 @@ def sum_million(state):
     while state:
         sum(range(1_000_000))
 
+
 @benchmark.register
 def pause_timing(state):
     """Pause timing every iteration."""
@@ -85,7 +86,9 @@ def custom_counters(state):
     # Set a counter as a rate.
     state.counters["foo_rate"] = Counter(num_foo, Counter.kIsRate)
     #  Set a counter as an inverse of rate.
-    state.counters["foo_inv_rate"] = Counter(num_foo, Counter.kIsRate | Counter.kInvert)
+    state.counters["foo_inv_rate"] = Counter(
+        num_foo, Counter.kIsRate | Counter.kInvert
+    )
     # Set a counter as a thread-average quantity.
     state.counters["foo_avg"] = Counter(num_foo, Counter.kAvgThreads)
     # There's also a combined flag:
diff --git a/bindings/python/nanobind.BUILD b/bindings/python/nanobind.BUILD
index c6fa1c6d..9874b80d 100644
--- a/bindings/python/nanobind.BUILD
+++ b/bindings/python/nanobind.BUILD
@@ -1,9 +1,9 @@
+load("@bazel_skylib//lib:selects.bzl", "selects")
+
 licenses(["notice"])
 
 package(default_visibility = ["//visibility:public"])
 
-load("@bazel_skylib//lib:selects.bzl", "selects")
-
 config_setting(
     name = "msvc_compiler",
     flag_values = {"@bazel_tools//tools/cpp:compiler": "msvc-cl"},
diff --git a/pyproject.toml b/pyproject.toml
index 2db11fcb..0bac140b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -33,6 +33,11 @@ dependencies = [
     "absl-py>=0.7.1",
 ]
 
+[project.optional-dependencies]
+dev = [
+    "pre-commit>=3.3.3",
+]
+
 [project.urls]
 Homepage = "https://github.com/google/benchmark"
 Documentation = "https://github.com/google/benchmark/tree/main/docs"
@@ -49,3 +54,37 @@ where = ["bindings/python"]
 [tool.setuptools.dynamic]
 version = { attr = "google_benchmark.__version__" }
 readme = { file = "README.md", content-type = "text/markdown" }
+
+[tool.black]
+# Source https://github.com/psf/black#configuration-format
+include = "\\.pyi?$"
+line-length = 80
+target-version = ["py311"]
+
+# Black-compatible settings for isort
+# See https://black.readthedocs.io/en/stable/
+[tool.isort]
+line_length = "80"
+profile = "black"
+
+[tool.mypy]
+check_untyped_defs = true
+disallow_incomplete_defs = true
+pretty = true
+python_version = "3.11"
+strict_optional = false
+warn_unreachable = true
+
+[[tool.mypy.overrides]]
+module = ["yaml"]
+ignore_missing_imports = true
+
+[tool.ruff]
+# Enable pycodestyle (`E`) and Pyflakes (`F`) codes by default.
+select = ["E", "F", "W"]
+ignore = [
+    # whitespace before colon (:), rely on black for formatting (in particular, allow spaces before ":" in list/array slices)
+    "E203",
+    # line too long, rely on black for reformatting of these, since sometimes URLs or comments can be longer
+    "E501",
+]
diff --git a/setup.py b/setup.py
index 0593bb9c..f4700a02 100644
--- a/setup.py
+++ b/setup.py
@@ -4,11 +4,11 @@ import platform
 import shutil
 import sysconfig
 from pathlib import Path
+from typing import Generator
 
 import setuptools
 from setuptools.command import build_ext
 
-
 PYTHON_INCLUDE_PATH_PLACEHOLDER = "<PYTHON_INCLUDE_PATH>"
 
 IS_WINDOWS = platform.system() == "Windows"
@@ -16,14 +16,14 @@ IS_MAC = platform.system() == "Darwin"
 
 
 @contextlib.contextmanager
-def temp_fill_include_path(fp: str):
+def temp_fill_include_path(fp: str) -> Generator[None, None, None]:
     """Temporarily set the Python include path in a file."""
     with open(fp, "r+") as f:
         try:
             content = f.read()
             replaced = content.replace(
                 PYTHON_INCLUDE_PATH_PLACEHOLDER,
-                Path(sysconfig.get_paths()['include']).as_posix(),
+                Path(sysconfig.get_paths()["include"]).as_posix(),
             )
             f.seek(0)
             f.write(replaced)
@@ -57,7 +57,7 @@ class BuildBazelExtension(build_ext.build_ext):
         # explicitly call `bazel shutdown` for graceful exit
         self.spawn(["bazel", "shutdown"])
 
-    def bazel_build(self, ext: BazelExtension):
+    def bazel_build(self, ext: BazelExtension) -> None:
         """Runs the bazel build to create the package."""
         with temp_fill_include_path("WORKSPACE"):
             temp_path = Path(self.build_temp)
@@ -93,9 +93,11 @@ class BuildBazelExtension(build_ext.build_ext):
 
             self.spawn(bazel_argv)
 
-            shared_lib_suffix = '.dll' if IS_WINDOWS else '.so'
+            shared_lib_suffix = ".dll" if IS_WINDOWS else ".so"
             ext_name = ext.target_name + shared_lib_suffix
-            ext_bazel_bin_path = temp_path / 'bazel-bin' / ext.relpath / ext_name
+            ext_bazel_bin_path = (
+                temp_path / "bazel-bin" / ext.relpath / ext_name
+            )
 
             ext_dest_path = Path(self.get_ext_fullpath(ext.name))
             shutil.copyfile(ext_bazel_bin_path, ext_dest_path)
diff --git a/tools/BUILD.bazel b/tools/BUILD.bazel
index d25caa79..0e364728 100644
--- a/tools/BUILD.bazel
+++ b/tools/BUILD.bazel
@@ -4,8 +4,8 @@ py_library(
     name = "gbench",
     srcs = glob(["gbench/*.py"]),
     deps = [
-      requirement("numpy"),
-      requirement("scipy"),
+        requirement("numpy"),
+        requirement("scipy"),
     ],
 )
 
diff --git a/tools/compare.py b/tools/compare.py
index e5eeb247..3cc9e5eb 100755
--- a/tools/compare.py
+++ b/tools/compare.py
@@ -1,17 +1,20 @@
 #!/usr/bin/env python3
 
-import unittest
+# type: ignore
+
 """
 compare.py - versatile benchmark output compare tool
 """
 
 import argparse
-from argparse import ArgumentParser
 import json
-import sys
 import os
+import sys
+import unittest
+from argparse import ArgumentParser
+
 import gbench
-from gbench import util, report
+from gbench import report, util
 
 
 def check_inputs(in1, in2, flags):
@@ -20,163 +23,203 @@ def check_inputs(in1, in2, flags):
     """
     in1_kind, in1_err = util.classify_input_file(in1)
     in2_kind, in2_err = util.classify_input_file(in2)
-    output_file = util.find_benchmark_flag('--benchmark_out=', flags)
-    output_type = util.find_benchmark_flag('--benchmark_out_format=', flags)
-    if in1_kind == util.IT_Executable and in2_kind == util.IT_Executable and output_file:
-        print(("WARNING: '--benchmark_out=%s' will be passed to both "
-               "benchmarks causing it to be overwritten") % output_file)
+    output_file = util.find_benchmark_flag("--benchmark_out=", flags)
+    output_type = util.find_benchmark_flag("--benchmark_out_format=", flags)
+    if (
+        in1_kind == util.IT_Executable
+        and in2_kind == util.IT_Executable
+        and output_file
+    ):
+        print(
+            (
+                "WARNING: '--benchmark_out=%s' will be passed to both "
+                "benchmarks causing it to be overwritten"
+            )
+            % output_file
+        )
     if in1_kind == util.IT_JSON and in2_kind == util.IT_JSON:
         # When both sides are JSON the only supported flag is
         # --benchmark_filter=
-        for flag in util.remove_benchmark_flags('--benchmark_filter=', flags):
-            print("WARNING: passing %s has no effect since both "
-                  "inputs are JSON" % flag)
-    if output_type is not None and output_type != 'json':
-        print(("ERROR: passing '--benchmark_out_format=%s' to 'compare.py`"
-               " is not supported.") % output_type)
+        for flag in util.remove_benchmark_flags("--benchmark_filter=", flags):
+            print(
+                "WARNING: passing %s has no effect since both "
+                "inputs are JSON" % flag
+            )
+    if output_type is not None and output_type != "json":
+        print(
+            (
+                "ERROR: passing '--benchmark_out_format=%s' to 'compare.py`"
+                " is not supported."
+            )
+            % output_type
+        )
         sys.exit(1)
 
 
 def create_parser():
     parser = ArgumentParser(
-        description='versatile benchmark output compare tool')
-
-    parser.add_argument(
-        '-a',
-        '--display_aggregates_only',
-        dest='display_aggregates_only',
-        action="store_true",
-        help="If there are repetitions, by default, we display everything - the"
-             " actual runs, and the aggregates computed. Sometimes, it is "
-             "desirable to only view the aggregates. E.g. when there are a lot "
-             "of repetitions. Do note that only the display is affected. "
-             "Internally, all the actual runs are still used, e.g. for U test.")
-
-    parser.add_argument(
-        '--no-color',
-        dest='color',
-        default=True,
-        action="store_false",
-        help="Do not use colors in the terminal output"
+        description="versatile benchmark output compare tool"
     )
 
     parser.add_argument(
-        '-d',
-        '--dump_to_json',
-        dest='dump_to_json',
-        help="Additionally, dump benchmark comparison output to this file in JSON format.")
+        "-a",
+        "--display_aggregates_only",
+        dest="display_aggregates_only",
+        action="store_true",
+        help="If there are repetitions, by default, we display everything - the"
+        " actual runs, and the aggregates computed. Sometimes, it is "
+        "desirable to only view the aggregates. E.g. when there are a lot "
+        "of repetitions. Do note that only the display is affected. "
+        "Internally, all the actual runs are still used, e.g. for U test.",
+    )
+
+    parser.add_argument(
+        "--no-color",
+        dest="color",
+        default=True,
+        action="store_false",
+        help="Do not use colors in the terminal output",
+    )
+
+    parser.add_argument(
+        "-d",
+        "--dump_to_json",
+        dest="dump_to_json",
+        help="Additionally, dump benchmark comparison output to this file in JSON format.",
+    )
 
     utest = parser.add_argument_group()
     utest.add_argument(
-        '--no-utest',
-        dest='utest',
+        "--no-utest",
+        dest="utest",
         default=True,
         action="store_false",
-        help="The tool can do a two-tailed Mann-Whitney U test with the null hypothesis that it is equally likely that a randomly selected value from one sample will be less than or greater than a randomly selected value from a second sample.\nWARNING: requires **LARGE** (no less than {}) number of repetitions to be meaningful!\nThe test is being done by default, if at least {} repetitions were done.\nThis option can disable the U Test.".format(report.UTEST_OPTIMAL_REPETITIONS, report.UTEST_MIN_REPETITIONS))
+        help="The tool can do a two-tailed Mann-Whitney U test with the null hypothesis that it is equally likely that a randomly selected value from one sample will be less than or greater than a randomly selected value from a second sample.\nWARNING: requires **LARGE** (no less than {}) number of repetitions to be meaningful!\nThe test is being done by default, if at least {} repetitions were done.\nThis option can disable the U Test.".format(
+            report.UTEST_OPTIMAL_REPETITIONS, report.UTEST_MIN_REPETITIONS
+        ),
+    )
     alpha_default = 0.05
     utest.add_argument(
         "--alpha",
-        dest='utest_alpha',
+        dest="utest_alpha",
         default=alpha_default,
         type=float,
-        help=("significance level alpha. if the calculated p-value is below this value, then the result is said to be statistically significant and the null hypothesis is rejected.\n(default: %0.4f)") %
-        alpha_default)
+        help=(
+            "significance level alpha. if the calculated p-value is below this value, then the result is said to be statistically significant and the null hypothesis is rejected.\n(default: %0.4f)"
+        )
+        % alpha_default,
+    )
 
     subparsers = parser.add_subparsers(
-        help='This tool has multiple modes of operation:',
-        dest='mode')
+        help="This tool has multiple modes of operation:", dest="mode"
+    )
 
     parser_a = subparsers.add_parser(
-        'benchmarks',
-        help='The most simple use-case, compare all the output of these two benchmarks')
-    baseline = parser_a.add_argument_group(
-        'baseline', 'The benchmark baseline')
+        "benchmarks",
+        help="The most simple use-case, compare all the output of these two benchmarks",
+    )
+    baseline = parser_a.add_argument_group("baseline", "The benchmark baseline")
     baseline.add_argument(
-        'test_baseline',
-        metavar='test_baseline',
-        type=argparse.FileType('r'),
+        "test_baseline",
+        metavar="test_baseline",
+        type=argparse.FileType("r"),
         nargs=1,
-        help='A benchmark executable or JSON output file')
+        help="A benchmark executable or JSON output file",
+    )
     contender = parser_a.add_argument_group(
-        'contender', 'The benchmark that will be compared against the baseline')
+        "contender", "The benchmark that will be compared against the baseline"
+    )
     contender.add_argument(
-        'test_contender',
-        metavar='test_contender',
-        type=argparse.FileType('r'),
+        "test_contender",
+        metavar="test_contender",
+        type=argparse.FileType("r"),
         nargs=1,
-        help='A benchmark executable or JSON output file')
+        help="A benchmark executable or JSON output file",
+    )
     parser_a.add_argument(
-        'benchmark_options',
-        metavar='benchmark_options',
+        "benchmark_options",
+        metavar="benchmark_options",
         nargs=argparse.REMAINDER,
-        help='Arguments to pass when running benchmark executables')
+        help="Arguments to pass when running benchmark executables",
+    )
 
     parser_b = subparsers.add_parser(
-        'filters', help='Compare filter one with the filter two of benchmark')
-    baseline = parser_b.add_argument_group(
-        'baseline', 'The benchmark baseline')
+        "filters", help="Compare filter one with the filter two of benchmark"
+    )
+    baseline = parser_b.add_argument_group("baseline", "The benchmark baseline")
     baseline.add_argument(
-        'test',
-        metavar='test',
-        type=argparse.FileType('r'),
+        "test",
+        metavar="test",
+        type=argparse.FileType("r"),
         nargs=1,
-        help='A benchmark executable or JSON output file')
+        help="A benchmark executable or JSON output file",
+    )
     baseline.add_argument(
-        'filter_baseline',
-        metavar='filter_baseline',
+        "filter_baseline",
+        metavar="filter_baseline",
         type=str,
         nargs=1,
-        help='The first filter, that will be used as baseline')
+        help="The first filter, that will be used as baseline",
+    )
     contender = parser_b.add_argument_group(
-        'contender', 'The benchmark that will be compared against the baseline')
+        "contender", "The benchmark that will be compared against the baseline"
+    )
     contender.add_argument(
-        'filter_contender',
-        metavar='filter_contender',
+        "filter_contender",
+        metavar="filter_contender",
         type=str,
         nargs=1,
-        help='The second filter, that will be compared against the baseline')
+        help="The second filter, that will be compared against the baseline",
+    )
     parser_b.add_argument(
-        'benchmark_options',
-        metavar='benchmark_options',
+        "benchmark_options",
+        metavar="benchmark_options",
         nargs=argparse.REMAINDER,
-        help='Arguments to pass when running benchmark executables')
+        help="Arguments to pass when running benchmark executables",
+    )
 
     parser_c = subparsers.add_parser(
-        'benchmarksfiltered',
-        help='Compare filter one of first benchmark with filter two of the second benchmark')
-    baseline = parser_c.add_argument_group(
-        'baseline', 'The benchmark baseline')
+        "benchmarksfiltered",
+        help="Compare filter one of first benchmark with filter two of the second benchmark",
+    )
+    baseline = parser_c.add_argument_group("baseline", "The benchmark baseline")
     baseline.add_argument(
-        'test_baseline',
-        metavar='test_baseline',
-        type=argparse.FileType('r'),
+        "test_baseline",
+        metavar="test_baseline",
+        type=argparse.FileType("r"),
         nargs=1,
-        help='A benchmark executable or JSON output file')
+        help="A benchmark executable or JSON output file",
+    )
     baseline.add_argument(
-        'filter_baseline',
-        metavar='filter_baseline',
+        "filter_baseline",
+        metavar="filter_baseline",
         type=str,
         nargs=1,
-        help='The first filter, that will be used as baseline')
+        help="The first filter, that will be used as baseline",
+    )
     contender = parser_c.add_argument_group(
-        'contender', 'The benchmark that will be compared against the baseline')
+        "contender", "The benchmark that will be compared against the baseline"
+    )
     contender.add_argument(
-        'test_contender',
-        metavar='test_contender',
-        type=argparse.FileType('r'),
+        "test_contender",
+        metavar="test_contender",
+        type=argparse.FileType("r"),
         nargs=1,
-        help='The second benchmark executable or JSON output file, that will be compared against the baseline')
+        help="The second benchmark executable or JSON output file, that will be compared against the baseline",
+    )
     contender.add_argument(
-        'filter_contender',
-        metavar='filter_contender',
+        "filter_contender",
+        metavar="filter_contender",
         type=str,
         nargs=1,
-        help='The second filter, that will be compared against the baseline')
+        help="The second filter, that will be compared against the baseline",
+    )
     parser_c.add_argument(
-        'benchmark_options',
-        metavar='benchmark_options',
+        "benchmark_options",
+        metavar="benchmark_options",
         nargs=argparse.REMAINDER,
-        help='Arguments to pass when running benchmark executables')
+        help="Arguments to pass when running benchmark executables",
+    )
 
     return parser
 
@@ -191,16 +234,16 @@ def main():
     assert not unknown_args
     benchmark_options = args.benchmark_options
 
-    if args.mode == 'benchmarks':
+    if args.mode == "benchmarks":
         test_baseline = args.test_baseline[0].name
         test_contender = args.test_contender[0].name
-        filter_baseline = ''
-        filter_contender = ''
+        filter_baseline = ""
+        filter_contender = ""
 
         # NOTE: if test_baseline == test_contender, you are analyzing the stdev
 
-        description = 'Comparing %s to %s' % (test_baseline, test_contender)
-    elif args.mode == 'filters':
+        description = "Comparing %s to %s" % (test_baseline, test_contender)
+    elif args.mode == "filters":
         test_baseline = args.test[0].name
         test_contender = args.test[0].name
         filter_baseline = args.filter_baseline[0]
@@ -209,9 +252,12 @@ def main():
         # NOTE: if filter_baseline == filter_contender, you are analyzing the
         # stdev
 
-        description = 'Comparing %s to %s (from %s)' % (
-            filter_baseline, filter_contender, args.test[0].name)
-    elif args.mode == 'benchmarksfiltered':
+        description = "Comparing %s to %s (from %s)" % (
+            filter_baseline,
+            filter_contender,
+            args.test[0].name,
+        )
+    elif args.mode == "benchmarksfiltered":
         test_baseline = args.test_baseline[0].name
         test_contender = args.test_contender[0].name
         filter_baseline = args.filter_baseline[0]
@@ -220,8 +266,12 @@ def main():
         # NOTE: if test_baseline == test_contender and
         # filter_baseline == filter_contender, you are analyzing the stdev
 
-        description = 'Comparing %s (from %s) to %s (from %s)' % (
-            filter_baseline, test_baseline, filter_contender, test_contender)
+        description = "Comparing %s (from %s) to %s (from %s)" % (
+            filter_baseline,
+            test_baseline,
+            filter_contender,
+            test_contender,
+        )
     else:
         # should never happen
         print("Unrecognized mode of operation: '%s'" % args.mode)
@@ -231,199 +281,240 @@ def main():
     check_inputs(test_baseline, test_contender, benchmark_options)
 
     if args.display_aggregates_only:
-        benchmark_options += ['--benchmark_display_aggregates_only=true']
+        benchmark_options += ["--benchmark_display_aggregates_only=true"]
 
     options_baseline = []
     options_contender = []
 
     if filter_baseline and filter_contender:
-        options_baseline = ['--benchmark_filter=%s' % filter_baseline]
-        options_contender = ['--benchmark_filter=%s' % filter_contender]
+        options_baseline = ["--benchmark_filter=%s" % filter_baseline]
+        options_contender = ["--benchmark_filter=%s" % filter_contender]
 
     # Run the benchmarks and report the results
-    json1 = json1_orig = gbench.util.sort_benchmark_results(gbench.util.run_or_load_benchmark(
-        test_baseline, benchmark_options + options_baseline))
-    json2 = json2_orig = gbench.util.sort_benchmark_results(gbench.util.run_or_load_benchmark(
-        test_contender, benchmark_options + options_contender))
+    json1 = json1_orig = gbench.util.sort_benchmark_results(
+        gbench.util.run_or_load_benchmark(
+            test_baseline, benchmark_options + options_baseline
+        )
+    )
+    json2 = json2_orig = gbench.util.sort_benchmark_results(
+        gbench.util.run_or_load_benchmark(
+            test_contender, benchmark_options + options_contender
+        )
+    )
 
     # Now, filter the benchmarks so that the difference report can work
     if filter_baseline and filter_contender:
-        replacement = '[%s vs. %s]' % (filter_baseline, filter_contender)
+        replacement = "[%s vs. %s]" % (filter_baseline, filter_contender)
         json1 = gbench.report.filter_benchmark(
-            json1_orig, filter_baseline, replacement)
+            json1_orig, filter_baseline, replacement
+        )
         json2 = gbench.report.filter_benchmark(
-            json2_orig, filter_contender, replacement)
+            json2_orig, filter_contender, replacement
+        )
 
-    diff_report = gbench.report.get_difference_report(
-        json1, json2, args.utest)
+    diff_report = gbench.report.get_difference_report(json1, json2, args.utest)
     output_lines = gbench.report.print_difference_report(
         diff_report,
         args.display_aggregates_only,
-        args.utest, args.utest_alpha, args.color)
+        args.utest,
+        args.utest_alpha,
+        args.color,
+    )
     print(description)
     for ln in output_lines:
         print(ln)
 
     # Optionally, diff and output to JSON
     if args.dump_to_json is not None:
-        with open(args.dump_to_json, 'w') as f_json:
+        with open(args.dump_to_json, "w") as f_json:
             json.dump(diff_report, f_json)
 
+
 class TestParser(unittest.TestCase):
     def setUp(self):
         self.parser = create_parser()
         testInputs = os.path.join(
-            os.path.dirname(
-                os.path.realpath(__file__)),
-            'gbench',
-            'Inputs')
-        self.testInput0 = os.path.join(testInputs, 'test1_run1.json')
-        self.testInput1 = os.path.join(testInputs, 'test1_run2.json')
+            os.path.dirname(os.path.realpath(__file__)), "gbench", "Inputs"
+        )
+        self.testInput0 = os.path.join(testInputs, "test1_run1.json")
+        self.testInput1 = os.path.join(testInputs, "test1_run2.json")
 
     def test_benchmarks_basic(self):
         parsed = self.parser.parse_args(
-            ['benchmarks', self.testInput0, self.testInput1])
+            ["benchmarks", self.testInput0, self.testInput1]
+        )
         self.assertFalse(parsed.display_aggregates_only)
         self.assertTrue(parsed.utest)
-        self.assertEqual(parsed.mode, 'benchmarks')
+        self.assertEqual(parsed.mode, "benchmarks")
         self.assertEqual(parsed.test_baseline[0].name, self.testInput0)
         self.assertEqual(parsed.test_contender[0].name, self.testInput1)
         self.assertFalse(parsed.benchmark_options)
 
     def test_benchmarks_basic_without_utest(self):
         parsed = self.parser.parse_args(
-            ['--no-utest', 'benchmarks', self.testInput0, self.testInput1])
+            ["--no-utest", "benchmarks", self.testInput0, self.testInput1]
+        )
         self.assertFalse(parsed.display_aggregates_only)
         self.assertFalse(parsed.utest)
         self.assertEqual(parsed.utest_alpha, 0.05)
-        self.assertEqual(parsed.mode, 'benchmarks')
+        self.assertEqual(parsed.mode, "benchmarks")
         self.assertEqual(parsed.test_baseline[0].name, self.testInput0)
         self.assertEqual(parsed.test_contender[0].name, self.testInput1)
         self.assertFalse(parsed.benchmark_options)
 
     def test_benchmarks_basic_display_aggregates_only(self):
         parsed = self.parser.parse_args(
-            ['-a', 'benchmarks', self.testInput0, self.testInput1])
+            ["-a", "benchmarks", self.testInput0, self.testInput1]
+        )
         self.assertTrue(parsed.display_aggregates_only)
         self.assertTrue(parsed.utest)
-        self.assertEqual(parsed.mode, 'benchmarks')
+        self.assertEqual(parsed.mode, "benchmarks")
         self.assertEqual(parsed.test_baseline[0].name, self.testInput0)
         self.assertEqual(parsed.test_contender[0].name, self.testInput1)
         self.assertFalse(parsed.benchmark_options)
 
     def test_benchmarks_basic_with_utest_alpha(self):
         parsed = self.parser.parse_args(
-            ['--alpha=0.314', 'benchmarks', self.testInput0, self.testInput1])
+            ["--alpha=0.314", "benchmarks", self.testInput0, self.testInput1]
+        )
         self.assertFalse(parsed.display_aggregates_only)
         self.assertTrue(parsed.utest)
         self.assertEqual(parsed.utest_alpha, 0.314)
-        self.assertEqual(parsed.mode, 'benchmarks')
+        self.assertEqual(parsed.mode, "benchmarks")
         self.assertEqual(parsed.test_baseline[0].name, self.testInput0)
         self.assertEqual(parsed.test_contender[0].name, self.testInput1)
         self.assertFalse(parsed.benchmark_options)
 
     def test_benchmarks_basic_without_utest_with_utest_alpha(self):
         parsed = self.parser.parse_args(
-            ['--no-utest', '--alpha=0.314', 'benchmarks', self.testInput0, self.testInput1])
+            [
+                "--no-utest",
+                "--alpha=0.314",
+                "benchmarks",
+                self.testInput0,
+                self.testInput1,
+            ]
+        )
         self.assertFalse(parsed.display_aggregates_only)
         self.assertFalse(parsed.utest)
         self.assertEqual(parsed.utest_alpha, 0.314)
-        self.assertEqual(parsed.mode, 'benchmarks')
+        self.assertEqual(parsed.mode, "benchmarks")
         self.assertEqual(parsed.test_baseline[0].name, self.testInput0)
         self.assertEqual(parsed.test_contender[0].name, self.testInput1)
         self.assertFalse(parsed.benchmark_options)
 
     def test_benchmarks_with_remainder(self):
         parsed = self.parser.parse_args(
-            ['benchmarks', self.testInput0, self.testInput1, 'd'])
+            ["benchmarks", self.testInput0, self.testInput1, "d"]
+        )
         self.assertFalse(parsed.display_aggregates_only)
         self.assertTrue(parsed.utest)
-        self.assertEqual(parsed.mode, 'benchmarks')
+        self.assertEqual(parsed.mode, "benchmarks")
         self.assertEqual(parsed.test_baseline[0].name, self.testInput0)
         self.assertEqual(parsed.test_contender[0].name, self.testInput1)
-        self.assertEqual(parsed.benchmark_options, ['d'])
+        self.assertEqual(parsed.benchmark_options, ["d"])
 
     def test_benchmarks_with_remainder_after_doubleminus(self):
         parsed = self.parser.parse_args(
-            ['benchmarks', self.testInput0, self.testInput1, '--', 'e'])
+            ["benchmarks", self.testInput0, self.testInput1, "--", "e"]
+        )
         self.assertFalse(parsed.display_aggregates_only)
         self.assertTrue(parsed.utest)
-        self.assertEqual(parsed.mode, 'benchmarks')
+        self.assertEqual(parsed.mode, "benchmarks")
         self.assertEqual(parsed.test_baseline[0].name, self.testInput0)
         self.assertEqual(parsed.test_contender[0].name, self.testInput1)
-        self.assertEqual(parsed.benchmark_options, ['e'])
+        self.assertEqual(parsed.benchmark_options, ["e"])
 
     def test_filters_basic(self):
-        parsed = self.parser.parse_args(
-            ['filters', self.testInput0, 'c', 'd'])
+        parsed = self.parser.parse_args(["filters", self.testInput0, "c", "d"])
         self.assertFalse(parsed.display_aggregates_only)
         self.assertTrue(parsed.utest)
-        self.assertEqual(parsed.mode, 'filters')
+        self.assertEqual(parsed.mode, "filters")
         self.assertEqual(parsed.test[0].name, self.testInput0)
-        self.assertEqual(parsed.filter_baseline[0], 'c')
-        self.assertEqual(parsed.filter_contender[0], 'd')
+        self.assertEqual(parsed.filter_baseline[0], "c")
+        self.assertEqual(parsed.filter_contender[0], "d")
         self.assertFalse(parsed.benchmark_options)
 
     def test_filters_with_remainder(self):
         parsed = self.parser.parse_args(
-            ['filters', self.testInput0, 'c', 'd', 'e'])
+            ["filters", self.testInput0, "c", "d", "e"]
+        )
         self.assertFalse(parsed.display_aggregates_only)
         self.assertTrue(parsed.utest)
-        self.assertEqual(parsed.mode, 'filters')
+        self.assertEqual(parsed.mode, "filters")
         self.assertEqual(parsed.test[0].name, self.testInput0)
-        self.assertEqual(parsed.filter_baseline[0], 'c')
-        self.assertEqual(parsed.filter_contender[0], 'd')
-        self.assertEqual(parsed.benchmark_options, ['e'])
+        self.assertEqual(parsed.filter_baseline[0], "c")
+        self.assertEqual(parsed.filter_contender[0], "d")
+        self.assertEqual(parsed.benchmark_options, ["e"])
 
     def test_filters_with_remainder_after_doubleminus(self):
         parsed = self.parser.parse_args(
-            ['filters', self.testInput0, 'c', 'd', '--', 'f'])
+            ["filters", self.testInput0, "c", "d", "--", "f"]
+        )
         self.assertFalse(parsed.display_aggregates_only)
         self.assertTrue(parsed.utest)
-        self.assertEqual(parsed.mode, 'filters')
+        self.assertEqual(parsed.mode, "filters")
         self.assertEqual(parsed.test[0].name, self.testInput0)
-        self.assertEqual(parsed.filter_baseline[0], 'c')
-        self.assertEqual(parsed.filter_contender[0], 'd')
-        self.assertEqual(parsed.benchmark_options, ['f'])
+        self.assertEqual(parsed.filter_baseline[0], "c")
+        self.assertEqual(parsed.filter_contender[0], "d")
+        self.assertEqual(parsed.benchmark_options, ["f"])
 
     def test_benchmarksfiltered_basic(self):
         parsed = self.parser.parse_args(
-            ['benchmarksfiltered', self.testInput0, 'c', self.testInput1, 'e'])
+            ["benchmarksfiltered", self.testInput0, "c", self.testInput1, "e"]
+        )
         self.assertFalse(parsed.display_aggregates_only)
         self.assertTrue(parsed.utest)
-        self.assertEqual(parsed.mode, 'benchmarksfiltered')
+        self.assertEqual(parsed.mode, "benchmarksfiltered")
         self.assertEqual(parsed.test_baseline[0].name, self.testInput0)
-        self.assertEqual(parsed.filter_baseline[0], 'c')
+        self.assertEqual(parsed.filter_baseline[0], "c")
         self.assertEqual(parsed.test_contender[0].name, self.testInput1)
-        self.assertEqual(parsed.filter_contender[0], 'e')
+        self.assertEqual(parsed.filter_contender[0], "e")
         self.assertFalse(parsed.benchmark_options)
 
     def test_benchmarksfiltered_with_remainder(self):
         parsed = self.parser.parse_args(
-            ['benchmarksfiltered', self.testInput0, 'c', self.testInput1, 'e', 'f'])
+            [
+                "benchmarksfiltered",
+                self.testInput0,
+                "c",
+                self.testInput1,
+                "e",
+                "f",
+            ]
+        )
         self.assertFalse(parsed.display_aggregates_only)
         self.assertTrue(parsed.utest)
-        self.assertEqual(parsed.mode, 'benchmarksfiltered')
+        self.assertEqual(parsed.mode, "benchmarksfiltered")
         self.assertEqual(parsed.test_baseline[0].name, self.testInput0)
-        self.assertEqual(parsed.filter_baseline[0], 'c')
+        self.assertEqual(parsed.filter_baseline[0], "c")
         self.assertEqual(parsed.test_contender[0].name, self.testInput1)
-        self.assertEqual(parsed.filter_contender[0], 'e')
-        self.assertEqual(parsed.benchmark_options[0], 'f')
+        self.assertEqual(parsed.filter_contender[0], "e")
+        self.assertEqual(parsed.benchmark_options[0], "f")
 
     def test_benchmarksfiltered_with_remainder_after_doubleminus(self):
         parsed = self.parser.parse_args(
-            ['benchmarksfiltered', self.testInput0, 'c', self.testInput1, 'e', '--', 'g'])
+            [
+                "benchmarksfiltered",
+                self.testInput0,
+                "c",
+                self.testInput1,
+                "e",
+                "--",
+                "g",
+            ]
+        )
         self.assertFalse(parsed.display_aggregates_only)
         self.assertTrue(parsed.utest)
-        self.assertEqual(parsed.mode, 'benchmarksfiltered')
+        self.assertEqual(parsed.mode, "benchmarksfiltered")
         self.assertEqual(parsed.test_baseline[0].name, self.testInput0)
-        self.assertEqual(parsed.filter_baseline[0], 'c')
+        self.assertEqual(parsed.filter_baseline[0], "c")
         self.assertEqual(parsed.test_contender[0].name, self.testInput1)
-        self.assertEqual(parsed.filter_contender[0], 'e')
-        self.assertEqual(parsed.benchmark_options[0], 'g')
+        self.assertEqual(parsed.filter_contender[0], "e")
+        self.assertEqual(parsed.benchmark_options[0], "g")
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     # unittest.main()
     main()
 
diff --git a/tools/gbench/__init__.py b/tools/gbench/__init__.py
index fce1a1ac..92125688 100644
--- a/tools/gbench/__init__.py
+++ b/tools/gbench/__init__.py
@@ -1,8 +1,8 @@
 """Google Benchmark tooling"""
 
-__author__ = 'Eric Fiselier'
-__email__ = 'eric@efcs.ca'
+__author__ = "Eric Fiselier"
+__email__ = "eric@efcs.ca"
 __versioninfo__ = (0, 5, 0)
-__version__ = '.'.join(str(v) for v in __versioninfo__) + 'dev'
+__version__ = ".".join(str(v) for v in __versioninfo__) + "dev"
 
-__all__ = []
+__all__ = []  # type: ignore
diff --git a/tools/gbench/report.py b/tools/gbench/report.py
index b2bbfb9f..10e6b508 100644
--- a/tools/gbench/report.py
+++ b/tools/gbench/report.py
@@ -1,14 +1,17 @@
-"""report.py - Utilities for reporting statistics about benchmark results
+# type: ignore
+
+"""
+report.py - Utilities for reporting statistics about benchmark results
 """
 
-import unittest
-import os
-import re
 import copy
+import os
 import random
+import re
+import unittest
 
-from scipy.stats import mannwhitneyu, gmean
 from numpy import array
+from scipy.stats import gmean, mannwhitneyu
 
 
 class BenchmarkColor(object):
@@ -17,26 +20,25 @@ class BenchmarkColor(object):
         self.code = code
 
     def __repr__(self):
-        return '%s%r' % (self.__class__.__name__,
-                         (self.name, self.code))
+        return "%s%r" % (self.__class__.__name__, (self.name, self.code))
 
     def __format__(self, format):
         return self.code
 
 
 # Benchmark Colors Enumeration
-BC_NONE = BenchmarkColor('NONE', '')
-BC_MAGENTA = BenchmarkColor('MAGENTA', '\033[95m')
-BC_CYAN = BenchmarkColor('CYAN', '\033[96m')
-BC_OKBLUE = BenchmarkColor('OKBLUE', '\033[94m')
-BC_OKGREEN = BenchmarkColor('OKGREEN', '\033[32m')
-BC_HEADER = BenchmarkColor('HEADER', '\033[92m')
-BC_WARNING = BenchmarkColor('WARNING', '\033[93m')
-BC_WHITE = BenchmarkColor('WHITE', '\033[97m')
-BC_FAIL = BenchmarkColor('FAIL', '\033[91m')
-BC_ENDC = BenchmarkColor('ENDC', '\033[0m')
-BC_BOLD = BenchmarkColor('BOLD', '\033[1m')
-BC_UNDERLINE = BenchmarkColor('UNDERLINE', '\033[4m')
+BC_NONE = BenchmarkColor("NONE", "")
+BC_MAGENTA = BenchmarkColor("MAGENTA", "\033[95m")
+BC_CYAN = BenchmarkColor("CYAN", "\033[96m")
+BC_OKBLUE = BenchmarkColor("OKBLUE", "\033[94m")
+BC_OKGREEN = BenchmarkColor("OKGREEN", "\033[32m")
+BC_HEADER = BenchmarkColor("HEADER", "\033[92m")
+BC_WARNING = BenchmarkColor("WARNING", "\033[93m")
+BC_WHITE = BenchmarkColor("WHITE", "\033[97m")
+BC_FAIL = BenchmarkColor("FAIL", "\033[91m")
+BC_ENDC = BenchmarkColor("ENDC", "\033[0m")
+BC_BOLD = BenchmarkColor("BOLD", "\033[1m")
+BC_UNDERLINE = BenchmarkColor("UNDERLINE", "\033[4m")
 
 UTEST_MIN_REPETITIONS = 2
 UTEST_OPTIMAL_REPETITIONS = 9  # Lowest reasonable number, More is better.
@@ -59,10 +61,14 @@ def color_format(use_color, fmt_str, *args, **kwargs):
     """
     assert use_color is True or use_color is False
     if not use_color:
-        args = [arg if not isinstance(arg, BenchmarkColor) else BC_NONE
-                for arg in args]
-        kwargs = {key: arg if not isinstance(arg, BenchmarkColor) else BC_NONE
-                  for key, arg in kwargs.items()}
+        args = [
+            arg if not isinstance(arg, BenchmarkColor) else BC_NONE
+            for arg in args
+        ]
+        kwargs = {
+            key: arg if not isinstance(arg, BenchmarkColor) else BC_NONE
+            for key, arg in kwargs.items()
+        }
     return fmt_str.format(*args, **kwargs)
 
 
@@ -73,8 +79,8 @@ def find_longest_name(benchmark_list):
     """
     longest_name = 1
     for bc in benchmark_list:
-        if len(bc['name']) > longest_name:
-            longest_name = len(bc['name'])
+        if len(bc["name"]) > longest_name:
+            longest_name = len(bc["name"])
     return longest_name
 
 
@@ -95,13 +101,13 @@ def filter_benchmark(json_orig, family, replacement=""):
     """
     regex = re.compile(family)
     filtered = {}
-    filtered['benchmarks'] = []
-    for be in json_orig['benchmarks']:
-        if not regex.search(be['name']):
+    filtered["benchmarks"] = []
+    for be in json_orig["benchmarks"]:
+        if not regex.search(be["name"]):
             continue
         filteredbench = copy.deepcopy(be)  # Do NOT modify the old name!
-        filteredbench['name'] = regex.sub(replacement, filteredbench['name'])
-        filtered['benchmarks'].append(filteredbench)
+        filteredbench["name"] = regex.sub(replacement, filteredbench["name"])
+        filtered["benchmarks"].append(filteredbench)
     return filtered
 
 
@@ -110,9 +116,11 @@ def get_unique_benchmark_names(json):
     While *keeping* the order, give all the unique 'names' used for benchmarks.
     """
     seen = set()
-    uniqued = [x['name'] for x in json['benchmarks']
-               if x['name'] not in seen and
-               (seen.add(x['name']) or True)]
+    uniqued = [
+        x["name"]
+        for x in json["benchmarks"]
+        if x["name"] not in seen and (seen.add(x["name"]) or True)
+    ]
     return uniqued
 
 
@@ -125,7 +133,7 @@ def intersect(list1, list2):
 
 
 def is_potentially_comparable_benchmark(x):
-    return ('time_unit' in x and 'real_time' in x and 'cpu_time' in x)
+    return "time_unit" in x and "real_time" in x and "cpu_time" in x
 
 
 def partition_benchmarks(json1, json2):
@@ -142,18 +150,24 @@ def partition_benchmarks(json1, json2):
         time_unit = None
         # Pick the time unit from the first entry of the lhs benchmark.
         # We should be careful not to crash with unexpected input.
-        for x in json1['benchmarks']:
-            if (x['name'] == name and is_potentially_comparable_benchmark(x)):
-                time_unit = x['time_unit']
+        for x in json1["benchmarks"]:
+            if x["name"] == name and is_potentially_comparable_benchmark(x):
+                time_unit = x["time_unit"]
                 break
         if time_unit is None:
             continue
         # Filter by name and time unit.
         # All the repetitions are assumed to be comparable.
-        lhs = [x for x in json1['benchmarks'] if x['name'] == name and
-               x['time_unit'] == time_unit]
-        rhs = [x for x in json2['benchmarks'] if x['name'] == name and
-               x['time_unit'] == time_unit]
+        lhs = [
+            x
+            for x in json1["benchmarks"]
+            if x["name"] == name and x["time_unit"] == time_unit
+        ]
+        rhs = [
+            x
+            for x in json2["benchmarks"]
+            if x["name"] == name and x["time_unit"] == time_unit
+        ]
         partitions.append([lhs, rhs])
     return partitions
 
@@ -164,7 +178,7 @@ def get_timedelta_field_as_seconds(benchmark, field_name):
     time_unit, as time in seconds.
     """
     timedelta = benchmark[field_name]
-    time_unit = benchmark.get('time_unit', 's')
+    time_unit = benchmark.get("time_unit", "s")
     return timedelta * _TIME_UNIT_TO_SECONDS_MULTIPLIER.get(time_unit)
 
 
@@ -174,11 +188,15 @@ def calculate_geomean(json):
     and calculate their geomean.
     """
     times = []
-    for benchmark in json['benchmarks']:
-        if 'run_type' in benchmark and benchmark['run_type'] == 'aggregate':
+    for benchmark in json["benchmarks"]:
+        if "run_type" in benchmark and benchmark["run_type"] == "aggregate":
             continue
-        times.append([get_timedelta_field_as_seconds(benchmark, 'real_time'),
-                      get_timedelta_field_as_seconds(benchmark, 'cpu_time')])
+        times.append(
+            [
+                get_timedelta_field_as_seconds(benchmark, "real_time"),
+                get_timedelta_field_as_seconds(benchmark, "cpu_time"),
+            ]
+        )
     return gmean(times) if times else array([])
 
 
@@ -190,19 +208,23 @@ def extract_field(partition, field_name):
 
 
 def calc_utest(timings_cpu, timings_time):
-    min_rep_cnt = min(len(timings_time[0]),
-                      len(timings_time[1]),
-                      len(timings_cpu[0]),
-                      len(timings_cpu[1]))
+    min_rep_cnt = min(
+        len(timings_time[0]),
+        len(timings_time[1]),
+        len(timings_cpu[0]),
+        len(timings_cpu[1]),
+    )
 
     # Does *everything* has at least UTEST_MIN_REPETITIONS repetitions?
     if min_rep_cnt < UTEST_MIN_REPETITIONS:
         return False, None, None
 
     time_pvalue = mannwhitneyu(
-        timings_time[0], timings_time[1], alternative='two-sided').pvalue
+        timings_time[0], timings_time[1], alternative="two-sided"
+    ).pvalue
     cpu_pvalue = mannwhitneyu(
-        timings_cpu[0], timings_cpu[1], alternative='two-sided').pvalue
+        timings_cpu[0], timings_cpu[1], alternative="two-sided"
+    ).pvalue
 
     return (min_rep_cnt >= UTEST_OPTIMAL_REPETITIONS), cpu_pvalue, time_pvalue
 
@@ -212,38 +234,46 @@ def print_utest(bc_name, utest, utest_alpha, first_col_width, use_color=True):
         return BC_FAIL if pval >= utest_alpha else BC_OKGREEN
 
     # Check if we failed miserably with minimum required repetitions for utest
-    if not utest['have_optimal_repetitions'] and utest['cpu_pvalue'] is None and utest['time_pvalue'] is None:
+    if (
+        not utest["have_optimal_repetitions"]
+        and utest["cpu_pvalue"] is None
+        and utest["time_pvalue"] is None
+    ):
         return []
 
     dsc = "U Test, Repetitions: {} vs {}".format(
-        utest['nr_of_repetitions'], utest['nr_of_repetitions_other'])
+        utest["nr_of_repetitions"], utest["nr_of_repetitions_other"]
+    )
     dsc_color = BC_OKGREEN
 
     # We still got some results to show but issue a warning about it.
-    if not utest['have_optimal_repetitions']:
+    if not utest["have_optimal_repetitions"]:
         dsc_color = BC_WARNING
         dsc += ". WARNING: Results unreliable! {}+ repetitions recommended.".format(
-            UTEST_OPTIMAL_REPETITIONS)
+            UTEST_OPTIMAL_REPETITIONS
+        )
 
     special_str = "{}{:<{}s}{endc}{}{:16.4f}{endc}{}{:16.4f}{endc}{}      {}"
 
-    return [color_format(use_color,
-                         special_str,
-                         BC_HEADER,
-                         "{}{}".format(bc_name, UTEST_COL_NAME),
-                         first_col_width,
-                         get_utest_color(
-                             utest['time_pvalue']), utest['time_pvalue'],
-                         get_utest_color(
-                             utest['cpu_pvalue']), utest['cpu_pvalue'],
-                         dsc_color, dsc,
-                         endc=BC_ENDC)]
+    return [
+        color_format(
+            use_color,
+            special_str,
+            BC_HEADER,
+            "{}{}".format(bc_name, UTEST_COL_NAME),
+            first_col_width,
+            get_utest_color(utest["time_pvalue"]),
+            utest["time_pvalue"],
+            get_utest_color(utest["cpu_pvalue"]),
+            utest["cpu_pvalue"],
+            dsc_color,
+            dsc,
+            endc=BC_ENDC,
+        )
+    ]
 
 
-def get_difference_report(
-        json1,
-        json2,
-        utest=False):
+def get_difference_report(json1, json2, utest=False):
     """
     Calculate and report the difference between each test of two benchmarks
     runs specified as 'json1' and 'json2'. Output is another json containing
@@ -254,37 +284,44 @@ def get_difference_report(
     diff_report = []
     partitions = partition_benchmarks(json1, json2)
     for partition in partitions:
-        benchmark_name = partition[0][0]['name']
-        label = partition[0][0]['label'] if 'label' in partition[0][0] else ''
-        time_unit = partition[0][0]['time_unit']
+        benchmark_name = partition[0][0]["name"]
+        label = partition[0][0]["label"] if "label" in partition[0][0] else ""
+        time_unit = partition[0][0]["time_unit"]
         measurements = []
         utest_results = {}
         # Careful, we may have different repetition count.
         for i in range(min(len(partition[0]), len(partition[1]))):
             bn = partition[0][i]
             other_bench = partition[1][i]
-            measurements.append({
-                'real_time': bn['real_time'],
-                'cpu_time': bn['cpu_time'],
-                'real_time_other': other_bench['real_time'],
-                'cpu_time_other': other_bench['cpu_time'],
-                'time': calculate_change(bn['real_time'], other_bench['real_time']),
-                'cpu': calculate_change(bn['cpu_time'], other_bench['cpu_time'])
-            })
+            measurements.append(
+                {
+                    "real_time": bn["real_time"],
+                    "cpu_time": bn["cpu_time"],
+                    "real_time_other": other_bench["real_time"],
+                    "cpu_time_other": other_bench["cpu_time"],
+                    "time": calculate_change(
+                        bn["real_time"], other_bench["real_time"]
+                    ),
+                    "cpu": calculate_change(
+                        bn["cpu_time"], other_bench["cpu_time"]
+                    ),
+                }
+            )
 
         # After processing the whole partition, if requested, do the U test.
         if utest:
-            timings_cpu = extract_field(partition, 'cpu_time')
-            timings_time = extract_field(partition, 'real_time')
+            timings_cpu = extract_field(partition, "cpu_time")
+            timings_time = extract_field(partition, "real_time")
             have_optimal_repetitions, cpu_pvalue, time_pvalue = calc_utest(
-                timings_cpu, timings_time)
+                timings_cpu, timings_time
+            )
             if cpu_pvalue and time_pvalue:
                 utest_results = {
-                    'have_optimal_repetitions': have_optimal_repetitions,
-                    'cpu_pvalue': cpu_pvalue,
-                    'time_pvalue': time_pvalue,
-                    'nr_of_repetitions': len(timings_cpu[0]),
-                    'nr_of_repetitions_other': len(timings_cpu[1])
+                    "have_optimal_repetitions": have_optimal_repetitions,
+                    "cpu_pvalue": cpu_pvalue,
+                    "time_pvalue": time_pvalue,
+                    "nr_of_repetitions": len(timings_cpu[0]),
+                    "nr_of_repetitions_other": len(timings_cpu[1]),
                 }
 
         # Store only if we had any measurements for given benchmark.
@@ -292,47 +329,63 @@ def get_difference_report(
         # time units which are not compatible with other time units in the
         # benchmark suite.
         if measurements:
-            run_type = partition[0][0]['run_type'] if 'run_type' in partition[0][0] else ''
-            aggregate_name = partition[0][0]['aggregate_name'] if run_type == 'aggregate' and 'aggregate_name' in partition[0][0] else ''
-            diff_report.append({
-                'name': benchmark_name,
-                'label': label,
-                'measurements': measurements,
-                'time_unit': time_unit,
-                'run_type': run_type,
-                'aggregate_name': aggregate_name,
-                'utest': utest_results
-            })
+            run_type = (
+                partition[0][0]["run_type"]
+                if "run_type" in partition[0][0]
+                else ""
+            )
+            aggregate_name = (
+                partition[0][0]["aggregate_name"]
+                if run_type == "aggregate"
+                and "aggregate_name" in partition[0][0]
+                else ""
+            )
+            diff_report.append(
+                {
+                    "name": benchmark_name,
+                    "label": label,
+                    "measurements": measurements,
+                    "time_unit": time_unit,
+                    "run_type": run_type,
+                    "aggregate_name": aggregate_name,
+                    "utest": utest_results,
+                }
+            )
 
     lhs_gmean = calculate_geomean(json1)
     rhs_gmean = calculate_geomean(json2)
     if lhs_gmean.any() and rhs_gmean.any():
-        diff_report.append({
-            'name': 'OVERALL_GEOMEAN',
-            'label': '',
-            'measurements': [{
-                'real_time': lhs_gmean[0],
-                'cpu_time': lhs_gmean[1],
-                'real_time_other': rhs_gmean[0],
-                'cpu_time_other': rhs_gmean[1],
-                'time': calculate_change(lhs_gmean[0], rhs_gmean[0]),
-                'cpu': calculate_change(lhs_gmean[1], rhs_gmean[1])
-            }],
-            'time_unit': 's',
-            'run_type': 'aggregate',
-            'aggregate_name': 'geomean',
-            'utest': {}
-        })
+        diff_report.append(
+            {
+                "name": "OVERALL_GEOMEAN",
+                "label": "",
+                "measurements": [
+                    {
+                        "real_time": lhs_gmean[0],
+                        "cpu_time": lhs_gmean[1],
+                        "real_time_other": rhs_gmean[0],
+                        "cpu_time_other": rhs_gmean[1],
+                        "time": calculate_change(lhs_gmean[0], rhs_gmean[0]),
+                        "cpu": calculate_change(lhs_gmean[1], rhs_gmean[1]),
+                    }
+                ],
+                "time_unit": "s",
+                "run_type": "aggregate",
+                "aggregate_name": "geomean",
+                "utest": {},
+            }
+        )
 
     return diff_report
 
 
 def print_difference_report(
-        json_diff_report,
-        include_aggregates_only=False,
-        utest=False,
-        utest_alpha=0.05,
-        use_color=True):
+    json_diff_report,
+    include_aggregates_only=False,
+    utest=False,
+    utest_alpha=0.05,
+    use_color=True,
+):
     """
     Calculate and report the difference between each test of two benchmarks
     runs specified as 'json1' and 'json2'.
@@ -348,44 +401,53 @@ def print_difference_report(
             return BC_CYAN
 
     first_col_width = find_longest_name(json_diff_report)
-    first_col_width = max(
-        first_col_width,
-        len('Benchmark'))
+    first_col_width = max(first_col_width, len("Benchmark"))
     first_col_width += len(UTEST_COL_NAME)
     first_line = "{:<{}s}Time             CPU      Time Old      Time New       CPU Old       CPU New".format(
-        'Benchmark', 12 + first_col_width)
-    output_strs = [first_line, '-' * len(first_line)]
+        "Benchmark", 12 + first_col_width
+    )
+    output_strs = [first_line, "-" * len(first_line)]
 
     fmt_str = "{}{:<{}s}{endc}{}{:+16.4f}{endc}{}{:+16.4f}{endc}{:14.0f}{:14.0f}{endc}{:14.0f}{:14.0f}"
     for benchmark in json_diff_report:
         # *If* we were asked to only include aggregates,
         # and if it is non-aggregate, then don't print it.
-        if not include_aggregates_only or not 'run_type' in benchmark or benchmark['run_type'] == 'aggregate':
-            for measurement in benchmark['measurements']:
-                output_strs += [color_format(use_color,
-                                             fmt_str,
-                                             BC_HEADER,
-                                             benchmark['name'],
-                                             first_col_width,
-                                             get_color(measurement['time']),
-                                             measurement['time'],
-                                             get_color(measurement['cpu']),
-                                             measurement['cpu'],
-                                             measurement['real_time'],
-                                             measurement['real_time_other'],
-                                             measurement['cpu_time'],
-                                             measurement['cpu_time_other'],
-                                             endc=BC_ENDC)]
+        if (
+            not include_aggregates_only
+            or "run_type" not in benchmark
+            or benchmark["run_type"] == "aggregate"
+        ):
+            for measurement in benchmark["measurements"]:
+                output_strs += [
+                    color_format(
+                        use_color,
+                        fmt_str,
+                        BC_HEADER,
+                        benchmark["name"],
+                        first_col_width,
+                        get_color(measurement["time"]),
+                        measurement["time"],
+                        get_color(measurement["cpu"]),
+                        measurement["cpu"],
+                        measurement["real_time"],
+                        measurement["real_time_other"],
+                        measurement["cpu_time"],
+                        measurement["cpu_time_other"],
+                        endc=BC_ENDC,
+                    )
+                ]
 
         # After processing the measurements, if requested and
         # if applicable (e.g. u-test exists for given benchmark),
         # print the U test.
-        if utest and benchmark['utest']:
-            output_strs += print_utest(benchmark['name'],
-                                       benchmark['utest'],
-                                       utest_alpha=utest_alpha,
-                                       first_col_width=first_col_width,
-                                       use_color=use_color)
+        if utest and benchmark["utest"]:
+            output_strs += print_utest(
+                benchmark["name"],
+                benchmark["utest"],
+                utest_alpha=utest_alpha,
+                first_col_width=first_col_width,
+                use_color=use_color,
+            )
 
     return output_strs
 
@@ -397,21 +459,21 @@ def print_difference_report(
 class TestGetUniqueBenchmarkNames(unittest.TestCase):
     def load_results(self):
         import json
+
         testInputs = os.path.join(
-            os.path.dirname(
-                os.path.realpath(__file__)),
-            'Inputs')
-        testOutput = os.path.join(testInputs, 'test3_run0.json')
-        with open(testOutput, 'r') as f:
+            os.path.dirname(os.path.realpath(__file__)), "Inputs"
+        )
+        testOutput = os.path.join(testInputs, "test3_run0.json")
+        with open(testOutput, "r") as f:
             json = json.load(f)
         return json
 
     def test_basic(self):
         expect_lines = [
-            'BM_One',
-            'BM_Two',
-            'short',  # These two are not sorted
-            'medium',  # These two are not sorted
+            "BM_One",
+            "BM_Two",
+            "short",  # These two are not sorted
+            "medium",  # These two are not sorted
         ]
         json = self.load_results()
         output_lines = get_unique_benchmark_names(json)
@@ -427,15 +489,15 @@ class TestReportDifference(unittest.TestCase):
     def setUpClass(cls):
         def load_results():
             import json
+
             testInputs = os.path.join(
-                os.path.dirname(
-                    os.path.realpath(__file__)),
-                'Inputs')
-            testOutput1 = os.path.join(testInputs, 'test1_run1.json')
-            testOutput2 = os.path.join(testInputs, 'test1_run2.json')
-            with open(testOutput1, 'r') as f:
+                os.path.dirname(os.path.realpath(__file__)), "Inputs"
+            )
+            testOutput1 = os.path.join(testInputs, "test1_run1.json")
+            testOutput2 = os.path.join(testInputs, "test1_run2.json")
+            with open(testOutput1, "r") as f:
                 json1 = json.load(f)
-            with open(testOutput2, 'r') as f:
+            with open(testOutput2, "r") as f:
                 json2 = json.load(f)
             return json1, json2
 
@@ -444,171 +506,323 @@ class TestReportDifference(unittest.TestCase):
 
     def test_json_diff_report_pretty_printing(self):
         expect_lines = [
-            ['BM_SameTimes', '+0.0000', '+0.0000', '10', '10', '10', '10'],
-            ['BM_2xFaster', '-0.5000', '-0.5000', '50', '25', '50', '25'],
-            ['BM_2xSlower', '+1.0000', '+1.0000', '50', '100', '50', '100'],
-            ['BM_1PercentFaster', '-0.0100', '-0.0100', '100', '99', '100', '99'],
-            ['BM_1PercentSlower', '+0.0100', '+0.0100', '100', '101', '100', '101'],
-            ['BM_10PercentFaster', '-0.1000', '-0.1000', '100', '90', '100', '90'],
-            ['BM_10PercentSlower', '+0.1000', '+0.1000', '100', '110', '100', '110'],
-            ['BM_100xSlower', '+99.0000', '+99.0000',
-                '100', '10000', '100', '10000'],
-            ['BM_100xFaster', '-0.9900', '-0.9900',
-                '10000', '100', '10000', '100'],
-            ['BM_10PercentCPUToTime', '+0.1000',
-                '-0.1000', '100', '110', '100', '90'],
-            ['BM_ThirdFaster', '-0.3333', '-0.3334', '100', '67', '100', '67'],
-            ['BM_NotBadTimeUnit', '-0.9000', '+0.2000', '0', '0', '0', '1'],
-            ['BM_hasLabel', '+0.0000', '+0.0000', '1', '1', '1', '1'],
-            ['OVERALL_GEOMEAN', '-0.8113', '-0.7779', '0', '0', '0', '0']
+            ["BM_SameTimes", "+0.0000", "+0.0000", "10", "10", "10", "10"],
+            ["BM_2xFaster", "-0.5000", "-0.5000", "50", "25", "50", "25"],
+            ["BM_2xSlower", "+1.0000", "+1.0000", "50", "100", "50", "100"],
+            [
+                "BM_1PercentFaster",
+                "-0.0100",
+                "-0.0100",
+                "100",
+                "99",
+                "100",
+                "99",
+            ],
+            [
+                "BM_1PercentSlower",
+                "+0.0100",
+                "+0.0100",
+                "100",
+                "101",
+                "100",
+                "101",
+            ],
+            [
+                "BM_10PercentFaster",
+                "-0.1000",
+                "-0.1000",
+                "100",
+                "90",
+                "100",
+                "90",
+            ],
+            [
+                "BM_10PercentSlower",
+                "+0.1000",
+                "+0.1000",
+                "100",
+                "110",
+                "100",
+                "110",
+            ],
+            [
+                "BM_100xSlower",
+                "+99.0000",
+                "+99.0000",
+                "100",
+                "10000",
+                "100",
+                "10000",
+            ],
+            [
+                "BM_100xFaster",
+                "-0.9900",
+                "-0.9900",
+                "10000",
+                "100",
+                "10000",
+                "100",
+            ],
+            [
+                "BM_10PercentCPUToTime",
+                "+0.1000",
+                "-0.1000",
+                "100",
+                "110",
+                "100",
+                "90",
+            ],
+            ["BM_ThirdFaster", "-0.3333", "-0.3334", "100", "67", "100", "67"],
+            ["BM_NotBadTimeUnit", "-0.9000", "+0.2000", "0", "0", "0", "1"],
+            ["BM_hasLabel", "+0.0000", "+0.0000", "1", "1", "1", "1"],
+            ["OVERALL_GEOMEAN", "-0.8113", "-0.7779", "0", "0", "0", "0"],
         ]
         output_lines_with_header = print_difference_report(
-            self.json_diff_report, use_color=False)
+            self.json_diff_report, use_color=False
+        )
         output_lines = output_lines_with_header[2:]
         print("\n")
         print("\n".join(output_lines_with_header))
         self.assertEqual(len(output_lines), len(expect_lines))
         for i in range(0, len(output_lines)):
-            parts = [x for x in output_lines[i].split(' ') if x]
+            parts = [x for x in output_lines[i].split(" ") if x]
             self.assertEqual(len(parts), 7)
             self.assertEqual(expect_lines[i], parts)
 
     def test_json_diff_report_output(self):
         expected_output = [
             {
-                'name': 'BM_SameTimes',
-                'label': '',
-                'measurements': [{'time': 0.0000, 'cpu': 0.0000,
-                                  'real_time': 10, 'real_time_other': 10,
-                                  'cpu_time': 10, 'cpu_time_other': 10}],
-                'time_unit': 'ns',
-                'utest': {}
+                "name": "BM_SameTimes",
+                "label": "",
+                "measurements": [
+                    {
+                        "time": 0.0000,
+                        "cpu": 0.0000,
+                        "real_time": 10,
+                        "real_time_other": 10,
+                        "cpu_time": 10,
+                        "cpu_time_other": 10,
+                    }
+                ],
+                "time_unit": "ns",
+                "utest": {},
             },
             {
-                'name': 'BM_2xFaster',
-                'label': '',
-                'measurements': [{'time': -0.5000, 'cpu': -0.5000,
-                                  'real_time': 50, 'real_time_other': 25,
-                                  'cpu_time': 50, 'cpu_time_other': 25}],
-                'time_unit': 'ns',
-                'utest': {}
+                "name": "BM_2xFaster",
+                "label": "",
+                "measurements": [
+                    {
+                        "time": -0.5000,
+                        "cpu": -0.5000,
+                        "real_time": 50,
+                        "real_time_other": 25,
+                        "cpu_time": 50,
+                        "cpu_time_other": 25,
+                    }
+                ],
+                "time_unit": "ns",
+                "utest": {},
             },
             {
-                'name': 'BM_2xSlower',
-                'label': '',
-                'measurements': [{'time': 1.0000, 'cpu': 1.0000,
-                                  'real_time': 50, 'real_time_other': 100,
-                                  'cpu_time': 50, 'cpu_time_other': 100}],
-                'time_unit': 'ns',
-                'utest': {}
+                "name": "BM_2xSlower",
+                "label": "",
+                "measurements": [
+                    {
+                        "time": 1.0000,
+                        "cpu": 1.0000,
+                        "real_time": 50,
+                        "real_time_other": 100,
+                        "cpu_time": 50,
+                        "cpu_time_other": 100,
+                    }
+                ],
+                "time_unit": "ns",
+                "utest": {},
             },
             {
-                'name': 'BM_1PercentFaster',
-                'label': '',
-                'measurements': [{'time': -0.0100, 'cpu': -0.0100,
-                                  'real_time': 100, 'real_time_other': 98.9999999,
-                                  'cpu_time': 100, 'cpu_time_other': 98.9999999}],
-                'time_unit': 'ns',
-                'utest': {}
+                "name": "BM_1PercentFaster",
+                "label": "",
+                "measurements": [
+                    {
+                        "time": -0.0100,
+                        "cpu": -0.0100,
+                        "real_time": 100,
+                        "real_time_other": 98.9999999,
+                        "cpu_time": 100,
+                        "cpu_time_other": 98.9999999,
+                    }
+                ],
+                "time_unit": "ns",
+                "utest": {},
             },
             {
-                'name': 'BM_1PercentSlower',
-                'label': '',
-                'measurements': [{'time': 0.0100, 'cpu': 0.0100,
-                                  'real_time': 100, 'real_time_other': 101,
-                                  'cpu_time': 100, 'cpu_time_other': 101}],
-                'time_unit': 'ns',
-                'utest': {}
+                "name": "BM_1PercentSlower",
+                "label": "",
+                "measurements": [
+                    {
+                        "time": 0.0100,
+                        "cpu": 0.0100,
+                        "real_time": 100,
+                        "real_time_other": 101,
+                        "cpu_time": 100,
+                        "cpu_time_other": 101,
+                    }
+                ],
+                "time_unit": "ns",
+                "utest": {},
             },
             {
-                'name': 'BM_10PercentFaster',
-                'label': '',
-                'measurements': [{'time': -0.1000, 'cpu': -0.1000,
-                                  'real_time': 100, 'real_time_other': 90,
-                                  'cpu_time': 100, 'cpu_time_other': 90}],
-                'time_unit': 'ns',
-                'utest': {}
+                "name": "BM_10PercentFaster",
+                "label": "",
+                "measurements": [
+                    {
+                        "time": -0.1000,
+                        "cpu": -0.1000,
+                        "real_time": 100,
+                        "real_time_other": 90,
+                        "cpu_time": 100,
+                        "cpu_time_other": 90,
+                    }
+                ],
+                "time_unit": "ns",
+                "utest": {},
             },
             {
-                'name': 'BM_10PercentSlower',
-                'label': '',
-                'measurements': [{'time': 0.1000, 'cpu': 0.1000,
-                                  'real_time': 100, 'real_time_other': 110,
-                                  'cpu_time': 100, 'cpu_time_other': 110}],
-                'time_unit': 'ns',
-                'utest': {}
+                "name": "BM_10PercentSlower",
+                "label": "",
+                "measurements": [
+                    {
+                        "time": 0.1000,
+                        "cpu": 0.1000,
+                        "real_time": 100,
+                        "real_time_other": 110,
+                        "cpu_time": 100,
+                        "cpu_time_other": 110,
+                    }
+                ],
+                "time_unit": "ns",
+                "utest": {},
             },
             {
-                'name': 'BM_100xSlower',
-                'label': '',
-                'measurements': [{'time': 99.0000, 'cpu': 99.0000,
-                                  'real_time': 100, 'real_time_other': 10000,
-                                  'cpu_time': 100, 'cpu_time_other': 10000}],
-                'time_unit': 'ns',
-                'utest': {}
+                "name": "BM_100xSlower",
+                "label": "",
+                "measurements": [
+                    {
+                        "time": 99.0000,
+                        "cpu": 99.0000,
+                        "real_time": 100,
+                        "real_time_other": 10000,
+                        "cpu_time": 100,
+                        "cpu_time_other": 10000,
+                    }
+                ],
+                "time_unit": "ns",
+                "utest": {},
             },
             {
-                'name': 'BM_100xFaster',
-                'label': '',
-                'measurements': [{'time': -0.9900, 'cpu': -0.9900,
-                                  'real_time': 10000, 'real_time_other': 100,
-                                  'cpu_time': 10000, 'cpu_time_other': 100}],
-                'time_unit': 'ns',
-                'utest': {}
+                "name": "BM_100xFaster",
+                "label": "",
+                "measurements": [
+                    {
+                        "time": -0.9900,
+                        "cpu": -0.9900,
+                        "real_time": 10000,
+                        "real_time_other": 100,
+                        "cpu_time": 10000,
+                        "cpu_time_other": 100,
+                    }
+                ],
+                "time_unit": "ns",
+                "utest": {},
             },
             {
-                'name': 'BM_10PercentCPUToTime',
-                'label': '',
-                'measurements': [{'time': 0.1000, 'cpu': -0.1000,
-                                  'real_time': 100, 'real_time_other': 110,
-                                  'cpu_time': 100, 'cpu_time_other': 90}],
-                'time_unit': 'ns',
-                'utest': {}
+                "name": "BM_10PercentCPUToTime",
+                "label": "",
+                "measurements": [
+                    {
+                        "time": 0.1000,
+                        "cpu": -0.1000,
+                        "real_time": 100,
+                        "real_time_other": 110,
+                        "cpu_time": 100,
+                        "cpu_time_other": 90,
+                    }
+                ],
+                "time_unit": "ns",
+                "utest": {},
             },
             {
-                'name': 'BM_ThirdFaster',
-                'label': '',
-                'measurements': [{'time': -0.3333, 'cpu': -0.3334,
-                                  'real_time': 100, 'real_time_other': 67,
-                                  'cpu_time': 100, 'cpu_time_other': 67}],
-                'time_unit': 'ns',
-                'utest': {}
+                "name": "BM_ThirdFaster",
+                "label": "",
+                "measurements": [
+                    {
+                        "time": -0.3333,
+                        "cpu": -0.3334,
+                        "real_time": 100,
+                        "real_time_other": 67,
+                        "cpu_time": 100,
+                        "cpu_time_other": 67,
+                    }
+                ],
+                "time_unit": "ns",
+                "utest": {},
             },
             {
-                'name': 'BM_NotBadTimeUnit',
-                'label': '',
-                'measurements': [{'time': -0.9000, 'cpu': 0.2000,
-                                  'real_time': 0.4, 'real_time_other': 0.04,
-                                  'cpu_time': 0.5, 'cpu_time_other': 0.6}],
-                'time_unit': 's',
-                'utest': {}
+                "name": "BM_NotBadTimeUnit",
+                "label": "",
+                "measurements": [
+                    {
+                        "time": -0.9000,
+                        "cpu": 0.2000,
+                        "real_time": 0.4,
+                        "real_time_other": 0.04,
+                        "cpu_time": 0.5,
+                        "cpu_time_other": 0.6,
+                    }
+                ],
+                "time_unit": "s",
+                "utest": {},
             },
             {
-                'name': 'BM_hasLabel',
-                'label': 'a label',
-                'measurements': [{'time': 0.0000, 'cpu': 0.0000,
-                                  'real_time': 1, 'real_time_other': 1,
-                                  'cpu_time': 1, 'cpu_time_other': 1}],
-                'time_unit': 's',
-                'utest': {}
+                "name": "BM_hasLabel",
+                "label": "a label",
+                "measurements": [
+                    {
+                        "time": 0.0000,
+                        "cpu": 0.0000,
+                        "real_time": 1,
+                        "real_time_other": 1,
+                        "cpu_time": 1,
+                        "cpu_time_other": 1,
+                    }
+                ],
+                "time_unit": "s",
+                "utest": {},
             },
             {
-                'name': 'OVERALL_GEOMEAN',
-                'label': '',
-                'measurements': [{'real_time': 3.1622776601683826e-06, 'cpu_time': 3.2130844755623912e-06,
-                                  'real_time_other': 1.9768988699420897e-07, 'cpu_time_other': 2.397447755209533e-07,
-                                  'time': -0.8112976497120911, 'cpu': -0.7778551721181174}],
-                'time_unit': 's',
-                'run_type': 'aggregate',
-                'aggregate_name': 'geomean', 'utest': {}
+                "name": "OVERALL_GEOMEAN",
+                "label": "",
+                "measurements": [
+                    {
+                        "real_time": 3.1622776601683826e-06,
+                        "cpu_time": 3.2130844755623912e-06,
+                        "real_time_other": 1.9768988699420897e-07,
+                        "cpu_time_other": 2.397447755209533e-07,
+                        "time": -0.8112976497120911,
+                        "cpu": -0.7778551721181174,
+                    }
+                ],
+                "time_unit": "s",
+                "run_type": "aggregate",
+                "aggregate_name": "geomean",
+                "utest": {},
             },
         ]
         self.assertEqual(len(self.json_diff_report), len(expected_output))
-        for out, expected in zip(
-                self.json_diff_report, expected_output):
-            self.assertEqual(out['name'], expected['name'])
-            self.assertEqual(out['label'], expected['label'])
-            self.assertEqual(out['time_unit'], expected['time_unit'])
+        for out, expected in zip(self.json_diff_report, expected_output):
+            self.assertEqual(out["name"], expected["name"])
+            self.assertEqual(out["label"], expected["label"])
+            self.assertEqual(out["time_unit"], expected["time_unit"])
             assert_utest(self, out, expected)
             assert_measurements(self, out, expected)
 
@@ -618,12 +832,12 @@ class TestReportDifferenceBetweenFamilies(unittest.TestCase):
     def setUpClass(cls):
         def load_result():
             import json
+
             testInputs = os.path.join(
-                os.path.dirname(
-                    os.path.realpath(__file__)),
-                'Inputs')
-            testOutput = os.path.join(testInputs, 'test2_run.json')
-            with open(testOutput, 'r') as f:
+                os.path.dirname(os.path.realpath(__file__)), "Inputs"
+            )
+            testOutput = os.path.join(testInputs, "test2_run.json")
+            with open(testOutput, "r") as f:
                 json = json.load(f)
             return json
 
@@ -634,65 +848,108 @@ class TestReportDifferenceBetweenFamilies(unittest.TestCase):
 
     def test_json_diff_report_pretty_printing(self):
         expect_lines = [
-            ['.', '-0.5000', '-0.5000', '10', '5', '10', '5'],
-            ['./4', '-0.5000', '-0.5000', '40', '20', '40', '20'],
-            ['Prefix/.', '-0.5000', '-0.5000', '20', '10', '20', '10'],
-            ['Prefix/./3', '-0.5000', '-0.5000', '30', '15', '30', '15'],
-            ['OVERALL_GEOMEAN', '-0.5000', '-0.5000', '0', '0', '0', '0']
+            [".", "-0.5000", "-0.5000", "10", "5", "10", "5"],
+            ["./4", "-0.5000", "-0.5000", "40", "20", "40", "20"],
+            ["Prefix/.", "-0.5000", "-0.5000", "20", "10", "20", "10"],
+            ["Prefix/./3", "-0.5000", "-0.5000", "30", "15", "30", "15"],
+            ["OVERALL_GEOMEAN", "-0.5000", "-0.5000", "0", "0", "0", "0"],
         ]
         output_lines_with_header = print_difference_report(
-            self.json_diff_report, use_color=False)
+            self.json_diff_report, use_color=False
+        )
         output_lines = output_lines_with_header[2:]
         print("\n")
         print("\n".join(output_lines_with_header))
         self.assertEqual(len(output_lines), len(expect_lines))
         for i in range(0, len(output_lines)):
-            parts = [x for x in output_lines[i].split(' ') if x]
+            parts = [x for x in output_lines[i].split(" ") if x]
             self.assertEqual(len(parts), 7)
             self.assertEqual(expect_lines[i], parts)
 
     def test_json_diff_report(self):
         expected_output = [
             {
-                'name': u'.',
-                'measurements': [{'time': -0.5, 'cpu': -0.5, 'real_time': 10, 'real_time_other': 5, 'cpu_time': 10, 'cpu_time_other': 5}],
-                'time_unit': 'ns',
-                'utest': {}
+                "name": ".",
+                "measurements": [
+                    {
+                        "time": -0.5,
+                        "cpu": -0.5,
+                        "real_time": 10,
+                        "real_time_other": 5,
+                        "cpu_time": 10,
+                        "cpu_time_other": 5,
+                    }
+                ],
+                "time_unit": "ns",
+                "utest": {},
             },
             {
-                'name': u'./4',
-                'measurements': [{'time': -0.5, 'cpu': -0.5, 'real_time': 40, 'real_time_other': 20, 'cpu_time': 40, 'cpu_time_other': 20}],
-                'time_unit': 'ns',
-                'utest': {},
+                "name": "./4",
+                "measurements": [
+                    {
+                        "time": -0.5,
+                        "cpu": -0.5,
+                        "real_time": 40,
+                        "real_time_other": 20,
+                        "cpu_time": 40,
+                        "cpu_time_other": 20,
+                    }
+                ],
+                "time_unit": "ns",
+                "utest": {},
             },
             {
-                'name': u'Prefix/.',
-                'measurements': [{'time': -0.5, 'cpu': -0.5, 'real_time': 20, 'real_time_other': 10, 'cpu_time': 20, 'cpu_time_other': 10}],
-                'time_unit': 'ns',
-                'utest': {}
+                "name": "Prefix/.",
+                "measurements": [
+                    {
+                        "time": -0.5,
+                        "cpu": -0.5,
+                        "real_time": 20,
+                        "real_time_other": 10,
+                        "cpu_time": 20,
+                        "cpu_time_other": 10,
+                    }
+                ],
+                "time_unit": "ns",
+                "utest": {},
             },
             {
-                'name': u'Prefix/./3',
-                'measurements': [{'time': -0.5, 'cpu': -0.5, 'real_time': 30, 'real_time_other': 15, 'cpu_time': 30, 'cpu_time_other': 15}],
-                'time_unit': 'ns',
-                'utest': {}
+                "name": "Prefix/./3",
+                "measurements": [
+                    {
+                        "time": -0.5,
+                        "cpu": -0.5,
+                        "real_time": 30,
+                        "real_time_other": 15,
+                        "cpu_time": 30,
+                        "cpu_time_other": 15,
+                    }
+                ],
+                "time_unit": "ns",
+                "utest": {},
             },
             {
-                'name': 'OVERALL_GEOMEAN',
-                'measurements': [{'real_time': 2.213363839400641e-08, 'cpu_time': 2.213363839400641e-08,
-                                  'real_time_other': 1.1066819197003185e-08, 'cpu_time_other': 1.1066819197003185e-08,
-                                  'time': -0.5000000000000009, 'cpu': -0.5000000000000009}],
-                'time_unit': 's',
-                'run_type': 'aggregate',
-                'aggregate_name': 'geomean',
-                'utest': {}
-            }
+                "name": "OVERALL_GEOMEAN",
+                "measurements": [
+                    {
+                        "real_time": 2.213363839400641e-08,
+                        "cpu_time": 2.213363839400641e-08,
+                        "real_time_other": 1.1066819197003185e-08,
+                        "cpu_time_other": 1.1066819197003185e-08,
+                        "time": -0.5000000000000009,
+                        "cpu": -0.5000000000000009,
+                    }
+                ],
+                "time_unit": "s",
+                "run_type": "aggregate",
+                "aggregate_name": "geomean",
+                "utest": {},
+            },
         ]
         self.assertEqual(len(self.json_diff_report), len(expected_output))
-        for out, expected in zip(
-                self.json_diff_report, expected_output):
-            self.assertEqual(out['name'], expected['name'])
-            self.assertEqual(out['time_unit'], expected['time_unit'])
+        for out, expected in zip(self.json_diff_report, expected_output):
+            self.assertEqual(out["name"], expected["name"])
+            self.assertEqual(out["time_unit"], expected["time_unit"])
             assert_utest(self, out, expected)
             assert_measurements(self, out, expected)
 
@@ -702,424 +959,489 @@ class TestReportDifferenceWithUTest(unittest.TestCase):
     def setUpClass(cls):
         def load_results():
             import json
+
             testInputs = os.path.join(
-                os.path.dirname(
-                    os.path.realpath(__file__)),
-                'Inputs')
-            testOutput1 = os.path.join(testInputs, 'test3_run0.json')
-            testOutput2 = os.path.join(testInputs, 'test3_run1.json')
-            with open(testOutput1, 'r') as f:
+                os.path.dirname(os.path.realpath(__file__)), "Inputs"
+            )
+            testOutput1 = os.path.join(testInputs, "test3_run0.json")
+            testOutput2 = os.path.join(testInputs, "test3_run1.json")
+            with open(testOutput1, "r") as f:
                 json1 = json.load(f)
-            with open(testOutput2, 'r') as f:
+            with open(testOutput2, "r") as f:
                 json2 = json.load(f)
             return json1, json2
 
         json1, json2 = load_results()
-        cls.json_diff_report = get_difference_report(
-            json1, json2, utest=True)
+        cls.json_diff_report = get_difference_report(json1, json2, utest=True)
 
     def test_json_diff_report_pretty_printing(self):
         expect_lines = [
-            ['BM_One', '-0.1000', '+0.1000', '10', '9', '100', '110'],
-            ['BM_Two', '+0.1111', '-0.0111', '9', '10', '90', '89'],
-            ['BM_Two', '-0.1250', '-0.1628', '8', '7', '86', '72'],
-            ['BM_Two_pvalue',
-             '1.0000',
-             '0.6667',
-             'U',
-             'Test,',
-             'Repetitions:',
-             '2',
-             'vs',
-             '2.',
-             'WARNING:',
-             'Results',
-             'unreliable!',
-             '9+',
-             'repetitions',
-             'recommended.'],
-            ['short', '-0.1250', '-0.0625', '8', '7', '80', '75'],
-            ['short', '-0.4325', '-0.1351', '8', '5', '77', '67'],
-            ['short_pvalue',
-             '0.7671',
-             '0.2000',
-             'U',
-             'Test,',
-             'Repetitions:',
-             '2',
-             'vs',
-             '3.',
-             'WARNING:',
-             'Results',
-             'unreliable!',
-             '9+',
-             'repetitions',
-             'recommended.'],
-            ['medium', '-0.3750', '-0.3375', '8', '5', '80', '53'],
-            ['OVERALL_GEOMEAN', '+1.6405', '-0.6985', '0', '0', '0', '0']
+            ["BM_One", "-0.1000", "+0.1000", "10", "9", "100", "110"],
+            ["BM_Two", "+0.1111", "-0.0111", "9", "10", "90", "89"],
+            ["BM_Two", "-0.1250", "-0.1628", "8", "7", "86", "72"],
+            [
+                "BM_Two_pvalue",
+                "1.0000",
+                "0.6667",
+                "U",
+                "Test,",
+                "Repetitions:",
+                "2",
+                "vs",
+                "2.",
+                "WARNING:",
+                "Results",
+                "unreliable!",
+                "9+",
+                "repetitions",
+                "recommended.",
+            ],
+            ["short", "-0.1250", "-0.0625", "8", "7", "80", "75"],
+            ["short", "-0.4325", "-0.1351", "8", "5", "77", "67"],
+            [
+                "short_pvalue",
+                "0.7671",
+                "0.2000",
+                "U",
+                "Test,",
+                "Repetitions:",
+                "2",
+                "vs",
+                "3.",
+                "WARNING:",
+                "Results",
+                "unreliable!",
+                "9+",
+                "repetitions",
+                "recommended.",
+            ],
+            ["medium", "-0.3750", "-0.3375", "8", "5", "80", "53"],
+            ["OVERALL_GEOMEAN", "+1.6405", "-0.6985", "0", "0", "0", "0"],
         ]
         output_lines_with_header = print_difference_report(
-            self.json_diff_report, utest=True, utest_alpha=0.05, use_color=False)
+            self.json_diff_report, utest=True, utest_alpha=0.05, use_color=False
+        )
         output_lines = output_lines_with_header[2:]
         print("\n")
         print("\n".join(output_lines_with_header))
         self.assertEqual(len(output_lines), len(expect_lines))
         for i in range(0, len(output_lines)):
-            parts = [x for x in output_lines[i].split(' ') if x]
+            parts = [x for x in output_lines[i].split(" ") if x]
             self.assertEqual(expect_lines[i], parts)
 
     def test_json_diff_report_pretty_printing_aggregates_only(self):
         expect_lines = [
-            ['BM_One', '-0.1000', '+0.1000', '10', '9', '100', '110'],
-            ['BM_Two_pvalue',
-             '1.0000',
-             '0.6667',
-             'U',
-             'Test,',
-             'Repetitions:',
-             '2',
-             'vs',
-             '2.',
-             'WARNING:',
-             'Results',
-             'unreliable!',
-             '9+',
-             'repetitions',
-             'recommended.'],
-            ['short', '-0.1250', '-0.0625', '8', '7', '80', '75'],
-            ['short', '-0.4325', '-0.1351', '8', '5', '77', '67'],
-            ['short_pvalue',
-             '0.7671',
-             '0.2000',
-             'U',
-             'Test,',
-             'Repetitions:',
-             '2',
-             'vs',
-             '3.',
-             'WARNING:',
-             'Results',
-             'unreliable!',
-             '9+',
-             'repetitions',
-             'recommended.'],
-            ['OVERALL_GEOMEAN', '+1.6405', '-0.6985', '0', '0', '0', '0']
+            ["BM_One", "-0.1000", "+0.1000", "10", "9", "100", "110"],
+            [
+                "BM_Two_pvalue",
+                "1.0000",
+                "0.6667",
+                "U",
+                "Test,",
+                "Repetitions:",
+                "2",
+                "vs",
+                "2.",
+                "WARNING:",
+                "Results",
+                "unreliable!",
+                "9+",
+                "repetitions",
+                "recommended.",
+            ],
+            ["short", "-0.1250", "-0.0625", "8", "7", "80", "75"],
+            ["short", "-0.4325", "-0.1351", "8", "5", "77", "67"],
+            [
+                "short_pvalue",
+                "0.7671",
+                "0.2000",
+                "U",
+                "Test,",
+                "Repetitions:",
+                "2",
+                "vs",
+                "3.",
+                "WARNING:",
+                "Results",
+                "unreliable!",
+                "9+",
+                "repetitions",
+                "recommended.",
+            ],
+            ["OVERALL_GEOMEAN", "+1.6405", "-0.6985", "0", "0", "0", "0"],
         ]
         output_lines_with_header = print_difference_report(
-            self.json_diff_report, include_aggregates_only=True, utest=True, utest_alpha=0.05, use_color=False)
+            self.json_diff_report,
+            include_aggregates_only=True,
+            utest=True,
+            utest_alpha=0.05,
+            use_color=False,
+        )
         output_lines = output_lines_with_header[2:]
         print("\n")
         print("\n".join(output_lines_with_header))
         self.assertEqual(len(output_lines), len(expect_lines))
         for i in range(0, len(output_lines)):
-            parts = [x for x in output_lines[i].split(' ') if x]
+            parts = [x for x in output_lines[i].split(" ") if x]
             self.assertEqual(expect_lines[i], parts)
 
     def test_json_diff_report(self):
         expected_output = [
             {
-                'name': u'BM_One',
-                'measurements': [
-                    {'time': -0.1,
-                     'cpu': 0.1,
-                     'real_time': 10,
-                     'real_time_other': 9,
-                     'cpu_time': 100,
-                     'cpu_time_other': 110}
+                "name": "BM_One",
+                "measurements": [
+                    {
+                        "time": -0.1,
+                        "cpu": 0.1,
+                        "real_time": 10,
+                        "real_time_other": 9,
+                        "cpu_time": 100,
+                        "cpu_time_other": 110,
+                    }
                 ],
-                'time_unit': 'ns',
-                'utest': {}
+                "time_unit": "ns",
+                "utest": {},
             },
             {
-                'name': u'BM_Two',
-                'measurements': [
-                    {'time': 0.1111111111111111,
-                     'cpu': -0.011111111111111112,
-                     'real_time': 9,
-                     'real_time_other': 10,
-                     'cpu_time': 90,
-                     'cpu_time_other': 89},
-                    {'time': -0.125, 'cpu': -0.16279069767441862, 'real_time': 8,
-                        'real_time_other': 7, 'cpu_time': 86, 'cpu_time_other': 72}
+                "name": "BM_Two",
+                "measurements": [
+                    {
+                        "time": 0.1111111111111111,
+                        "cpu": -0.011111111111111112,
+                        "real_time": 9,
+                        "real_time_other": 10,
+                        "cpu_time": 90,
+                        "cpu_time_other": 89,
+                    },
+                    {
+                        "time": -0.125,
+                        "cpu": -0.16279069767441862,
+                        "real_time": 8,
+                        "real_time_other": 7,
+                        "cpu_time": 86,
+                        "cpu_time_other": 72,
+                    },
                 ],
-                'time_unit': 'ns',
-                'utest': {
-                    'have_optimal_repetitions': False, 'cpu_pvalue': 0.6666666666666666, 'time_pvalue': 1.0
-                }
+                "time_unit": "ns",
+                "utest": {
+                    "have_optimal_repetitions": False,
+                    "cpu_pvalue": 0.6666666666666666,
+                    "time_pvalue": 1.0,
+                },
             },
             {
-                'name': u'short',
-                'measurements': [
-                    {'time': -0.125,
-                     'cpu': -0.0625,
-                     'real_time': 8,
-                     'real_time_other': 7,
-                     'cpu_time': 80,
-                     'cpu_time_other': 75},
-                    {'time': -0.4325,
-                     'cpu': -0.13506493506493514,
-                     'real_time': 8,
-                     'real_time_other': 4.54,
-                     'cpu_time': 77,
-                     'cpu_time_other': 66.6}
+                "name": "short",
+                "measurements": [
+                    {
+                        "time": -0.125,
+                        "cpu": -0.0625,
+                        "real_time": 8,
+                        "real_time_other": 7,
+                        "cpu_time": 80,
+                        "cpu_time_other": 75,
+                    },
+                    {
+                        "time": -0.4325,
+                        "cpu": -0.13506493506493514,
+                        "real_time": 8,
+                        "real_time_other": 4.54,
+                        "cpu_time": 77,
+                        "cpu_time_other": 66.6,
+                    },
                 ],
-                'time_unit': 'ns',
-                'utest': {
-                    'have_optimal_repetitions': False, 'cpu_pvalue': 0.2, 'time_pvalue': 0.7670968684102772
-                }
+                "time_unit": "ns",
+                "utest": {
+                    "have_optimal_repetitions": False,
+                    "cpu_pvalue": 0.2,
+                    "time_pvalue": 0.7670968684102772,
+                },
             },
             {
-                'name': u'medium',
-                'measurements': [
-                    {'time': -0.375,
-                     'cpu': -0.3375,
-                     'real_time': 8,
-                     'real_time_other': 5,
-                     'cpu_time': 80,
-                     'cpu_time_other': 53}
+                "name": "medium",
+                "measurements": [
+                    {
+                        "time": -0.375,
+                        "cpu": -0.3375,
+                        "real_time": 8,
+                        "real_time_other": 5,
+                        "cpu_time": 80,
+                        "cpu_time_other": 53,
+                    }
                 ],
-                'time_unit': 'ns',
-                'utest': {}
+                "time_unit": "ns",
+                "utest": {},
             },
             {
-                'name': 'OVERALL_GEOMEAN',
-                'measurements': [{'real_time': 8.48528137423858e-09, 'cpu_time': 8.441336246629233e-08,
-                                  'real_time_other': 2.2405267593145244e-08, 'cpu_time_other': 2.5453661413660466e-08,
-                                  'time': 1.6404861082353634, 'cpu': -0.6984640740519662}],
-                'time_unit': 's',
-                'run_type': 'aggregate',
-                'aggregate_name': 'geomean',
-                'utest': {}
-            }
+                "name": "OVERALL_GEOMEAN",
+                "measurements": [
+                    {
+                        "real_time": 8.48528137423858e-09,
+                        "cpu_time": 8.441336246629233e-08,
+                        "real_time_other": 2.2405267593145244e-08,
+                        "cpu_time_other": 2.5453661413660466e-08,
+                        "time": 1.6404861082353634,
+                        "cpu": -0.6984640740519662,
+                    }
+                ],
+                "time_unit": "s",
+                "run_type": "aggregate",
+                "aggregate_name": "geomean",
+                "utest": {},
+            },
         ]
         self.assertEqual(len(self.json_diff_report), len(expected_output))
-        for out, expected in zip(
-                self.json_diff_report, expected_output):
-            self.assertEqual(out['name'], expected['name'])
-            self.assertEqual(out['time_unit'], expected['time_unit'])
+        for out, expected in zip(self.json_diff_report, expected_output):
+            self.assertEqual(out["name"], expected["name"])
+            self.assertEqual(out["time_unit"], expected["time_unit"])
             assert_utest(self, out, expected)
             assert_measurements(self, out, expected)
 
 
 class TestReportDifferenceWithUTestWhileDisplayingAggregatesOnly(
-        unittest.TestCase):
+    unittest.TestCase
+):
     @classmethod
     def setUpClass(cls):
         def load_results():
             import json
+
             testInputs = os.path.join(
-                os.path.dirname(
-                    os.path.realpath(__file__)),
-                'Inputs')
-            testOutput1 = os.path.join(testInputs, 'test3_run0.json')
-            testOutput2 = os.path.join(testInputs, 'test3_run1.json')
-            with open(testOutput1, 'r') as f:
+                os.path.dirname(os.path.realpath(__file__)), "Inputs"
+            )
+            testOutput1 = os.path.join(testInputs, "test3_run0.json")
+            testOutput2 = os.path.join(testInputs, "test3_run1.json")
+            with open(testOutput1, "r") as f:
                 json1 = json.load(f)
-            with open(testOutput2, 'r') as f:
+            with open(testOutput2, "r") as f:
                 json2 = json.load(f)
             return json1, json2
 
         json1, json2 = load_results()
-        cls.json_diff_report = get_difference_report(
-            json1, json2, utest=True)
+        cls.json_diff_report = get_difference_report(json1, json2, utest=True)
 
     def test_json_diff_report_pretty_printing(self):
         expect_lines = [
-            ['BM_One', '-0.1000', '+0.1000', '10', '9', '100', '110'],
-            ['BM_Two', '+0.1111', '-0.0111', '9', '10', '90', '89'],
-            ['BM_Two', '-0.1250', '-0.1628', '8', '7', '86', '72'],
-            ['BM_Two_pvalue',
-             '1.0000',
-             '0.6667',
-             'U',
-             'Test,',
-             'Repetitions:',
-             '2',
-             'vs',
-             '2.',
-             'WARNING:',
-             'Results',
-             'unreliable!',
-             '9+',
-             'repetitions',
-             'recommended.'],
-            ['short', '-0.1250', '-0.0625', '8', '7', '80', '75'],
-            ['short', '-0.4325', '-0.1351', '8', '5', '77', '67'],
-            ['short_pvalue',
-             '0.7671',
-             '0.2000',
-             'U',
-             'Test,',
-             'Repetitions:',
-             '2',
-             'vs',
-             '3.',
-             'WARNING:',
-             'Results',
-             'unreliable!',
-             '9+',
-             'repetitions',
-             'recommended.'],
-            ['medium', '-0.3750', '-0.3375', '8', '5', '80', '53'],
-            ['OVERALL_GEOMEAN', '+1.6405', '-0.6985', '0', '0', '0', '0']
+            ["BM_One", "-0.1000", "+0.1000", "10", "9", "100", "110"],
+            ["BM_Two", "+0.1111", "-0.0111", "9", "10", "90", "89"],
+            ["BM_Two", "-0.1250", "-0.1628", "8", "7", "86", "72"],
+            [
+                "BM_Two_pvalue",
+                "1.0000",
+                "0.6667",
+                "U",
+                "Test,",
+                "Repetitions:",
+                "2",
+                "vs",
+                "2.",
+                "WARNING:",
+                "Results",
+                "unreliable!",
+                "9+",
+                "repetitions",
+                "recommended.",
+            ],
+            ["short", "-0.1250", "-0.0625", "8", "7", "80", "75"],
+            ["short", "-0.4325", "-0.1351", "8", "5", "77", "67"],
+            [
+                "short_pvalue",
+                "0.7671",
+                "0.2000",
+                "U",
+                "Test,",
+                "Repetitions:",
+                "2",
+                "vs",
+                "3.",
+                "WARNING:",
+                "Results",
+                "unreliable!",
+                "9+",
+                "repetitions",
+                "recommended.",
+            ],
+            ["medium", "-0.3750", "-0.3375", "8", "5", "80", "53"],
+            ["OVERALL_GEOMEAN", "+1.6405", "-0.6985", "0", "0", "0", "0"],
         ]
         output_lines_with_header = print_difference_report(
-            self.json_diff_report,
-            utest=True, utest_alpha=0.05, use_color=False)
+            self.json_diff_report, utest=True, utest_alpha=0.05, use_color=False
+        )
         output_lines = output_lines_with_header[2:]
         print("\n")
         print("\n".join(output_lines_with_header))
         self.assertEqual(len(output_lines), len(expect_lines))
         for i in range(0, len(output_lines)):
-            parts = [x for x in output_lines[i].split(' ') if x]
+            parts = [x for x in output_lines[i].split(" ") if x]
             self.assertEqual(expect_lines[i], parts)
 
     def test_json_diff_report(self):
         expected_output = [
             {
-                'name': u'BM_One',
-                'measurements': [
-                    {'time': -0.1,
-                     'cpu': 0.1,
-                     'real_time': 10,
-                     'real_time_other': 9,
-                     'cpu_time': 100,
-                     'cpu_time_other': 110}
+                "name": "BM_One",
+                "measurements": [
+                    {
+                        "time": -0.1,
+                        "cpu": 0.1,
+                        "real_time": 10,
+                        "real_time_other": 9,
+                        "cpu_time": 100,
+                        "cpu_time_other": 110,
+                    }
                 ],
-                'time_unit': 'ns',
-                'utest': {}
+                "time_unit": "ns",
+                "utest": {},
             },
             {
-                'name': u'BM_Two',
-                'measurements': [
-                    {'time': 0.1111111111111111,
-                     'cpu': -0.011111111111111112,
-                     'real_time': 9,
-                     'real_time_other': 10,
-                     'cpu_time': 90,
-                     'cpu_time_other': 89},
-                    {'time': -0.125, 'cpu': -0.16279069767441862, 'real_time': 8,
-                        'real_time_other': 7, 'cpu_time': 86, 'cpu_time_other': 72}
+                "name": "BM_Two",
+                "measurements": [
+                    {
+                        "time": 0.1111111111111111,
+                        "cpu": -0.011111111111111112,
+                        "real_time": 9,
+                        "real_time_other": 10,
+                        "cpu_time": 90,
+                        "cpu_time_other": 89,
+                    },
+                    {
+                        "time": -0.125,
+                        "cpu": -0.16279069767441862,
+                        "real_time": 8,
+                        "real_time_other": 7,
+                        "cpu_time": 86,
+                        "cpu_time_other": 72,
+                    },
                 ],
-                'time_unit': 'ns',
-                'utest': {
-                    'have_optimal_repetitions': False, 'cpu_pvalue': 0.6666666666666666, 'time_pvalue': 1.0
-                }
+                "time_unit": "ns",
+                "utest": {
+                    "have_optimal_repetitions": False,
+                    "cpu_pvalue": 0.6666666666666666,
+                    "time_pvalue": 1.0,
+                },
             },
             {
-                'name': u'short',
-                'measurements': [
-                    {'time': -0.125,
-                     'cpu': -0.0625,
-                     'real_time': 8,
-                     'real_time_other': 7,
-                     'cpu_time': 80,
-                     'cpu_time_other': 75},
-                    {'time': -0.4325,
-                     'cpu': -0.13506493506493514,
-                     'real_time': 8,
-                     'real_time_other': 4.54,
-                     'cpu_time': 77,
-                     'cpu_time_other': 66.6}
+                "name": "short",
+                "measurements": [
+                    {
+                        "time": -0.125,
+                        "cpu": -0.0625,
+                        "real_time": 8,
+                        "real_time_other": 7,
+                        "cpu_time": 80,
+                        "cpu_time_other": 75,
+                    },
+                    {
+                        "time": -0.4325,
+                        "cpu": -0.13506493506493514,
+                        "real_time": 8,
+                        "real_time_other": 4.54,
+                        "cpu_time": 77,
+                        "cpu_time_other": 66.6,
+                    },
                 ],
-                'time_unit': 'ns',
-                'utest': {
-                    'have_optimal_repetitions': False, 'cpu_pvalue': 0.2, 'time_pvalue': 0.7670968684102772
-                }
+                "time_unit": "ns",
+                "utest": {
+                    "have_optimal_repetitions": False,
+                    "cpu_pvalue": 0.2,
+                    "time_pvalue": 0.7670968684102772,
+                },
             },
             {
-                'name': u'medium',
-                'measurements': [
-                    {'real_time_other': 5,
-                     'cpu_time': 80,
-                     'time': -0.375,
-                     'real_time': 8,
-                     'cpu_time_other': 53,
-                     'cpu': -0.3375
-                     }
+                "name": "medium",
+                "measurements": [
+                    {
+                        "real_time_other": 5,
+                        "cpu_time": 80,
+                        "time": -0.375,
+                        "real_time": 8,
+                        "cpu_time_other": 53,
+                        "cpu": -0.3375,
+                    }
                 ],
-                'utest': {},
-                'time_unit': u'ns',
-                'aggregate_name': ''
+                "utest": {},
+                "time_unit": "ns",
+                "aggregate_name": "",
             },
             {
-                'name': 'OVERALL_GEOMEAN',
-                'measurements': [{'real_time': 8.48528137423858e-09, 'cpu_time': 8.441336246629233e-08,
-                                  'real_time_other': 2.2405267593145244e-08, 'cpu_time_other': 2.5453661413660466e-08,
-                                  'time': 1.6404861082353634, 'cpu': -0.6984640740519662}],
-                'time_unit': 's',
-                'run_type': 'aggregate',
-                'aggregate_name': 'geomean',
-                'utest': {}
-            }
+                "name": "OVERALL_GEOMEAN",
+                "measurements": [
+                    {
+                        "real_time": 8.48528137423858e-09,
+                        "cpu_time": 8.441336246629233e-08,
+                        "real_time_other": 2.2405267593145244e-08,
+                        "cpu_time_other": 2.5453661413660466e-08,
+                        "time": 1.6404861082353634,
+                        "cpu": -0.6984640740519662,
+                    }
+                ],
+                "time_unit": "s",
+                "run_type": "aggregate",
+                "aggregate_name": "geomean",
+                "utest": {},
+            },
         ]
         self.assertEqual(len(self.json_diff_report), len(expected_output))
-        for out, expected in zip(
-                self.json_diff_report, expected_output):
-            self.assertEqual(out['name'], expected['name'])
-            self.assertEqual(out['time_unit'], expected['time_unit'])
+        for out, expected in zip(self.json_diff_report, expected_output):
+            self.assertEqual(out["name"], expected["name"])
+            self.assertEqual(out["time_unit"], expected["time_unit"])
             assert_utest(self, out, expected)
             assert_measurements(self, out, expected)
 
 
-class TestReportDifferenceForPercentageAggregates(
-        unittest.TestCase):
+class TestReportDifferenceForPercentageAggregates(unittest.TestCase):
     @classmethod
     def setUpClass(cls):
         def load_results():
             import json
+
             testInputs = os.path.join(
-                os.path.dirname(
-                    os.path.realpath(__file__)),
-                'Inputs')
-            testOutput1 = os.path.join(testInputs, 'test4_run0.json')
-            testOutput2 = os.path.join(testInputs, 'test4_run1.json')
-            with open(testOutput1, 'r') as f:
+                os.path.dirname(os.path.realpath(__file__)), "Inputs"
+            )
+            testOutput1 = os.path.join(testInputs, "test4_run0.json")
+            testOutput2 = os.path.join(testInputs, "test4_run1.json")
+            with open(testOutput1, "r") as f:
                 json1 = json.load(f)
-            with open(testOutput2, 'r') as f:
+            with open(testOutput2, "r") as f:
                 json2 = json.load(f)
             return json1, json2
 
         json1, json2 = load_results()
-        cls.json_diff_report = get_difference_report(
-            json1, json2, utest=True)
+        cls.json_diff_report = get_difference_report(json1, json2, utest=True)
 
     def test_json_diff_report_pretty_printing(self):
-        expect_lines = [
-            ['whocares', '-0.5000', '+0.5000', '0', '0', '0', '0']
-        ]
+        expect_lines = [["whocares", "-0.5000", "+0.5000", "0", "0", "0", "0"]]
         output_lines_with_header = print_difference_report(
-            self.json_diff_report,
-            utest=True, utest_alpha=0.05, use_color=False)
+            self.json_diff_report, utest=True, utest_alpha=0.05, use_color=False
+        )
         output_lines = output_lines_with_header[2:]
         print("\n")
         print("\n".join(output_lines_with_header))
         self.assertEqual(len(output_lines), len(expect_lines))
         for i in range(0, len(output_lines)):
-            parts = [x for x in output_lines[i].split(' ') if x]
+            parts = [x for x in output_lines[i].split(" ") if x]
             self.assertEqual(expect_lines[i], parts)
 
     def test_json_diff_report(self):
         expected_output = [
             {
-                'name': u'whocares',
-                'measurements': [
-                    {'time': -0.5,
-                     'cpu': 0.5,
-                     'real_time': 0.01,
-                     'real_time_other': 0.005,
-                     'cpu_time': 0.10,
-                     'cpu_time_other': 0.15}
+                "name": "whocares",
+                "measurements": [
+                    {
+                        "time": -0.5,
+                        "cpu": 0.5,
+                        "real_time": 0.01,
+                        "real_time_other": 0.005,
+                        "cpu_time": 0.10,
+                        "cpu_time_other": 0.15,
+                    }
                 ],
-                'time_unit': 'ns',
-                'utest': {}
+                "time_unit": "ns",
+                "utest": {},
             }
         ]
         self.assertEqual(len(self.json_diff_report), len(expected_output))
-        for out, expected in zip(
-                self.json_diff_report, expected_output):
-            self.assertEqual(out['name'], expected['name'])
-            self.assertEqual(out['time_unit'], expected['time_unit'])
+        for out, expected in zip(self.json_diff_report, expected_output):
+            self.assertEqual(out["name"], expected["name"])
+            self.assertEqual(out["time_unit"], expected["time_unit"])
             assert_utest(self, out, expected)
             assert_measurements(self, out, expected)
 
@@ -1129,12 +1451,12 @@ class TestReportSorting(unittest.TestCase):
     def setUpClass(cls):
         def load_result():
             import json
+
             testInputs = os.path.join(
-                os.path.dirname(
-                    os.path.realpath(__file__)),
-                'Inputs')
-            testOutput = os.path.join(testInputs, 'test4_run.json')
-            with open(testOutput, 'r') as f:
+                os.path.dirname(os.path.realpath(__file__)), "Inputs"
+            )
+            testOutput = os.path.join(testInputs, "test4_run.json")
+            with open(testOutput, "r") as f:
                 json = json.load(f)
             return json
 
@@ -1155,45 +1477,47 @@ class TestReportSorting(unittest.TestCase):
             "91 family 1 instance 0 aggregate",
             "90 family 1 instance 1 repetition 0",
             "89 family 1 instance 1 repetition 1",
-            "88 family 1 instance 1 aggregate"
+            "88 family 1 instance 1 aggregate",
         ]
 
-        for n in range(len(self.json['benchmarks']) ** 2):
-            random.shuffle(self.json['benchmarks'])
+        for n in range(len(self.json["benchmarks"]) ** 2):
+            random.shuffle(self.json["benchmarks"])
             sorted_benchmarks = util.sort_benchmark_results(self.json)[
-                'benchmarks']
+                "benchmarks"
+            ]
             self.assertEqual(len(expected_names), len(sorted_benchmarks))
             for out, expected in zip(sorted_benchmarks, expected_names):
-                self.assertEqual(out['name'], expected)
+                self.assertEqual(out["name"], expected)
 
 
 def assert_utest(unittest_instance, lhs, rhs):
-    if lhs['utest']:
+    if lhs["utest"]:
         unittest_instance.assertAlmostEqual(
-            lhs['utest']['cpu_pvalue'],
-            rhs['utest']['cpu_pvalue'])
+            lhs["utest"]["cpu_pvalue"], rhs["utest"]["cpu_pvalue"]
+        )
         unittest_instance.assertAlmostEqual(
-            lhs['utest']['time_pvalue'],
-            rhs['utest']['time_pvalue'])
+            lhs["utest"]["time_pvalue"], rhs["utest"]["time_pvalue"]
+        )
         unittest_instance.assertEqual(
-            lhs['utest']['have_optimal_repetitions'],
-            rhs['utest']['have_optimal_repetitions'])
+            lhs["utest"]["have_optimal_repetitions"],
+            rhs["utest"]["have_optimal_repetitions"],
+        )
     else:
         # lhs is empty. assert if rhs is not.
-        unittest_instance.assertEqual(lhs['utest'], rhs['utest'])
+        unittest_instance.assertEqual(lhs["utest"], rhs["utest"])
 
 
 def assert_measurements(unittest_instance, lhs, rhs):
-    for m1, m2 in zip(lhs['measurements'], rhs['measurements']):
-        unittest_instance.assertEqual(m1['real_time'], m2['real_time'])
-        unittest_instance.assertEqual(m1['cpu_time'], m2['cpu_time'])
+    for m1, m2 in zip(lhs["measurements"], rhs["measurements"]):
+        unittest_instance.assertEqual(m1["real_time"], m2["real_time"])
+        unittest_instance.assertEqual(m1["cpu_time"], m2["cpu_time"])
         # m1['time'] and m1['cpu'] hold values which are being calculated,
         # and therefore we must use almost-equal pattern.
-        unittest_instance.assertAlmostEqual(m1['time'], m2['time'], places=4)
-        unittest_instance.assertAlmostEqual(m1['cpu'], m2['cpu'], places=4)
+        unittest_instance.assertAlmostEqual(m1["time"], m2["time"], places=4)
+        unittest_instance.assertAlmostEqual(m1["cpu"], m2["cpu"], places=4)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     unittest.main()
 
 # vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
diff --git a/tools/gbench/util.py b/tools/gbench/util.py
index 5e79da8f..84747d10 100644
--- a/tools/gbench/util.py
+++ b/tools/gbench/util.py
@@ -7,13 +7,12 @@ import subprocess
 import sys
 import tempfile
 
-
 # Input file type enumeration
 IT_Invalid = 0
 IT_JSON = 1
 IT_Executable = 2
 
-_num_magic_bytes = 2 if sys.platform.startswith('win') else 4
+_num_magic_bytes = 2 if sys.platform.startswith("win") else 4
 
 
 def is_executable_file(filename):
@@ -24,21 +23,21 @@ def is_executable_file(filename):
     """
     if not os.path.isfile(filename):
         return False
-    with open(filename, mode='rb') as f:
+    with open(filename, mode="rb") as f:
         magic_bytes = f.read(_num_magic_bytes)
-    if sys.platform == 'darwin':
+    if sys.platform == "darwin":
         return magic_bytes in [
-            b'\xfe\xed\xfa\xce',  # MH_MAGIC
-            b'\xce\xfa\xed\xfe',  # MH_CIGAM
-            b'\xfe\xed\xfa\xcf',  # MH_MAGIC_64
-            b'\xcf\xfa\xed\xfe',  # MH_CIGAM_64
-            b'\xca\xfe\xba\xbe',  # FAT_MAGIC
-            b'\xbe\xba\xfe\xca'   # FAT_CIGAM
+            b"\xfe\xed\xfa\xce",  # MH_MAGIC
+            b"\xce\xfa\xed\xfe",  # MH_CIGAM
+            b"\xfe\xed\xfa\xcf",  # MH_MAGIC_64
+            b"\xcf\xfa\xed\xfe",  # MH_CIGAM_64
+            b"\xca\xfe\xba\xbe",  # FAT_MAGIC
+            b"\xbe\xba\xfe\xca",  # FAT_CIGAM
         ]
-    elif sys.platform.startswith('win'):
-        return magic_bytes == b'MZ'
+    elif sys.platform.startswith("win"):
+        return magic_bytes == b"MZ"
     else:
-        return magic_bytes == b'\x7FELF'
+        return magic_bytes == b"\x7FELF"
 
 
 def is_json_file(filename):
@@ -47,7 +46,7 @@ def is_json_file(filename):
     'False' otherwise.
     """
     try:
-        with open(filename, 'r') as f:
+        with open(filename, "r") as f:
             json.load(f)
         return True
     except BaseException:
@@ -72,7 +71,10 @@ def classify_input_file(filename):
     elif is_json_file(filename):
         ftype = IT_JSON
     else:
-        err_msg = "'%s' does not name a valid benchmark executable or JSON file" % filename
+        err_msg = (
+            "'%s' does not name a valid benchmark executable or JSON file"
+            % filename
+        )
     return ftype, err_msg
 
 
@@ -95,11 +97,11 @@ def find_benchmark_flag(prefix, benchmark_flags):
     if it is found return the arg it specifies. If specified more than once the
     last value is returned. If the flag is not found None is returned.
     """
-    assert prefix.startswith('--') and prefix.endswith('=')
+    assert prefix.startswith("--") and prefix.endswith("=")
     result = None
     for f in benchmark_flags:
         if f.startswith(prefix):
-            result = f[len(prefix):]
+            result = f[len(prefix) :]
     return result
 
 
@@ -108,7 +110,7 @@ def remove_benchmark_flags(prefix, benchmark_flags):
     Return a new list containing the specified benchmark_flags except those
     with the specified prefix.
     """
-    assert prefix.startswith('--') and prefix.endswith('=')
+    assert prefix.startswith("--") and prefix.endswith("=")
     return [f for f in benchmark_flags if not f.startswith(prefix)]
 
 
@@ -124,36 +126,54 @@ def load_benchmark_results(fname, benchmark_filter):
 
     REQUIRES: 'fname' names a file containing JSON benchmark output.
     """
+
     def benchmark_wanted(benchmark):
         if benchmark_filter is None:
             return True
-        name = benchmark.get('run_name', None) or benchmark['name']
+        name = benchmark.get("run_name", None) or benchmark["name"]
         if re.search(benchmark_filter, name):
             return True
         return False
 
-    with open(fname, 'r') as f:
+    with open(fname, "r") as f:
         results = json.load(f)
-        if 'benchmarks' in results:
-            results['benchmarks'] = list(filter(benchmark_wanted,
-                                                results['benchmarks']))
+        if "benchmarks" in results:
+            results["benchmarks"] = list(
+                filter(benchmark_wanted, results["benchmarks"])
+            )
         return results
 
 
 def sort_benchmark_results(result):
-    benchmarks = result['benchmarks']
+    benchmarks = result["benchmarks"]
 
     # From inner key to the outer key!
     benchmarks = sorted(
-        benchmarks, key=lambda benchmark: benchmark['repetition_index'] if 'repetition_index' in benchmark else -1)
+        benchmarks,
+        key=lambda benchmark: benchmark["repetition_index"]
+        if "repetition_index" in benchmark
+        else -1,
+    )
     benchmarks = sorted(
-        benchmarks, key=lambda benchmark: 1 if 'run_type' in benchmark and benchmark['run_type'] == "aggregate" else 0)
+        benchmarks,
+        key=lambda benchmark: 1
+        if "run_type" in benchmark and benchmark["run_type"] == "aggregate"
+        else 0,
+    )
     benchmarks = sorted(
-        benchmarks, key=lambda benchmark: benchmark['per_family_instance_index'] if 'per_family_instance_index' in benchmark else -1)
+        benchmarks,
+        key=lambda benchmark: benchmark["per_family_instance_index"]
+        if "per_family_instance_index" in benchmark
+        else -1,
+    )
     benchmarks = sorted(
-        benchmarks, key=lambda benchmark: benchmark['family_index'] if 'family_index' in benchmark else -1)
+        benchmarks,
+        key=lambda benchmark: benchmark["family_index"]
+        if "family_index" in benchmark
+        else -1,
+    )
 
-    result['benchmarks'] = benchmarks
+    result["benchmarks"] = benchmarks
     return result
 
 
@@ -164,21 +184,21 @@ def run_benchmark(exe_name, benchmark_flags):
     real time console output.
     RETURNS: A JSON object representing the benchmark output
     """
-    output_name = find_benchmark_flag('--benchmark_out=',
-                                      benchmark_flags)
+    output_name = find_benchmark_flag("--benchmark_out=", benchmark_flags)
     is_temp_output = False
     if output_name is None:
         is_temp_output = True
         thandle, output_name = tempfile.mkstemp()
         os.close(thandle)
-        benchmark_flags = list(benchmark_flags) + \
-            ['--benchmark_out=%s' % output_name]
+        benchmark_flags = list(benchmark_flags) + [
+            "--benchmark_out=%s" % output_name
+        ]
 
     cmd = [exe_name] + benchmark_flags
-    print("RUNNING: %s" % ' '.join(cmd))
+    print("RUNNING: %s" % " ".join(cmd))
     exitCode = subprocess.call(cmd)
     if exitCode != 0:
-        print('TEST FAILED...')
+        print("TEST FAILED...")
         sys.exit(exitCode)
     json_res = load_benchmark_results(output_name, None)
     if is_temp_output:
@@ -195,9 +215,10 @@ def run_or_load_benchmark(filename, benchmark_flags):
     """
     ftype = check_input_file(filename)
     if ftype == IT_JSON:
-        benchmark_filter = find_benchmark_flag('--benchmark_filter=',
-                                               benchmark_flags)
+        benchmark_filter = find_benchmark_flag(
+            "--benchmark_filter=", benchmark_flags
+        )
         return load_benchmark_results(filename, benchmark_filter)
     if ftype == IT_Executable:
         return run_benchmark(filename, benchmark_flags)
-    raise ValueError('Unknown file type %s' % ftype)
+    raise ValueError("Unknown file type %s" % ftype)
diff --git a/tools/strip_asm.py b/tools/strip_asm.py
index d131dc71..bc3a774a 100755
--- a/tools/strip_asm.py
+++ b/tools/strip_asm.py
@@ -4,48 +4,49 @@
 strip_asm.py - Cleanup ASM output for the specified file
 """
 
-from argparse import ArgumentParser
-import sys
 import os
 import re
+import sys
+from argparse import ArgumentParser
+
 
 def find_used_labels(asm):
     found = set()
-    label_re = re.compile("\s*j[a-z]+\s+\.L([a-zA-Z0-9][a-zA-Z0-9_]*)")
-    for l in asm.splitlines():
-        m = label_re.match(l)
+    label_re = re.compile(r"\s*j[a-z]+\s+\.L([a-zA-Z0-9][a-zA-Z0-9_]*)")
+    for line in asm.splitlines():
+        m = label_re.match(line)
         if m:
-            found.add('.L%s' % m.group(1))
+            found.add(".L%s" % m.group(1))
     return found
 
 
 def normalize_labels(asm):
     decls = set()
     label_decl = re.compile("^[.]{0,1}L([a-zA-Z0-9][a-zA-Z0-9_]*)(?=:)")
-    for l in asm.splitlines():
-        m = label_decl.match(l)
+    for line in asm.splitlines():
+        m = label_decl.match(line)
         if m:
             decls.add(m.group(0))
     if len(decls) == 0:
         return asm
-    needs_dot = next(iter(decls))[0] != '.'
+    needs_dot = next(iter(decls))[0] != "."
     if not needs_dot:
         return asm
     for ld in decls:
-        asm = re.sub("(^|\s+)" + ld + "(?=:|\s)", '\\1.' + ld, asm)
+        asm = re.sub(r"(^|\s+)" + ld + r"(?=:|\s)", "\\1." + ld, asm)
     return asm
 
 
 def transform_labels(asm):
     asm = normalize_labels(asm)
     used_decls = find_used_labels(asm)
-    new_asm = ''
-    label_decl = re.compile("^\.L([a-zA-Z0-9][a-zA-Z0-9_]*)(?=:)")
-    for l in asm.splitlines():
-        m = label_decl.match(l)
+    new_asm = ""
+    label_decl = re.compile(r"^\.L([a-zA-Z0-9][a-zA-Z0-9_]*)(?=:)")
+    for line in asm.splitlines():
+        m = label_decl.match(line)
         if not m or m.group(0) in used_decls:
-            new_asm += l
-            new_asm += '\n'
+            new_asm += line
+            new_asm += "\n"
     return new_asm
 
 
@@ -53,29 +54,34 @@ def is_identifier(tk):
     if len(tk) == 0:
         return False
     first = tk[0]
-    if not first.isalpha() and first != '_':
+    if not first.isalpha() and first != "_":
         return False
     for i in range(1, len(tk)):
         c = tk[i]
-        if not c.isalnum() and c != '_':
+        if not c.isalnum() and c != "_":
             return False
     return True
 
-def process_identifiers(l):
+
+def process_identifiers(line):
     """
     process_identifiers - process all identifiers and modify them to have
     consistent names across all platforms; specifically across ELF and MachO.
     For example, MachO inserts an additional understore at the beginning of
     names. This function removes that.
     """
-    parts = re.split(r'([a-zA-Z0-9_]+)', l)
-    new_line = ''
+    parts = re.split(r"([a-zA-Z0-9_]+)", line)
+    new_line = ""
     for tk in parts:
         if is_identifier(tk):
-            if tk.startswith('__Z'):
+            if tk.startswith("__Z"):
                 tk = tk[1:]
-            elif tk.startswith('_') and len(tk) > 1 and \
-                    tk[1].isalpha() and tk[1] != 'Z':
+            elif (
+                tk.startswith("_")
+                and len(tk) > 1
+                and tk[1].isalpha()
+                and tk[1] != "Z"
+            ):
                 tk = tk[1:]
         new_line += tk
     return new_line
@@ -85,65 +91,71 @@ def process_asm(asm):
     """
     Strip the ASM of unwanted directives and lines
     """
-    new_contents = ''
+    new_contents = ""
     asm = transform_labels(asm)
 
     # TODO: Add more things we want to remove
     discard_regexes = [
-        re.compile("\s+\..*$"), # directive
-        re.compile("\s*#(NO_APP|APP)$"), #inline ASM
-        re.compile("\s*#.*$"), # comment line
-        re.compile("\s*\.globa?l\s*([.a-zA-Z_][a-zA-Z0-9$_.]*)"), #global directive
-        re.compile("\s*\.(string|asciz|ascii|[1248]?byte|short|word|long|quad|value|zero)"),
-    ]
-    keep_regexes = [
-
+        re.compile(r"\s+\..*$"),  # directive
+        re.compile(r"\s*#(NO_APP|APP)$"),  # inline ASM
+        re.compile(r"\s*#.*$"),  # comment line
+        re.compile(
+            r"\s*\.globa?l\s*([.a-zA-Z_][a-zA-Z0-9$_.]*)"
+        ),  # global directive
+        re.compile(
+            r"\s*\.(string|asciz|ascii|[1248]?byte|short|word|long|quad|value|zero)"
+        ),
     ]
+    keep_regexes: list[re.Pattern] = []
     fn_label_def = re.compile("^[a-zA-Z_][a-zA-Z0-9_.]*:")
-    for l in asm.splitlines():
+    for line in asm.splitlines():
         # Remove Mach-O attribute
-        l = l.replace('@GOTPCREL', '')
+        line = line.replace("@GOTPCREL", "")
         add_line = True
         for reg in discard_regexes:
-            if reg.match(l) is not None:
+            if reg.match(line) is not None:
                 add_line = False
                 break
         for reg in keep_regexes:
-            if reg.match(l) is not None:
+            if reg.match(line) is not None:
                 add_line = True
                 break
         if add_line:
-            if fn_label_def.match(l) and len(new_contents) != 0:
-                new_contents += '\n'
-            l = process_identifiers(l)
-            new_contents += l
-            new_contents += '\n'
+            if fn_label_def.match(line) and len(new_contents) != 0:
+                new_contents += "\n"
+            line = process_identifiers(line)
+            new_contents += line
+            new_contents += "\n"
     return new_contents
 
+
 def main():
-    parser = ArgumentParser(
-        description='generate a stripped assembly file')
+    parser = ArgumentParser(description="generate a stripped assembly file")
     parser.add_argument(
-        'input', metavar='input', type=str, nargs=1,
-        help='An input assembly file')
+        "input",
+        metavar="input",
+        type=str,
+        nargs=1,
+        help="An input assembly file",
+    )
     parser.add_argument(
-        'out', metavar='output', type=str, nargs=1,
-        help='The output file')
+        "out", metavar="output", type=str, nargs=1, help="The output file"
+    )
     args, unknown_args = parser.parse_known_args()
     input = args.input[0]
     output = args.out[0]
     if not os.path.isfile(input):
-        print(("ERROR: input file '%s' does not exist") % input)
+        print("ERROR: input file '%s' does not exist" % input)
         sys.exit(1)
-    contents = None
-    with open(input, 'r') as f:
+
+    with open(input, "r") as f:
         contents = f.read()
     new_contents = process_asm(contents)
-    with open(output, 'w') as f:
+    with open(output, "w") as f:
         f.write(new_contents)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()
 
 # vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4