Add pre-commit config and GitHub Actions job (#1688)

* Add pre-commit config and GitHub Actions job

Contains the following hooks:
* buildifier - for formatting and linting Bazel files.
* mypy, ruff, isort, black - for Python typechecking, import hygiene,
static analysis, and formatting.

The pylint CI job was changed to be a pre-commit CI job, where pre-commit
is bootstrapped via Python.

Pylint is currently no longer part of the
code checks, but can be re-added if requested. The reason to drop was
that it does not play nicely with pre-commit, and lots of its
functionality and responsibilities are actually covered in ruff.

* Add dev extra to pyproject.toml for development installs

* Clarify that pre-commit contains only Python and Bazel hooks

* Add one-line docstrings to Bazel modules

* Apply buildifier pre-commit fixes to Bazel files

* Apply pre-commit fixes to Python files

* Supply --profile=black to isort to prevent conflicts

* Fix nanobind build file formatting

* Add tooling configs to `pyproject.toml`

In particular, set line length 80 for all Python files.

* Reformat all Python files to line length 80, fix return type annotations

Also ignores the `tools/compare.py` and `tools/gbench/report.py` files
for mypy, since they emit a barrage of errors which we can deal with
later. The errors are mostly related to dynamic classmethod definition.
This commit is contained in:
Nicholas Junge 2023-10-30 16:35:37 +01:00 committed by GitHub
parent b219e18b91
commit b93f5a5929
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
22 changed files with 1621 additions and 1073 deletions

39
.github/workflows/pre-commit.yml vendored Normal file
View File

@ -0,0 +1,39 @@
name: python + Bazel pre-commit checks
on:
push:
branches: [ main ]
pull_request:
branches: [ main ]
jobs:
pre-commit:
runs-on: ubuntu-latest
env:
MYPY_CACHE_DIR: "${{ github.workspace }}/.cache/mypy"
RUFF_CACHE_DIR: "${{ github.workspace }}/.cache/ruff"
PRE_COMMIT_HOME: "${{ github.workspace }}/.cache/pre-commit"
steps:
- uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: 3.11
cache: 'pip'
cache-dependency-path: pyproject.toml
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -e ".[dev]"
- name: Cache pre-commit tools
uses: actions/cache@v3
with:
path: |
${{ env.MYPY_CACHE_DIR }}
${{ env.RUFF_CACHE_DIR }}
${{ env.PRE_COMMIT_HOME }}
key: ${{ runner.os }}-${{ hashFiles('.pre-commit-config.yaml') }}-linter-cache
- name: Run pre-commit checks
run: |
pre-commit run --all-files --verbose --show-diff-on-failure

View File

@ -1,28 +0,0 @@
name: pylint
on:
push:
branches: [ main ]
pull_request:
branches: [ main ]
jobs:
pylint:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Set up Python 3.8
uses: actions/setup-python@v1
with:
python-version: 3.8
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install pylint pylint-exit conan
- name: Run pylint
run: |
pylint `find . -name '*.py'|xargs` || pylint-exit $?

26
.pre-commit-config.yaml Normal file
View File

@ -0,0 +1,26 @@
repos:
- repo: https://github.com/keith/pre-commit-buildifier
rev: 6.3.3.1
hooks:
- id: buildifier
- id: buildifier-lint
- repo: https://github.com/pre-commit/mirrors-mypy
rev: v1.6.1
hooks:
- id: mypy
types_or: [ python, pyi ]
args: [ "--ignore-missing-imports", "--scripts-are-modules" ]
- repo: https://github.com/psf/black
rev: 23.10.1
hooks:
- id: black
- repo: https://github.com/pycqa/isort
rev: 5.12.0
hooks:
- id: isort
args: [--profile, black]
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.1.3
hooks:
- id: ruff
args: [ --fix, --exit-non-zero-on-fix ]

View File

@ -1,25 +1,30 @@
import os
import ycm_core
# These are the compilation flags that will be used in case there's no
# compilation database set (by default, one is not set).
# CHANGE THIS LIST OF FLAGS. YES, THIS IS THE DROID YOU HAVE BEEN LOOKING FOR.
flags = [
'-Wall',
'-Werror',
'-pedantic-errors',
'-std=c++0x',
'-fno-strict-aliasing',
'-O3',
'-DNDEBUG',
"-Wall",
"-Werror",
"-pedantic-errors",
"-std=c++0x",
"-fno-strict-aliasing",
"-O3",
"-DNDEBUG",
# ...and the same thing goes for the magic -x option which specifies the
# language that the files to be compiled are written in. This is mostly
# relevant for c++ headers.
# For a C project, you would set this to 'c' instead of 'c++'.
'-x', 'c++',
'-I', 'include',
'-isystem', '/usr/include',
'-isystem', '/usr/local/include',
"-x",
"c++",
"-I",
"include",
"-isystem",
"/usr/include",
"-isystem",
"/usr/local/include",
]
@ -29,14 +34,15 @@ flags = [
#
# Most projects will NOT need to set this to anything; you can just change the
# 'flags' list of compilation flags. Notice that YCM itself uses that approach.
compilation_database_folder = ''
compilation_database_folder = ""
if os.path.exists(compilation_database_folder):
database = ycm_core.CompilationDatabase(compilation_database_folder)
else:
database = None
SOURCE_EXTENSIONS = [ '.cc' ]
SOURCE_EXTENSIONS = [".cc"]
def DirectoryOfThisScript():
return os.path.dirname(os.path.abspath(__file__))
@ -47,13 +53,13 @@ def MakeRelativePathsInFlagsAbsolute( flags, working_directory ):
return list(flags)
new_flags = []
make_next_absolute = False
path_flags = [ '-isystem', '-I', '-iquote', '--sysroot=' ]
path_flags = ["-isystem", "-I", "-iquote", "--sysroot="]
for flag in flags:
new_flag = flag
if make_next_absolute:
make_next_absolute = False
if not flag.startswith( '/' ):
if not flag.startswith("/"):
new_flag = os.path.join(working_directory, flag)
for path_flag in path_flags:
@ -73,7 +79,7 @@ def MakeRelativePathsInFlagsAbsolute( flags, working_directory ):
def IsHeaderFile(filename):
extension = os.path.splitext(filename)[1]
return extension in [ '.h', '.hxx', '.hpp', '.hh' ]
return extension in [".h", ".hxx", ".hpp", ".hh"]
def GetCompilationInfoForFile(filename):
@ -87,7 +93,8 @@ def GetCompilationInfoForFile( filename ):
replacement_file = basename + extension
if os.path.exists(replacement_file):
compilation_info = database.GetCompilationInfoForFile(
replacement_file )
replacement_file
)
if compilation_info.compiler_flags_:
return compilation_info
return None
@ -104,12 +111,10 @@ def FlagsForFile( filename, **kwargs ):
final_flags = MakeRelativePathsInFlagsAbsolute(
compilation_info.compiler_flags_,
compilation_info.compiler_working_dir_ )
compilation_info.compiler_working_dir_,
)
else:
relative_to = DirectoryOfThisScript()
final_flags = MakeRelativePathsInFlagsAbsolute(flags, relative_to)
return {
'flags': final_flags,
'do_cache': True
}
return {"flags": final_flags, "do_cache": True}

View File

@ -45,28 +45,28 @@ cc_library(
"include/benchmark/benchmark.h",
"include/benchmark/export.h",
],
linkopts = select({
":windows": ["-DEFAULTLIB:shlwapi.lib"],
"//conditions:default": ["-pthread"],
}),
copts = select({
":windows": [],
"//conditions:default": ["-Werror=old-style-cast"],
}),
strip_include_prefix = "include",
visibility = ["//visibility:public"],
# Only static linking is allowed; no .so will be produced.
# Using `defines` (i.e. not `local_defines`) means that no
# dependent rules need to bother about defining the macro.
linkstatic = True,
defines = [
"BENCHMARK_STATIC_DEFINE",
] + select({
":perfcounters": ["HAVE_LIBPFM"],
"//conditions:default": [],
}),
linkopts = select({
":windows": ["-DEFAULTLIB:shlwapi.lib"],
"//conditions:default": ["-pthread"],
}),
# Only static linking is allowed; no .so will be produced.
# Using `defines` (i.e. not `local_defines`) means that no
# dependent rules need to bother about defining the macro.
linkstatic = True,
strip_include_prefix = "include",
visibility = ["//visibility:public"],
deps = select({
":perfcounters": ["@libpfm//:libpfm"],
":perfcounters": ["@libpfm"],
"//conditions:default": [],
}),
)
@ -74,7 +74,10 @@ cc_library(
cc_library(
name = "benchmark_main",
srcs = ["src/benchmark_main.cc"],
hdrs = ["include/benchmark/benchmark.h", "include/benchmark/export.h"],
hdrs = [
"include/benchmark/benchmark.h",
"include/benchmark/export.h",
],
strip_include_prefix = "include",
visibility = ["//visibility:public"],
deps = [":benchmark"],

View File

@ -1,11 +1,16 @@
module(name = "google_benchmark", version="1.8.3")
module(
name = "google_benchmark",
version = "1.8.3",
)
bazel_dep(name = "bazel_skylib", version = "1.4.2")
bazel_dep(name = "platforms", version = "0.0.6")
bazel_dep(name = "rules_foreign_cc", version = "0.9.0")
bazel_dep(name = "rules_cc", version = "0.0.6")
bazel_dep(name = "rules_python", version = "0.24.0", dev_dependency = True)
bazel_dep(name = "googletest", version = "1.12.1", repo_name = "com_google_googletest", dev_dependency = True)
bazel_dep(name = "googletest", version = "1.12.1", dev_dependency = True, repo_name = "com_google_googletest")
bazel_dep(name = "libpfm", version = "4.11.0")
# Register a toolchain for Python 3.9 to be able to build numpy. Python
@ -20,5 +25,6 @@ pip = use_extension("@rules_python//python/extensions:pip.bzl", "pip", dev_depen
pip.parse(
hub_name = "tools_pip_deps",
python_version = "3.9",
requirements_lock="//tools:requirements.txt")
requirements_lock = "//tools:requirements.txt",
)
use_repo(pip, "tools_pip_deps")

View File

@ -1,5 +1,9 @@
load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
"""
This file contains the Bazel build dependencies for Google Benchmark (both C++ source and Python bindings).
"""
load("@bazel_tools//tools/build_defs/repo:git.bzl", "new_git_repository")
load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
def benchmark_deps():
"""Loads dependencies required to build Google Benchmark."""

View File

@ -1,3 +1,3 @@
exports_files(glob(["*.BUILD"]))
exports_files(["build_defs.bzl"])
exports_files(["build_defs.bzl"])

View File

@ -1,3 +1,7 @@
"""
This file contains some build definitions for C++ extensions used in the Google Benchmark Python bindings.
"""
_SHARED_LIB_SUFFIX = {
"//conditions:default": ".so",
"//:windows": ".dll",

View File

@ -37,4 +37,3 @@ py_test(
":google_benchmark",
],
)

View File

@ -32,23 +32,22 @@ from absl import app
from google_benchmark import _benchmark
from google_benchmark._benchmark import (
Counter,
kNanosecond,
State,
kMicrosecond,
kMillisecond,
kNanosecond,
kSecond,
oNone,
o1,
oN,
oNSquared,
oNCubed,
oLogN,
oNLogN,
oAuto,
oLambda,
State,
oLogN,
oN,
oNCubed,
oNLogN,
oNone,
oNSquared,
)
__all__ = [
"register",
"main",
@ -97,7 +96,6 @@ class __OptionMaker:
# The function that get returned on @option.range(start=0, limit=1<<5).
def __builder_method(*args, **kwargs):
# The decorator that get called, either with the benchmared function
# or the previous Options
def __decorator(func_or_options):

View File

@ -38,6 +38,7 @@ def sum_million(state):
while state:
sum(range(1_000_000))
@benchmark.register
def pause_timing(state):
"""Pause timing every iteration."""
@ -85,7 +86,9 @@ def custom_counters(state):
# Set a counter as a rate.
state.counters["foo_rate"] = Counter(num_foo, Counter.kIsRate)
# Set a counter as an inverse of rate.
state.counters["foo_inv_rate"] = Counter(num_foo, Counter.kIsRate | Counter.kInvert)
state.counters["foo_inv_rate"] = Counter(
num_foo, Counter.kIsRate | Counter.kInvert
)
# Set a counter as a thread-average quantity.
state.counters["foo_avg"] = Counter(num_foo, Counter.kAvgThreads)
# There's also a combined flag:

View File

@ -1,9 +1,9 @@
load("@bazel_skylib//lib:selects.bzl", "selects")
licenses(["notice"])
package(default_visibility = ["//visibility:public"])
load("@bazel_skylib//lib:selects.bzl", "selects")
config_setting(
name = "msvc_compiler",
flag_values = {"@bazel_tools//tools/cpp:compiler": "msvc-cl"},

View File

@ -33,6 +33,11 @@ dependencies = [
"absl-py>=0.7.1",
]
[project.optional-dependencies]
dev = [
"pre-commit>=3.3.3",
]
[project.urls]
Homepage = "https://github.com/google/benchmark"
Documentation = "https://github.com/google/benchmark/tree/main/docs"
@ -49,3 +54,37 @@ where = ["bindings/python"]
[tool.setuptools.dynamic]
version = { attr = "google_benchmark.__version__" }
readme = { file = "README.md", content-type = "text/markdown" }
[tool.black]
# Source https://github.com/psf/black#configuration-format
include = "\\.pyi?$"
line-length = 80
target-version = ["py311"]
# Black-compatible settings for isort
# See https://black.readthedocs.io/en/stable/
[tool.isort]
line_length = "80"
profile = "black"
[tool.mypy]
check_untyped_defs = true
disallow_incomplete_defs = true
pretty = true
python_version = "3.11"
strict_optional = false
warn_unreachable = true
[[tool.mypy.overrides]]
module = ["yaml"]
ignore_missing_imports = true
[tool.ruff]
# Enable pycodestyle (`E`) and Pyflakes (`F`) codes by default.
select = ["E", "F", "W"]
ignore = [
# whitespace before colon (:), rely on black for formatting (in particular, allow spaces before ":" in list/array slices)
"E203",
# line too long, rely on black for reformatting of these, since sometimes URLs or comments can be longer
"E501",
]

View File

@ -4,11 +4,11 @@ import platform
import shutil
import sysconfig
from pathlib import Path
from typing import Generator
import setuptools
from setuptools.command import build_ext
PYTHON_INCLUDE_PATH_PLACEHOLDER = "<PYTHON_INCLUDE_PATH>"
IS_WINDOWS = platform.system() == "Windows"
@ -16,14 +16,14 @@ IS_MAC = platform.system() == "Darwin"
@contextlib.contextmanager
def temp_fill_include_path(fp: str):
def temp_fill_include_path(fp: str) -> Generator[None, None, None]:
"""Temporarily set the Python include path in a file."""
with open(fp, "r+") as f:
try:
content = f.read()
replaced = content.replace(
PYTHON_INCLUDE_PATH_PLACEHOLDER,
Path(sysconfig.get_paths()['include']).as_posix(),
Path(sysconfig.get_paths()["include"]).as_posix(),
)
f.seek(0)
f.write(replaced)
@ -57,7 +57,7 @@ class BuildBazelExtension(build_ext.build_ext):
# explicitly call `bazel shutdown` for graceful exit
self.spawn(["bazel", "shutdown"])
def bazel_build(self, ext: BazelExtension):
def bazel_build(self, ext: BazelExtension) -> None:
"""Runs the bazel build to create the package."""
with temp_fill_include_path("WORKSPACE"):
temp_path = Path(self.build_temp)
@ -93,9 +93,11 @@ class BuildBazelExtension(build_ext.build_ext):
self.spawn(bazel_argv)
shared_lib_suffix = '.dll' if IS_WINDOWS else '.so'
shared_lib_suffix = ".dll" if IS_WINDOWS else ".so"
ext_name = ext.target_name + shared_lib_suffix
ext_bazel_bin_path = temp_path / 'bazel-bin' / ext.relpath / ext_name
ext_bazel_bin_path = (
temp_path / "bazel-bin" / ext.relpath / ext_name
)
ext_dest_path = Path(self.get_ext_fullpath(ext.name))
shutil.copyfile(ext_bazel_bin_path, ext_dest_path)

View File

@ -1,17 +1,20 @@
#!/usr/bin/env python3
import unittest
# type: ignore
"""
compare.py - versatile benchmark output compare tool
"""
import argparse
from argparse import ArgumentParser
import json
import sys
import os
import sys
import unittest
from argparse import ArgumentParser
import gbench
from gbench import util, report
from gbench import report, util
def check_inputs(in1, in2, flags):
@ -20,163 +23,203 @@ def check_inputs(in1, in2, flags):
"""
in1_kind, in1_err = util.classify_input_file(in1)
in2_kind, in2_err = util.classify_input_file(in2)
output_file = util.find_benchmark_flag('--benchmark_out=', flags)
output_type = util.find_benchmark_flag('--benchmark_out_format=', flags)
if in1_kind == util.IT_Executable and in2_kind == util.IT_Executable and output_file:
print(("WARNING: '--benchmark_out=%s' will be passed to both "
"benchmarks causing it to be overwritten") % output_file)
output_file = util.find_benchmark_flag("--benchmark_out=", flags)
output_type = util.find_benchmark_flag("--benchmark_out_format=", flags)
if (
in1_kind == util.IT_Executable
and in2_kind == util.IT_Executable
and output_file
):
print(
(
"WARNING: '--benchmark_out=%s' will be passed to both "
"benchmarks causing it to be overwritten"
)
% output_file
)
if in1_kind == util.IT_JSON and in2_kind == util.IT_JSON:
# When both sides are JSON the only supported flag is
# --benchmark_filter=
for flag in util.remove_benchmark_flags('--benchmark_filter=', flags):
print("WARNING: passing %s has no effect since both "
"inputs are JSON" % flag)
if output_type is not None and output_type != 'json':
print(("ERROR: passing '--benchmark_out_format=%s' to 'compare.py`"
" is not supported.") % output_type)
for flag in util.remove_benchmark_flags("--benchmark_filter=", flags):
print(
"WARNING: passing %s has no effect since both "
"inputs are JSON" % flag
)
if output_type is not None and output_type != "json":
print(
(
"ERROR: passing '--benchmark_out_format=%s' to 'compare.py`"
" is not supported."
)
% output_type
)
sys.exit(1)
def create_parser():
parser = ArgumentParser(
description='versatile benchmark output compare tool')
description="versatile benchmark output compare tool"
)
parser.add_argument(
'-a',
'--display_aggregates_only',
dest='display_aggregates_only',
"-a",
"--display_aggregates_only",
dest="display_aggregates_only",
action="store_true",
help="If there are repetitions, by default, we display everything - the"
" actual runs, and the aggregates computed. Sometimes, it is "
"desirable to only view the aggregates. E.g. when there are a lot "
"of repetitions. Do note that only the display is affected. "
"Internally, all the actual runs are still used, e.g. for U test.")
parser.add_argument(
'--no-color',
dest='color',
default=True,
action="store_false",
help="Do not use colors in the terminal output"
"Internally, all the actual runs are still used, e.g. for U test.",
)
parser.add_argument(
'-d',
'--dump_to_json',
dest='dump_to_json',
help="Additionally, dump benchmark comparison output to this file in JSON format.")
"--no-color",
dest="color",
default=True,
action="store_false",
help="Do not use colors in the terminal output",
)
parser.add_argument(
"-d",
"--dump_to_json",
dest="dump_to_json",
help="Additionally, dump benchmark comparison output to this file in JSON format.",
)
utest = parser.add_argument_group()
utest.add_argument(
'--no-utest',
dest='utest',
"--no-utest",
dest="utest",
default=True,
action="store_false",
help="The tool can do a two-tailed Mann-Whitney U test with the null hypothesis that it is equally likely that a randomly selected value from one sample will be less than or greater than a randomly selected value from a second sample.\nWARNING: requires **LARGE** (no less than {}) number of repetitions to be meaningful!\nThe test is being done by default, if at least {} repetitions were done.\nThis option can disable the U Test.".format(report.UTEST_OPTIMAL_REPETITIONS, report.UTEST_MIN_REPETITIONS))
help="The tool can do a two-tailed Mann-Whitney U test with the null hypothesis that it is equally likely that a randomly selected value from one sample will be less than or greater than a randomly selected value from a second sample.\nWARNING: requires **LARGE** (no less than {}) number of repetitions to be meaningful!\nThe test is being done by default, if at least {} repetitions were done.\nThis option can disable the U Test.".format(
report.UTEST_OPTIMAL_REPETITIONS, report.UTEST_MIN_REPETITIONS
),
)
alpha_default = 0.05
utest.add_argument(
"--alpha",
dest='utest_alpha',
dest="utest_alpha",
default=alpha_default,
type=float,
help=("significance level alpha. if the calculated p-value is below this value, then the result is said to be statistically significant and the null hypothesis is rejected.\n(default: %0.4f)") %
alpha_default)
help=(
"significance level alpha. if the calculated p-value is below this value, then the result is said to be statistically significant and the null hypothesis is rejected.\n(default: %0.4f)"
)
% alpha_default,
)
subparsers = parser.add_subparsers(
help='This tool has multiple modes of operation:',
dest='mode')
help="This tool has multiple modes of operation:", dest="mode"
)
parser_a = subparsers.add_parser(
'benchmarks',
help='The most simple use-case, compare all the output of these two benchmarks')
baseline = parser_a.add_argument_group(
'baseline', 'The benchmark baseline')
"benchmarks",
help="The most simple use-case, compare all the output of these two benchmarks",
)
baseline = parser_a.add_argument_group("baseline", "The benchmark baseline")
baseline.add_argument(
'test_baseline',
metavar='test_baseline',
type=argparse.FileType('r'),
"test_baseline",
metavar="test_baseline",
type=argparse.FileType("r"),
nargs=1,
help='A benchmark executable or JSON output file')
help="A benchmark executable or JSON output file",
)
contender = parser_a.add_argument_group(
'contender', 'The benchmark that will be compared against the baseline')
"contender", "The benchmark that will be compared against the baseline"
)
contender.add_argument(
'test_contender',
metavar='test_contender',
type=argparse.FileType('r'),
"test_contender",
metavar="test_contender",
type=argparse.FileType("r"),
nargs=1,
help='A benchmark executable or JSON output file')
help="A benchmark executable or JSON output file",
)
parser_a.add_argument(
'benchmark_options',
metavar='benchmark_options',
"benchmark_options",
metavar="benchmark_options",
nargs=argparse.REMAINDER,
help='Arguments to pass when running benchmark executables')
help="Arguments to pass when running benchmark executables",
)
parser_b = subparsers.add_parser(
'filters', help='Compare filter one with the filter two of benchmark')
baseline = parser_b.add_argument_group(
'baseline', 'The benchmark baseline')
"filters", help="Compare filter one with the filter two of benchmark"
)
baseline = parser_b.add_argument_group("baseline", "The benchmark baseline")
baseline.add_argument(
'test',
metavar='test',
type=argparse.FileType('r'),
"test",
metavar="test",
type=argparse.FileType("r"),
nargs=1,
help='A benchmark executable or JSON output file')
help="A benchmark executable or JSON output file",
)
baseline.add_argument(
'filter_baseline',
metavar='filter_baseline',
"filter_baseline",
metavar="filter_baseline",
type=str,
nargs=1,
help='The first filter, that will be used as baseline')
help="The first filter, that will be used as baseline",
)
contender = parser_b.add_argument_group(
'contender', 'The benchmark that will be compared against the baseline')
"contender", "The benchmark that will be compared against the baseline"
)
contender.add_argument(
'filter_contender',
metavar='filter_contender',
"filter_contender",
metavar="filter_contender",
type=str,
nargs=1,
help='The second filter, that will be compared against the baseline')
help="The second filter, that will be compared against the baseline",
)
parser_b.add_argument(
'benchmark_options',
metavar='benchmark_options',
"benchmark_options",
metavar="benchmark_options",
nargs=argparse.REMAINDER,
help='Arguments to pass when running benchmark executables')
help="Arguments to pass when running benchmark executables",
)
parser_c = subparsers.add_parser(
'benchmarksfiltered',
help='Compare filter one of first benchmark with filter two of the second benchmark')
baseline = parser_c.add_argument_group(
'baseline', 'The benchmark baseline')
"benchmarksfiltered",
help="Compare filter one of first benchmark with filter two of the second benchmark",
)
baseline = parser_c.add_argument_group("baseline", "The benchmark baseline")
baseline.add_argument(
'test_baseline',
metavar='test_baseline',
type=argparse.FileType('r'),
"test_baseline",
metavar="test_baseline",
type=argparse.FileType("r"),
nargs=1,
help='A benchmark executable or JSON output file')
help="A benchmark executable or JSON output file",
)
baseline.add_argument(
'filter_baseline',
metavar='filter_baseline',
"filter_baseline",
metavar="filter_baseline",
type=str,
nargs=1,
help='The first filter, that will be used as baseline')
help="The first filter, that will be used as baseline",
)
contender = parser_c.add_argument_group(
'contender', 'The benchmark that will be compared against the baseline')
"contender", "The benchmark that will be compared against the baseline"
)
contender.add_argument(
'test_contender',
metavar='test_contender',
type=argparse.FileType('r'),
"test_contender",
metavar="test_contender",
type=argparse.FileType("r"),
nargs=1,
help='The second benchmark executable or JSON output file, that will be compared against the baseline')
help="The second benchmark executable or JSON output file, that will be compared against the baseline",
)
contender.add_argument(
'filter_contender',
metavar='filter_contender',
"filter_contender",
metavar="filter_contender",
type=str,
nargs=1,
help='The second filter, that will be compared against the baseline')
help="The second filter, that will be compared against the baseline",
)
parser_c.add_argument(
'benchmark_options',
metavar='benchmark_options',
"benchmark_options",
metavar="benchmark_options",
nargs=argparse.REMAINDER,
help='Arguments to pass when running benchmark executables')
help="Arguments to pass when running benchmark executables",
)
return parser
@ -191,16 +234,16 @@ def main():
assert not unknown_args
benchmark_options = args.benchmark_options
if args.mode == 'benchmarks':
if args.mode == "benchmarks":
test_baseline = args.test_baseline[0].name
test_contender = args.test_contender[0].name
filter_baseline = ''
filter_contender = ''
filter_baseline = ""
filter_contender = ""
# NOTE: if test_baseline == test_contender, you are analyzing the stdev
description = 'Comparing %s to %s' % (test_baseline, test_contender)
elif args.mode == 'filters':
description = "Comparing %s to %s" % (test_baseline, test_contender)
elif args.mode == "filters":
test_baseline = args.test[0].name
test_contender = args.test[0].name
filter_baseline = args.filter_baseline[0]
@ -209,9 +252,12 @@ def main():
# NOTE: if filter_baseline == filter_contender, you are analyzing the
# stdev
description = 'Comparing %s to %s (from %s)' % (
filter_baseline, filter_contender, args.test[0].name)
elif args.mode == 'benchmarksfiltered':
description = "Comparing %s to %s (from %s)" % (
filter_baseline,
filter_contender,
args.test[0].name,
)
elif args.mode == "benchmarksfiltered":
test_baseline = args.test_baseline[0].name
test_contender = args.test_contender[0].name
filter_baseline = args.filter_baseline[0]
@ -220,8 +266,12 @@ def main():
# NOTE: if test_baseline == test_contender and
# filter_baseline == filter_contender, you are analyzing the stdev
description = 'Comparing %s (from %s) to %s (from %s)' % (
filter_baseline, test_baseline, filter_contender, test_contender)
description = "Comparing %s (from %s) to %s (from %s)" % (
filter_baseline,
test_baseline,
filter_contender,
test_contender,
)
else:
# should never happen
print("Unrecognized mode of operation: '%s'" % args.mode)
@ -231,199 +281,240 @@ def main():
check_inputs(test_baseline, test_contender, benchmark_options)
if args.display_aggregates_only:
benchmark_options += ['--benchmark_display_aggregates_only=true']
benchmark_options += ["--benchmark_display_aggregates_only=true"]
options_baseline = []
options_contender = []
if filter_baseline and filter_contender:
options_baseline = ['--benchmark_filter=%s' % filter_baseline]
options_contender = ['--benchmark_filter=%s' % filter_contender]
options_baseline = ["--benchmark_filter=%s" % filter_baseline]
options_contender = ["--benchmark_filter=%s" % filter_contender]
# Run the benchmarks and report the results
json1 = json1_orig = gbench.util.sort_benchmark_results(gbench.util.run_or_load_benchmark(
test_baseline, benchmark_options + options_baseline))
json2 = json2_orig = gbench.util.sort_benchmark_results(gbench.util.run_or_load_benchmark(
test_contender, benchmark_options + options_contender))
json1 = json1_orig = gbench.util.sort_benchmark_results(
gbench.util.run_or_load_benchmark(
test_baseline, benchmark_options + options_baseline
)
)
json2 = json2_orig = gbench.util.sort_benchmark_results(
gbench.util.run_or_load_benchmark(
test_contender, benchmark_options + options_contender
)
)
# Now, filter the benchmarks so that the difference report can work
if filter_baseline and filter_contender:
replacement = '[%s vs. %s]' % (filter_baseline, filter_contender)
replacement = "[%s vs. %s]" % (filter_baseline, filter_contender)
json1 = gbench.report.filter_benchmark(
json1_orig, filter_baseline, replacement)
json1_orig, filter_baseline, replacement
)
json2 = gbench.report.filter_benchmark(
json2_orig, filter_contender, replacement)
json2_orig, filter_contender, replacement
)
diff_report = gbench.report.get_difference_report(
json1, json2, args.utest)
diff_report = gbench.report.get_difference_report(json1, json2, args.utest)
output_lines = gbench.report.print_difference_report(
diff_report,
args.display_aggregates_only,
args.utest, args.utest_alpha, args.color)
args.utest,
args.utest_alpha,
args.color,
)
print(description)
for ln in output_lines:
print(ln)
# Optionally, diff and output to JSON
if args.dump_to_json is not None:
with open(args.dump_to_json, 'w') as f_json:
with open(args.dump_to_json, "w") as f_json:
json.dump(diff_report, f_json)
class TestParser(unittest.TestCase):
def setUp(self):
self.parser = create_parser()
testInputs = os.path.join(
os.path.dirname(
os.path.realpath(__file__)),
'gbench',
'Inputs')
self.testInput0 = os.path.join(testInputs, 'test1_run1.json')
self.testInput1 = os.path.join(testInputs, 'test1_run2.json')
os.path.dirname(os.path.realpath(__file__)), "gbench", "Inputs"
)
self.testInput0 = os.path.join(testInputs, "test1_run1.json")
self.testInput1 = os.path.join(testInputs, "test1_run2.json")
def test_benchmarks_basic(self):
parsed = self.parser.parse_args(
['benchmarks', self.testInput0, self.testInput1])
["benchmarks", self.testInput0, self.testInput1]
)
self.assertFalse(parsed.display_aggregates_only)
self.assertTrue(parsed.utest)
self.assertEqual(parsed.mode, 'benchmarks')
self.assertEqual(parsed.mode, "benchmarks")
self.assertEqual(parsed.test_baseline[0].name, self.testInput0)
self.assertEqual(parsed.test_contender[0].name, self.testInput1)
self.assertFalse(parsed.benchmark_options)
def test_benchmarks_basic_without_utest(self):
parsed = self.parser.parse_args(
['--no-utest', 'benchmarks', self.testInput0, self.testInput1])
["--no-utest", "benchmarks", self.testInput0, self.testInput1]
)
self.assertFalse(parsed.display_aggregates_only)
self.assertFalse(parsed.utest)
self.assertEqual(parsed.utest_alpha, 0.05)
self.assertEqual(parsed.mode, 'benchmarks')
self.assertEqual(parsed.mode, "benchmarks")
self.assertEqual(parsed.test_baseline[0].name, self.testInput0)
self.assertEqual(parsed.test_contender[0].name, self.testInput1)
self.assertFalse(parsed.benchmark_options)
def test_benchmarks_basic_display_aggregates_only(self):
parsed = self.parser.parse_args(
['-a', 'benchmarks', self.testInput0, self.testInput1])
["-a", "benchmarks", self.testInput0, self.testInput1]
)
self.assertTrue(parsed.display_aggregates_only)
self.assertTrue(parsed.utest)
self.assertEqual(parsed.mode, 'benchmarks')
self.assertEqual(parsed.mode, "benchmarks")
self.assertEqual(parsed.test_baseline[0].name, self.testInput0)
self.assertEqual(parsed.test_contender[0].name, self.testInput1)
self.assertFalse(parsed.benchmark_options)
def test_benchmarks_basic_with_utest_alpha(self):
parsed = self.parser.parse_args(
['--alpha=0.314', 'benchmarks', self.testInput0, self.testInput1])
["--alpha=0.314", "benchmarks", self.testInput0, self.testInput1]
)
self.assertFalse(parsed.display_aggregates_only)
self.assertTrue(parsed.utest)
self.assertEqual(parsed.utest_alpha, 0.314)
self.assertEqual(parsed.mode, 'benchmarks')
self.assertEqual(parsed.mode, "benchmarks")
self.assertEqual(parsed.test_baseline[0].name, self.testInput0)
self.assertEqual(parsed.test_contender[0].name, self.testInput1)
self.assertFalse(parsed.benchmark_options)
def test_benchmarks_basic_without_utest_with_utest_alpha(self):
parsed = self.parser.parse_args(
['--no-utest', '--alpha=0.314', 'benchmarks', self.testInput0, self.testInput1])
[
"--no-utest",
"--alpha=0.314",
"benchmarks",
self.testInput0,
self.testInput1,
]
)
self.assertFalse(parsed.display_aggregates_only)
self.assertFalse(parsed.utest)
self.assertEqual(parsed.utest_alpha, 0.314)
self.assertEqual(parsed.mode, 'benchmarks')
self.assertEqual(parsed.mode, "benchmarks")
self.assertEqual(parsed.test_baseline[0].name, self.testInput0)
self.assertEqual(parsed.test_contender[0].name, self.testInput1)
self.assertFalse(parsed.benchmark_options)
def test_benchmarks_with_remainder(self):
parsed = self.parser.parse_args(
['benchmarks', self.testInput0, self.testInput1, 'd'])
["benchmarks", self.testInput0, self.testInput1, "d"]
)
self.assertFalse(parsed.display_aggregates_only)
self.assertTrue(parsed.utest)
self.assertEqual(parsed.mode, 'benchmarks')
self.assertEqual(parsed.mode, "benchmarks")
self.assertEqual(parsed.test_baseline[0].name, self.testInput0)
self.assertEqual(parsed.test_contender[0].name, self.testInput1)
self.assertEqual(parsed.benchmark_options, ['d'])
self.assertEqual(parsed.benchmark_options, ["d"])
def test_benchmarks_with_remainder_after_doubleminus(self):
parsed = self.parser.parse_args(
['benchmarks', self.testInput0, self.testInput1, '--', 'e'])
["benchmarks", self.testInput0, self.testInput1, "--", "e"]
)
self.assertFalse(parsed.display_aggregates_only)
self.assertTrue(parsed.utest)
self.assertEqual(parsed.mode, 'benchmarks')
self.assertEqual(parsed.mode, "benchmarks")
self.assertEqual(parsed.test_baseline[0].name, self.testInput0)
self.assertEqual(parsed.test_contender[0].name, self.testInput1)
self.assertEqual(parsed.benchmark_options, ['e'])
self.assertEqual(parsed.benchmark_options, ["e"])
def test_filters_basic(self):
parsed = self.parser.parse_args(
['filters', self.testInput0, 'c', 'd'])
parsed = self.parser.parse_args(["filters", self.testInput0, "c", "d"])
self.assertFalse(parsed.display_aggregates_only)
self.assertTrue(parsed.utest)
self.assertEqual(parsed.mode, 'filters')
self.assertEqual(parsed.mode, "filters")
self.assertEqual(parsed.test[0].name, self.testInput0)
self.assertEqual(parsed.filter_baseline[0], 'c')
self.assertEqual(parsed.filter_contender[0], 'd')
self.assertEqual(parsed.filter_baseline[0], "c")
self.assertEqual(parsed.filter_contender[0], "d")
self.assertFalse(parsed.benchmark_options)
def test_filters_with_remainder(self):
parsed = self.parser.parse_args(
['filters', self.testInput0, 'c', 'd', 'e'])
["filters", self.testInput0, "c", "d", "e"]
)
self.assertFalse(parsed.display_aggregates_only)
self.assertTrue(parsed.utest)
self.assertEqual(parsed.mode, 'filters')
self.assertEqual(parsed.mode, "filters")
self.assertEqual(parsed.test[0].name, self.testInput0)
self.assertEqual(parsed.filter_baseline[0], 'c')
self.assertEqual(parsed.filter_contender[0], 'd')
self.assertEqual(parsed.benchmark_options, ['e'])
self.assertEqual(parsed.filter_baseline[0], "c")
self.assertEqual(parsed.filter_contender[0], "d")
self.assertEqual(parsed.benchmark_options, ["e"])
def test_filters_with_remainder_after_doubleminus(self):
parsed = self.parser.parse_args(
['filters', self.testInput0, 'c', 'd', '--', 'f'])
["filters", self.testInput0, "c", "d", "--", "f"]
)
self.assertFalse(parsed.display_aggregates_only)
self.assertTrue(parsed.utest)
self.assertEqual(parsed.mode, 'filters')
self.assertEqual(parsed.mode, "filters")
self.assertEqual(parsed.test[0].name, self.testInput0)
self.assertEqual(parsed.filter_baseline[0], 'c')
self.assertEqual(parsed.filter_contender[0], 'd')
self.assertEqual(parsed.benchmark_options, ['f'])
self.assertEqual(parsed.filter_baseline[0], "c")
self.assertEqual(parsed.filter_contender[0], "d")
self.assertEqual(parsed.benchmark_options, ["f"])
def test_benchmarksfiltered_basic(self):
parsed = self.parser.parse_args(
['benchmarksfiltered', self.testInput0, 'c', self.testInput1, 'e'])
["benchmarksfiltered", self.testInput0, "c", self.testInput1, "e"]
)
self.assertFalse(parsed.display_aggregates_only)
self.assertTrue(parsed.utest)
self.assertEqual(parsed.mode, 'benchmarksfiltered')
self.assertEqual(parsed.mode, "benchmarksfiltered")
self.assertEqual(parsed.test_baseline[0].name, self.testInput0)
self.assertEqual(parsed.filter_baseline[0], 'c')
self.assertEqual(parsed.filter_baseline[0], "c")
self.assertEqual(parsed.test_contender[0].name, self.testInput1)
self.assertEqual(parsed.filter_contender[0], 'e')
self.assertEqual(parsed.filter_contender[0], "e")
self.assertFalse(parsed.benchmark_options)
def test_benchmarksfiltered_with_remainder(self):
parsed = self.parser.parse_args(
['benchmarksfiltered', self.testInput0, 'c', self.testInput1, 'e', 'f'])
[
"benchmarksfiltered",
self.testInput0,
"c",
self.testInput1,
"e",
"f",
]
)
self.assertFalse(parsed.display_aggregates_only)
self.assertTrue(parsed.utest)
self.assertEqual(parsed.mode, 'benchmarksfiltered')
self.assertEqual(parsed.mode, "benchmarksfiltered")
self.assertEqual(parsed.test_baseline[0].name, self.testInput0)
self.assertEqual(parsed.filter_baseline[0], 'c')
self.assertEqual(parsed.filter_baseline[0], "c")
self.assertEqual(parsed.test_contender[0].name, self.testInput1)
self.assertEqual(parsed.filter_contender[0], 'e')
self.assertEqual(parsed.benchmark_options[0], 'f')
self.assertEqual(parsed.filter_contender[0], "e")
self.assertEqual(parsed.benchmark_options[0], "f")
def test_benchmarksfiltered_with_remainder_after_doubleminus(self):
parsed = self.parser.parse_args(
['benchmarksfiltered', self.testInput0, 'c', self.testInput1, 'e', '--', 'g'])
[
"benchmarksfiltered",
self.testInput0,
"c",
self.testInput1,
"e",
"--",
"g",
]
)
self.assertFalse(parsed.display_aggregates_only)
self.assertTrue(parsed.utest)
self.assertEqual(parsed.mode, 'benchmarksfiltered')
self.assertEqual(parsed.mode, "benchmarksfiltered")
self.assertEqual(parsed.test_baseline[0].name, self.testInput0)
self.assertEqual(parsed.filter_baseline[0], 'c')
self.assertEqual(parsed.filter_baseline[0], "c")
self.assertEqual(parsed.test_contender[0].name, self.testInput1)
self.assertEqual(parsed.filter_contender[0], 'e')
self.assertEqual(parsed.benchmark_options[0], 'g')
self.assertEqual(parsed.filter_contender[0], "e")
self.assertEqual(parsed.benchmark_options[0], "g")
if __name__ == '__main__':
if __name__ == "__main__":
# unittest.main()
main()

View File

@ -1,8 +1,8 @@
"""Google Benchmark tooling"""
__author__ = 'Eric Fiselier'
__email__ = 'eric@efcs.ca'
__author__ = "Eric Fiselier"
__email__ = "eric@efcs.ca"
__versioninfo__ = (0, 5, 0)
__version__ = '.'.join(str(v) for v in __versioninfo__) + 'dev'
__version__ = ".".join(str(v) for v in __versioninfo__) + "dev"
__all__ = []
__all__ = [] # type: ignore

File diff suppressed because it is too large Load Diff

View File

@ -7,13 +7,12 @@ import subprocess
import sys
import tempfile
# Input file type enumeration
IT_Invalid = 0
IT_JSON = 1
IT_Executable = 2
_num_magic_bytes = 2 if sys.platform.startswith('win') else 4
_num_magic_bytes = 2 if sys.platform.startswith("win") else 4
def is_executable_file(filename):
@ -24,21 +23,21 @@ def is_executable_file(filename):
"""
if not os.path.isfile(filename):
return False
with open(filename, mode='rb') as f:
with open(filename, mode="rb") as f:
magic_bytes = f.read(_num_magic_bytes)
if sys.platform == 'darwin':
if sys.platform == "darwin":
return magic_bytes in [
b'\xfe\xed\xfa\xce', # MH_MAGIC
b'\xce\xfa\xed\xfe', # MH_CIGAM
b'\xfe\xed\xfa\xcf', # MH_MAGIC_64
b'\xcf\xfa\xed\xfe', # MH_CIGAM_64
b'\xca\xfe\xba\xbe', # FAT_MAGIC
b'\xbe\xba\xfe\xca' # FAT_CIGAM
b"\xfe\xed\xfa\xce", # MH_MAGIC
b"\xce\xfa\xed\xfe", # MH_CIGAM
b"\xfe\xed\xfa\xcf", # MH_MAGIC_64
b"\xcf\xfa\xed\xfe", # MH_CIGAM_64
b"\xca\xfe\xba\xbe", # FAT_MAGIC
b"\xbe\xba\xfe\xca", # FAT_CIGAM
]
elif sys.platform.startswith('win'):
return magic_bytes == b'MZ'
elif sys.platform.startswith("win"):
return magic_bytes == b"MZ"
else:
return magic_bytes == b'\x7FELF'
return magic_bytes == b"\x7FELF"
def is_json_file(filename):
@ -47,7 +46,7 @@ def is_json_file(filename):
'False' otherwise.
"""
try:
with open(filename, 'r') as f:
with open(filename, "r") as f:
json.load(f)
return True
except BaseException:
@ -72,7 +71,10 @@ def classify_input_file(filename):
elif is_json_file(filename):
ftype = IT_JSON
else:
err_msg = "'%s' does not name a valid benchmark executable or JSON file" % filename
err_msg = (
"'%s' does not name a valid benchmark executable or JSON file"
% filename
)
return ftype, err_msg
@ -95,7 +97,7 @@ def find_benchmark_flag(prefix, benchmark_flags):
if it is found return the arg it specifies. If specified more than once the
last value is returned. If the flag is not found None is returned.
"""
assert prefix.startswith('--') and prefix.endswith('=')
assert prefix.startswith("--") and prefix.endswith("=")
result = None
for f in benchmark_flags:
if f.startswith(prefix):
@ -108,7 +110,7 @@ def remove_benchmark_flags(prefix, benchmark_flags):
Return a new list containing the specified benchmark_flags except those
with the specified prefix.
"""
assert prefix.startswith('--') and prefix.endswith('=')
assert prefix.startswith("--") and prefix.endswith("=")
return [f for f in benchmark_flags if not f.startswith(prefix)]
@ -124,36 +126,54 @@ def load_benchmark_results(fname, benchmark_filter):
REQUIRES: 'fname' names a file containing JSON benchmark output.
"""
def benchmark_wanted(benchmark):
if benchmark_filter is None:
return True
name = benchmark.get('run_name', None) or benchmark['name']
name = benchmark.get("run_name", None) or benchmark["name"]
if re.search(benchmark_filter, name):
return True
return False
with open(fname, 'r') as f:
with open(fname, "r") as f:
results = json.load(f)
if 'benchmarks' in results:
results['benchmarks'] = list(filter(benchmark_wanted,
results['benchmarks']))
if "benchmarks" in results:
results["benchmarks"] = list(
filter(benchmark_wanted, results["benchmarks"])
)
return results
def sort_benchmark_results(result):
benchmarks = result['benchmarks']
benchmarks = result["benchmarks"]
# From inner key to the outer key!
benchmarks = sorted(
benchmarks, key=lambda benchmark: benchmark['repetition_index'] if 'repetition_index' in benchmark else -1)
benchmarks,
key=lambda benchmark: benchmark["repetition_index"]
if "repetition_index" in benchmark
else -1,
)
benchmarks = sorted(
benchmarks, key=lambda benchmark: 1 if 'run_type' in benchmark and benchmark['run_type'] == "aggregate" else 0)
benchmarks,
key=lambda benchmark: 1
if "run_type" in benchmark and benchmark["run_type"] == "aggregate"
else 0,
)
benchmarks = sorted(
benchmarks, key=lambda benchmark: benchmark['per_family_instance_index'] if 'per_family_instance_index' in benchmark else -1)
benchmarks,
key=lambda benchmark: benchmark["per_family_instance_index"]
if "per_family_instance_index" in benchmark
else -1,
)
benchmarks = sorted(
benchmarks, key=lambda benchmark: benchmark['family_index'] if 'family_index' in benchmark else -1)
benchmarks,
key=lambda benchmark: benchmark["family_index"]
if "family_index" in benchmark
else -1,
)
result['benchmarks'] = benchmarks
result["benchmarks"] = benchmarks
return result
@ -164,21 +184,21 @@ def run_benchmark(exe_name, benchmark_flags):
real time console output.
RETURNS: A JSON object representing the benchmark output
"""
output_name = find_benchmark_flag('--benchmark_out=',
benchmark_flags)
output_name = find_benchmark_flag("--benchmark_out=", benchmark_flags)
is_temp_output = False
if output_name is None:
is_temp_output = True
thandle, output_name = tempfile.mkstemp()
os.close(thandle)
benchmark_flags = list(benchmark_flags) + \
['--benchmark_out=%s' % output_name]
benchmark_flags = list(benchmark_flags) + [
"--benchmark_out=%s" % output_name
]
cmd = [exe_name] + benchmark_flags
print("RUNNING: %s" % ' '.join(cmd))
print("RUNNING: %s" % " ".join(cmd))
exitCode = subprocess.call(cmd)
if exitCode != 0:
print('TEST FAILED...')
print("TEST FAILED...")
sys.exit(exitCode)
json_res = load_benchmark_results(output_name, None)
if is_temp_output:
@ -195,9 +215,10 @@ def run_or_load_benchmark(filename, benchmark_flags):
"""
ftype = check_input_file(filename)
if ftype == IT_JSON:
benchmark_filter = find_benchmark_flag('--benchmark_filter=',
benchmark_flags)
benchmark_filter = find_benchmark_flag(
"--benchmark_filter=", benchmark_flags
)
return load_benchmark_results(filename, benchmark_filter)
if ftype == IT_Executable:
return run_benchmark(filename, benchmark_flags)
raise ValueError('Unknown file type %s' % ftype)
raise ValueError("Unknown file type %s" % ftype)

View File

@ -4,48 +4,49 @@
strip_asm.py - Cleanup ASM output for the specified file
"""
from argparse import ArgumentParser
import sys
import os
import re
import sys
from argparse import ArgumentParser
def find_used_labels(asm):
found = set()
label_re = re.compile("\s*j[a-z]+\s+\.L([a-zA-Z0-9][a-zA-Z0-9_]*)")
for l in asm.splitlines():
m = label_re.match(l)
label_re = re.compile(r"\s*j[a-z]+\s+\.L([a-zA-Z0-9][a-zA-Z0-9_]*)")
for line in asm.splitlines():
m = label_re.match(line)
if m:
found.add('.L%s' % m.group(1))
found.add(".L%s" % m.group(1))
return found
def normalize_labels(asm):
decls = set()
label_decl = re.compile("^[.]{0,1}L([a-zA-Z0-9][a-zA-Z0-9_]*)(?=:)")
for l in asm.splitlines():
m = label_decl.match(l)
for line in asm.splitlines():
m = label_decl.match(line)
if m:
decls.add(m.group(0))
if len(decls) == 0:
return asm
needs_dot = next(iter(decls))[0] != '.'
needs_dot = next(iter(decls))[0] != "."
if not needs_dot:
return asm
for ld in decls:
asm = re.sub("(^|\s+)" + ld + "(?=:|\s)", '\\1.' + ld, asm)
asm = re.sub(r"(^|\s+)" + ld + r"(?=:|\s)", "\\1." + ld, asm)
return asm
def transform_labels(asm):
asm = normalize_labels(asm)
used_decls = find_used_labels(asm)
new_asm = ''
label_decl = re.compile("^\.L([a-zA-Z0-9][a-zA-Z0-9_]*)(?=:)")
for l in asm.splitlines():
m = label_decl.match(l)
new_asm = ""
label_decl = re.compile(r"^\.L([a-zA-Z0-9][a-zA-Z0-9_]*)(?=:)")
for line in asm.splitlines():
m = label_decl.match(line)
if not m or m.group(0) in used_decls:
new_asm += l
new_asm += '\n'
new_asm += line
new_asm += "\n"
return new_asm
@ -53,29 +54,34 @@ def is_identifier(tk):
if len(tk) == 0:
return False
first = tk[0]
if not first.isalpha() and first != '_':
if not first.isalpha() and first != "_":
return False
for i in range(1, len(tk)):
c = tk[i]
if not c.isalnum() and c != '_':
if not c.isalnum() and c != "_":
return False
return True
def process_identifiers(l):
def process_identifiers(line):
"""
process_identifiers - process all identifiers and modify them to have
consistent names across all platforms; specifically across ELF and MachO.
For example, MachO inserts an additional understore at the beginning of
names. This function removes that.
"""
parts = re.split(r'([a-zA-Z0-9_]+)', l)
new_line = ''
parts = re.split(r"([a-zA-Z0-9_]+)", line)
new_line = ""
for tk in parts:
if is_identifier(tk):
if tk.startswith('__Z'):
if tk.startswith("__Z"):
tk = tk[1:]
elif tk.startswith('_') and len(tk) > 1 and \
tk[1].isalpha() and tk[1] != 'Z':
elif (
tk.startswith("_")
and len(tk) > 1
and tk[1].isalpha()
and tk[1] != "Z"
):
tk = tk[1:]
new_line += tk
return new_line
@ -85,65 +91,71 @@ def process_asm(asm):
"""
Strip the ASM of unwanted directives and lines
"""
new_contents = ''
new_contents = ""
asm = transform_labels(asm)
# TODO: Add more things we want to remove
discard_regexes = [
re.compile("\s+\..*$"), # directive
re.compile("\s*#(NO_APP|APP)$"), #inline ASM
re.compile("\s*#.*$"), # comment line
re.compile("\s*\.globa?l\s*([.a-zA-Z_][a-zA-Z0-9$_.]*)"), #global directive
re.compile("\s*\.(string|asciz|ascii|[1248]?byte|short|word|long|quad|value|zero)"),
]
keep_regexes = [
re.compile(r"\s+\..*$"), # directive
re.compile(r"\s*#(NO_APP|APP)$"), # inline ASM
re.compile(r"\s*#.*$"), # comment line
re.compile(
r"\s*\.globa?l\s*([.a-zA-Z_][a-zA-Z0-9$_.]*)"
), # global directive
re.compile(
r"\s*\.(string|asciz|ascii|[1248]?byte|short|word|long|quad|value|zero)"
),
]
keep_regexes: list[re.Pattern] = []
fn_label_def = re.compile("^[a-zA-Z_][a-zA-Z0-9_.]*:")
for l in asm.splitlines():
for line in asm.splitlines():
# Remove Mach-O attribute
l = l.replace('@GOTPCREL', '')
line = line.replace("@GOTPCREL", "")
add_line = True
for reg in discard_regexes:
if reg.match(l) is not None:
if reg.match(line) is not None:
add_line = False
break
for reg in keep_regexes:
if reg.match(l) is not None:
if reg.match(line) is not None:
add_line = True
break
if add_line:
if fn_label_def.match(l) and len(new_contents) != 0:
new_contents += '\n'
l = process_identifiers(l)
new_contents += l
new_contents += '\n'
if fn_label_def.match(line) and len(new_contents) != 0:
new_contents += "\n"
line = process_identifiers(line)
new_contents += line
new_contents += "\n"
return new_contents
def main():
parser = ArgumentParser(
description='generate a stripped assembly file')
parser = ArgumentParser(description="generate a stripped assembly file")
parser.add_argument(
'input', metavar='input', type=str, nargs=1,
help='An input assembly file')
"input",
metavar="input",
type=str,
nargs=1,
help="An input assembly file",
)
parser.add_argument(
'out', metavar='output', type=str, nargs=1,
help='The output file')
"out", metavar="output", type=str, nargs=1, help="The output file"
)
args, unknown_args = parser.parse_known_args()
input = args.input[0]
output = args.out[0]
if not os.path.isfile(input):
print(("ERROR: input file '%s' does not exist") % input)
print("ERROR: input file '%s' does not exist" % input)
sys.exit(1)
contents = None
with open(input, 'r') as f:
with open(input, "r") as f:
contents = f.read()
new_contents = process_asm(contents)
with open(output, 'w') as f:
with open(output, "w") as f:
f.write(new_contents)
if __name__ == '__main__':
if __name__ == "__main__":
main()
# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4