fix: improve execution requirements of all copy files/directory rules for better perf (#79)

This commit is contained in:
Greg Magolan 2022-04-14 10:48:44 -07:00 committed by GitHub
parent 48b6f29367
commit d59ca6092f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 79 additions and 33 deletions

View File

@ -5,17 +5,27 @@ exports_files(
visibility = ["//docs:__pkg__"],
)
bzl_library(
name = "copy_common",
srcs = ["copy_common.bzl"],
visibility = ["//lib:__subpackages__"],
)
bzl_library(
name = "copy_file",
srcs = ["copy_file.bzl"],
visibility = ["//lib:__subpackages__"],
deps = [":directory_path"],
deps = [
":copy_common",
":directory_path",
],
)
bzl_library(
name = "copy_directory",
srcs = ["copy_directory.bzl"],
visibility = ["//lib:__subpackages__"],
deps = [":copy_common"],
)
bzl_library(
@ -23,6 +33,7 @@ bzl_library(
srcs = ["copy_to_directory.bzl"],
visibility = ["//lib:__subpackages__"],
deps = [
":copy_common",
":directory_path",
":output_files",
":paths",

View File

@ -0,0 +1,59 @@
"Helpers for copy rules"
# Hints for Bazel spawn strategy
COPY_EXECUTION_REQUIREMENTS = {
# ----------------+-----------------------------------------------------------------------------
# no-remote | Prevents the action or test from being executed remotely or cached remotely.
# | This is equivalent to using both `no-remote-cache` and `no-remote-exec`.
# ----------------+-----------------------------------------------------------------------------
# no-remote-cache | Results in the action or test never being cached remotely (but it may
# | be cached locally; it may also be executed remotely). Note: for the purposes
# | of this tag, the disk-cache is considered a local cache, whereas the http
# | and gRPC caches are considered remote. If a combined cache is specified
# | (i.e. a cache with local and remote components), it's treated as a remote
# | cache and disabled entirely unless --incompatible_remote_results_ignore_disk
# | is set in which case the local components will be used.
# ----------------+-----------------------------------------------------------------------------
# no-remote-exec | Results in the action or test never being executed remotely (but it may be
# | cached remotely).
# ----------------+-----------------------------------------------------------------------------
# no-cache | Results in the action or test never being cached (remotely or locally)
# ----------------+-----------------------------------------------------------------------------
# no-sandbox | Results in the action or test never being sandboxed; it can still be cached
# | or run remotely - use no-cache or no-remote to prevent either or both of
# | those.
# ----------------+-----------------------------------------------------------------------------
# local | Precludes the action or test from being remotely cached, remotely executed,
# | or run inside the sandbox. For genrules and tests, marking the rule with the
# | local = True attribute has the same effect.
# ----------------+-----------------------------------------------------------------------------
# See https://bazel.google.cn/reference/be/common-definitions?hl=en&authuser=0#common-attributes
#
# Copying file & directories is entirely IO-bound and there is no point doing this work
# remotely.
#
# Also, remote-execution does not allow source directory inputs, see
# https://github.com/bazelbuild/bazel/commit/c64421bc35214f0414e4f4226cc953e8c55fa0d2 So we must
# not attempt to execute remotely in that case.
#
# There is also no point pulling the output file or directory from the remote cache since the
# bytes to copy are already available locally. Conversely, no point in writing to the cache if
# no one has any reason to check it for this action.
#
# Read and writing to disk cache is disabled as well primarily to reduce disk usage on the local
# machine. A disk cache hit of a directory copy could be slghtly faster than a copy since the
# disk cache stores the directory artifact as a single entry, but the slight performance bump
# comes at the cost of heavy disk cache usage, which is an unmanaged directory that grow beyond
# the bounds of the physical disk.
# TODO: run benchmarks to measure the impact on copy_directory
#
# Sandboxing for this action is wasteful as well since there is a 1:1 mapping of input
# file/directory to output file/directory and no room for non-hermetic inputs to sneak in to the
# input.
"no-remote": "1",
"no-remote-cache": "1",
"no-remote-exec": "1",
"no-cache": "1",
"no-sandbox": "1",
"local": "1",
}

View File

@ -4,14 +4,7 @@ This rule copies a directory to another location using Bash (on Linux/macOS) or
cmd.exe (on Windows).
"""
# Hints for Bazel spawn strategy
_execution_requirements = {
# Copying files is entirely IO-bound and there is no point doing this work remotely.
# Also, remote-execution does not allow source directory inputs, see
# https://github.com/bazelbuild/bazel/commit/c64421bc35214f0414e4f4226cc953e8c55fa0d2
# So we must not attempt to execute remotely in that case.
"no-remote-exec": "1",
}
load(":copy_common.bzl", _COPY_EXECUTION_REQUIREMENTS = "COPY_EXECUTION_REQUIREMENTS")
def _copy_cmd(ctx, src, dst):
# Most Windows binaries built with MSVC use a certain argument quoting
@ -49,7 +42,7 @@ def _copy_cmd(ctx, src, dst):
mnemonic = mnemonic,
progress_message = progress_message,
use_default_shell_env = True,
execution_requirements = _execution_requirements,
execution_requirements = _COPY_EXECUTION_REQUIREMENTS,
)
def _copy_bash(ctx, src, dst):
@ -65,7 +58,7 @@ def _copy_bash(ctx, src, dst):
mnemonic = mnemonic,
progress_message = progress_message,
use_default_shell_env = True,
execution_requirements = _execution_requirements,
execution_requirements = _COPY_EXECUTION_REQUIREMENTS,
)
def copy_directory_action(ctx, src, dst, is_windows = False):

View File

@ -24,17 +24,9 @@ cmd.exe (on Windows). `_copy_xfile` marks the resulting file executable,
`_copy_file` does not.
"""
load(":copy_common.bzl", _COPY_EXECUTION_REQUIREMENTS = "COPY_EXECUTION_REQUIREMENTS")
load(":directory_path.bzl", "DirectoryPathInfo")
# Hints for Bazel spawn strategy
_execution_requirements = {
# Copying files is entirely IO-bound and there is no point doing this work remotely.
# Also, remote-execution does not allow source directory inputs, see
# https://github.com/bazelbuild/bazel/commit/c64421bc35214f0414e4f4226cc953e8c55fa0d2
# So we must not attempt to execute remotely in that case.
"no-remote-exec": "1",
}
def _copy_cmd(ctx, src, src_path, dst):
# Most Windows binaries built with MSVC use a certain argument quoting
# scheme. Bazel uses that scheme too to quote arguments. However,
@ -70,7 +62,7 @@ def _copy_cmd(ctx, src, src_path, dst):
mnemonic = mnemonic,
progress_message = progress_message,
use_default_shell_env = True,
execution_requirements = _execution_requirements,
execution_requirements = _COPY_EXECUTION_REQUIREMENTS,
)
def _copy_bash(ctx, src, src_path, dst):
@ -86,7 +78,7 @@ def _copy_bash(ctx, src, src_path, dst):
mnemonic = mnemonic,
progress_message = progress_message,
use_default_shell_env = True,
execution_requirements = _execution_requirements,
execution_requirements = _COPY_EXECUTION_REQUIREMENTS,
)
def copy_file_action(ctx, src, dst, dir_path = None, is_windows = False):

View File

@ -1,6 +1,7 @@
"copy_to_directory implementation"
load("@bazel_skylib//lib:paths.bzl", skylib_paths = "paths")
load(":copy_common.bzl", _COPY_EXECUTION_REQUIREMENTS = "COPY_EXECUTION_REQUIREMENTS")
load(":paths.bzl", "paths")
load(":directory_path.bzl", "DirectoryPathInfo")
@ -13,16 +14,6 @@ _copy_to_directory_attr = {
"is_windows": attr.bool(mandatory = True),
}
# Hints for Bazel spawn strategy
_execution_requirements = {
# Copying files is entirely IO-bound and there is no point doing this work
# remotely. Also, remote-execution does not allow source directory inputs,
# see
# https://github.com/bazelbuild/bazel/commit/c64421bc35214f0414e4f4226cc953e8c55fa0d2
# So we must not attempt to execute remotely in that case.
"no-remote-exec": "1",
}
def _longest_match(subject, tests, allow_partial = False):
match = None
high_score = 0
@ -101,7 +92,7 @@ fi
mnemonic = "CopyToDirectory",
progress_message = "Copying files to directory",
use_default_shell_env = True,
execution_requirements = _execution_requirements,
execution_requirements = _COPY_EXECUTION_REQUIREMENTS,
)
def _copy_to_dir_cmd(ctx, copy_paths, dst_dir):