mirror of
https://github.com/bazel-contrib/bazel-lib
synced 2024-11-25 11:32:33 +00:00
cc956d8589
* fix(tar): append slash to top-level directory mtree entries bsdtar's mtree format has a quirk wherein entries without "/" in their first word are treated as "relative" entries, and "relative" directories will cause tar to "change directory" into the declared directory entry. If such a directory is followed by a "relative" entry, then the file will be created within the directory, instead of at top-level as expected. To mitigate, we append a slash to top-level directory entries. Fixes #851. * chore: golden files have BINDIR placeholder --------- Co-authored-by: Alex Eagle <alex@aspect.dev>
271 lines
9.5 KiB
Python
271 lines
9.5 KiB
Python
"Implementation of tar rule"
|
|
|
|
load("//lib:paths.bzl", "to_repository_relative_path")
|
|
|
|
TAR_TOOLCHAIN_TYPE = "@aspect_bazel_lib//lib:tar_toolchain_type"
|
|
|
|
# https://www.gnu.org/software/tar/manual/html_section/Compression.html
|
|
_ACCEPTED_EXTENSIONS = [
|
|
".tar", # uncompressed,
|
|
".gz", # gzip
|
|
".tgz", # gzip
|
|
".taz", # gzip
|
|
".Z", # compress
|
|
".taZ", # compress
|
|
".bz2", # bzip2
|
|
".tz2", # bzip2
|
|
".tbz2", # bzip2
|
|
".tbz", # bzip2
|
|
".lz", # lzip
|
|
".lzma", # lzma
|
|
".tlz", # lzma
|
|
".lzo", # lzop
|
|
".xz", # xz
|
|
".zst", # zstd
|
|
".tzst", # zstd
|
|
]
|
|
|
|
_COMPRESSION_TO_EXTENSION = {
|
|
"bzip2": ".tar.bz2",
|
|
"compress": ".tar.Z",
|
|
"gzip": ".tar.gz",
|
|
"lrzip": ".tar.lrz",
|
|
"lz4": ".tar.lz4",
|
|
"lzma": ".tar.lzma",
|
|
"lzop": ".tar.lzo",
|
|
"xz": ".tar.xz",
|
|
"zstd": ".tar.zst",
|
|
}
|
|
|
|
# https://www.gnu.org/software/tar/manual/html_section/Compression.html
|
|
_ACCEPTED_COMPRESSION_TYPES = _COMPRESSION_TO_EXTENSION.keys()
|
|
|
|
_tar_attrs = {
|
|
"args": attr.string_list(
|
|
doc = "Additional flags permitted by BSD tar; see the man page.",
|
|
),
|
|
"srcs": attr.label_list(
|
|
doc = """\
|
|
Files, directories, or other targets whose default outputs are placed into the tar.
|
|
|
|
If any of the srcs are binaries with runfiles, those are copied into the resulting tar as well.
|
|
""",
|
|
allow_files = True,
|
|
),
|
|
"mode": attr.string(
|
|
doc = """A mode indicator from the following list, copied from the tar manpage:
|
|
|
|
- create: Create a new archive containing the specified items.
|
|
- append: Like `create`, but new entries are appended to the archive.
|
|
Note that this only works on uncompressed archives stored in regular files.
|
|
The -f option is required.
|
|
- list: List archive contents to stdout.
|
|
- update: Like `append`, but new entries are added only if they have a
|
|
modification date newer than the corresponding entry in the archive.
|
|
Note that this only works on uncompressed archives stored in
|
|
regular files. The -f option is required.
|
|
- extract: Extract to disk from the archive. If a file with the same name
|
|
appears more than once in the archive, each copy will be extracted,
|
|
with later copies overwriting (replacing) earlier copies.
|
|
""",
|
|
values = ["create"], # TODO: support other modes: ["append", "list", "update", "extract"]
|
|
default = "create",
|
|
),
|
|
"mtree": attr.label(
|
|
doc = "An mtree specification file",
|
|
allow_single_file = True,
|
|
# Mandatory since it's the only way to set constant timestamps
|
|
mandatory = True,
|
|
),
|
|
"out": attr.output(
|
|
doc = "Resulting tar file to write. If absent, `[name].tar` is written.",
|
|
),
|
|
"compress": attr.string(
|
|
doc = "Compress the archive file with a supported algorithm.",
|
|
values = _ACCEPTED_COMPRESSION_TYPES,
|
|
),
|
|
}
|
|
|
|
_mtree_attrs = {
|
|
"srcs": attr.label_list(doc = "Files that are placed into the tar", allow_files = True),
|
|
"out": attr.output(doc = "Resulting specification file to write"),
|
|
}
|
|
|
|
def _add_compression_args(compress, args):
|
|
if compress == "bzip2":
|
|
args.add("--bzip2")
|
|
if compress == "compress":
|
|
args.add("--compress")
|
|
if compress == "gzip":
|
|
args.add("--gzip")
|
|
if compress == "lrzip":
|
|
args.add("--lrzip")
|
|
if compress == "lzma":
|
|
args.add("--lzma")
|
|
if compress == "lz4":
|
|
args.add("--lz4")
|
|
if compress == "lzop":
|
|
args.add("--lzop")
|
|
if compress == "xz":
|
|
args.add("--xz")
|
|
if compress == "zstd":
|
|
args.add("--zstd")
|
|
|
|
def _calculate_runfiles_dir(default_info):
|
|
manifest = default_info.files_to_run.runfiles_manifest
|
|
|
|
# Newer versions of Bazel put the manifest besides the runfiles with the suffix .runfiles_manifest.
|
|
# For example, the runfiles directory is named my_binary.runfiles then the manifest is beside the
|
|
# runfiles directory and named my_binary.runfiles_manifest
|
|
# Older versions of Bazel put the manifest file named MANIFEST in the runfiles directory
|
|
# See similar logic:
|
|
# https://github.com/aspect-build/rules_js/blob/c50bd3f797c501fb229cf9ab58e0e4fc11464a2f/js/private/bash.bzl#L63
|
|
if manifest.short_path.endswith("_manifest") or manifest.short_path.endswith("/MANIFEST"):
|
|
# Trim last 9 characters, as that's the length in both cases
|
|
return manifest.short_path[:-9]
|
|
fail("manifest path {} seems malformed".format(manifest.short_path))
|
|
|
|
def _tar_impl(ctx):
|
|
bsdtar = ctx.toolchains[TAR_TOOLCHAIN_TYPE]
|
|
inputs = ctx.files.srcs[:]
|
|
args = ctx.actions.args()
|
|
|
|
# Set mode
|
|
args.add(ctx.attr.mode, format = "--%s")
|
|
|
|
# User-provided args first
|
|
args.add_all(ctx.attr.args)
|
|
|
|
# Compression args
|
|
_add_compression_args(ctx.attr.compress, args)
|
|
|
|
ext = _COMPRESSION_TO_EXTENSION[ctx.attr.compress] if ctx.attr.compress else ".tar"
|
|
|
|
out = ctx.outputs.out or ctx.actions.declare_file(ctx.attr.name + ext)
|
|
args.add("--file", out)
|
|
|
|
args.add(ctx.file.mtree, format = "@%s")
|
|
inputs.append(ctx.file.mtree)
|
|
|
|
ctx.actions.run(
|
|
executable = bsdtar.tarinfo.binary,
|
|
inputs = depset(direct = inputs, transitive = [bsdtar.default.files] + [
|
|
src[DefaultInfo].default_runfiles.files
|
|
for src in ctx.attr.srcs
|
|
]),
|
|
outputs = [out],
|
|
arguments = [args],
|
|
mnemonic = "Tar",
|
|
)
|
|
|
|
return DefaultInfo(files = depset([out]), runfiles = ctx.runfiles([out]))
|
|
|
|
def _mtree_line(file, type, content = None, uid = "0", gid = "0", time = "1672560000", mode = "0755"):
|
|
spec = [
|
|
file,
|
|
"uid=" + uid,
|
|
"gid=" + gid,
|
|
"time=" + time,
|
|
"mode=" + mode,
|
|
"type=" + type,
|
|
]
|
|
if content:
|
|
spec.append("content=" + content)
|
|
return " ".join(spec)
|
|
|
|
# This function exactly same as the one from "@aspect_bazel_lib//lib:paths.bzl"
|
|
# except that it takes workspace_name directly instead of the ctx object.
|
|
# Reason is the performance of Args.add_all closures where we use this function.
|
|
# https://bazel.build/rules/lib/builtins/Args#add_all `allow_closure` explains this.
|
|
def _to_rlocation_path(file, workspace):
|
|
if file.short_path.startswith("../"):
|
|
return file.short_path[3:]
|
|
else:
|
|
return workspace + "/" + file.short_path
|
|
|
|
def _vis_encode(filename):
|
|
# TODO(#794): correctly encode all filenames by using vis(3) (or porting it)
|
|
return filename.replace(" ", "\\040")
|
|
|
|
def _expand(file, expander, transform = to_repository_relative_path):
|
|
expanded = expander.expand(file)
|
|
lines = []
|
|
for e in expanded:
|
|
path = transform(e)
|
|
segments = path.split("/")
|
|
for i in range(1, len(segments)):
|
|
parent = "/".join(segments[:i])
|
|
|
|
# NOTE: The mtree format treats file paths without slashes as "relative" entries.
|
|
# If a relative entry is a directory, then it will "change directory" to that
|
|
# directory, and any subsequent "relative" entries will be created inside that
|
|
# directory. This causes issues when there is a top-level directory that is
|
|
# followed by a top-level file, as the file will be created inside the directory.
|
|
# To avoid this, we append a slash to the directory path to make it a "full" entry.
|
|
if i == 1:
|
|
parent += "/"
|
|
|
|
lines.append(_mtree_line(parent, "dir"))
|
|
|
|
lines.append(_mtree_line(_vis_encode(path), "file", content = _vis_encode(e.path)))
|
|
return lines
|
|
|
|
def _mtree_impl(ctx):
|
|
out = ctx.outputs.out or ctx.actions.declare_file(ctx.attr.name + ".spec")
|
|
|
|
content = ctx.actions.args()
|
|
content.set_param_file_format("multiline")
|
|
content.add_all(
|
|
ctx.files.srcs,
|
|
map_each = _expand,
|
|
expand_directories = True,
|
|
uniquify = True,
|
|
)
|
|
|
|
for s in ctx.attr.srcs:
|
|
default_info = s[DefaultInfo]
|
|
if not default_info.files_to_run.runfiles_manifest:
|
|
continue
|
|
|
|
runfiles_dir = _calculate_runfiles_dir(default_info)
|
|
|
|
# copy workspace name here just in case to prevent ctx
|
|
# to be transferred to execution phase.
|
|
workspace_name = str(ctx.workspace_name)
|
|
|
|
content.add(_mtree_line(runfiles_dir, type = "dir"))
|
|
content.add_all(
|
|
s.default_runfiles.files,
|
|
expand_directories = True,
|
|
uniquify = True,
|
|
format_each = "{}/%s".format(runfiles_dir),
|
|
# be careful about what you pass to _expand_for_runfiles as it will carry the data structures over to execution phase.
|
|
map_each = lambda f, e: _expand(f, e, lambda f: _to_rlocation_path(f, workspace_name)),
|
|
allow_closure = True,
|
|
)
|
|
|
|
ctx.actions.write(out, content = content)
|
|
|
|
return DefaultInfo(files = depset([out]), runfiles = ctx.runfiles([out]))
|
|
|
|
tar_lib = struct(
|
|
attrs = _tar_attrs,
|
|
implementation = _tar_impl,
|
|
mtree_attrs = _mtree_attrs,
|
|
mtree_implementation = _mtree_impl,
|
|
toolchain_type = TAR_TOOLCHAIN_TYPE,
|
|
common = struct(
|
|
accepted_tar_extensions = _ACCEPTED_EXTENSIONS,
|
|
accepted_compression_types = _ACCEPTED_COMPRESSION_TYPES,
|
|
compression_to_extension = _COMPRESSION_TO_EXTENSION,
|
|
add_compression_args = _add_compression_args,
|
|
),
|
|
)
|
|
|
|
tar = rule(
|
|
doc = "Rule that executes BSD `tar`. Most users should use the [`tar`](#tar) macro, rather than load this directly.",
|
|
implementation = tar_lib.implementation,
|
|
attrs = tar_lib.attrs,
|
|
toolchains = [tar_lib.toolchain_type],
|
|
)
|