2
0
Fork 0
mirror of https://github.com/bazel-contrib/bazel-lib synced 2024-11-25 11:32:33 +00:00
bazel-lib/lib/private/tar.bzl
Synchronization Acknowledgement cc956d8589
fix(tar): append slash to top-level directory mtree entries (#852)
* fix(tar): append slash to top-level directory mtree entries

bsdtar's mtree format has a quirk wherein entries without "/" in their
first word are treated as "relative" entries, and "relative" directories
will cause tar to "change directory" into the declared directory entry.
If such a directory is followed by a "relative" entry, then the file
will be created within the directory, instead of at top-level as
expected. To mitigate, we append a slash to top-level directory entries.

Fixes #851.

* chore: golden files have BINDIR placeholder

---------

Co-authored-by: Alex Eagle <alex@aspect.dev>
2024-07-02 09:27:06 -07:00

271 lines
9.5 KiB
Python

"Implementation of tar rule"
load("//lib:paths.bzl", "to_repository_relative_path")
TAR_TOOLCHAIN_TYPE = "@aspect_bazel_lib//lib:tar_toolchain_type"
# https://www.gnu.org/software/tar/manual/html_section/Compression.html
_ACCEPTED_EXTENSIONS = [
".tar", # uncompressed,
".gz", # gzip
".tgz", # gzip
".taz", # gzip
".Z", # compress
".taZ", # compress
".bz2", # bzip2
".tz2", # bzip2
".tbz2", # bzip2
".tbz", # bzip2
".lz", # lzip
".lzma", # lzma
".tlz", # lzma
".lzo", # lzop
".xz", # xz
".zst", # zstd
".tzst", # zstd
]
_COMPRESSION_TO_EXTENSION = {
"bzip2": ".tar.bz2",
"compress": ".tar.Z",
"gzip": ".tar.gz",
"lrzip": ".tar.lrz",
"lz4": ".tar.lz4",
"lzma": ".tar.lzma",
"lzop": ".tar.lzo",
"xz": ".tar.xz",
"zstd": ".tar.zst",
}
# https://www.gnu.org/software/tar/manual/html_section/Compression.html
_ACCEPTED_COMPRESSION_TYPES = _COMPRESSION_TO_EXTENSION.keys()
_tar_attrs = {
"args": attr.string_list(
doc = "Additional flags permitted by BSD tar; see the man page.",
),
"srcs": attr.label_list(
doc = """\
Files, directories, or other targets whose default outputs are placed into the tar.
If any of the srcs are binaries with runfiles, those are copied into the resulting tar as well.
""",
allow_files = True,
),
"mode": attr.string(
doc = """A mode indicator from the following list, copied from the tar manpage:
- create: Create a new archive containing the specified items.
- append: Like `create`, but new entries are appended to the archive.
Note that this only works on uncompressed archives stored in regular files.
The -f option is required.
- list: List archive contents to stdout.
- update: Like `append`, but new entries are added only if they have a
modification date newer than the corresponding entry in the archive.
Note that this only works on uncompressed archives stored in
regular files. The -f option is required.
- extract: Extract to disk from the archive. If a file with the same name
appears more than once in the archive, each copy will be extracted,
with later copies overwriting (replacing) earlier copies.
""",
values = ["create"], # TODO: support other modes: ["append", "list", "update", "extract"]
default = "create",
),
"mtree": attr.label(
doc = "An mtree specification file",
allow_single_file = True,
# Mandatory since it's the only way to set constant timestamps
mandatory = True,
),
"out": attr.output(
doc = "Resulting tar file to write. If absent, `[name].tar` is written.",
),
"compress": attr.string(
doc = "Compress the archive file with a supported algorithm.",
values = _ACCEPTED_COMPRESSION_TYPES,
),
}
_mtree_attrs = {
"srcs": attr.label_list(doc = "Files that are placed into the tar", allow_files = True),
"out": attr.output(doc = "Resulting specification file to write"),
}
def _add_compression_args(compress, args):
if compress == "bzip2":
args.add("--bzip2")
if compress == "compress":
args.add("--compress")
if compress == "gzip":
args.add("--gzip")
if compress == "lrzip":
args.add("--lrzip")
if compress == "lzma":
args.add("--lzma")
if compress == "lz4":
args.add("--lz4")
if compress == "lzop":
args.add("--lzop")
if compress == "xz":
args.add("--xz")
if compress == "zstd":
args.add("--zstd")
def _calculate_runfiles_dir(default_info):
manifest = default_info.files_to_run.runfiles_manifest
# Newer versions of Bazel put the manifest besides the runfiles with the suffix .runfiles_manifest.
# For example, the runfiles directory is named my_binary.runfiles then the manifest is beside the
# runfiles directory and named my_binary.runfiles_manifest
# Older versions of Bazel put the manifest file named MANIFEST in the runfiles directory
# See similar logic:
# https://github.com/aspect-build/rules_js/blob/c50bd3f797c501fb229cf9ab58e0e4fc11464a2f/js/private/bash.bzl#L63
if manifest.short_path.endswith("_manifest") or manifest.short_path.endswith("/MANIFEST"):
# Trim last 9 characters, as that's the length in both cases
return manifest.short_path[:-9]
fail("manifest path {} seems malformed".format(manifest.short_path))
def _tar_impl(ctx):
bsdtar = ctx.toolchains[TAR_TOOLCHAIN_TYPE]
inputs = ctx.files.srcs[:]
args = ctx.actions.args()
# Set mode
args.add(ctx.attr.mode, format = "--%s")
# User-provided args first
args.add_all(ctx.attr.args)
# Compression args
_add_compression_args(ctx.attr.compress, args)
ext = _COMPRESSION_TO_EXTENSION[ctx.attr.compress] if ctx.attr.compress else ".tar"
out = ctx.outputs.out or ctx.actions.declare_file(ctx.attr.name + ext)
args.add("--file", out)
args.add(ctx.file.mtree, format = "@%s")
inputs.append(ctx.file.mtree)
ctx.actions.run(
executable = bsdtar.tarinfo.binary,
inputs = depset(direct = inputs, transitive = [bsdtar.default.files] + [
src[DefaultInfo].default_runfiles.files
for src in ctx.attr.srcs
]),
outputs = [out],
arguments = [args],
mnemonic = "Tar",
)
return DefaultInfo(files = depset([out]), runfiles = ctx.runfiles([out]))
def _mtree_line(file, type, content = None, uid = "0", gid = "0", time = "1672560000", mode = "0755"):
spec = [
file,
"uid=" + uid,
"gid=" + gid,
"time=" + time,
"mode=" + mode,
"type=" + type,
]
if content:
spec.append("content=" + content)
return " ".join(spec)
# This function exactly same as the one from "@aspect_bazel_lib//lib:paths.bzl"
# except that it takes workspace_name directly instead of the ctx object.
# Reason is the performance of Args.add_all closures where we use this function.
# https://bazel.build/rules/lib/builtins/Args#add_all `allow_closure` explains this.
def _to_rlocation_path(file, workspace):
if file.short_path.startswith("../"):
return file.short_path[3:]
else:
return workspace + "/" + file.short_path
def _vis_encode(filename):
# TODO(#794): correctly encode all filenames by using vis(3) (or porting it)
return filename.replace(" ", "\\040")
def _expand(file, expander, transform = to_repository_relative_path):
expanded = expander.expand(file)
lines = []
for e in expanded:
path = transform(e)
segments = path.split("/")
for i in range(1, len(segments)):
parent = "/".join(segments[:i])
# NOTE: The mtree format treats file paths without slashes as "relative" entries.
# If a relative entry is a directory, then it will "change directory" to that
# directory, and any subsequent "relative" entries will be created inside that
# directory. This causes issues when there is a top-level directory that is
# followed by a top-level file, as the file will be created inside the directory.
# To avoid this, we append a slash to the directory path to make it a "full" entry.
if i == 1:
parent += "/"
lines.append(_mtree_line(parent, "dir"))
lines.append(_mtree_line(_vis_encode(path), "file", content = _vis_encode(e.path)))
return lines
def _mtree_impl(ctx):
out = ctx.outputs.out or ctx.actions.declare_file(ctx.attr.name + ".spec")
content = ctx.actions.args()
content.set_param_file_format("multiline")
content.add_all(
ctx.files.srcs,
map_each = _expand,
expand_directories = True,
uniquify = True,
)
for s in ctx.attr.srcs:
default_info = s[DefaultInfo]
if not default_info.files_to_run.runfiles_manifest:
continue
runfiles_dir = _calculate_runfiles_dir(default_info)
# copy workspace name here just in case to prevent ctx
# to be transferred to execution phase.
workspace_name = str(ctx.workspace_name)
content.add(_mtree_line(runfiles_dir, type = "dir"))
content.add_all(
s.default_runfiles.files,
expand_directories = True,
uniquify = True,
format_each = "{}/%s".format(runfiles_dir),
# be careful about what you pass to _expand_for_runfiles as it will carry the data structures over to execution phase.
map_each = lambda f, e: _expand(f, e, lambda f: _to_rlocation_path(f, workspace_name)),
allow_closure = True,
)
ctx.actions.write(out, content = content)
return DefaultInfo(files = depset([out]), runfiles = ctx.runfiles([out]))
tar_lib = struct(
attrs = _tar_attrs,
implementation = _tar_impl,
mtree_attrs = _mtree_attrs,
mtree_implementation = _mtree_impl,
toolchain_type = TAR_TOOLCHAIN_TYPE,
common = struct(
accepted_tar_extensions = _ACCEPTED_EXTENSIONS,
accepted_compression_types = _ACCEPTED_COMPRESSION_TYPES,
compression_to_extension = _COMPRESSION_TO_EXTENSION,
add_compression_args = _add_compression_args,
),
)
tar = rule(
doc = "Rule that executes BSD `tar`. Most users should use the [`tar`](#tar) macro, rather than load this directly.",
implementation = tar_lib.implementation,
attrs = tar_lib.attrs,
toolchains = [tar_lib.toolchain_type],
)