From e8d14e88010c74c148a2d3adafc76f8b2d0d8449 Mon Sep 17 00:00:00 2001 From: thesayyn Date: Wed, 16 Oct 2024 10:10:53 -0700 Subject: [PATCH] wip preserve symlinks --- lib/private/modify_mtree.awk | 16 +++++++++ lib/tar.bzl | 9 +++++ lib/tests/tar/BUILD.bazel | 20 +++++++++++ lib/tests/tar/node_modules_tree.bzl | 51 +++++++++++++++++++++++++++++ 4 files changed, 96 insertions(+) create mode 100644 lib/tests/tar/node_modules_tree.bzl diff --git a/lib/private/modify_mtree.awk b/lib/private/modify_mtree.awk index 346321c..6eb3556 100644 --- a/lib/private/modify_mtree.awk +++ b/lib/private/modify_mtree.awk @@ -1,5 +1,21 @@ # Edits mtree files. See the modify_mtree macro in /lib/tar.bzl. { + if (preserve_symlink != "") { + # By default Bazel reports symlinks as regular file/dir therefore mtree_spec has no way of knowing that a file + # is a symlink. This is a problem when we want to preserve symlinks especially for symlink sensitive applications + # such as nodejs with pnpm. To work around this we need to determine if a file a symlink and if so, we need to + # determine where the symlink points to by calling readlink repeatedly until we get the final destination. + # + # We then need to decide if it's a symlink based on how many times we had to call readlink and where we ended up. + # + # Unlike Bazels own symlinks, which points out of the sandbox symlinks, symlinks created by ctx.actions.symlink + # stays within the bazel sandbox so it's possible to detect those. + # + # See https://github.com/bazelbuild/rules_pkg/pull/609 + if ($0 ~ /type=file/) { + + } + } if (strip_prefix != "") { if ($1 == strip_prefix) { # this line declares the directory which is now the root. It may be discarded. diff --git a/lib/tar.bzl b/lib/tar.bzl index ee0e00e..00dfc2a 100644 --- a/lib/tar.bzl +++ b/lib/tar.bzl @@ -137,6 +137,8 @@ def tar(name, mtree = "auto", stamp = 0, **kwargs): def mtree_mutate( name, mtree, + srcs = None, + preserve_symlinks = False, strip_prefix = None, package_dir = None, mtime = None, @@ -148,6 +150,8 @@ def mtree_mutate( Args: name: name of the target, output will be `[name].mtree`. + srcs: source files to be used when resolving symlinks. required if `preserve_symlinks` is set to True. + preserve_symlinks: preserve symlinks mtree: input mtree file, typically created by `mtree_spec`. strip_prefix: prefix to remove from all paths in the tar. Files and directories not under this prefix are dropped. package_dir: directory prefix to add to all paths in the tar. @@ -155,6 +159,7 @@ def mtree_mutate( owner: new uid for all entries. ownername: new uname for all entries. awk_script: may be overridden to change the script containing the modification logic. + **kwargs: additional named parameters to genrule """ vars = [] @@ -168,6 +173,10 @@ def mtree_mutate( vars.append("-v owner='{}'".format(owner)) if ownername: vars.append("-v ownername='{}'".format(ownername)) + if preserve_symlinks: + vars.append("-v preserve_symlinks=1") + if not srcs: + fail("preserve_symlinks requires srcs to be set in order to resolve symlinks") native.genrule( name = name, diff --git a/lib/tests/tar/BUILD.bazel b/lib/tests/tar/BUILD.bazel index d2b4996..b53d541 100644 --- a/lib/tests/tar/BUILD.bazel +++ b/lib/tests/tar/BUILD.bazel @@ -4,6 +4,7 @@ load("@aspect_bazel_lib//lib:tar.bzl", "mtree_mutate", "mtree_spec", "tar") load("@aspect_bazel_lib//lib:testing.bzl", "assert_archive_contains") load("@bazel_skylib//rules:write_file.bzl", "write_file") load(":asserts.bzl", "assert_tar_listing", "assert_unused_listing") +load(":node_modules_tree.bzl", "node_modules_tree") # The examples below work with both source files and generated files. # Here we generate a file to use in the examples. @@ -465,3 +466,22 @@ assert_unused_listing( "lib/tests/tar/unused/space in name.txt", ], ) + +############# +# Example 16: mtree_mutate preserves symlinks +node_modules_tree( + name = "e16_node_modules", +) + +mtree_spec( + name = "mtree16", + srcs = [ + ":e16_node_modules", + ], +) + +assert_tar_listing( + name = "test_16_before_processing", + actual = ":mtree16", + expected = [], +) diff --git a/lib/tests/tar/node_modules_tree.bzl b/lib/tests/tar/node_modules_tree.bzl new file mode 100644 index 0000000..6460db7 --- /dev/null +++ b/lib/tests/tar/node_modules_tree.bzl @@ -0,0 +1,51 @@ +# https://github.com/bazelbuild/rules_pkg/pull/609 +def impl(ctx): + # packages + # - a + # - b depends on a + store_a = ctx.actions.declare_directory("node_modules/.pnpm/a@0.0.0/node_modules/a") + store_b = ctx.actions.declare_directory("node_modules/.pnpm/b@0.0.0/node_modules/b") + + ctx.actions.run_shell( + outputs = [store_a, store_b], + command = "echo 'test' > %s/package.json" % store_a.path, + ) + + dep_symlink_b_to_a = ctx.actions.declare_directory("node_modules/.pnpm/b@0.0.0/node_modules/a") + + ctx.actions.symlink( + output = dep_symlink_b_to_a, + target_file = store_a, + ) + + node_modules_a = ctx.actions.declare_directory("node_modules/a") + ctx.actions.symlink( + output = node_modules_a, + target_file = store_a, + ) + + # single file + a = ctx.actions.declare_file("dir/a") + ctx.actions.run_shell( + outputs = [a], + command = "echo 'test' > %s" % a.path, + ) + + b = ctx.actions.declare_file("dir/b") + ctx.actions.symlink( + output = b, + target_file = a, + ) + + return DefaultInfo(files = depset([ + store_a, + store_b, + dep_symlink_b_to_a, + node_modules_a, + a, + b, + ])) + +node_modules_tree = rule( + implementation = impl, +)