feat: add glob_match starlark utility function that support "*", "**" and "?" expressions as well as optionally allow for "*" and "?" to match of path separators (#194)
This commit is contained in:
parent
61c7cc209b
commit
fd03cf0ecb
|
@ -0,0 +1,140 @@
|
|||
"""
|
||||
Basic glob match implementation for starlark.
|
||||
|
||||
This was originally developed by @jbedard for use in rules_js
|
||||
(https://github.com/aspect-build/rules_js/blob/6ca32d5199ddc0bf19bd704f591030dc1468ca5f/npm/private/pkg_glob.bzl)
|
||||
to support the pnpm public-hoist-expr option (https://pnpm.io/npmrc#public-hoist-expr). The pnpm
|
||||
implementation and tests were used as a reference implementation:
|
||||
https://github.com/pnpm/pnpm/blob/v7.4.0-2/packages/matcher/src/index.ts
|
||||
https://github.com/pnpm/pnpm/blob/v7.4.0-2/packages/matcher/test/index.ts
|
||||
"""
|
||||
|
||||
def _split_on(expr, splits):
|
||||
# Splits an expression on the tokens in splits but keeps the tokens split in the result.
|
||||
# Tokens are matched in order so a token such as `**` should come before `*`.
|
||||
result = []
|
||||
accumulator = ""
|
||||
skip = 0
|
||||
for i in range(len(expr)):
|
||||
j = i + skip
|
||||
if j >= len(expr):
|
||||
break
|
||||
for split in splits:
|
||||
if not split:
|
||||
fail("empty split token")
|
||||
if expr[j:].startswith(split):
|
||||
if accumulator:
|
||||
result.append(accumulator)
|
||||
accumulator = ""
|
||||
result.append(split)
|
||||
skip = skip + len(split)
|
||||
j = i + skip
|
||||
break
|
||||
if j >= len(expr):
|
||||
break
|
||||
accumulator = accumulator + expr[j]
|
||||
if accumulator:
|
||||
result.append(accumulator)
|
||||
return result
|
||||
|
||||
def glob_match(expr, path, match_path_separator = False):
|
||||
"""Test if the passed path matches the glob expression.
|
||||
|
||||
`*` A single asterisk stands for zero or more arbitrary characters except for the the path separator `/` if `match_path_separator` is False
|
||||
|
||||
`?` The question mark stands for exactly one character except for the the path separator `/` if `match_path_separator` is False
|
||||
|
||||
`**` A double asterisk stands for an arbitrary sequence of 0 or more characters. It is only allowed when preceded by either the beginning of the string or a slash. Likewise it must be followed by a slash or the end of the pattern.
|
||||
|
||||
Args:
|
||||
expr: the glob expression
|
||||
path: the path against which to match the glob expression
|
||||
match_path_separator: whether or not to match the path separator '/' when matching `*` and `?` expressions
|
||||
|
||||
Returns:
|
||||
True if the path matches the glob expression
|
||||
"""
|
||||
|
||||
expr_i = 0
|
||||
path_i = 0
|
||||
|
||||
if expr.find("***") != -1:
|
||||
fail("glob_match: invalid *** pattern found in glob expression")
|
||||
|
||||
expr_parts = _split_on(expr, ["**", "*", "?"])
|
||||
|
||||
for i, expr_part in enumerate(expr_parts):
|
||||
if expr_part == "**":
|
||||
if i > 0 and not expr_parts[i - 1].endswith("/"):
|
||||
msg = "glob_match: `**` globstar in expression `{}` must be at the start of the expression or preceeded by `/`".format(expr)
|
||||
fail(msg)
|
||||
if i != len(expr_parts) - 1 and not expr_parts[i + 1].startswith("/"):
|
||||
msg = "glob_match: `**` globstar in expression `{}` must be at the end of the expression or followed by `/`".format(expr)
|
||||
fail(msg)
|
||||
|
||||
# Locations a * was terminated that can be rolled back to.
|
||||
branches = []
|
||||
|
||||
# Loop "forever" (2^30).
|
||||
for _ in range(1073741824):
|
||||
subpath = path[path_i:] if path_i < len(path) else None
|
||||
subexpr = expr_parts[expr_i] if expr_i < len(expr_parts) else None
|
||||
|
||||
# The next part of the expression.
|
||||
next_pp = expr_parts[expr_i + 1] if expr_i + 1 < len(expr_parts) else None
|
||||
|
||||
stop_at_leading_path_separator = not match_path_separator and subpath != None and subpath.startswith("/")
|
||||
stop_at_contained_path_separator = not match_path_separator and subpath != None and subpath.find("/") != -1
|
||||
|
||||
if (subexpr == "*" and subpath != None and not stop_at_leading_path_separator) or (subexpr == "**" and subpath != None):
|
||||
# A wildcard or globstar in the expression and something to consume.
|
||||
if next_pp == None and not stop_at_contained_path_separator:
|
||||
# This wildcard is the last and matches everything beyond here.
|
||||
return True
|
||||
|
||||
# If the next part of the expression matches the current subpath
|
||||
# then advance past the wildcard and consume that next expression.
|
||||
if next_pp != None and subpath.startswith(next_pp):
|
||||
# Persist the alternative of using the wildcard instead of advancing.
|
||||
branches.append([expr_i, path_i + 1])
|
||||
expr_i = expr_i + 1
|
||||
else:
|
||||
# Otherwise consume the next character.
|
||||
path_i = path_i + 1
|
||||
|
||||
elif subexpr == "*" and subpath != None and stop_at_leading_path_separator and next_pp != None and subpath.startswith(next_pp):
|
||||
# A wildcard that has hit a path separator but we can branch
|
||||
# Persist the alternative of using the wildcard instead of advancing.
|
||||
branches.append([expr_i, path_i + 1])
|
||||
expr_i = expr_i + 1
|
||||
|
||||
elif subexpr == "?" and subpath != None and not stop_at_leading_path_separator:
|
||||
# The string matches a ? wildcard at the current location in the path.
|
||||
expr_i = expr_i + 1
|
||||
path_i = path_i + 1
|
||||
|
||||
elif subexpr and subpath != None and subpath.startswith(subexpr):
|
||||
# The string matches the current location in the path.
|
||||
expr_i = expr_i + 1
|
||||
path_i = path_i + len(subexpr)
|
||||
|
||||
elif subpath == None and expr_i == len(expr_parts) - 1 and (subexpr == "*" or subexpr == "**"):
|
||||
# Reached the package on a final empty "*" or "**" expression
|
||||
return True
|
||||
|
||||
elif len(branches) > 0:
|
||||
# The string does not match, backup to the previous branch.
|
||||
[restored_pattern_i, restored_path_i] = branches.pop()
|
||||
|
||||
path_i = restored_path_i
|
||||
expr_i = restored_pattern_i
|
||||
|
||||
else:
|
||||
# The string does not match, with no branches to rollback to, there is no match.
|
||||
return False
|
||||
|
||||
if path_i == len(path) and expr_i == len(expr_parts):
|
||||
# Reached the end of the expression and package.
|
||||
return True
|
||||
|
||||
fail("glob_match: reached the end of the (in)finite loop")
|
|
@ -6,6 +6,7 @@ load("//lib:expand_make_vars.bzl", "expand_template")
|
|||
load(":expand_make_vars_test.bzl", "expand_make_vars_test_suite")
|
||||
load(":utils_test.bzl", "utils_test_suite")
|
||||
load(":paths_test.bzl", "paths_test_suite")
|
||||
load(":glob_match_test.bzl", "glob_match_test_suite")
|
||||
|
||||
expand_make_vars_test_suite()
|
||||
|
||||
|
@ -13,6 +14,8 @@ paths_test_suite()
|
|||
|
||||
utils_test_suite()
|
||||
|
||||
glob_match_test_suite()
|
||||
|
||||
write_file(
|
||||
name = "gen_template",
|
||||
out = "template.txt",
|
||||
|
|
|
@ -0,0 +1,167 @@
|
|||
"""unit tests for glob_match"""
|
||||
|
||||
load("@bazel_skylib//lib:unittest.bzl", "asserts", "unittest")
|
||||
load("//lib:glob_match.bzl", "glob_match")
|
||||
|
||||
def _glob_match_test(ctx, expr, matches, non_matches, mps_matches = None, mps_non_matches = None):
|
||||
"""`mps sands for `match path segment`
|
||||
"""
|
||||
env = unittest.begin(ctx)
|
||||
|
||||
if mps_matches == None:
|
||||
mps_matches = matches
|
||||
|
||||
if mps_non_matches == None:
|
||||
mps_non_matches = non_matches
|
||||
|
||||
for path in matches:
|
||||
asserts.equals(env, True, glob_match(expr, path), "Expected expr '{}' to match on path '{}'".format(expr, path))
|
||||
|
||||
for path in non_matches:
|
||||
asserts.equals(env, False, glob_match(expr, path), "Expected expr '{}' to _not_ match on path '{}'".format(expr, path))
|
||||
|
||||
for path in mps_matches:
|
||||
asserts.equals(env, True, glob_match(expr, path, match_path_separator = True), "Expected expr '{}' with match_path_separator to match on path '{}'".format(expr, path))
|
||||
|
||||
for path in mps_non_matches:
|
||||
asserts.equals(env, False, glob_match(expr, path, match_path_separator = True), "Expected expr '{}' with match_path_separator to _not_ match on path '{}'".format(expr, path))
|
||||
|
||||
return unittest.end(env)
|
||||
|
||||
def _star(ctx):
|
||||
return _glob_match_test(
|
||||
ctx,
|
||||
"*",
|
||||
matches = ["express"],
|
||||
non_matches = ["@eslint/plugin-foo"],
|
||||
mps_matches = ["express", "@eslint/plugin-foo"],
|
||||
mps_non_matches = [],
|
||||
)
|
||||
|
||||
star_test = unittest.make(_star)
|
||||
|
||||
def _globstar(ctx):
|
||||
return _glob_match_test(ctx, "**", ["@eslint/plugin-foo", "express"], [])
|
||||
|
||||
globstar_test = unittest.make(_globstar)
|
||||
|
||||
def _qmark(ctx):
|
||||
return _glob_match_test(
|
||||
ctx,
|
||||
"?",
|
||||
matches = ["a", "b"],
|
||||
non_matches = ["/", "aa", "bb"],
|
||||
mps_matches = ["a", "b", "/"],
|
||||
mps_non_matches = ["aa", "bb"],
|
||||
)
|
||||
|
||||
qmark_test = unittest.make(_qmark)
|
||||
|
||||
def _qmark_qmark(ctx):
|
||||
return _glob_match_test(
|
||||
ctx,
|
||||
"??",
|
||||
matches = ["aa", "ba"],
|
||||
non_matches = ["/", "a", "b"],
|
||||
)
|
||||
|
||||
qmark_qmark_test = unittest.make(_qmark_qmark)
|
||||
|
||||
def _wrapped_qmark(ctx):
|
||||
return _glob_match_test(
|
||||
ctx,
|
||||
"f?n",
|
||||
matches = ["fun", "fin"],
|
||||
non_matches = ["funny", "fit", "bob", "f/n"],
|
||||
mps_matches = ["fun", "fin", "f/n"],
|
||||
mps_non_matches = ["funny", "fit", "bob"],
|
||||
)
|
||||
|
||||
wrapped_qmark_test = unittest.make(_wrapped_qmark)
|
||||
|
||||
def _mixed_wrapped_qmark(ctx):
|
||||
return _glob_match_test(
|
||||
ctx,
|
||||
"f?n*",
|
||||
matches = ["fun", "fin", "funny"],
|
||||
non_matches = ["fit", "bob", "f/n", "f/n/uny"],
|
||||
mps_matches = ["fun", "fin", "f/n", "funny", "f/n/uny"],
|
||||
mps_non_matches = ["fit", "bob"],
|
||||
)
|
||||
|
||||
mixed_wrapped_qmark_test = unittest.make(_mixed_wrapped_qmark)
|
||||
|
||||
def _ending_star(ctx):
|
||||
return _glob_match_test(ctx, "eslint-*", ["eslint-plugin-foo"], ["@eslint/plugin-foo", "express", "eslint", "-eslint"])
|
||||
|
||||
ending_star_test = unittest.make(_ending_star)
|
||||
|
||||
def _wrapping_star(ctx):
|
||||
return _glob_match_test(
|
||||
ctx,
|
||||
"*plugin*",
|
||||
matches = ["eslint-plugin-foo"],
|
||||
non_matches = ["@eslint/plugin-foo", "express"],
|
||||
mps_matches = ["eslint-plugin-foo", "@eslint/plugin-foo"],
|
||||
mps_non_matches = ["express"],
|
||||
)
|
||||
|
||||
wrapping_star_test = unittest.make(_wrapping_star)
|
||||
|
||||
def _wrapped_star(ctx):
|
||||
return _glob_match_test(ctx, "a*c", ["ac", "abc", "accc", "acacac", "a1234c", "a12c34c"], ["abcd"])
|
||||
|
||||
wrapped_star_test = unittest.make(_wrapped_star)
|
||||
|
||||
def _starting_star(ctx):
|
||||
return _glob_match_test(ctx, "*-positive", ["is-positive"], ["is-positive-not"])
|
||||
|
||||
starting_star_test = unittest.make(_starting_star)
|
||||
|
||||
def _mixed_trailing_globstar(ctx):
|
||||
return _glob_match_test(
|
||||
ctx,
|
||||
"foo*/**",
|
||||
matches = ["foo/fum/bar", "foostar/fum/bar"],
|
||||
non_matches = ["fo/fum/bar", "fostar/fum/bar", "foo", "foostar"],
|
||||
)
|
||||
|
||||
mixed_trailing_globstar_test = unittest.make(_mixed_trailing_globstar)
|
||||
|
||||
def _mixed_leading_globstar(ctx):
|
||||
return _glob_match_test(
|
||||
ctx,
|
||||
"**/foo*",
|
||||
matches = ["fum/bar/foo", "fum/bar/foostar"],
|
||||
non_matches = ["fum/bar/fo", "fum/bar/fostar", "foo", "foostar"],
|
||||
)
|
||||
|
||||
mixed_leading_globstar_test = unittest.make(_mixed_leading_globstar)
|
||||
|
||||
def _mixed_wrapping_globstar(ctx):
|
||||
return _glob_match_test(
|
||||
ctx,
|
||||
"**/foo*/**",
|
||||
matches = ["fum/bar/foo/fum/bar", "fum/bar/foostar/fum/bar"],
|
||||
non_matches = ["fum/bar/fo/fum/bar", "fum/bar/fostar/fum/bar", "foo", "foostar"],
|
||||
)
|
||||
|
||||
mixed_wrapper_globstar_test = unittest.make(_mixed_wrapping_globstar)
|
||||
|
||||
def glob_match_test_suite():
|
||||
unittest.suite(
|
||||
"glob_match_tests",
|
||||
star_test,
|
||||
globstar_test,
|
||||
qmark_test,
|
||||
qmark_qmark_test,
|
||||
wrapped_qmark_test,
|
||||
mixed_wrapped_qmark_test,
|
||||
ending_star_test,
|
||||
wrapping_star_test,
|
||||
wrapped_star_test,
|
||||
starting_star_test,
|
||||
mixed_trailing_globstar_test,
|
||||
mixed_leading_globstar_test,
|
||||
mixed_wrapper_globstar_test,
|
||||
)
|
Loading…
Reference in New Issue