Add is_normalized and starts_with to paths module (#514)

* Add is_normalized and starts_with to paths module.

* Update docs
This commit is contained in:
Ivo List 2024-05-29 15:40:38 +02:00 committed by GitHub
parent f351bedd9a
commit 0e485c80b7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 202 additions and 0 deletions

View File

@ -83,6 +83,33 @@ Returns `True` if `path` is an absolute path.
`True` if `path` is an absolute path. `True` if `path` is an absolute path.
<a id="paths.is_normalized"></a>
## paths.is_normalized
<pre>
paths.is_normalized(<a href="#paths.is_normalized-str">str</a>, <a href="#paths.is_normalized-look_for_same_level_references">look_for_same_level_references</a>)
</pre>
Returns true if the passed path doesn't contain uplevel references "..".
Also checks for single-dot references "." if look_for_same_level_references
is `True.`
**PARAMETERS**
| Name | Description | Default Value |
| :------------- | :------------- | :------------- |
| <a id="paths.is_normalized-str"></a>str | The path string to check. | none |
| <a id="paths.is_normalized-look_for_same_level_references"></a>look_for_same_level_references | If True checks if path doesn't contain uplevel references ".." or single-dot references ".". | `True` |
**RETURNS**
True if the path is normalized, False otherwise.
<a id="paths.join"></a> <a id="paths.join"></a>
## paths.join ## paths.join
@ -239,3 +266,24 @@ the leading dot). The returned tuple always satisfies the relationship
`root + ext == p`. `root + ext == p`.
<a id="paths.starts_with"></a>
## paths.starts_with
<pre>
paths.starts_with(<a href="#paths.starts_with-path_a">path_a</a>, <a href="#paths.starts_with-path_b">path_b</a>)
</pre>
Returns True if and only if path_b is an ancestor of path_a.
Does not handle OS dependent case-insensitivity.
**PARAMETERS**
| Name | Description | Default Value |
| :------------- | :------------- | :------------- |
| <a id="paths.starts_with-path_a"></a>path_a | <p align="center"> - </p> | none |
| <a id="paths.starts_with-path_b"></a>path_b | <p align="center"> - </p> | none |

View File

@ -153,6 +153,67 @@ def _normalize(path):
return path or "." return path or "."
_BASE = 0
_SEPARATOR = 1
_DOT = 2
_DOTDOT = 3
def _is_normalized(str, look_for_same_level_references = True):
"""Returns true if the passed path doesn't contain uplevel references "..".
Also checks for single-dot references "." if look_for_same_level_references
is `True.`
Args:
str: The path string to check.
look_for_same_level_references: If True checks if path doesn't contain
uplevel references ".." or single-dot references ".".
Returns:
True if the path is normalized, False otherwise.
"""
state = _SEPARATOR
for c in str.elems():
is_separator = False
if c == "/":
is_separator = True
if state == _BASE:
if is_separator:
state = _SEPARATOR
else:
state = _BASE
elif state == _SEPARATOR:
if is_separator:
state = _SEPARATOR
elif c == ".":
state = _DOT
else:
state = _BASE
elif state == _DOT:
if is_separator:
if look_for_same_level_references:
# "." segment found.
return False
state = _SEPARATOR
elif c == ".":
state = _DOTDOT
else:
state = _BASE
elif state == _DOTDOT:
if is_separator:
return False
else:
state = _BASE
if state == _DOT:
if look_for_same_level_references:
# "." segment found.
return False
elif state == _DOTDOT:
return False
return True
def _relativize(path, start): def _relativize(path, start):
"""Returns the portion of `path` that is relative to `start`. """Returns the portion of `path` that is relative to `start`.
@ -230,13 +291,30 @@ def _split_extension(p):
dot_distance_from_end = len(b) - last_dot_in_basename dot_distance_from_end = len(b) - last_dot_in_basename
return (p[:-dot_distance_from_end], p[-dot_distance_from_end:]) return (p[:-dot_distance_from_end], p[-dot_distance_from_end:])
def _starts_with(path_a, path_b):
"""Returns True if and only if path_b is an ancestor of path_a.
Does not handle OS dependent case-insensitivity."""
if not path_b:
# all paths start with the empty string
return True
norm_a = _normalize(path_a)
norm_b = _normalize(path_b)
if len(norm_b) > len(norm_a):
return False
if not norm_a.startswith(norm_b):
return False
return len(norm_a) == len(norm_b) or norm_a[len(norm_b)] == "/"
paths = struct( paths = struct(
basename = _basename, basename = _basename,
dirname = _dirname, dirname = _dirname,
is_absolute = _is_absolute, is_absolute = _is_absolute,
join = _join, join = _join,
normalize = _normalize, normalize = _normalize,
is_normalized = _is_normalized,
relativize = _relativize, relativize = _relativize,
replace_extension = _replace_extension, replace_extension = _replace_extension,
split_extension = _split_extension, split_extension = _split_extension,
starts_with = _starts_with,
) )

View File

@ -180,6 +180,55 @@ def _normalize_test(ctx):
normalize_test = unittest.make(_normalize_test) normalize_test = unittest.make(_normalize_test)
def _is_normalized_test(ctx):
"""Unit tests for paths.is_normalized."""
env = unittest.begin(ctx)
# Try the most basic cases.
asserts.true(env, paths.is_normalized(""))
asserts.false(env, paths.is_normalized("."))
asserts.true(env, paths.is_normalized("/"))
asserts.true(env, paths.is_normalized("/tmp"))
asserts.true(env, paths.is_normalized("tmp"))
asserts.true(env, paths.is_normalized("c:/"))
asserts.false(env, paths.is_normalized("../a"))
asserts.false(env, paths.is_normalized("a/.."))
# Try some basic adjacent-slash removal.
asserts.true(env, paths.is_normalized("foo//bar"))
asserts.true(env, paths.is_normalized("foo////bar"))
# Try some "." removal.
asserts.false(env, paths.is_normalized("foo/./bar"))
asserts.false(env, paths.is_normalized("./foo/bar"))
asserts.false(env, paths.is_normalized("foo/bar/."))
asserts.false(env, paths.is_normalized("/."))
# Try some ".." removal.
asserts.false(env, paths.is_normalized("foo/../bar"))
asserts.false(env, paths.is_normalized("foo/bar/.."))
asserts.false(env, paths.is_normalized("foo/.."))
asserts.false(env, paths.is_normalized("foo/bar/../.."))
asserts.false(env, paths.is_normalized("foo/../.."))
asserts.false(env, paths.is_normalized("/foo/../.."))
asserts.false(env, paths.is_normalized("a/b/../../../../c/d/.."))
# Make sure one or two initial slashes are preserved, but three or more are
# collapsed to a single slash.
asserts.true(env, paths.is_normalized("/foo"))
asserts.true(env, paths.is_normalized("//foo"))
asserts.true(env, paths.is_normalized("///foo"))
# Trailing slashes should be removed unless the entire path is a trailing
# slash.
asserts.true(env, paths.is_normalized("/"))
asserts.true(env, paths.is_normalized("foo/"))
asserts.true(env, paths.is_normalized("foo/bar/"))
return unittest.end(env)
is_normalized_test = unittest.make(_is_normalized_test)
def _relativize_test(ctx): def _relativize_test(ctx):
"""Unit tests for paths.relativize.""" """Unit tests for paths.relativize."""
env = unittest.begin(ctx) env = unittest.begin(ctx)
@ -276,6 +325,31 @@ def _split_extension_test(ctx):
split_extension_test = unittest.make(_split_extension_test) split_extension_test = unittest.make(_split_extension_test)
def _starts_with_test(ctx):
"""Unit tests for paths.starts_with."""
env = unittest.begin(ctx)
# Make sure that relative-to-current-directory works in all forms.
asserts.true(env, paths.starts_with("foo", ""))
asserts.false(env, paths.starts_with("foo", "."))
# Try some regular cases.
asserts.true(env, paths.starts_with("foo/bar", "foo"))
asserts.false(env, paths.starts_with("foo/bar", "fo"))
asserts.true(env, paths.starts_with("foo/bar/baz", "foo/bar"))
asserts.true(env, paths.starts_with("foo/bar/baz", "foo"))
# Try a case where a parent directory is normalized away.
asserts.true(env, paths.starts_with("foo/bar/../baz", "foo"))
# Relative paths work, as long as they share a common start.
asserts.true(env, paths.starts_with("../foo/bar/baz/file", "../foo/bar/baz"))
asserts.true(env, paths.starts_with("../foo/bar/baz/file", "../foo/bar"))
return unittest.end(env)
starts_with_test = unittest.make(_starts_with_test)
def paths_test_suite(): def paths_test_suite():
"""Creates the test targets and test suite for paths.bzl tests.""" """Creates the test targets and test suite for paths.bzl tests."""
unittest.suite( unittest.suite(
@ -285,7 +359,9 @@ def paths_test_suite():
is_absolute_test, is_absolute_test,
join_test, join_test,
normalize_test, normalize_test,
is_normalized_test,
relativize_test, relativize_test,
replace_extension_test, replace_extension_test,
split_extension_test, split_extension_test,
starts_with_test,
) )