mirror of
https://github.com/bazelbuild/bazel-skylib
synced 2024-11-28 08:43:51 +00:00
0e485c80b7
* Add is_normalized and starts_with to paths module. * Update docs
321 lines
10 KiB
Python
321 lines
10 KiB
Python
# Copyright 2017 The Bazel Authors. All rights reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
"""Skylib module containing file path manipulation functions.
|
|
|
|
NOTE: The functions in this module currently only support paths with Unix-style
|
|
path separators (forward slash, "/"); they do not handle Windows-style paths
|
|
with backslash separators or drive letters.
|
|
"""
|
|
|
|
def _basename(p):
|
|
"""Returns the basename (i.e., the file portion) of a path.
|
|
|
|
Note that if `p` ends with a slash, this function returns an empty string.
|
|
This matches the behavior of Python's `os.path.basename`, but differs from
|
|
the Unix `basename` command (which would return the path segment preceding
|
|
the final slash).
|
|
|
|
Args:
|
|
p: The path whose basename should be returned.
|
|
|
|
Returns:
|
|
The basename of the path, which includes the extension.
|
|
"""
|
|
return p.rpartition("/")[-1]
|
|
|
|
def _dirname(p):
|
|
"""Returns the dirname of a path.
|
|
|
|
The dirname is the portion of `p` up to but not including the file portion
|
|
(i.e., the basename). Any slashes immediately preceding the basename are not
|
|
included, unless omitting them would make the dirname empty.
|
|
|
|
Args:
|
|
p: The path whose dirname should be returned.
|
|
|
|
Returns:
|
|
The dirname of the path.
|
|
"""
|
|
prefix, sep, _ = p.rpartition("/")
|
|
if not prefix:
|
|
return sep
|
|
else:
|
|
# If there are multiple consecutive slashes, strip them all out as Python's
|
|
# os.path.dirname does.
|
|
return prefix.rstrip("/")
|
|
|
|
def _is_absolute(path):
|
|
"""Returns `True` if `path` is an absolute path.
|
|
|
|
Args:
|
|
path: A path (which is a string).
|
|
|
|
Returns:
|
|
`True` if `path` is an absolute path.
|
|
"""
|
|
return path.startswith("/") or (len(path) > 2 and path[1] == ":")
|
|
|
|
def _join(path, *others):
|
|
"""Joins one or more path components intelligently.
|
|
|
|
This function mimics the behavior of Python's `os.path.join` function on POSIX
|
|
platform. It returns the concatenation of `path` and any members of `others`,
|
|
inserting directory separators before each component except the first. The
|
|
separator is not inserted if the path up until that point is either empty or
|
|
already ends in a separator.
|
|
|
|
If any component is an absolute path, all previous components are discarded.
|
|
|
|
Args:
|
|
path: A path segment.
|
|
*others: Additional path segments.
|
|
|
|
Returns:
|
|
A string containing the joined paths.
|
|
"""
|
|
result = path
|
|
|
|
for p in others:
|
|
if _is_absolute(p):
|
|
result = p
|
|
elif not result or result.endswith("/"):
|
|
result += p
|
|
else:
|
|
result += "/" + p
|
|
|
|
return result
|
|
|
|
def _normalize(path):
|
|
"""Normalizes a path, eliminating double slashes and other redundant segments.
|
|
|
|
This function mimics the behavior of Python's `os.path.normpath` function on
|
|
POSIX platforms; specifically:
|
|
|
|
- If the entire path is empty, "." is returned.
|
|
- All "." segments are removed, unless the path consists solely of a single
|
|
"." segment.
|
|
- Trailing slashes are removed, unless the path consists solely of slashes.
|
|
- ".." segments are removed as long as there are corresponding segments
|
|
earlier in the path to remove; otherwise, they are retained as leading ".."
|
|
segments.
|
|
- Single and double leading slashes are preserved, but three or more leading
|
|
slashes are collapsed into a single leading slash.
|
|
- Multiple adjacent internal slashes are collapsed into a single slash.
|
|
|
|
Args:
|
|
path: A path.
|
|
|
|
Returns:
|
|
The normalized path.
|
|
"""
|
|
if not path:
|
|
return "."
|
|
|
|
if path.startswith("//") and not path.startswith("///"):
|
|
initial_slashes = 2
|
|
elif path.startswith("/"):
|
|
initial_slashes = 1
|
|
else:
|
|
initial_slashes = 0
|
|
is_relative = (initial_slashes == 0)
|
|
|
|
components = path.split("/")
|
|
new_components = []
|
|
|
|
for component in components:
|
|
if component in ("", "."):
|
|
continue
|
|
if component == "..":
|
|
if new_components and new_components[-1] != "..":
|
|
# Only pop the last segment if it isn't another "..".
|
|
new_components.pop()
|
|
elif is_relative:
|
|
# Preserve leading ".." segments for relative paths.
|
|
new_components.append(component)
|
|
else:
|
|
new_components.append(component)
|
|
|
|
path = "/".join(new_components)
|
|
if not is_relative:
|
|
path = ("/" * initial_slashes) + path
|
|
|
|
return path or "."
|
|
|
|
_BASE = 0
|
|
_SEPARATOR = 1
|
|
_DOT = 2
|
|
_DOTDOT = 3
|
|
|
|
def _is_normalized(str, look_for_same_level_references = True):
|
|
"""Returns true if the passed path doesn't contain uplevel references "..".
|
|
|
|
Also checks for single-dot references "." if look_for_same_level_references
|
|
is `True.`
|
|
|
|
Args:
|
|
str: The path string to check.
|
|
look_for_same_level_references: If True checks if path doesn't contain
|
|
uplevel references ".." or single-dot references ".".
|
|
|
|
Returns:
|
|
True if the path is normalized, False otherwise.
|
|
"""
|
|
state = _SEPARATOR
|
|
for c in str.elems():
|
|
is_separator = False
|
|
if c == "/":
|
|
is_separator = True
|
|
|
|
if state == _BASE:
|
|
if is_separator:
|
|
state = _SEPARATOR
|
|
else:
|
|
state = _BASE
|
|
elif state == _SEPARATOR:
|
|
if is_separator:
|
|
state = _SEPARATOR
|
|
elif c == ".":
|
|
state = _DOT
|
|
else:
|
|
state = _BASE
|
|
elif state == _DOT:
|
|
if is_separator:
|
|
if look_for_same_level_references:
|
|
# "." segment found.
|
|
return False
|
|
state = _SEPARATOR
|
|
elif c == ".":
|
|
state = _DOTDOT
|
|
else:
|
|
state = _BASE
|
|
elif state == _DOTDOT:
|
|
if is_separator:
|
|
return False
|
|
else:
|
|
state = _BASE
|
|
|
|
if state == _DOT:
|
|
if look_for_same_level_references:
|
|
# "." segment found.
|
|
return False
|
|
elif state == _DOTDOT:
|
|
return False
|
|
return True
|
|
|
|
def _relativize(path, start):
|
|
"""Returns the portion of `path` that is relative to `start`.
|
|
|
|
Because we do not have access to the underlying file system, this
|
|
implementation differs slightly from Python's `os.path.relpath` in that it
|
|
will fail if `path` is not beneath `start` (rather than use parent segments to
|
|
walk up to the common file system root).
|
|
|
|
Relativizing paths that start with parent directory references only works if
|
|
the path both start with the same initial parent references.
|
|
|
|
Args:
|
|
path: The path to relativize.
|
|
start: The ancestor path against which to relativize.
|
|
|
|
Returns:
|
|
The portion of `path` that is relative to `start`.
|
|
"""
|
|
segments = _normalize(path).split("/")
|
|
start_segments = _normalize(start).split("/")
|
|
if start_segments == ["."]:
|
|
start_segments = []
|
|
start_length = len(start_segments)
|
|
|
|
if (path.startswith("/") != start.startswith("/") or
|
|
len(segments) < start_length):
|
|
fail("Path '%s' is not beneath '%s'" % (path, start))
|
|
|
|
for ancestor_segment, segment in zip(start_segments, segments):
|
|
if ancestor_segment != segment:
|
|
fail("Path '%s' is not beneath '%s'" % (path, start))
|
|
|
|
length = len(segments) - start_length
|
|
result_segments = segments[-length:]
|
|
return "/".join(result_segments)
|
|
|
|
def _replace_extension(p, new_extension):
|
|
"""Replaces the extension of the file at the end of a path.
|
|
|
|
If the path has no extension, the new extension is added to it.
|
|
|
|
Args:
|
|
p: The path whose extension should be replaced.
|
|
new_extension: The new extension for the file. The new extension should
|
|
begin with a dot if you want the new filename to have one.
|
|
|
|
Returns:
|
|
The path with the extension replaced (or added, if it did not have one).
|
|
"""
|
|
return _split_extension(p)[0] + new_extension
|
|
|
|
def _split_extension(p):
|
|
"""Splits the path `p` into a tuple containing the root and extension.
|
|
|
|
Leading periods on the basename are ignored, so
|
|
`path.split_extension(".bashrc")` returns `(".bashrc", "")`.
|
|
|
|
Args:
|
|
p: The path whose root and extension should be split.
|
|
|
|
Returns:
|
|
A tuple `(root, ext)` such that the root is the path without the file
|
|
extension, and `ext` is the file extension (which, if non-empty, contains
|
|
the leading dot). The returned tuple always satisfies the relationship
|
|
`root + ext == p`.
|
|
"""
|
|
b = _basename(p)
|
|
last_dot_in_basename = b.rfind(".")
|
|
|
|
# If there is no dot or the only dot in the basename is at the front, then
|
|
# there is no extension.
|
|
if last_dot_in_basename <= 0:
|
|
return (p, "")
|
|
|
|
dot_distance_from_end = len(b) - last_dot_in_basename
|
|
return (p[:-dot_distance_from_end], p[-dot_distance_from_end:])
|
|
|
|
def _starts_with(path_a, path_b):
|
|
"""Returns True if and only if path_b is an ancestor of path_a.
|
|
|
|
Does not handle OS dependent case-insensitivity."""
|
|
if not path_b:
|
|
# all paths start with the empty string
|
|
return True
|
|
norm_a = _normalize(path_a)
|
|
norm_b = _normalize(path_b)
|
|
if len(norm_b) > len(norm_a):
|
|
return False
|
|
if not norm_a.startswith(norm_b):
|
|
return False
|
|
return len(norm_a) == len(norm_b) or norm_a[len(norm_b)] == "/"
|
|
|
|
paths = struct(
|
|
basename = _basename,
|
|
dirname = _dirname,
|
|
is_absolute = _is_absolute,
|
|
join = _join,
|
|
normalize = _normalize,
|
|
is_normalized = _is_normalized,
|
|
relativize = _relativize,
|
|
replace_extension = _replace_extension,
|
|
split_extension = _split_extension,
|
|
starts_with = _starts_with,
|
|
)
|