pkg_zip: Some unicode file handling fixes and basic tests (#641)

Add primitive zip unicode filename handling tests
- Just make sure we can create an archive with non-ASCII file names.
This commit is contained in:
aiuto 2023-02-13 23:25:09 -05:00 committed by GitHub
parent f9cf7959cd
commit 7c2983c869
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 97 additions and 16 deletions

View File

@ -47,6 +47,7 @@ win_tests: &win_tests
# https://github.com/bazelbuild/bazel/issues?q=is%3Aissue+is%3Aopen+%2Bunicode+%2Bwindows+
- "-//tests/mappings:utf8_manifest_test"
- "-//tests/mappings/filter_directory/..."
- "-//tests/zip:unicode_test"
# See #387
- "-//tests/install/..."
# rpmbuild(8) is not supported on Windows

View File

@ -87,6 +87,8 @@ organization will commit to maintaining that feature and responding to issues.
- We use Python 3. We are not trying to support Python 2 at all.
- Always import with a full path from the root of the source tree.
- Compatibility back to python 3.6 (for CentOS 7 and Ubuntu 18.04 LTS) is
required.
### Dependencies

View File

@ -49,9 +49,16 @@ class ManifestEntry(object):
def read_entries_from(fh):
"""Return a list of ManifestEntry's from `fh`"""
raw_entries = json.load(fh)
# Subtle: decode the content with read() rather than in json.load() because
# the load in older python releases (< 3.7?) does not know how to decode.
raw_entries = json.loads(fh.read())
return [ManifestEntry(**entry) for entry in raw_entries]
def read_entries_from_file(manifest_path):
"""Return a list of ManifestEntry's from the manifest file at `path`"""
with open(manifest_path, 'r', encoding='utf-8') as fh:
return read_entries_from(fh)
def entry_type_to_string(et):
"""Entry type stringifier"""
if et == ENTRY_IS_FILE:

View File

@ -142,8 +142,9 @@ class ZipWriter(object):
if entry_type == manifest.ENTRY_IS_FILE:
entry_info.compress_type = zipfile.ZIP_DEFLATED
with open(src, 'rb') as src:
self.zip_file.writestr(entry_info, src.read())
# Using utf-8 for the file names is for python <3.7 compatibility.
with open(src.encode('utf-8'), 'rb') as src_content:
self.zip_file.writestr(entry_info, src_content.read())
elif entry_type == manifest.ENTRY_IS_DIR:
entry_info.compress_type = zipfile.ZIP_STORED
# Set directory bits
@ -153,7 +154,7 @@ class ZipWriter(object):
entry_info.compress_type = zipfile.ZIP_STORED
# Set directory bits
entry_info.external_attr |= (UNIX_SYMLINK_BIT << 16)
self.zip_file.writestr(entry_info, src)
self.zip_file.writestr(entry_info, src.encode('utf-8'))
elif entry_type == manifest.ENTRY_IS_TREE:
self.add_tree(src, dst_path, mode)
elif entry_type == manifest.ENTRY_IS_EMPTY_FILE:
@ -226,9 +227,10 @@ class ZipWriter(object):
entry_info.external_attr |= (UNIX_DIR_BIT << 16) | MSDOS_DIR_BIT
self.zip_file.writestr(entry_info, '')
def _load_manifest(prefix, manifest_fp):
def _load_manifest(prefix, manifest_path):
manifest_map = {}
for entry in manifest.read_entries_from(manifest_fp):
for entry in manifest.read_entries_from_file(manifest_path):
entry.dest = _combine_paths(prefix, entry.dest)
manifest_map[entry.dest] = entry
@ -263,12 +265,11 @@ def main(args):
if args.mode:
default_mode = int(args.mode, 8)
with open(args.manifest, 'r') as manifest_fp:
manifest = _load_manifest(args.directory, manifest_fp)
with ZipWriter(
args.output, time_stamp=ts, default_mode=default_mode) as zip_out:
for entry in manifest:
zip_out.add_manifest_entry(entry)
manifest = _load_manifest(args.directory, args.manifest)
with ZipWriter(
args.output, time_stamp=ts, default_mode=default_mode) as zip_out:
for entry in manifest:
zip_out.add_manifest_entry(entry)
if __name__ == '__main__':

View File

@ -14,11 +14,10 @@
# -*- coding: utf-8 -*-
load("@rules_python//python:defs.bzl", "py_test")
load("@bazel_skylib//rules:copy_file.bzl", "copy_file")
load(":my_package_name.bzl", "my_package_naming")
load(":path_test.bzl", "path_tests")
load("//pkg:deb.bzl", "pkg_deb")
load("//pkg:mappings.bzl", "pkg_attributes", "pkg_mkdirs")
load("//pkg:mappings.bzl", "pkg_attributes", "pkg_files", "strip_prefix")
load("//pkg:tar.bzl", "pkg_tar")
load("//pkg:zip.bzl", "pkg_zip")
@ -53,6 +52,23 @@ filegroup(
srcs = glob(["**/*.txt"]),
)
#
# Data source for Unicode handling tests
#
pkg_files(
name = "utf8_files",
srcs = [
"//tests/testdata/utf8:files",
],
# Note: This is temporary. We need to fix a latent bug where
# we are using 555 as the default for many things. That was the
# Google internal behavior.
# See https://github.com/bazelbuild/rules_pkg/issues/302 for thoughts.
attributes = pkg_attributes(mode = "0o555"),
strip_prefix = strip_prefix.from_pkg(),
visibility = ["//tests:__subpackages__"],
)
py_test(
name = "archive_test",
srcs = [

View File

@ -109,6 +109,7 @@ pkg_files(
"//tests/testdata/utf8:files",
],
strip_prefix = strip_prefix.from_pkg(),
visibility = ["//tests:__subpackages__"],
)
write_content_manifest(

View File

@ -256,7 +256,6 @@ py_test(
python_version = "PY3",
deps = [
":zip_test_lib",
"@bazel_tools//tools/python/runfiles",
],
)
@ -282,3 +281,22 @@ py_test(
":zip_test_lib",
],
)
pkg_zip(
name = "unicode_names",
srcs = ["//tests:utf8_files"]
)
py_test(
name = "unicode_test",
srcs = [
"unicode_test.py",
],
data = [
":unicode_names",
],
python_version = "PY3",
deps = [
":zip_test_lib",
],
)

34
tests/zip/unicode_test.py Normal file
View File

@ -0,0 +1,34 @@
# Copyright 2022 The Bazel Authors. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
from tests.zip import zip_test_lib
class UnicodeHandlingTests(zip_test_lib.ZipContentsTestBase):
def test_unicode_file_names(self):
self.assertZipFileContent("unicode_names.zip", [
{"filename": "1-a"},
{"filename": "2-λ"},
{"filename": "3-世"},
{"filename": "BUILD"},
{"filename": "sübdir/", "isdir": True},
{"filename": "sübdir/2-λ"},
{"filename": "sübdir/hello"},
])
if __name__ == "__main__":
unittest.main()

View File

@ -91,4 +91,5 @@ class ZipContentsTestBase(ZipTest):
# legacy rule implementation.
attr = 0o555
self.assertEqual(oct((info.external_attr >> 16) & UNIX_RWX_BITS),
oct(attr))
oct(attr),
msg = info.filename)