fix: correctly split quoted args (#909)

This commit is contained in:
Greg Magolan 2024-08-19 16:36:41 -04:00 committed by GitHub
parent 62b2fd06aa
commit 73d021fb36
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 159 additions and 36 deletions

2
docs/jq.md generated
View File

@ -144,7 +144,7 @@ Invoke jq with a filter on a set of json input files.
| <a id="jq-args"></a>args | Additional args to pass to jq | `[]` | | <a id="jq-args"></a>args | Additional args to pass to jq | `[]` |
| <a id="jq-out"></a>out | Name of the output json file; defaults to the rule name plus ".json" | `None` | | <a id="jq-out"></a>out | Name of the output json file; defaults to the rule name plus ".json" | `None` |
| <a id="jq-data"></a>data | List of additional files. May be empty. | `[]` | | <a id="jq-data"></a>data | List of additional files. May be empty. | `[]` |
| <a id="jq-expand_args"></a>expand_args | Run bazel's location-expansion on the args. | `False` | | <a id="jq-expand_args"></a>expand_args | Run bazel's location and make variable expansion on the args. | `False` |
| <a id="jq-kwargs"></a>kwargs | Other common named parameters such as `tags` or `visibility` | none | | <a id="jq-kwargs"></a>kwargs | Other common named parameters such as `tags` or `visibility` | none |

26
docs/strings.md generated
View File

@ -80,3 +80,29 @@ Unicode replacement character, U+FFFD.
codepoint of `c` argument. codepoint of `c` argument.
<a id="split_args"></a>
## split_args
<pre>
split_args(<a href="#split_args-s">s</a>)
</pre>
Split a string into a list space separated arguments
Unlike the naive `.split(" ")`, this function takes quoted strings
and escapes into account.
**PARAMETERS**
| Name | Description | Default Value |
| :------------- | :------------- | :------------- |
| <a id="split_args-s"></a>s | input string | none |
**RETURNS**
list of strings with each an argument found in the input string

View File

@ -144,7 +144,7 @@ def jq(name, srcs, filter = None, filter_file = None, args = [], out = None, dat
filter_file: File containing filter expression (alternative to `filter`) filter_file: File containing filter expression (alternative to `filter`)
args: Additional args to pass to jq args: Additional args to pass to jq
expand_args: Run bazel's location-expansion on the args. expand_args: Run bazel's location and make variable expansion on the args.
out: Name of the output json file; defaults to the rule name plus ".json" out: Name of the output json file; defaults to the rule name plus ".json"
**kwargs: Other common named parameters such as `tags` or `visibility` **kwargs: Other common named parameters such as `tags` or `visibility`
""" """

View File

@ -160,6 +160,8 @@ bzl_library(
srcs = ["jq.bzl"], srcs = ["jq.bzl"],
visibility = ["//lib:__subpackages__"], visibility = ["//lib:__subpackages__"],
deps = [ deps = [
":expand_variables",
":strings",
"//lib:stamping", "//lib:stamping",
], ],
) )
@ -182,6 +184,10 @@ bzl_library(
name = "params_file", name = "params_file",
srcs = ["params_file.bzl"], srcs = ["params_file.bzl"],
visibility = ["//lib:__subpackages__"], visibility = ["//lib:__subpackages__"],
deps = [
":expand_variables",
":strings",
],
) )
bzl_library( bzl_library(
@ -204,6 +210,7 @@ bzl_library(
visibility = ["//lib:__subpackages__"], visibility = ["//lib:__subpackages__"],
deps = [ deps = [
":expand_variables", ":expand_variables",
":strings",
"//lib:stamping", "//lib:stamping",
"@bazel_skylib//lib:dicts", "@bazel_skylib//lib:dicts",
], ],

View File

@ -41,7 +41,7 @@ def _bats_test_impl(ctx):
for (key, value) in ctx.attr.env.items(): for (key, value) in ctx.attr.env.items():
envs.append(_ENV_SET.format( envs.append(_ENV_SET.format(
key = key, key = key,
value = " ".join([expand_variables(ctx, exp, attribute_name = "env") for exp in ctx.expand_location(value, targets = ctx.attr.data).split(" ")]), value = expand_variables(ctx, ctx.expand_location(value, targets = ctx.attr.data), attribute_name = "env"),
)) ))
# See https://www.msys2.org/wiki/Porting/: # See https://www.msys2.org/wiki/Porting/:

View File

@ -2,15 +2,12 @@
load("@bazel_skylib//lib:dicts.bzl", "dicts") load("@bazel_skylib//lib:dicts.bzl", "dicts")
load("//lib:stamping.bzl", "STAMP_ATTRS", "maybe_stamp") load("//lib:stamping.bzl", "STAMP_ATTRS", "maybe_stamp")
load(":expand_variables.bzl", _expand_variables = "expand_variables") load(":expand_variables.bzl", "expand_variables")
def _expand_substitutions(ctx, output, substitutions): def _expand_substitutions(ctx, output, substitutions):
result = {} result = {}
for k, v in substitutions.items(): for k, v in substitutions.items():
result[k] = " ".join([ result[k] = expand_variables(ctx, ctx.expand_location(v, targets = ctx.attr.data), outs = [output], attribute_name = "substitutions")
_expand_variables(ctx, e, outs = [output], attribute_name = "substitutions")
for e in ctx.expand_location(v, targets = ctx.attr.data).split(" ")
])
return result return result
def _expand_template_impl(ctx): def _expand_template_impl(ctx):

View File

@ -1,6 +1,8 @@
"""Implementation for jq rule""" """Implementation for jq rule"""
load("//lib:stamping.bzl", "STAMP_ATTRS", "maybe_stamp") load("//lib:stamping.bzl", "STAMP_ATTRS", "maybe_stamp")
load(":expand_variables.bzl", "expand_variables")
load(":strings.bzl", "split_args")
_jq_attrs = dict({ _jq_attrs = dict({
"srcs": attr.label_list( "srcs": attr.label_list(
@ -22,12 +24,6 @@ _jq_attrs = dict({
), ),
}, **STAMP_ATTRS) }, **STAMP_ATTRS)
def _expand_locations(ctx, s):
# `.split(" ")` is a work-around https://github.com/bazelbuild/bazel/issues/10309
# TODO: If the string has intentional spaces or if one or more of the expanded file
# locations has a space in the name, we will incorrectly split it into multiple arguments
return ctx.expand_location(s, targets = ctx.attr.data).split(" ")
def _jq_impl(ctx): def _jq_impl(ctx):
jq_bin = ctx.toolchains["@aspect_bazel_lib//lib:jq_toolchain_type"].jqinfo.bin jq_bin = ctx.toolchains["@aspect_bazel_lib//lib:jq_toolchain_type"].jqinfo.bin
@ -35,7 +31,7 @@ def _jq_impl(ctx):
if ctx.attr.expand_args: if ctx.attr.expand_args:
args = [] args = []
for a in ctx.attr.args: for a in ctx.attr.args:
args += _expand_locations(ctx, a) args += split_args(expand_variables(ctx, ctx.expand_location(a, targets = ctx.attr.data), outs = [out]))
else: else:
args = ctx.attr.args args = ctx.attr.args
@ -52,7 +48,7 @@ def _jq_impl(ctx):
args = args + ["--null-input"] args = args + ["--null-input"]
if ctx.attr.filter_file: if ctx.attr.filter_file:
args = args + ["--from-file '%s'" % ctx.file.filter_file.path] args = args + ["--from-file", ctx.file.filter_file.path]
inputs.append(ctx.file.filter_file) inputs.append(ctx.file.filter_file)
stamp = maybe_stamp(ctx) stamp = maybe_stamp(ctx)
@ -76,9 +72,16 @@ def _jq_impl(ctx):
args = args + ["--slurpfile", "STAMP", stamp_json.path] args = args + ["--slurpfile", "STAMP", stamp_json.path]
# quote args that contain spaces
quoted_args = []
for a in args:
if " " in a:
a = "'{}'".format(a)
quoted_args.append(a)
cmd = "{jq} {args} {filter} {sources} > {out}".format( cmd = "{jq} {args} {filter} {sources} > {out}".format(
jq = jq_bin.path, jq = jq_bin.path,
args = " ".join(args), args = " ".join(quoted_args),
filter = "'%s'" % ctx.attr.filter if ctx.attr.filter else "", filter = "'%s'" % ctx.attr.filter if ctx.attr.filter else "",
sources = " ".join(["'%s'" % file.path for file in ctx.files.srcs]), sources = " ".join(["'%s'" % file.path for file in ctx.files.srcs]),
out = out.path, out = out.path,

View File

@ -1,5 +1,8 @@
"params_file rule" "params_file rule"
load(":expand_variables.bzl", "expand_variables")
load(":strings.bzl", "split_args")
_ATTRS = { _ATTRS = {
"args": attr.string_list(), "args": attr.string_list(),
"data": attr.label_list(allow_files = True), "data": attr.label_list(allow_files = True),
@ -11,12 +14,6 @@ _ATTRS = {
"_windows_constraint": attr.label(default = "@platforms//os:windows"), "_windows_constraint": attr.label(default = "@platforms//os:windows"),
} }
def _expand_locations(ctx, s):
# `.split(" ")` is a work-around https://github.com/bazelbuild/bazel/issues/10309
# TODO: If the string has intentional spaces or if one or more of the expanded file
# locations has a space in the name, we will incorrectly split it into multiple arguments
return ctx.expand_location(s, targets = ctx.attr.data).split(" ")
def _params_file_impl(ctx): def _params_file_impl(ctx):
is_windows = ctx.target_platform_has_constraint(ctx.attr._windows_constraint[platform_common.ConstraintValueInfo]) is_windows = ctx.target_platform_has_constraint(ctx.attr._windows_constraint[platform_common.ConstraintValueInfo])
@ -29,12 +26,9 @@ def _params_file_impl(ctx):
expanded_args = [] expanded_args = []
# First expand predefined source/output path variables # Expand predefined source/output path && predefined variables & custom variables
for a in ctx.attr.args: for a in ctx.attr.args:
expanded_args += _expand_locations(ctx, a) expanded_args += split_args(expand_variables(ctx, ctx.expand_location(a, targets = ctx.attr.data), outs = [ctx.outputs.out]))
# Next expand predefined variables & custom variables
expanded_args = [ctx.expand_make_variables("args", e, {}) for e in expanded_args]
# ctx.actions.write creates a FileWriteAction which uses UTF-8 encoding. # ctx.actions.write creates a FileWriteAction which uses UTF-8 encoding.
ctx.actions.write( ctx.actions.write(

View File

@ -17,6 +17,7 @@
load("@bazel_skylib//lib:dicts.bzl", "dicts") load("@bazel_skylib//lib:dicts.bzl", "dicts")
load("//lib:stamping.bzl", "STAMP_ATTRS", "maybe_stamp") load("//lib:stamping.bzl", "STAMP_ATTRS", "maybe_stamp")
load(":expand_variables.bzl", "expand_variables") load(":expand_variables.bzl", "expand_variables")
load(":strings.bzl", "split_args")
def _run_binary_impl(ctx): def _run_binary_impl(ctx):
args = ctx.actions.args() args = ctx.actions.args()
@ -46,15 +47,11 @@ Possible fixes:
rule_kind = str(ctx.attr.tool.label), rule_kind = str(ctx.attr.tool.label),
)) ))
# `expand_locations(...).split(" ")` is a work-around https://github.com/bazelbuild/bazel/issues/10309
# _expand_locations returns an array of args to support $(execpaths) expansions.
# TODO: If the string has intentional spaces or if one or more of the expanded file
# locations has a space in the name, we will incorrectly split it into multiple arguments
for a in ctx.attr.args: for a in ctx.attr.args:
args.add_all([expand_variables(ctx, e, outs = outputs) for e in ctx.expand_location(a, targets = ctx.attr.srcs).split(" ")]) args.add_all(split_args(expand_variables(ctx, ctx.expand_location(a, targets = ctx.attr.srcs), outs = outputs)))
envs = {} envs = {}
for k, v in ctx.attr.env.items(): for k, v in ctx.attr.env.items():
envs[k] = " ".join([expand_variables(ctx, e, outs = outputs, attribute_name = "env") for e in ctx.expand_location(v, targets = ctx.attr.srcs).split(" ")]) envs[k] = expand_variables(ctx, ctx.expand_location(v, targets = ctx.attr.srcs), outs = outputs, attribute_name = "env")
stamp = maybe_stamp(ctx) stamp = maybe_stamp(ctx)
if stamp: if stamp:

View File

@ -583,3 +583,73 @@ def hex(number):
hex_string = "0" hex_string = "0"
return "{}0x{}".format("-" if is_signed else "", hex_string) return "{}0x{}".format("-" if is_signed else "", hex_string)
def split_args(s):
"""Split a string into a list space separated arguments
Unlike the naive `.split(" ")`, this function takes quoted strings
and escapes into account.
Args:
s: input string
Returns:
list of strings with each an argument found in the input string
"""
args = []
arg = ""
single_quote = False
double_quote = False
escape = False
for c in s.elems():
if c == "\\":
escape = True
continue
if escape:
# this is an escaped character
if c == " ":
# a dangling escape is not an escape, put the backslack back
arg = arg + "\\"
else:
escape = False
else:
# not an escaped character, look for quotes & spaces
if c == "'":
# single quote char
if double_quote:
# we're in a double quote so single quotes are just chars
pass
elif single_quote:
# end of single quote
single_quote = False
continue
else:
# start of single quote
single_quote = True
continue
elif c == "\"":
# double quote char
if single_quote:
# we're in a single quote so double quotes are just chars
pass
elif double_quote:
# end of double quote
double_quote = False
continue
else:
# start of double quote
double_quote = True
continue
if c == " ":
if not single_quote and not double_quote:
# splitting space
if arg != "":
args.append(arg)
arg = ""
continue
arg = arg + c
# final arg?
if arg != "":
args.append(arg)
return args

View File

@ -1,7 +1,8 @@
"Utilities for strings" "Utilities for strings"
load("//lib/private:strings.bzl", _chr = "chr", _hex = "hex", _ord = "ord") load("//lib/private:strings.bzl", _chr = "chr", _hex = "hex", _ord = "ord", _split_args = "split_args")
chr = _chr chr = _chr
ord = _ord ord = _ord
hex = _hex hex = _hex
split_args = _split_args

View File

@ -2,7 +2,7 @@
load("@bazel_skylib//lib:partial.bzl", "partial") load("@bazel_skylib//lib:partial.bzl", "partial")
load("@bazel_skylib//lib:unittest.bzl", "asserts", "unittest") load("@bazel_skylib//lib:unittest.bzl", "asserts", "unittest")
load("//lib/private:strings.bzl", "chr", "hex", "ord") load("//lib/private:strings.bzl", "chr", "hex", "ord", "split_args")
def _ord_test_impl(ctx): def _ord_test_impl(ctx):
env = unittest.begin(ctx) env = unittest.begin(ctx)
@ -56,10 +56,38 @@ def _hex_test_impl(ctx):
hex_test = unittest.make(_hex_test_impl) hex_test = unittest.make(_hex_test_impl)
def _split_args_test_impl(ctx):
env = unittest.begin(ctx)
asserts.equals(env, ["a", "b", "c", "d"], split_args("a b c d"))
# sinle quotes
asserts.equals(env, ["a", "b c", "d"], split_args("a 'b c' d"))
# double quotes
asserts.equals(env, ["a", "b c", "d"], split_args("a \"b c\" d"))
# escaped single quotes
asserts.equals(env, ["a", "'b", "c'", "d"], split_args("a \\'b c\\' d"))
# escaped double quotes
asserts.equals(env, ["a", "\"b", "c\"", "d"], split_args("a \\\"b c\\\" d"))
# sinle quotes containing escaped quotes
asserts.equals(env, ["a", "b'\" c", "d"], split_args("a 'b\\'\\\" c' d"))
# double quotes containing escaped quotes
asserts.equals(env, ["a", "b'\" c", "d"], split_args("a \"b\\'\\\" c\" d"))
return unittest.end(env)
split_args_test = unittest.make(_split_args_test_impl)
def strings_test_suite(): def strings_test_suite():
unittest.suite( unittest.suite(
"strings_tests", "strings_tests",
partial.make(ord_test, timeout = "short"), partial.make(ord_test, timeout = "short"),
partial.make(chr_test, timeout = "short"), partial.make(chr_test, timeout = "short"),
partial.make(hex_test, timeout = "short"), partial.make(hex_test, timeout = "short"),
partial.make(split_args_test, timeout = "short"),
) )