2
0
Fork 0
mirror of https://github.com/bazel-contrib/bazel-lib synced 2024-11-30 01:41:21 +00:00
bazel-lib/lib/private/strings.bzl
2024-08-19 16:36:41 -04:00

656 lines
11 KiB
Python

"String utilities"
CHAR_TO_INT = {
"\0": 0,
"\1": 1,
"\2": 2,
"\3": 3,
"\4": 4,
"\5": 5,
"\6": 6,
"\7": 7,
"\10": 8,
"\11": 9,
"\12": 10,
"\13": 11,
"\14": 12,
"\15": 13,
"\16": 14,
"\17": 15,
"\20": 16,
"\21": 17,
"\22": 18,
"\23": 19,
"\24": 20,
"\25": 21,
"\26": 22,
"\27": 23,
"\30": 24,
"\31": 25,
"\32": 26,
"\33": 27,
"\34": 28,
"\35": 29,
"\36": 30,
"\37": 31,
"\40": 32,
"\41": 33,
"\42": 34,
"\43": 35,
"\44": 36,
"\45": 37,
"\46": 38,
"\47": 39,
"\50": 40,
"\51": 41,
"\52": 42,
"\53": 43,
"\54": 44,
"\55": 45,
"\56": 46,
"\57": 47,
"\60": 48,
"\61": 49,
"\62": 50,
"\63": 51,
"\64": 52,
"\65": 53,
"\66": 54,
"\67": 55,
"\70": 56,
"\71": 57,
"\72": 58,
"\73": 59,
"\74": 60,
"\75": 61,
"\76": 62,
"\77": 63,
"\100": 64,
"\101": 65,
"\102": 66,
"\103": 67,
"\104": 68,
"\105": 69,
"\106": 70,
"\107": 71,
"\110": 72,
"\111": 73,
"\112": 74,
"\113": 75,
"\114": 76,
"\115": 77,
"\116": 78,
"\117": 79,
"\120": 80,
"\121": 81,
"\122": 82,
"\123": 83,
"\124": 84,
"\125": 85,
"\126": 86,
"\127": 87,
"\130": 88,
"\131": 89,
"\132": 90,
"\133": 91,
"\134": 92,
"\135": 93,
"\136": 94,
"\137": 95,
"\140": 96,
"\141": 97,
"\142": 98,
"\143": 99,
"\144": 100,
"\145": 101,
"\146": 102,
"\147": 103,
"\150": 104,
"\151": 105,
"\152": 106,
"\153": 107,
"\154": 108,
"\155": 109,
"\156": 110,
"\157": 111,
"\160": 112,
"\161": 113,
"\162": 114,
"\163": 115,
"\164": 116,
"\165": 117,
"\166": 118,
"\167": 119,
"\170": 120,
"\171": 121,
"\172": 122,
"\173": 123,
"\174": 124,
"\175": 125,
"\176": 126,
"\177": 127,
"\200": 128,
"\201": 129,
"\202": 130,
"\203": 131,
"\204": 132,
"\205": 133,
"\206": 134,
"\207": 135,
"\210": 136,
"\211": 137,
"\212": 138,
"\213": 139,
"\214": 140,
"\215": 141,
"\216": 142,
"\217": 143,
"\220": 144,
"\221": 145,
"\222": 146,
"\223": 147,
"\224": 148,
"\225": 149,
"\226": 150,
"\227": 151,
"\230": 152,
"\231": 153,
"\232": 154,
"\233": 155,
"\234": 156,
"\235": 157,
"\236": 158,
"\237": 159,
"\240": 160,
"\241": 161,
"\242": 162,
"\243": 163,
"\244": 164,
"\245": 165,
"\246": 166,
"\247": 167,
"\250": 168,
"\251": 169,
"\252": 170,
"\253": 171,
"\254": 172,
"\255": 173,
"\256": 174,
"\257": 175,
"\260": 176,
"\261": 177,
"\262": 178,
"\263": 179,
"\264": 180,
"\265": 181,
"\266": 182,
"\267": 183,
"\270": 184,
"\271": 185,
"\272": 186,
"\273": 187,
"\274": 188,
"\275": 189,
"\276": 190,
"\277": 191,
"\300": 192,
"\301": 193,
"\302": 194,
"\303": 195,
"\304": 196,
"\305": 197,
"\306": 198,
"\307": 199,
"\310": 200,
"\311": 201,
"\312": 202,
"\313": 203,
"\314": 204,
"\315": 205,
"\316": 206,
"\317": 207,
"\320": 208,
"\321": 209,
"\322": 210,
"\323": 211,
"\324": 212,
"\325": 213,
"\326": 214,
"\327": 215,
"\330": 216,
"\331": 217,
"\332": 218,
"\333": 219,
"\334": 220,
"\335": 221,
"\336": 222,
"\337": 223,
"\340": 224,
"\341": 225,
"\342": 226,
"\343": 227,
"\344": 228,
"\345": 229,
"\346": 230,
"\347": 231,
"\350": 232,
"\351": 233,
"\352": 234,
"\353": 235,
"\354": 236,
"\355": 237,
"\356": 238,
"\357": 239,
"\360": 240,
"\361": 241,
"\362": 242,
"\363": 243,
"\364": 244,
"\365": 245,
"\366": 246,
"\367": 247,
"\370": 248,
"\371": 249,
"\372": 250,
"\373": 251,
"\374": 252,
"\375": 253,
"\376": 254,
"\377": 255,
}
INT_TO_CHAR = [
"\0",
"\1",
"\2",
"\3",
"\4",
"\5",
"\6",
"\7",
"\10",
"\11",
"\12",
"\13",
"\14",
"\15",
"\16",
"\17",
"\20",
"\21",
"\22",
"\23",
"\24",
"\25",
"\26",
"\27",
"\30",
"\31",
"\32",
"\33",
"\34",
"\35",
"\36",
"\37",
"\40",
"\41",
"\42",
"\43",
"\44",
"\45",
"\46",
"\47",
"\50",
"\51",
"\52",
"\53",
"\54",
"\55",
"\56",
"\57",
"\60",
"\61",
"\62",
"\63",
"\64",
"\65",
"\66",
"\67",
"\70",
"\71",
"\72",
"\73",
"\74",
"\75",
"\76",
"\77",
"\100",
"\101",
"\102",
"\103",
"\104",
"\105",
"\106",
"\107",
"\110",
"\111",
"\112",
"\113",
"\114",
"\115",
"\116",
"\117",
"\120",
"\121",
"\122",
"\123",
"\124",
"\125",
"\126",
"\127",
"\130",
"\131",
"\132",
"\133",
"\134",
"\135",
"\136",
"\137",
"\140",
"\141",
"\142",
"\143",
"\144",
"\145",
"\146",
"\147",
"\150",
"\151",
"\152",
"\153",
"\154",
"\155",
"\156",
"\157",
"\160",
"\161",
"\162",
"\163",
"\164",
"\165",
"\166",
"\167",
"\170",
"\171",
"\172",
"\173",
"\174",
"\175",
"\176",
"\177",
"\200",
"\201",
"\202",
"\203",
"\204",
"\205",
"\206",
"\207",
"\210",
"\211",
"\212",
"\213",
"\214",
"\215",
"\216",
"\217",
"\220",
"\221",
"\222",
"\223",
"\224",
"\225",
"\226",
"\227",
"\230",
"\231",
"\232",
"\233",
"\234",
"\235",
"\236",
"\237",
"\240",
"\241",
"\242",
"\243",
"\244",
"\245",
"\246",
"\247",
"\250",
"\251",
"\252",
"\253",
"\254",
"\255",
"\256",
"\257",
"\260",
"\261",
"\262",
"\263",
"\264",
"\265",
"\266",
"\267",
"\270",
"\271",
"\272",
"\273",
"\274",
"\275",
"\276",
"\277",
"\300",
"\301",
"\302",
"\303",
"\304",
"\305",
"\306",
"\307",
"\310",
"\311",
"\312",
"\313",
"\314",
"\315",
"\316",
"\317",
"\320",
"\321",
"\322",
"\323",
"\324",
"\325",
"\326",
"\327",
"\330",
"\331",
"\332",
"\333",
"\334",
"\335",
"\336",
"\337",
"\340",
"\341",
"\342",
"\343",
"\344",
"\345",
"\346",
"\347",
"\350",
"\351",
"\352",
"\353",
"\354",
"\355",
"\356",
"\357",
"\360",
"\361",
"\362",
"\363",
"\364",
"\365",
"\366",
"\367",
"\370",
"\371",
"\372",
"\373",
"\374",
"\375",
"\376",
"\377",
]
def ord(c):
"""returns the codepoint of a character
ord(c) returns the integer value of the sole Unicode code point
encoded by the string `c`.
If `c` does not encode exactly one Unicode code point, `ord` fails.
Each invalid code within the string is treated as if it encodes the
Unicode replacement character, U+FFFD.
Args:
c: character whose codepoint to be returned.
Returns:
codepoint of `c` argument.
"""
if len(c) != 1:
fail("expected a string with a single character")
return CHAR_TO_INT.get(c)
def chr(i):
"""returns a string encoding a codepoint
chr returns a string that encodes the single Unicode code
point whose value is specified by the integer `i`
Args:
i: position of the character
Returns:
unicode string of the position
"""
if i < 0 or i > 255:
fail("expected a int between 0 and 255 (inclusive)")
return INT_TO_CHAR[i]
def _to_char(n):
alpha = "0123456789abcdef"
return alpha[n]
def hex(number):
"""Format integer to hexadecimal representation
Args:
number: number to format
Returns:
hexadecimal representation of the number argument
"""
hex_string = ""
is_signed = number < 0
r = number * -1 if is_signed else number
for _ in range(1000000):
if r > 0:
rem = r % 16
hex_string = _to_char(rem) + hex_string
r //= 16
else:
break
if not hex_string:
hex_string = "0"
return "{}0x{}".format("-" if is_signed else "", hex_string)
def split_args(s):
"""Split a string into a list space separated arguments
Unlike the naive `.split(" ")`, this function takes quoted strings
and escapes into account.
Args:
s: input string
Returns:
list of strings with each an argument found in the input string
"""
args = []
arg = ""
single_quote = False
double_quote = False
escape = False
for c in s.elems():
if c == "\\":
escape = True
continue
if escape:
# this is an escaped character
if c == " ":
# a dangling escape is not an escape, put the backslack back
arg = arg + "\\"
else:
escape = False
else:
# not an escaped character, look for quotes & spaces
if c == "'":
# single quote char
if double_quote:
# we're in a double quote so single quotes are just chars
pass
elif single_quote:
# end of single quote
single_quote = False
continue
else:
# start of single quote
single_quote = True
continue
elif c == "\"":
# double quote char
if single_quote:
# we're in a single quote so double quotes are just chars
pass
elif double_quote:
# end of double quote
double_quote = False
continue
else:
# start of double quote
double_quote = True
continue
if c == " ":
if not single_quote and not double_quote:
# splitting space
if arg != "":
args.append(arg)
arg = ""
continue
arg = arg + c
# final arg?
if arg != "":
args.append(arg)
return args