Introduce new_sets.bzl (#32)

This version is hash-based (implemented on top of a dictionary) and doesn't suffer the performance problems of the current version. It will eventually replace the old one after a deprecation period.
This commit is contained in:
Nicholas Titcombe 2018-04-20 14:44:25 -07:00 committed by Tony Allevato
parent 59fba13160
commit 0b40ea7b13
8 changed files with 530 additions and 0 deletions

1
BUILD
View File

@ -24,6 +24,7 @@ skylark_library(
deps = [
"//lib:collections",
"//lib:dicts",
"//lib:new_sets",
"//lib:partial",
"//lib:paths",
"//lib:selects",

View File

@ -16,6 +16,7 @@
load("//lib:collections.bzl", "collections")
load("//lib:dicts.bzl", "dicts")
load("//lib:new_sets.bzl", new_sets="sets")
load("//lib:partial.bzl", "partial")
load("//lib:paths.bzl", "paths")
load("//lib:selects.bzl", "selects")

View File

@ -41,6 +41,11 @@ skylark_library(
srcs = ["sets.bzl"],
)
skylark_library(
name = "new_sets",
srcs = ["new_sets.bzl"],
)
skylark_library(
name = "shell",
srcs = ["shell.bzl"],

233
lib/new_sets.bzl Normal file
View File

@ -0,0 +1,233 @@
# Copyright 2018 The Bazel Authors. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Skylib module containing common hash-set algorithms.
An empty set can be created using: `sets.make()`, or it can be created with some starting values
if you pass it an sequence: `sets.make([1, 2, 3])`. This returns a struct containing all of the
values as keys in a dictionary - this means that all passed in values must be hashable. The
values in the set can be retrieved using `sets.to_list(my_set)`.
"""
load(":dicts.bzl", "dicts")
def _make(elements=None):
"""Creates a new set.
All elements must be hashable.
Args:
elements: Optional sequence to construct the set out of.
Returns:
A set containing the passed in values.
"""
elements = elements if elements else []
return struct(_values = {e: None for e in elements})
def _copy(s):
"""Creates a new set from another set.
Args:
s: A set, as returned by `sets.make()`.
Returns:
A new set containing the same elements as `s`.
"""
return struct(_values = dict(s._values))
def _to_list(a):
"""Creates a list from the values in the set.
Returns:
A list of values inserted into the set.
"""
return a._values.keys()
def _insert(s, e):
"""Inserts an element into the set.
Element must be hashable. This mutates the orginal set.
Args:
s: A set, as returned by `sets.make()`.
e: The element to be inserted.
Returns:
The set `s` with `e` included.
"""
s._values[e] = None
return s
def _remove(s, e):
"""Removes an element from the set.
Element must be hashable. This mutates the orginal set.
Args:
s: A set, as returned by `sets.make()`.
e: The element to be removed.
Returns:
The set `s` with `e` removed.
"""
s._values.pop(e)
return s
def _contains(a, e):
"""Checks for the existence of an element in a set.
Args:
a: A set, as returned by `sets.make()`.
e: The element to look for.
Returns:
True if the element exists in the set, False if the element does not.
"""
return e in a._values
def _get_shorter_and_longer(a, b):
"""Returns two sets in the order of shortest and longest.
Args:
a: A set, as returned by `sets.make()`.
b: A set, as returned by `sets.make()`.
Returns:
`a`, `b` if `a` is shorter than `b` - or `b`, `a` if `b` is shorter than `a`.
"""
if _length(a) < _length(b):
return a, b
return b, a
def _is_equal(a, b):
"""Returns whether two sets are equal.
Args:
a: A set, as returned by `sets.make()`.
b: A set, as returned by `sets.make()`.
Returns:
True if `a` is equal to `b`, False otherwise.
"""
return a._values == b._values
def _is_subset(a, b):
"""Returns whether `a` is a subset of `b`.
Args:
a: A set, as returned by `sets.make()`.
b: A set, as returned by `sets.make()`.
Returns:
True if `a` is a subset of `b`, False otherwise.
"""
for e in a._values.keys():
if e not in b._values:
return False
return True
def _disjoint(a, b):
"""Returns whether two sets are disjoint.
Two sets are disjoint if they have no elements in common.
Args:
a: A set, as returned by `sets.make()`.
b: A set, as returned by `sets.make()`.
Returns:
True if `a` and `b` are disjoint, False otherwise.
"""
shorter, longer = _get_shorter_and_longer(a, b)
for e in shorter._values.keys():
if e in longer._values:
return False
return True
def _intersection(a, b):
"""Returns the intersection of two sets.
Args:
a: A set, as returned by `sets.make()`.
b: A set, as returned by `sets.make()`.
Returns:
A set containing the elements that are in both `a` and `b`.
"""
shorter, longer = _get_shorter_and_longer(a, b)
return struct(_values = {e: None for e in shorter._values.keys() if e in longer._values})
def _union(*args):
"""Returns the union of several sets.
Args:
*args: An arbitrary number of sets or lists.
Returns:
The set union of all sets or lists in `*args`.
"""
return struct(_values = dicts.add(*[s._values for s in args]))
def _difference(a, b):
"""Returns the elements in `a` that are not in `b`.
Args:
a: A set, as returned by `sets.make()`.
b: A set, as returned by `sets.make()`.
Returns:
A set containing the elements that are in `a` but not in `b`.
"""
return struct(_values = {e: None for e in a._values.keys() if e not in b._values})
def _length(s):
"""Returns the number of elements in a set.
Args:
s: A set, as returned by `sets.make()`.
Returns:
An integer representing the number of elements in the set.
"""
return len(s._values)
sets = struct(
make = _make,
copy = _copy,
to_list = _to_list,
insert = _insert,
contains = _contains,
is_equal = _is_equal,
is_subset = _is_subset,
disjoint = _disjoint,
intersection = _intersection,
union = _union,
difference = _difference,
length = _length,
remove = _remove,
)

View File

@ -48,6 +48,7 @@ def _is_equal(a, b):
Args:
a: A depset or a list.
b: A depset or a list.
Returns:
True if `a` is equal to `b`, False otherwise.
"""

View File

@ -20,6 +20,7 @@ assertions used to within tests.
"""
load(":sets.bzl", "sets")
load(":new_sets.bzl", new_sets="sets")
def _make(impl, attrs=None):
@ -253,11 +254,29 @@ def _assert_set_equals(env, expected, actual, msg=None):
full_msg = expectation_msg
_fail(env, full_msg)
def _assert_new_set_equals(env, expected, actual, msg=None):
"""Asserts that the given `expected` and `actual` sets are equal.
Args:
env: The test environment returned by `unittest.begin`.
expected: The expected set resulting from some computation.
actual: The actual set returned by some computation.
msg: An optional message that will be printed that describes the failure.
If omitted, a default will be used.
"""
if not new_sets.is_equal(expected, actual):
expectation_msg = "Expected %r, but got %r" % (expected, actual)
if msg:
full_msg = "%s (%s)" % (msg, expectation_msg)
else:
full_msg = expectation_msg
_fail(env, full_msg)
asserts = struct(
equals=_assert_equals,
false=_assert_false,
set_equals=_assert_set_equals,
new_set_equals = _assert_new_set_equals,
true=_assert_true,
)

View File

@ -4,6 +4,7 @@ load(":partial_tests.bzl", "partial_test_suite")
load(":paths_tests.bzl", "paths_test_suite")
load(":selects_tests.bzl", "selects_test_suite")
load(":sets_tests.bzl", "sets_test_suite")
load(":new_sets_tests.bzl", "new_sets_test_suite")
load(":shell_tests.bzl", "shell_test_suite")
load(":structs_tests.bzl", "structs_test_suite")
load(":versions_tests.bzl", "versions_test_suite")
@ -22,6 +23,8 @@ selects_test_suite()
sets_test_suite()
new_sets_test_suite()
shell_test_suite()
structs_test_suite()

267
tests/new_sets_tests.bzl Normal file
View File

@ -0,0 +1,267 @@
# Copyright 2018 The Bazel Authors. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Unit tests for new_sets.bzl."""
load("//:lib.bzl", "new_sets", "asserts", "unittest")
def _is_equal_test(ctx):
"""Unit tests for new_sets.is_equal.
Note that if this test fails, the results for the other `sets` tests will be
inconclusive because they use `asserts.new_set_equals`, which in turn calls
`new_sets.is_equal`.
"""
env = unittest.begin(ctx)
asserts.true(env, new_sets.is_equal(new_sets.make(), new_sets.make()))
asserts.false(env, new_sets.is_equal(new_sets.make(), new_sets.make([1])))
asserts.false(env, new_sets.is_equal(new_sets.make([1]), new_sets.make()))
asserts.true(env, new_sets.is_equal(new_sets.make([1]), new_sets.make([1])))
asserts.false(env, new_sets.is_equal(new_sets.make([1]), new_sets.make([1, 2])))
asserts.false(env, new_sets.is_equal(new_sets.make([1]), new_sets.make([2])))
asserts.false(env, new_sets.is_equal(new_sets.make([1]), new_sets.make([1, 2])))
# Verify that the implementation is not using == on the sets directly.
asserts.true(env, new_sets.is_equal(new_sets.make(depset([1])), new_sets.make(depset([1]))))
# If passing a list, verify that duplicate elements are ignored.
asserts.true(env, new_sets.is_equal(new_sets.make([1, 1]), new_sets.make([1])))
unittest.end(env)
is_equal_test = unittest.make(_is_equal_test)
def _is_subset_test(ctx):
"""Unit tests for new_sets.is_subset."""
env = unittest.begin(ctx)
asserts.true(env, new_sets.is_subset(new_sets.make(), new_sets.make()))
asserts.true(env, new_sets.is_subset(new_sets.make(), new_sets.make([1])))
asserts.false(env, new_sets.is_subset(new_sets.make([1]), new_sets.make()))
asserts.true(env, new_sets.is_subset(new_sets.make([1]), new_sets.make([1])))
asserts.true(env, new_sets.is_subset(new_sets.make([1]), new_sets.make([1, 2])))
asserts.false(env, new_sets.is_subset(new_sets.make([1]), new_sets.make([2])))
asserts.true(env, new_sets.is_subset(new_sets.make([1]), new_sets.make(depset([1, 2]))))
# If passing a list, verify that duplicate elements are ignored.
asserts.true(env, new_sets.is_subset(new_sets.make([1, 1]), new_sets.make([1, 2])))
unittest.end(env)
is_subset_test = unittest.make(_is_subset_test)
def _disjoint_test(ctx):
"""Unit tests for new_sets.disjoint."""
env = unittest.begin(ctx)
asserts.true(env, new_sets.disjoint(new_sets.make(), new_sets.make()))
asserts.true(env, new_sets.disjoint(new_sets.make(), new_sets.make([1])))
asserts.true(env, new_sets.disjoint(new_sets.make([1]), new_sets.make()))
asserts.false(env, new_sets.disjoint(new_sets.make([1]), new_sets.make([1])))
asserts.false(env, new_sets.disjoint(new_sets.make([1]), new_sets.make([1, 2])))
asserts.true(env, new_sets.disjoint(new_sets.make([1]), new_sets.make([2])))
asserts.true(env, new_sets.disjoint(new_sets.make([1]), new_sets.make(depset([2]))))
# If passing a list, verify that duplicate elements are ignored.
asserts.false(env, new_sets.disjoint(new_sets.make([1, 1]), new_sets.make([1, 2])))
unittest.end(env)
disjoint_test = unittest.make(_disjoint_test)
def _intersection_test(ctx):
"""Unit tests for new_sets.intersection."""
env = unittest.begin(ctx)
asserts.new_set_equals(env, new_sets.make(), new_sets.intersection(new_sets.make(), new_sets.make()))
asserts.new_set_equals(env, new_sets.make(), new_sets.intersection(new_sets.make(), new_sets.make([1])))
asserts.new_set_equals(env, new_sets.make(), new_sets.intersection(new_sets.make([1]), new_sets.make()))
asserts.new_set_equals(env, new_sets.make([1]), new_sets.intersection(new_sets.make([1]), new_sets.make([1])))
asserts.new_set_equals(env, new_sets.make([1]), new_sets.intersection(new_sets.make([1]), new_sets.make([1, 2])))
asserts.new_set_equals(env, new_sets.make(), new_sets.intersection(new_sets.make([1]), new_sets.make([2])))
asserts.new_set_equals(env, new_sets.make([1]), new_sets.intersection(new_sets.make([1]), new_sets.make(depset([1]))))
# If passing a list, verify that duplicate elements are ignored.
asserts.new_set_equals(env, new_sets.make([1]), new_sets.intersection(new_sets.make([1, 1]), new_sets.make([1, 2])))
unittest.end(env)
intersection_test = unittest.make(_intersection_test)
def _union_test(ctx):
"""Unit tests for new_sets.union."""
env = unittest.begin(ctx)
asserts.new_set_equals(env, new_sets.make(), new_sets.union())
asserts.new_set_equals(env, new_sets.make([1]), new_sets.union(new_sets.make([1])))
asserts.new_set_equals(env, new_sets.make(), new_sets.union(new_sets.make(), new_sets.make()))
asserts.new_set_equals(env, new_sets.make([1]), new_sets.union(new_sets.make(), new_sets.make([1])))
asserts.new_set_equals(env, new_sets.make([1]), new_sets.union(new_sets.make([1]), new_sets.make()))
asserts.new_set_equals(env, new_sets.make([1]), new_sets.union(new_sets.make([1]), new_sets.make([1])))
asserts.new_set_equals(env, new_sets.make([1, 2]), new_sets.union(new_sets.make([1]), new_sets.make([1, 2])))
asserts.new_set_equals(env, new_sets.make([1, 2]), new_sets.union(new_sets.make([1]), new_sets.make([2])))
asserts.new_set_equals(env, new_sets.make([1]), new_sets.union(new_sets.make([1]), new_sets.make(depset([1]))))
# If passing a list, verify that duplicate elements are ignored.
asserts.new_set_equals(env, new_sets.make([1, 2]), new_sets.union(new_sets.make([1, 1]), new_sets.make([1, 2])))
unittest.end(env)
union_test = unittest.make(_union_test)
def _difference_test(ctx):
"""Unit tests for new_sets.difference."""
env = unittest.begin(ctx)
asserts.new_set_equals(env, new_sets.make(), new_sets.difference(new_sets.make(), new_sets.make()))
asserts.new_set_equals(env, new_sets.make(), new_sets.difference(new_sets.make(), new_sets.make([1])))
asserts.new_set_equals(env, new_sets.make([1]), new_sets.difference(new_sets.make([1]), new_sets.make()))
asserts.new_set_equals(env, new_sets.make(), new_sets.difference(new_sets.make([1]), new_sets.make([1])))
asserts.new_set_equals(env, new_sets.make(), new_sets.difference(new_sets.make([1]), new_sets.make([1, 2])))
asserts.new_set_equals(env, new_sets.make([1]), new_sets.difference(new_sets.make([1]), new_sets.make([2])))
asserts.new_set_equals(env, new_sets.make(), new_sets.difference(new_sets.make([1]), new_sets.make(depset([1]))))
# If passing a list, verify that duplicate elements are ignored.
asserts.new_set_equals(env, new_sets.make([2]), new_sets.difference(new_sets.make([1, 2]), new_sets.make([1, 1])))
unittest.end(env)
difference_test = unittest.make(_difference_test)
def _to_list_test(ctx):
"""Unit tests for new_sets.to_list."""
env = unittest.begin(ctx)
asserts.equals(env, [], new_sets.to_list(new_sets.make()))
asserts.equals(env, [1], new_sets.to_list(new_sets.make([1, 1, 1])))
asserts.equals(env, [1, 2, 3], new_sets.to_list(new_sets.make([1, 2, 3])))
unittest.end(env)
to_list_test = unittest.make(_to_list_test)
def _make_test(ctx):
"""Unit tests for new_sets.make."""
env = unittest.begin(ctx)
asserts.equals(env, {}, new_sets.make()._values)
asserts.equals(env, {x: None for x in [1, 2, 3]}, new_sets.make([1, 1, 2, 2, 3, 3])._values)
asserts.equals(env, {1: None, 2: None}, new_sets.make(depset([1, 2]))._values)
unittest.end(env)
make_test = unittest.make(_make_test)
def _copy_test(ctx):
"""Unit tests for new_sets.copy."""
env = unittest.begin(ctx)
asserts.new_set_equals(env, new_sets.copy(new_sets.make()), new_sets.make())
asserts.new_set_equals(env, new_sets.copy(new_sets.make([1, 2, 3])), new_sets.make([1, 2, 3]))
# Ensure mutating the copy does not mutate the original
original = new_sets.make([1, 2, 3])
copy = new_sets.copy(original)
copy._values[5] = None
asserts.false(env, new_sets.is_equal(original, copy))
unittest.end(env)
copy_test = unittest.make(_copy_test)
def _insert_test(ctx):
"""Unit tests for new_sets.insert."""
env = unittest.begin(ctx)
asserts.new_set_equals(env, new_sets.make([1, 2, 3]), new_sets.insert(new_sets.make([1, 2]), 3))
# Ensure mutating the inserted set does mutate the original set.
original = new_sets.make([1, 2, 3])
after_insert = new_sets.insert(original, 4)
asserts.new_set_equals(env, original, after_insert,
msg="Insert creates a new set which is an O(n) operation, insert should be O(1).")
unittest.end(env)
insert_test = unittest.make(_insert_test)
def _contains_test(ctx):
"""Unit tests for new_sets.contains."""
env = unittest.begin(ctx)
asserts.false(env, new_sets.contains(new_sets.make(), 1))
asserts.true(env, new_sets.contains(new_sets.make([1]), 1))
asserts.true(env, new_sets.contains(new_sets.make([1, 2]), 1))
asserts.false(env, new_sets.contains(new_sets.make([2, 3]), 1))
unittest.end(env)
contains_test = unittest.make(_contains_test)
def _length_test(ctx):
"""Unit test for new_sets.length."""
env = unittest.begin(ctx)
asserts.equals(env, 0, new_sets.length(new_sets.make()))
asserts.equals(env, 1, new_sets.length(new_sets.make([1])))
asserts.equals(env, 2, new_sets.length(new_sets.make([1, 2])))
unittest.end(env)
length_test = unittest.make(_length_test)
def _remove_test(ctx):
"""Unit test for new_sets.remove."""
env = unittest.begin(ctx)
asserts.new_set_equals(env, new_sets.make([1, 2]), new_sets.remove(new_sets.make([1, 2, 3]), 3))
# Ensure mutating the inserted set does mutate the original set.
original = new_sets.make([1, 2, 3])
after_removal = new_sets.remove(original, 3)
asserts.new_set_equals(env, original, after_removal)
unittest.end(env)
remove_test = unittest.make(_remove_test)
def new_sets_test_suite():
"""Creates the test targets and test suite for new_sets.bzl tests."""
unittest.suite(
"new_sets_tests",
disjoint_test,
intersection_test,
is_equal_test,
is_subset_test,
difference_test,
union_test,
to_list_test,
make_test,
copy_test,
insert_test,
contains_test,
length_test,
remove_test,
)