2795: accept any iterator for `PySet::new` and `PyFrozenSet::new` r=birkenfeld a=davidhewitt

Closes #2789 

Looks like it's about 20% faster in the case I benchmarked to use `PySet_Add` directly without the intermediate list.

Co-authored-by: David Hewitt <1939362+davidhewitt@users.noreply.github.com>
This commit is contained in:
bors[bot] 2022-12-09 14:54:51 +00:00 committed by GitHub
commit 389a662506
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 114 additions and 53 deletions

View File

@ -4,6 +4,20 @@ use pyo3::prelude::*;
use pyo3::types::PySet;
use std::collections::{BTreeSet, HashSet};
fn set_new(b: &mut Bencher<'_>) {
Python::with_gil(|py| {
const LEN: usize = 100_000;
// Create Python objects up-front, so that the benchmark doesn't need to include
// the cost of allocating LEN Python integers
let elements: Vec<PyObject> = (0..LEN).into_iter().map(|i| i.into_py(py)).collect();
b.iter(|| {
let pool = unsafe { py.new_pool() };
PySet::new(py, &elements).unwrap();
drop(pool);
});
});
}
fn iter_set(b: &mut Bencher<'_>) {
Python::with_gil(|py| {
const LEN: usize = 100_000;
@ -44,6 +58,7 @@ fn extract_hashbrown_set(b: &mut Bencher<'_>) {
}
fn criterion_benchmark(c: &mut Criterion) {
c.bench_function("set_new", set_new);
c.bench_function("iter_set", iter_set);
c.bench_function("extract_hashset", extract_hashset);
c.bench_function("extract_btreeset", extract_btreeset);

View File

@ -0,0 +1 @@
Accept any iterator in `PySet::new` and `PyFrozenSet::new`.

View File

@ -22,6 +22,7 @@
//! Note that you must use compatible versions of hashbrown and PyO3.
//! The required hashbrown version may vary based on the version of PyO3.
use crate::{
types::set::new_from_iter,
types::{IntoPyDict, PyDict, PySet},
FromPyObject, IntoPy, PyAny, PyErr, PyObject, PyResult, Python, ToPyObject,
};
@ -73,13 +74,9 @@ where
T: hash::Hash + Eq + ToPyObject,
{
fn to_object(&self, py: Python<'_>) -> PyObject {
let set = PySet::new::<T>(py, &[]).expect("Failed to construct empty set");
{
for val in self {
set.add(val).expect("Failed to add to set");
}
}
set.into()
new_from_iter(py, self)
.expect("Failed to create Python set from hashbrown::HashSet")
.into()
}
}
@ -89,13 +86,9 @@ where
S: hash::BuildHasher + Default,
{
fn into_py(self, py: Python<'_>) -> PyObject {
let set = PySet::empty(py).expect("Failed to construct empty set");
{
for val in self {
set.add(val.into_py(py)).expect("Failed to add to set");
}
}
set.into()
new_from_iter(py, self.into_iter().map(|item| item.into_py(py)))
.expect("Failed to create Python set from hashbrown::HashSet")
.into()
}
}

View File

@ -1,8 +1,8 @@
use std::{cmp, collections, hash};
use crate::{
inspect::types::TypeInfo, types::PySet, FromPyObject, IntoPy, PyAny, PyObject, PyResult,
Python, ToPyObject,
inspect::types::TypeInfo, types::set::new_from_iter, types::PySet, FromPyObject, IntoPy, PyAny,
PyObject, PyResult, Python, ToPyObject,
};
impl<T, S> ToPyObject for collections::HashSet<T, S>
@ -11,13 +11,9 @@ where
S: hash::BuildHasher + Default,
{
fn to_object(&self, py: Python<'_>) -> PyObject {
let set = PySet::new::<T>(py, &[]).expect("Failed to construct empty set");
{
for val in self {
set.add(val).expect("Failed to add to set");
}
}
set.into()
new_from_iter(py, self)
.expect("Failed to create Python set from HashSet")
.into()
}
}
@ -26,13 +22,9 @@ where
T: hash::Hash + Eq + ToPyObject,
{
fn to_object(&self, py: Python<'_>) -> PyObject {
let set = PySet::new::<T>(py, &[]).expect("Failed to construct empty set");
{
for val in self {
set.add(val).expect("Failed to add to set");
}
}
set.into()
new_from_iter(py, self)
.expect("Failed to create Python set from BTreeSet")
.into()
}
}
@ -42,13 +34,9 @@ where
S: hash::BuildHasher + Default,
{
fn into_py(self, py: Python<'_>) -> PyObject {
let set = PySet::empty(py).expect("Failed to construct empty set");
{
for val in self {
set.add(val.into_py(py)).expect("Failed to add to set");
}
}
set.into()
new_from_iter(py, self.into_iter().map(|item| item.into_py(py)))
.expect("Failed to create Python set from HashSet")
.into()
}
fn type_output() -> TypeInfo {
@ -76,13 +64,9 @@ where
K: IntoPy<PyObject> + cmp::Ord,
{
fn into_py(self, py: Python<'_>) -> PyObject {
let set = PySet::empty(py).expect("Failed to construct empty set");
{
for val in self {
set.add(val.into_py(py)).expect("Failed to add to set");
}
}
set.into()
new_from_iter(py, self.into_iter().map(|item| item.into_py(py)))
.expect("Failed to create Python set from BTreeSet")
.into()
}
fn type_output() -> TypeInfo {

View File

@ -1,9 +1,12 @@
// Copyright (c) 2017-present PyO3 Project and Contributors
//
use crate::err::{PyErr, PyResult};
#[cfg(Py_LIMITED_API)]
use crate::types::PyIterator;
use crate::{
err::{self, PyErr, PyResult},
IntoPyPointer, Py, PyObject,
};
use crate::{ffi, AsPyPointer, PyAny, Python, ToPyObject};
use std::ptr;
@ -31,9 +34,12 @@ impl PyFrozenSet {
/// Creates a new frozenset.
///
/// May panic when running out of memory.
pub fn new<'p, T: ToPyObject>(py: Python<'p>, elements: &[T]) -> PyResult<&'p PyFrozenSet> {
let list = elements.to_object(py);
unsafe { py.from_owned_ptr_or_err(ffi::PyFrozenSet_New(list.as_ptr())) }
#[inline]
pub fn new<'a, 'p, T: ToPyObject + 'a>(
py: Python<'p>,
elements: impl IntoIterator<Item = &'a T>,
) -> PyResult<&'p PyFrozenSet> {
new_from_iter(py, elements).map(|set| set.into_ref(py))
}
/// Creates a new empty frozen set
@ -157,6 +163,34 @@ mod impl_ {
pub use impl_::*;
#[inline]
pub(crate) fn new_from_iter<T: ToPyObject>(
py: Python<'_>,
elements: impl IntoIterator<Item = T>,
) -> PyResult<Py<PyFrozenSet>> {
fn new_from_iter_inner(
py: Python<'_>,
elements: &mut dyn Iterator<Item = PyObject>,
) -> PyResult<Py<PyFrozenSet>> {
let set: Py<PyFrozenSet> = unsafe {
// We create the `Py` pointer because its Drop cleans up the set if user code panics.
Py::from_owned_ptr_or_err(py, ffi::PyFrozenSet_New(std::ptr::null_mut()))?
};
let ptr = set.as_ptr();
for obj in elements {
unsafe {
err::error_on_minusone(py, ffi::PySet_Add(ptr, obj.into_ptr()))?;
}
}
Ok(set)
}
let mut iter = elements.into_iter().map(|e| e.to_object(py));
new_from_iter_inner(py, &mut iter)
}
#[cfg(test)]
mod tests {
use super::*;

View File

@ -286,7 +286,7 @@ mod num;
#[cfg(not(PyPy))]
mod pysuper;
mod sequence;
mod set;
pub(crate) mod set;
mod slice;
mod string;
mod traceback;

View File

@ -1,9 +1,12 @@
// Copyright (c) 2017-present PyO3 Project and Contributors
//
use crate::err::{self, PyErr, PyResult};
#[cfg(Py_LIMITED_API)]
use crate::types::PyIterator;
use crate::{
err::{self, PyErr, PyResult},
IntoPyPointer, Py,
};
use crate::{ffi, AsPyPointer, PyAny, PyObject, Python, ToPyObject};
use std::ptr;
@ -30,9 +33,12 @@ impl PySet {
/// Creates a new set with elements from the given slice.
///
/// Returns an error if some element is not hashable.
pub fn new<'p, T: ToPyObject>(py: Python<'p>, elements: &[T]) -> PyResult<&'p PySet> {
let list = elements.to_object(py);
unsafe { py.from_owned_ptr_or_err(ffi::PySet_New(list.as_ptr())) }
#[inline]
pub fn new<'a, 'p, T: ToPyObject + 'a>(
py: Python<'p>,
elements: impl IntoIterator<Item = &'a T>,
) -> PyResult<&'p PySet> {
new_from_iter(py, elements).map(|set| set.into_ref(py))
}
/// Creates a new empty set.
@ -230,6 +236,34 @@ mod impl_ {
pub use impl_::*;
#[inline]
pub(crate) fn new_from_iter<T: ToPyObject>(
py: Python<'_>,
elements: impl IntoIterator<Item = T>,
) -> PyResult<Py<PySet>> {
fn new_from_iter_inner(
py: Python<'_>,
elements: &mut dyn Iterator<Item = PyObject>,
) -> PyResult<Py<PySet>> {
let set: Py<PySet> = unsafe {
// We create the `Py` pointer because its Drop cleans up the set if user code panics.
Py::from_owned_ptr_or_err(py, ffi::PySet_New(std::ptr::null_mut()))?
};
let ptr = set.as_ptr();
for obj in elements {
unsafe {
err::error_on_minusone(py, ffi::PySet_Add(ptr, obj.into_ptr()))?;
}
}
Ok(set)
}
let mut iter = elements.into_iter().map(|e| e.to_object(py));
new_from_iter_inner(py, &mut iter)
}
#[cfg(test)]
mod tests {
use super::PySet;