Merge #2944

2944: optimize sequence conversion for list and tuple r=adamreichold a=davidhewitt closes #2943 Avoid using `PyObject_IsInstance` for checking if lists or tuples are sequences, as we know they are always sequences. Co-authored-by: David Hewitt <1939362+davidhewitt@users.noreply.github.com>
2023-02-15 23:16:35 +00:00 · 2023-02-15 23:16:35 +00:00 · c858ced8d4
parent 18e7094ccc 4a41917365
commit c858ced8d4
4 changed files with 40 additions and 19 deletions
--- a/benches/bench_list.rs
+++ b/benches/bench_list.rs
@ -1,7 +1,7 @@
 use criterion::{criterion_group, criterion_main, Bencher, Criterion};

 use pyo3::prelude::*;
-use pyo3::types::PyList;
+use pyo3::types::{PyList, PySequence};

 fn iter_list(b: &mut Bencher<'_>) {
    Python::with_gil(|py| {
@ -53,12 +53,23 @@ fn list_get_item_unchecked(b: &mut Bencher<'_>) {
    });
 }

+fn sequence_from_list(b: &mut Bencher<'_>) {
+    Python::with_gil(|py| {
+        const LEN: usize = 50_000;
+        let list = PyList::new(py, 0..LEN).to_object(py);
+        b.iter(|| {
+            let _: &PySequence = list.extract(py).unwrap();
+        });
+    });
+}
+
 fn criterion_benchmark(c: &mut Criterion) {
    c.bench_function("iter_list", iter_list);
    c.bench_function("list_new", list_new);
    c.bench_function("list_get_item", list_get_item);
    #[cfg(not(Py_LIMITED_API))]
    c.bench_function("list_get_item_unchecked", list_get_item_unchecked);
+    c.bench_function("sequence_from_list", sequence_from_list);
 }

 criterion_group!(benches, criterion_benchmark);
--- a/benches/bench_tuple.rs
+++ b/benches/bench_tuple.rs
@ -1,7 +1,7 @@
 use criterion::{criterion_group, criterion_main, Bencher, Criterion};

 use pyo3::prelude::*;
-use pyo3::types::PyTuple;
+use pyo3::types::{PySequence, PyTuple};

 fn iter_tuple(b: &mut Bencher<'_>) {
    Python::with_gil(|py| {
@ -53,12 +53,21 @@ fn tuple_get_item_unchecked(b: &mut Bencher<'_>) {
    });
 }

+fn sequence_from_tuple(b: &mut Bencher<'_>) {
+    Python::with_gil(|py| {
+        const LEN: usize = 50_000;
+        let tuple = PyTuple::new(py, 0..LEN).to_object(py);
+        b.iter(|| tuple.extract::<&PySequence>(py).unwrap());
+    });
+}
+
 fn criterion_benchmark(c: &mut Criterion) {
    c.bench_function("iter_tuple", iter_tuple);
    c.bench_function("tuple_new", tuple_new);
    c.bench_function("tuple_get_item", tuple_get_item);
    #[cfg(not(any(Py_LIMITED_API, PyPy)))]
    c.bench_function("tuple_get_item_unchecked", tuple_get_item_unchecked);
+    c.bench_function("sequence_from_tuple", sequence_from_tuple);
 }

 criterion_group!(benches, criterion_benchmark);
--- a/newsfragments/2944.changed.md
+++ b/newsfragments/2944.changed.md
@ -0,0 +1 @@
+Optimize `PySequence` conversion for `list` and `tuple` inputs.
--- a/src/types/sequence.rs
+++ b/src/types/sequence.rs
@ -8,11 +8,9 @@ use crate::once_cell::GILOnceCell;
 use crate::type_object::PyTypeInfo;
 use crate::types::{PyAny, PyList, PyString, PyTuple, PyType};
 use crate::{ffi, PyNativeType, ToPyObject};
-use crate::{AsPyPointer, IntoPy, IntoPyPointer, Py, Python};
+use crate::{AsPyPointer, IntoPyPointer, Py, Python};
 use crate::{FromPyObject, PyTryFrom};

-static SEQUENCE_ABC: GILOnceCell<PyResult<Py<PyType>>> = GILOnceCell::new();
-
 /// Represents a reference to a Python object supporting the sequence protocol.
 #[repr(transparent)]
 pub struct PySequence(PyAny);
@ -318,17 +316,14 @@ where
    Ok(v)
 }

-fn get_sequence_abc(py: Python<'_>) -> Result<&PyType, PyErr> {
+static SEQUENCE_ABC: GILOnceCell<Py<PyType>> = GILOnceCell::new();
+
+fn get_sequence_abc(py: Python<'_>) -> PyResult<&PyType> {
    SEQUENCE_ABC
-        .get_or_init(py, || {
-            Ok(py
-                .import("collections.abc")?
-                .getattr("Sequence")?
-                .downcast::<PyType>()?
-                .into_py(py))
+        .get_or_try_init(py, || {
+            py.import("collections.abc")?.getattr("Sequence")?.extract()
        })
-        .as_ref()
-        .map_or_else(|e| Err(e.clone_ref(py)), |t| Ok(t.as_ref(py)))
+        .map(|ty| ty.as_ref(py))
 }

 impl<'v> PyTryFrom<'v> for PySequence {
@ -338,11 +333,16 @@ impl<'v> PyTryFrom<'v> for PySequence {
    fn try_from<V: Into<&'v PyAny>>(value: V) -> Result<&'v PySequence, PyDowncastError<'v>> {
        let value = value.into();

-        // TODO: surface specific errors in this chain to the user
-        if let Ok(abc) = get_sequence_abc(value.py()) {
-            if value.is_instance(abc).unwrap_or(false) {
-                unsafe { return Ok(value.downcast_unchecked::<PySequence>()) }
-            }
+        // Using `is_instance` for `collections.abc.Sequence` is slow, so provide
+        // optimized cases for list and tuples as common well-known sequences
+        if PyList::is_type_of(value)
+            || PyTuple::is_type_of(value)
+            || get_sequence_abc(value.py())
+                .and_then(|abc| value.is_instance(abc))
+                // TODO: surface errors in this chain to the user
+                .unwrap_or(false)
+        {
+            unsafe { return Ok(value.downcast_unchecked::<PySequence>()) }
        }

        Err(PyDowncastError::new(value, "Sequence"))
				`@ -0,0 +1 @@`
				Optimize `PySequence` conversion for `list` and `tuple` inputs.