Make benchmarks more comparable

2020-06-05 14:33:15 +02:00 · 2020-06-05 14:33:15 +02:00 · eb73105625
parent be1b7045af
commit eb73105625
3 changed files with 34 additions and 59 deletions
--- a/examples/word-count/src/lib.rs
+++ b/examples/word-count/src/lib.rs
@ -4,45 +4,24 @@
 use pyo3::prelude::*;
 use pyo3::wrap_pyfunction;
 use rayon::prelude::*;
-use std::fs;
-use std::path::PathBuf;

-/// Represents a file that can be searched
-#[pyclass(module = "word_count")]
-struct WordCounter {
-    path: PathBuf,
+/// Searches for the word, parallelized by rayon
+#[pyfunction]
+fn search(py: Python<'_>, contents: &str, search: String) -> PyResult<usize> {
+    let count = py.allow_threads(move || {
+        contents
+            .par_lines()
+            .map(|line| count_line(line, &search))
+            .sum()
+    });
+    Ok(count)
 }

-#[pymethods]
-impl WordCounter {
-    #[new]
-    fn new(path: String) -> Self {
-        WordCounter {
-            path: PathBuf::from(path),
-        }
-    }
-
-    /// Searches for the word, parallelized by rayon
-    fn search(&self, py: Python<'_>, search: String) -> PyResult<usize> {
-        let contents = fs::read_to_string(&self.path)?;
-
-        let count = py.allow_threads(move || {
-            contents
-                .par_lines()
-                .map(|line| count_line(line, &search))
-                .sum()
-        });
-        Ok(count)
-    }
-
-    /// Searches for a word in a classic sequential fashion
-    fn search_sequential(&self, needle: String) -> PyResult<usize> {
-        let contents = fs::read_to_string(&self.path)?;
-
-        let result = contents.lines().map(|line| count_line(line, &needle)).sum();
-
-        Ok(result)
-    }
+/// Searches for a word in a classic sequential fashion
+#[pyfunction]
+fn search_sequential(contents: &str, needle: String) -> PyResult<usize> {
+    let result = contents.lines().map(|line| count_line(line, &needle)).sum();
+    Ok(result)
 }

 fn matches(word: &str, needle: &str) -> bool {
@ -77,7 +56,8 @@ fn count_line(line: &str, needle: &str) -> usize {
 #[pymodule]
 fn word_count(_py: Python<'_>, m: &PyModule) -> PyResult<()> {
    m.add_wrapped(wrap_pyfunction!(count_line))?;
-    m.add_class::<WordCounter>()?;
+    m.add_wrapped(wrap_pyfunction!(search))?;
+    m.add_wrapped(wrap_pyfunction!(search_sequential))?;

    Ok(())
 }
--- a/examples/word-count/tests/test_word_count.py
+++ b/examples/word-count/tests/test_word_count.py
@ -8,8 +8,8 @@ current_dir = os.path.abspath(os.path.dirname(__file__))
 path = os.path.join(current_dir, "zen-of-python.txt")


-@pytest.fixture(scope="session", autouse=True)
-def textfile():
+@pytest.fixture(scope="session")
+def contents() -> str:
    text = """
 The Zen of Python, by Tim Peters

@ -33,23 +33,19 @@ If the implementation is hard to explain, it's a bad idea.
 If the implementation is easy to explain, it may be a good idea.
 Namespaces are one honking great idea -- let's do more of those!
 """
-
-    with open(path, "w") as f:
-        f.write(text * 1000)
-    yield
-    os.remove(path)
+    return text * 1000


-def test_word_count_rust_parallel(benchmark):
-    count = benchmark(word_count.WordCounter(path).search, "is")
+def test_word_count_rust_parallel(benchmark, contents):
+    count = benchmark(word_count.search, contents, "is")
    assert count == 10000


-def test_word_count_rust_sequential(benchmark):
-    count = benchmark(word_count.WordCounter(path).search_sequential, "is")
+def test_word_count_rust_sequential(benchmark, contents):
+    count = benchmark(word_count.search_sequential, contents, "is")
    assert count == 10000


-def test_word_count_python_sequential(benchmark):
-    count = benchmark(word_count.search_py, path, "is")
+def test_word_count_python_sequential(benchmark, contents):
+    count = benchmark(word_count.search_py, contents, "is")
    assert count == 10000
--- a/examples/word-count/word_count/init.py
+++ b/examples/word-count/word_count/init.py
@ -1,14 +1,13 @@
-from .word_count import WordCounter, count_line
+from .word_count import count_line, search, search_sequential

-__all__ = ["WordCounter", "count_line", "search_py"]
+__all__ = ["count_line", "search_py", "search", "search_sequential"]


-def search_py(path, needle):
+def search_py(contents, needle):
    total = 0
-    with open(path, "r") as f:
-        for line in f:
-            words = line.split(" ")
-            for word in words:
-                if word == needle:
-                    total += 1
+    for line in contents.split():
+        words = line.split(" ")
+        for word in words:
+            if word == needle:
+                total += 1
    return total