Make benchmarks more comparable

This commit is contained in:
Alexander Niederbühl 2020-06-05 14:33:15 +02:00
parent be1b7045af
commit eb73105625
3 changed files with 34 additions and 59 deletions

View File

@ -4,45 +4,24 @@
use pyo3::prelude::*; use pyo3::prelude::*;
use pyo3::wrap_pyfunction; use pyo3::wrap_pyfunction;
use rayon::prelude::*; use rayon::prelude::*;
use std::fs;
use std::path::PathBuf;
/// Represents a file that can be searched /// Searches for the word, parallelized by rayon
#[pyclass(module = "word_count")] #[pyfunction]
struct WordCounter { fn search(py: Python<'_>, contents: &str, search: String) -> PyResult<usize> {
path: PathBuf, let count = py.allow_threads(move || {
contents
.par_lines()
.map(|line| count_line(line, &search))
.sum()
});
Ok(count)
} }
#[pymethods] /// Searches for a word in a classic sequential fashion
impl WordCounter { #[pyfunction]
#[new] fn search_sequential(contents: &str, needle: String) -> PyResult<usize> {
fn new(path: String) -> Self { let result = contents.lines().map(|line| count_line(line, &needle)).sum();
WordCounter { Ok(result)
path: PathBuf::from(path),
}
}
/// Searches for the word, parallelized by rayon
fn search(&self, py: Python<'_>, search: String) -> PyResult<usize> {
let contents = fs::read_to_string(&self.path)?;
let count = py.allow_threads(move || {
contents
.par_lines()
.map(|line| count_line(line, &search))
.sum()
});
Ok(count)
}
/// Searches for a word in a classic sequential fashion
fn search_sequential(&self, needle: String) -> PyResult<usize> {
let contents = fs::read_to_string(&self.path)?;
let result = contents.lines().map(|line| count_line(line, &needle)).sum();
Ok(result)
}
} }
fn matches(word: &str, needle: &str) -> bool { fn matches(word: &str, needle: &str) -> bool {
@ -77,7 +56,8 @@ fn count_line(line: &str, needle: &str) -> usize {
#[pymodule] #[pymodule]
fn word_count(_py: Python<'_>, m: &PyModule) -> PyResult<()> { fn word_count(_py: Python<'_>, m: &PyModule) -> PyResult<()> {
m.add_wrapped(wrap_pyfunction!(count_line))?; m.add_wrapped(wrap_pyfunction!(count_line))?;
m.add_class::<WordCounter>()?; m.add_wrapped(wrap_pyfunction!(search))?;
m.add_wrapped(wrap_pyfunction!(search_sequential))?;
Ok(()) Ok(())
} }

View File

@ -8,8 +8,8 @@ current_dir = os.path.abspath(os.path.dirname(__file__))
path = os.path.join(current_dir, "zen-of-python.txt") path = os.path.join(current_dir, "zen-of-python.txt")
@pytest.fixture(scope="session", autouse=True) @pytest.fixture(scope="session")
def textfile(): def contents() -> str:
text = """ text = """
The Zen of Python, by Tim Peters The Zen of Python, by Tim Peters
@ -33,23 +33,19 @@ If the implementation is hard to explain, it's a bad idea.
If the implementation is easy to explain, it may be a good idea. If the implementation is easy to explain, it may be a good idea.
Namespaces are one honking great idea -- let's do more of those! Namespaces are one honking great idea -- let's do more of those!
""" """
return text * 1000
with open(path, "w") as f:
f.write(text * 1000)
yield
os.remove(path)
def test_word_count_rust_parallel(benchmark): def test_word_count_rust_parallel(benchmark, contents):
count = benchmark(word_count.WordCounter(path).search, "is") count = benchmark(word_count.search, contents, "is")
assert count == 10000 assert count == 10000
def test_word_count_rust_sequential(benchmark): def test_word_count_rust_sequential(benchmark, contents):
count = benchmark(word_count.WordCounter(path).search_sequential, "is") count = benchmark(word_count.search_sequential, contents, "is")
assert count == 10000 assert count == 10000
def test_word_count_python_sequential(benchmark): def test_word_count_python_sequential(benchmark, contents):
count = benchmark(word_count.search_py, path, "is") count = benchmark(word_count.search_py, contents, "is")
assert count == 10000 assert count == 10000

View File

@ -1,14 +1,13 @@
from .word_count import WordCounter, count_line from .word_count import count_line, search, search_sequential
__all__ = ["WordCounter", "count_line", "search_py"] __all__ = ["count_line", "search_py", "search", "search_sequential"]
def search_py(path, needle): def search_py(contents, needle):
total = 0 total = 0
with open(path, "r") as f: for line in contents.split():
for line in f: words = line.split(" ")
words = line.split(" ") for word in words:
for word in words: if word == needle:
if word == needle: total += 1
total += 1
return total return total