Make benchmarks more comparable
This commit is contained in:
parent
be1b7045af
commit
eb73105625
|
@ -4,45 +4,24 @@
|
||||||
use pyo3::prelude::*;
|
use pyo3::prelude::*;
|
||||||
use pyo3::wrap_pyfunction;
|
use pyo3::wrap_pyfunction;
|
||||||
use rayon::prelude::*;
|
use rayon::prelude::*;
|
||||||
use std::fs;
|
|
||||||
use std::path::PathBuf;
|
|
||||||
|
|
||||||
/// Represents a file that can be searched
|
/// Searches for the word, parallelized by rayon
|
||||||
#[pyclass(module = "word_count")]
|
#[pyfunction]
|
||||||
struct WordCounter {
|
fn search(py: Python<'_>, contents: &str, search: String) -> PyResult<usize> {
|
||||||
path: PathBuf,
|
let count = py.allow_threads(move || {
|
||||||
|
contents
|
||||||
|
.par_lines()
|
||||||
|
.map(|line| count_line(line, &search))
|
||||||
|
.sum()
|
||||||
|
});
|
||||||
|
Ok(count)
|
||||||
}
|
}
|
||||||
|
|
||||||
#[pymethods]
|
/// Searches for a word in a classic sequential fashion
|
||||||
impl WordCounter {
|
#[pyfunction]
|
||||||
#[new]
|
fn search_sequential(contents: &str, needle: String) -> PyResult<usize> {
|
||||||
fn new(path: String) -> Self {
|
let result = contents.lines().map(|line| count_line(line, &needle)).sum();
|
||||||
WordCounter {
|
Ok(result)
|
||||||
path: PathBuf::from(path),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Searches for the word, parallelized by rayon
|
|
||||||
fn search(&self, py: Python<'_>, search: String) -> PyResult<usize> {
|
|
||||||
let contents = fs::read_to_string(&self.path)?;
|
|
||||||
|
|
||||||
let count = py.allow_threads(move || {
|
|
||||||
contents
|
|
||||||
.par_lines()
|
|
||||||
.map(|line| count_line(line, &search))
|
|
||||||
.sum()
|
|
||||||
});
|
|
||||||
Ok(count)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Searches for a word in a classic sequential fashion
|
|
||||||
fn search_sequential(&self, needle: String) -> PyResult<usize> {
|
|
||||||
let contents = fs::read_to_string(&self.path)?;
|
|
||||||
|
|
||||||
let result = contents.lines().map(|line| count_line(line, &needle)).sum();
|
|
||||||
|
|
||||||
Ok(result)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn matches(word: &str, needle: &str) -> bool {
|
fn matches(word: &str, needle: &str) -> bool {
|
||||||
|
@ -77,7 +56,8 @@ fn count_line(line: &str, needle: &str) -> usize {
|
||||||
#[pymodule]
|
#[pymodule]
|
||||||
fn word_count(_py: Python<'_>, m: &PyModule) -> PyResult<()> {
|
fn word_count(_py: Python<'_>, m: &PyModule) -> PyResult<()> {
|
||||||
m.add_wrapped(wrap_pyfunction!(count_line))?;
|
m.add_wrapped(wrap_pyfunction!(count_line))?;
|
||||||
m.add_class::<WordCounter>()?;
|
m.add_wrapped(wrap_pyfunction!(search))?;
|
||||||
|
m.add_wrapped(wrap_pyfunction!(search_sequential))?;
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
|
@ -8,8 +8,8 @@ current_dir = os.path.abspath(os.path.dirname(__file__))
|
||||||
path = os.path.join(current_dir, "zen-of-python.txt")
|
path = os.path.join(current_dir, "zen-of-python.txt")
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="session", autouse=True)
|
@pytest.fixture(scope="session")
|
||||||
def textfile():
|
def contents() -> str:
|
||||||
text = """
|
text = """
|
||||||
The Zen of Python, by Tim Peters
|
The Zen of Python, by Tim Peters
|
||||||
|
|
||||||
|
@ -33,23 +33,19 @@ If the implementation is hard to explain, it's a bad idea.
|
||||||
If the implementation is easy to explain, it may be a good idea.
|
If the implementation is easy to explain, it may be a good idea.
|
||||||
Namespaces are one honking great idea -- let's do more of those!
|
Namespaces are one honking great idea -- let's do more of those!
|
||||||
"""
|
"""
|
||||||
|
return text * 1000
|
||||||
with open(path, "w") as f:
|
|
||||||
f.write(text * 1000)
|
|
||||||
yield
|
|
||||||
os.remove(path)
|
|
||||||
|
|
||||||
|
|
||||||
def test_word_count_rust_parallel(benchmark):
|
def test_word_count_rust_parallel(benchmark, contents):
|
||||||
count = benchmark(word_count.WordCounter(path).search, "is")
|
count = benchmark(word_count.search, contents, "is")
|
||||||
assert count == 10000
|
assert count == 10000
|
||||||
|
|
||||||
|
|
||||||
def test_word_count_rust_sequential(benchmark):
|
def test_word_count_rust_sequential(benchmark, contents):
|
||||||
count = benchmark(word_count.WordCounter(path).search_sequential, "is")
|
count = benchmark(word_count.search_sequential, contents, "is")
|
||||||
assert count == 10000
|
assert count == 10000
|
||||||
|
|
||||||
|
|
||||||
def test_word_count_python_sequential(benchmark):
|
def test_word_count_python_sequential(benchmark, contents):
|
||||||
count = benchmark(word_count.search_py, path, "is")
|
count = benchmark(word_count.search_py, contents, "is")
|
||||||
assert count == 10000
|
assert count == 10000
|
||||||
|
|
|
@ -1,14 +1,13 @@
|
||||||
from .word_count import WordCounter, count_line
|
from .word_count import count_line, search, search_sequential
|
||||||
|
|
||||||
__all__ = ["WordCounter", "count_line", "search_py"]
|
__all__ = ["count_line", "search_py", "search", "search_sequential"]
|
||||||
|
|
||||||
|
|
||||||
def search_py(path, needle):
|
def search_py(contents, needle):
|
||||||
total = 0
|
total = 0
|
||||||
with open(path, "r") as f:
|
for line in contents.split():
|
||||||
for line in f:
|
words = line.split(" ")
|
||||||
words = line.split(" ")
|
for word in words:
|
||||||
for word in words:
|
if word == needle:
|
||||||
if word == needle:
|
total += 1
|
||||||
total += 1
|
|
||||||
return total
|
return total
|
||||||
|
|
Loading…
Reference in New Issue