diff --git a/Makefile b/Makefile index 4888bad1..888d3b2f 100644 --- a/Makefile +++ b/Makefile @@ -24,6 +24,8 @@ build: test: build cargo test $(CARGO_FLAGS) + pip install setuptools-rust pytest pytest-benchmark + cd examples/word-count && python setup.py install && pytest -v tests clippy: if $$CLIPPY; then cargo clippy $(CARGO_FLAGS); fi diff --git a/examples/word-count/setup.py b/examples/word-count/setup.py index aacd6ffb..d2f3f6d7 100644 --- a/examples/word-count/setup.py +++ b/examples/word-count/setup.py @@ -29,7 +29,7 @@ class PyTest(TestCommand): setup_requires = ['setuptools-rust>=0.6.0'] install_requires = [] -tests_require = install_requires + ['pytest'] +tests_require = install_requires + ['pytest', 'pytest-benchmark'] setup( name='word-count', diff --git a/examples/word-count/src/lib.rs b/examples/word-count/src/lib.rs index 49a35e67..ff6d983c 100644 --- a/examples/word-count/src/lib.rs +++ b/examples/word-count/src/lib.rs @@ -7,13 +7,9 @@ extern crate rayon; use std::fs::File; use std::io::prelude::*; -use rayon::iter::{ParallelIterator, IntoParallelIterator}; +use rayon::prelude::*; use pyo3::{py, PyResult, Python, PyModule, ToPyErr}; -fn lines(corpus: &str) -> Vec<&str> { - corpus.lines().collect() -} - fn matches(word: &str, search: &str) -> bool { let mut search = search.chars(); for ch in word.chars().skip_while(|ch| !ch.is_alphabetic()) { @@ -39,14 +35,14 @@ fn wc_line(line: &str, search: &str) -> i32 { total } -// fn wc_sequential(lines: &[&str], search: &str) -> i32 { -// lines.into_iter() -// .map(|line| wc_line(line, search)) -// .fold(0, |sum, line| sum + line) -// } +fn wc_sequential(lines: &str, search: &str) -> i32 { + lines.lines() + .map(|line| wc_line(line, search)) + .fold(0, |sum, line| sum + line) +} -fn wc_parallel(lines: &[&str], search: &str) -> i32 { - lines.into_par_iter() +fn wc_parallel(lines: &str, search: &str) -> i32 { + lines.par_lines() .map(|line| wc_line(line, search)) .sum() } @@ -60,9 +56,17 @@ fn init_mod(py: Python, m: &PyModule) -> PyResult<()> { let mut contents = String::new(); file.read_to_string(&mut contents).map_err(|e| e.to_pyerr(py))?; - let count = py.allow_threads(move || wc_parallel(&lines(&contents), &search)); + let count = py.allow_threads(move || wc_parallel(&contents, &search)); Ok(count) } + #[pyfn(m, "search_sequential")] + fn search_sequential(py: Python, path: String, search: String) -> PyResult { + let mut file = File::open(path).map_err(|e| e.to_pyerr(py))?; + let mut contents = String::new(); + file.read_to_string(&mut contents).map_err(|e| e.to_pyerr(py))?; + Ok(wc_sequential(&contents, &search)) + } + Ok(()) } diff --git a/examples/word-count/tests/test_word_count.py b/examples/word-count/tests/test_word_count.py new file mode 100644 index 00000000..3ed33bb3 --- /dev/null +++ b/examples/word-count/tests/test_word_count.py @@ -0,0 +1,55 @@ +# -*- coding: utf-8 -*- +from __future__ import absolute_import + +import os + +import pytest + +import word_count + +current_dir = os.path.abspath(os.path.dirname(__file__)) +path = os.path.join(current_dir, 'zen-of-python.txt') + + +@pytest.fixture(scope='session', autouse=True) +def textfile(): + text = '''The Zen of Python, by Tim Peters + +Beautiful is better than ugly. +Explicit is better than implicit. +Simple is better than complex. +Complex is better than complicated. +Flat is better than nested. +Sparse is better than dense. +Readability counts. +Special cases aren't special enough to break the rules. +Although practicality beats purity. +Errors should never pass silently. +Unless explicitly silenced. +In the face of ambiguity, refuse the temptation to guess. +There should be one-- and preferably only one --obvious way to do it. +Although that way may not be obvious at first unless you're Dutch. +Now is better than never. +Although never is often better than *right* now. +If the implementation is hard to explain, it's a bad idea. +If the implementation is easy to explain, it may be a good idea. +Namespaces are one honking great idea -- let's do more of those!\n''' * 1000 + with open(path, 'w') as f: + f.write(text) + yield + os.remove(path) + + +def test_word_count_rust_parallel(benchmark): + count = benchmark(word_count.search, path, 'is') + assert count == 10000 + + +def test_word_count_rust_sequential(benchmark): + count = benchmark(word_count.search_sequential, path, 'is') + assert count == 10000 + + +def test_word_count_python_sequential(benchmark): + count = benchmark(word_count.search_py, path, 'is') + assert count == 10000 diff --git a/examples/word-count/word_count/__init__.py b/examples/word-count/word_count/__init__.py index 74f80fbd..3b1ae9af 100644 --- a/examples/word-count/word_count/__init__.py +++ b/examples/word-count/word_count/__init__.py @@ -1,3 +1,17 @@ -from ._word_count import search +# -*- coding: utf-8 -*- +from __future__ import absolute_import -__all__ = ['search'] +from ._word_count import search, search_sequential + +__all__ = ['search', 'search_sequential', 'search_py'] + + +def search_py(path, needle): + total = 0 + with open(path, 'r') as f: + for line in f: + words = line.split(' ') + for word in words: + if word == needle: + total += 1 + return total