Test and benchmark word-count example (#60)

* Test and benchmark word-count example

* Optimize rust word_count
This commit is contained in:
messense 2017-07-26 12:37:36 +08:00 committed by GitHub
parent 3ab5e4526c
commit 43a5d6f1b4
5 changed files with 91 additions and 16 deletions

View File

@ -24,6 +24,8 @@ build:
test: build
cargo test $(CARGO_FLAGS)
pip install setuptools-rust pytest pytest-benchmark
cd examples/word-count && python setup.py install && pytest -v tests
clippy:
if $$CLIPPY; then cargo clippy $(CARGO_FLAGS); fi

View File

@ -29,7 +29,7 @@ class PyTest(TestCommand):
setup_requires = ['setuptools-rust>=0.6.0']
install_requires = []
tests_require = install_requires + ['pytest']
tests_require = install_requires + ['pytest', 'pytest-benchmark']
setup(
name='word-count',

View File

@ -7,13 +7,9 @@ extern crate rayon;
use std::fs::File;
use std::io::prelude::*;
use rayon::iter::{ParallelIterator, IntoParallelIterator};
use rayon::prelude::*;
use pyo3::{py, PyResult, Python, PyModule, ToPyErr};
fn lines(corpus: &str) -> Vec<&str> {
corpus.lines().collect()
}
fn matches(word: &str, search: &str) -> bool {
let mut search = search.chars();
for ch in word.chars().skip_while(|ch| !ch.is_alphabetic()) {
@ -39,14 +35,14 @@ fn wc_line(line: &str, search: &str) -> i32 {
total
}
// fn wc_sequential(lines: &[&str], search: &str) -> i32 {
// lines.into_iter()
// .map(|line| wc_line(line, search))
// .fold(0, |sum, line| sum + line)
// }
fn wc_sequential(lines: &str, search: &str) -> i32 {
lines.lines()
.map(|line| wc_line(line, search))
.fold(0, |sum, line| sum + line)
}
fn wc_parallel(lines: &[&str], search: &str) -> i32 {
lines.into_par_iter()
fn wc_parallel(lines: &str, search: &str) -> i32 {
lines.par_lines()
.map(|line| wc_line(line, search))
.sum()
}
@ -60,9 +56,17 @@ fn init_mod(py: Python, m: &PyModule) -> PyResult<()> {
let mut contents = String::new();
file.read_to_string(&mut contents).map_err(|e| e.to_pyerr(py))?;
let count = py.allow_threads(move || wc_parallel(&lines(&contents), &search));
let count = py.allow_threads(move || wc_parallel(&contents, &search));
Ok(count)
}
#[pyfn(m, "search_sequential")]
fn search_sequential(py: Python, path: String, search: String) -> PyResult<i32> {
let mut file = File::open(path).map_err(|e| e.to_pyerr(py))?;
let mut contents = String::new();
file.read_to_string(&mut contents).map_err(|e| e.to_pyerr(py))?;
Ok(wc_sequential(&contents, &search))
}
Ok(())
}

View File

@ -0,0 +1,55 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import
import os
import pytest
import word_count
current_dir = os.path.abspath(os.path.dirname(__file__))
path = os.path.join(current_dir, 'zen-of-python.txt')
@pytest.fixture(scope='session', autouse=True)
def textfile():
text = '''The Zen of Python, by Tim Peters
Beautiful is better than ugly.
Explicit is better than implicit.
Simple is better than complex.
Complex is better than complicated.
Flat is better than nested.
Sparse is better than dense.
Readability counts.
Special cases aren't special enough to break the rules.
Although practicality beats purity.
Errors should never pass silently.
Unless explicitly silenced.
In the face of ambiguity, refuse the temptation to guess.
There should be one-- and preferably only one --obvious way to do it.
Although that way may not be obvious at first unless you're Dutch.
Now is better than never.
Although never is often better than *right* now.
If the implementation is hard to explain, it's a bad idea.
If the implementation is easy to explain, it may be a good idea.
Namespaces are one honking great idea -- let's do more of those!\n''' * 1000
with open(path, 'w') as f:
f.write(text)
yield
os.remove(path)
def test_word_count_rust_parallel(benchmark):
count = benchmark(word_count.search, path, 'is')
assert count == 10000
def test_word_count_rust_sequential(benchmark):
count = benchmark(word_count.search_sequential, path, 'is')
assert count == 10000
def test_word_count_python_sequential(benchmark):
count = benchmark(word_count.search_py, path, 'is')
assert count == 10000

View File

@ -1,3 +1,17 @@
from ._word_count import search
# -*- coding: utf-8 -*-
from __future__ import absolute_import
__all__ = ['search']
from ._word_count import search, search_sequential
__all__ = ['search', 'search_sequential', 'search_py']
def search_py(path, needle):
total = 0
with open(path, 'r') as f:
for line in f:
words = line.split(' ')
for word in words:
if word == needle:
total += 1
return total