Test and benchmark word-count example (#60)

* Test and benchmark word-count example * Optimize rust word_count
2017-07-26 12:37:36 +08:00 · 2017-07-26 12:37:36 +08:00 · 43a5d6f1b4
parent 3ab5e4526c
commit 43a5d6f1b4
5 changed files with 91 additions and 16 deletions
--- a/2
+++ b/2
@ -24,6 +24,8 @@ build:

 test: build
 	cargo test $(CARGO_FLAGS)
+	pip install setuptools-rust pytest pytest-benchmark
+	cd examples/word-count && python setup.py install && pytest -v tests

 clippy:
 	if $$CLIPPY; then cargo clippy $(CARGO_FLAGS); fi
--- a/examples/word-count/setup.py
+++ b/examples/word-count/setup.py
@ -29,7 +29,7 @@ class PyTest(TestCommand):

 setup_requires = ['setuptools-rust>=0.6.0']
 install_requires = []
-tests_require = install_requires + ['pytest']
+tests_require = install_requires + ['pytest', 'pytest-benchmark']

 setup(
    name='word-count',
--- a/examples/word-count/src/lib.rs
+++ b/examples/word-count/src/lib.rs
@ -7,13 +7,9 @@ extern crate rayon;
 use std::fs::File;
 use std::io::prelude::*;

-use rayon::iter::{ParallelIterator, IntoParallelIterator};
+use rayon::prelude::*;
 use pyo3::{py, PyResult, Python, PyModule, ToPyErr};

-fn lines(corpus: &str) -> Vec<&str> {
-    corpus.lines().collect()
-}
-
 fn matches(word: &str, search: &str) -> bool {
    let mut search = search.chars();
    for ch in word.chars().skip_while(|ch| !ch.is_alphabetic()) {
@ -39,14 +35,14 @@ fn wc_line(line: &str, search: &str) -> i32 {
    total
 }

-// fn wc_sequential(lines: &[&str], search: &str) -> i32 {
-//     lines.into_iter()
-//          .map(|line| wc_line(line, search))
-//          .fold(0, |sum, line| sum + line)
-// }
+fn wc_sequential(lines: &str, search: &str) -> i32 {
+    lines.lines()
+         .map(|line| wc_line(line, search))
+         .fold(0, |sum, line| sum + line)
+}

-fn wc_parallel(lines: &[&str], search: &str) -> i32 {
-    lines.into_par_iter()
+fn wc_parallel(lines: &str, search: &str) -> i32 {
+    lines.par_lines()
         .map(|line| wc_line(line, search))
         .sum()
 }
@ -60,9 +56,17 @@ fn init_mod(py: Python, m: &PyModule) -> PyResult<()> {
        let mut contents = String::new();
        file.read_to_string(&mut contents).map_err(|e| e.to_pyerr(py))?;

-        let count = py.allow_threads(move || wc_parallel(&lines(&contents), &search));
+        let count = py.allow_threads(move || wc_parallel(&contents, &search));
        Ok(count)
    }

+    #[pyfn(m, "search_sequential")]
+    fn search_sequential(py: Python, path: String, search: String) -> PyResult<i32> {
+        let mut file = File::open(path).map_err(|e| e.to_pyerr(py))?;
+        let mut contents = String::new();
+        file.read_to_string(&mut contents).map_err(|e| e.to_pyerr(py))?;
+        Ok(wc_sequential(&contents, &search))
+    }
+
    Ok(())
 }
--- a/examples/word-count/tests/test_word_count.py
+++ b/examples/word-count/tests/test_word_count.py
@ -0,0 +1,55 @@
+# -*- coding: utf-8 -*-
+from __future__ import absolute_import
+
+import os
+
+import pytest
+
+import word_count
+
+current_dir = os.path.abspath(os.path.dirname(__file__))
+path = os.path.join(current_dir, 'zen-of-python.txt')
+
+
+@pytest.fixture(scope='session', autouse=True)
+def textfile():
+    text = '''The Zen of Python, by Tim Peters
+
+Beautiful is better than ugly.
+Explicit is better than implicit.
+Simple is better than complex.
+Complex is better than complicated.
+Flat is better than nested.
+Sparse is better than dense.
+Readability counts.
+Special cases aren't special enough to break the rules.
+Although practicality beats purity.
+Errors should never pass silently.
+Unless explicitly silenced.
+In the face of ambiguity, refuse the temptation to guess.
+There should be one-- and preferably only one --obvious way to do it.
+Although that way may not be obvious at first unless you're Dutch.
+Now is better than never.
+Although never is often better than *right* now.
+If the implementation is hard to explain, it's a bad idea.
+If the implementation is easy to explain, it may be a good idea.
+Namespaces are one honking great idea -- let's do more of those!\n''' * 1000
+    with open(path, 'w') as f:
+        f.write(text)
+    yield
+    os.remove(path)
+
+
+def test_word_count_rust_parallel(benchmark):
+    count = benchmark(word_count.search, path, 'is')
+    assert count == 10000
+
+
+def test_word_count_rust_sequential(benchmark):
+    count = benchmark(word_count.search_sequential, path, 'is')
+    assert count == 10000
+
+
+def test_word_count_python_sequential(benchmark):
+    count = benchmark(word_count.search_py, path, 'is')
+    assert count == 10000
--- a/examples/word-count/word_count/init.py
+++ b/examples/word-count/word_count/init.py
@ -1,3 +1,17 @@
-from ._word_count import search
+# -*- coding: utf-8 -*-
+from __future__ import absolute_import

-__all__ = ['search']
+from ._word_count import search, search_sequential
+
+__all__ = ['search', 'search_sequential', 'search_py']
+
+
+def search_py(path, needle):
+    total = 0
+    with open(path, 'r') as f:
+        for line in f:
+            words = line.split(' ')
+            for word in words:
+                if word == needle:
+                    total += 1
+    return total