From 54a87aacdf7db5793fb82af32833a066d0223512 Mon Sep 17 00:00:00 2001 From: messense Date: Sun, 23 Jul 2017 13:32:18 +0800 Subject: [PATCH] Add a word count example --- examples/word-count/.gitignore | 67 +++++++++++++++++++++ examples/word-count/Cargo.toml | 14 +++++ examples/word-count/README.md | 15 +++++ examples/word-count/setup.py | 54 +++++++++++++++++ examples/word-count/src/lib.rs | 68 ++++++++++++++++++++++ examples/word-count/word_count/__init__.py | 3 + 6 files changed, 221 insertions(+) create mode 100644 examples/word-count/.gitignore create mode 100644 examples/word-count/Cargo.toml create mode 100644 examples/word-count/README.md create mode 100644 examples/word-count/setup.py create mode 100644 examples/word-count/src/lib.rs create mode 100644 examples/word-count/word_count/__init__.py diff --git a/examples/word-count/.gitignore b/examples/word-count/.gitignore new file mode 100644 index 00000000..db8bc200 --- /dev/null +++ b/examples/word-count/.gitignore @@ -0,0 +1,67 @@ +target/ +**/*.rs.bk +Cargo.lock + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] + +# C extensions +*.so + +# Distribution / packaging +.Python +env/ +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +bin/ +local/ +include/ +man/ +*.egg-info/ +.installed.cfg +*.egg +.ropeproject/ + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt +pip-selfcheck.json + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*,cover + +# Translations +*.mo +*.pot + +# Django stuff: +*.log + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ diff --git a/examples/word-count/Cargo.toml b/examples/word-count/Cargo.toml new file mode 100644 index 00000000..43fbd3ff --- /dev/null +++ b/examples/word-count/Cargo.toml @@ -0,0 +1,14 @@ +[package] +authors = ["Messense Lv "] +name = "word-count" +version = "0.1.0" + +[dependencies] +rayon = "0.8" + +[dependencies.pyo3] +path = "../../" + +[lib] +name = "word_count" +crate-type = ["cdylib"] diff --git a/examples/word-count/README.md b/examples/word-count/README.md new file mode 100644 index 00000000..ecf5250d --- /dev/null +++ b/examples/word-count/README.md @@ -0,0 +1,15 @@ +# word-count + +## Build + +```bash +python setup.py install +``` + +## Usage + +```python +from word_count import search + +search('path/to/file', 'word') +``` diff --git a/examples/word-count/setup.py b/examples/word-count/setup.py new file mode 100644 index 00000000..aacd6ffb --- /dev/null +++ b/examples/word-count/setup.py @@ -0,0 +1,54 @@ +import sys + +from setuptools import setup +from setuptools.command.test import test as TestCommand + +try: + from setuptools_rust import RustExtension +except ImportError: + import subprocess + errno = subprocess.call([sys.executable, '-m', 'pip', 'install', 'setuptools-rust']) + if errno: + print("Please install setuptools-rust package") + raise SystemExit(errno) + else: + from setuptools_rust import RustExtension + + +class PyTest(TestCommand): + user_options = [] + + def run(self): + self.run_command("test_rust") + + import subprocess + import sys + errno = subprocess.call([sys.executable, '-m', 'pytest', 'tests']) + raise SystemExit(errno) + + +setup_requires = ['setuptools-rust>=0.6.0'] +install_requires = [] +tests_require = install_requires + ['pytest'] + +setup( + name='word-count', + version='0.1.0', + classifiers=[ + 'License :: OSI Approved :: MIT License', + 'Development Status :: 3 - Alpha', + 'Intended Audience :: Developers', + 'Programming Language :: Python', + 'Programming Language :: Rust', + 'Operating System :: POSIX', + 'Operating System :: MacOS :: MacOS X', + ], + packages=['word_count'], + rust_extensions=[RustExtension('word_count._word_count', 'Cargo.toml')], + install_requires=install_requires, + tests_require=tests_require, + setup_requires=setup_requires, + include_package_data=True, + zip_safe=False, + cmdclass=dict(test=PyTest) +) diff --git a/examples/word-count/src/lib.rs b/examples/word-count/src/lib.rs new file mode 100644 index 00000000..e779acd6 --- /dev/null +++ b/examples/word-count/src/lib.rs @@ -0,0 +1,68 @@ +// Source adopted from +// https://github.com/tildeio/helix-website/blob/master/crates/word_count/src/lib.rs +#![feature(proc_macro, specialization, const_fn)] +extern crate pyo3; +extern crate rayon; + +use std::fs::File; +use std::io::prelude::*; + +use rayon::iter::{ParallelIterator, IntoParallelIterator}; +use pyo3::{py, PyResult, Python, PyModule, ToPyErr}; + +fn lines(corpus: &str) -> Vec<&str> { + corpus.lines().collect() +} + +fn matches(word: &str, search: &str) -> bool { + let mut search = search.chars(); + for ch in word.chars().skip_while(|ch| !ch.is_alphabetic()) { + match search.next() { + None => { return !ch.is_alphabetic(); } + Some(expect) => { + if ch.to_lowercase().next() != Some(expect) { + return false; + } + } + } + } + return search.next().is_none(); +} + +fn wc_line(line: &str, search: &str) -> i32 { + let mut total = 0; + for word in line.split(' ') { + if matches(word, search) { + total += 1; + } + } + total +} + +// fn wc_sequential(lines: &[&str], search: &str) -> i32 { +// lines.into_iter() +// .map(|line| wc_line(line, search)) +// .fold(0, |sum, line| sum + line) +// } + +fn wc_parallel(lines: &[&str], search: &str) -> i32 { + lines.into_par_iter() + .map(|line| wc_line(line, search)) + .sum() +} + +#[py::modinit(_word_count)] +fn init_mod(py: Python, m: &PyModule) -> PyResult<()> { + + #[pyfn(m, "search")] + fn search_py(py: Python, path: String, search: String) -> PyResult { + let mut file = File::open(path).map_err(|e| e.to_pyerr(py))?; + let mut contents = String::new(); + file.read_to_string(&mut contents).map_err(|e| e.to_pyerr(py))?; + + let count = py.allow_threads(move || wc_parallel(&lines(&contents), &search)); + Ok(count) + } + + Ok(()) +} diff --git a/examples/word-count/word_count/__init__.py b/examples/word-count/word_count/__init__.py new file mode 100644 index 00000000..74f80fbd --- /dev/null +++ b/examples/word-count/word_count/__init__.py @@ -0,0 +1,3 @@ +from ._word_count import search + +__all__ = ['search']