Add a word count example

This commit is contained in:
messense 2017-07-23 13:32:18 +08:00
parent 40d34ca59d
commit 54a87aacdf
No known key found for this signature in database
GPG Key ID: BB41A8A2C716CCA9
6 changed files with 221 additions and 0 deletions

67
examples/word-count/.gitignore vendored Normal file
View File

@ -0,0 +1,67 @@
target/
**/*.rs.bk
Cargo.lock
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
# C extensions
*.so
# Distribution / packaging
.Python
env/
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
bin/
local/
include/
man/
*.egg-info/
.installed.cfg
*.egg
.ropeproject/
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
pip-selfcheck.json
# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*,cover
# Translations
*.mo
*.pot
# Django stuff:
*.log
# Sphinx documentation
docs/_build/
# PyBuilder
target/

View File

@ -0,0 +1,14 @@
[package]
authors = ["Messense Lv <messense@icloud.com>"]
name = "word-count"
version = "0.1.0"
[dependencies]
rayon = "0.8"
[dependencies.pyo3]
path = "../../"
[lib]
name = "word_count"
crate-type = ["cdylib"]

View File

@ -0,0 +1,15 @@
# word-count
## Build
```bash
python setup.py install
```
## Usage
```python
from word_count import search
search('path/to/file', 'word')
```

View File

@ -0,0 +1,54 @@
import sys
from setuptools import setup
from setuptools.command.test import test as TestCommand
try:
from setuptools_rust import RustExtension
except ImportError:
import subprocess
errno = subprocess.call([sys.executable, '-m', 'pip', 'install', 'setuptools-rust'])
if errno:
print("Please install setuptools-rust package")
raise SystemExit(errno)
else:
from setuptools_rust import RustExtension
class PyTest(TestCommand):
user_options = []
def run(self):
self.run_command("test_rust")
import subprocess
import sys
errno = subprocess.call([sys.executable, '-m', 'pytest', 'tests'])
raise SystemExit(errno)
setup_requires = ['setuptools-rust>=0.6.0']
install_requires = []
tests_require = install_requires + ['pytest']
setup(
name='word-count',
version='0.1.0',
classifiers=[
'License :: OSI Approved :: MIT License',
'Development Status :: 3 - Alpha',
'Intended Audience :: Developers',
'Programming Language :: Python',
'Programming Language :: Rust',
'Operating System :: POSIX',
'Operating System :: MacOS :: MacOS X',
],
packages=['word_count'],
rust_extensions=[RustExtension('word_count._word_count', 'Cargo.toml')],
install_requires=install_requires,
tests_require=tests_require,
setup_requires=setup_requires,
include_package_data=True,
zip_safe=False,
cmdclass=dict(test=PyTest)
)

View File

@ -0,0 +1,68 @@
// Source adopted from
// https://github.com/tildeio/helix-website/blob/master/crates/word_count/src/lib.rs
#![feature(proc_macro, specialization, const_fn)]
extern crate pyo3;
extern crate rayon;
use std::fs::File;
use std::io::prelude::*;
use rayon::iter::{ParallelIterator, IntoParallelIterator};
use pyo3::{py, PyResult, Python, PyModule, ToPyErr};
fn lines(corpus: &str) -> Vec<&str> {
corpus.lines().collect()
}
fn matches(word: &str, search: &str) -> bool {
let mut search = search.chars();
for ch in word.chars().skip_while(|ch| !ch.is_alphabetic()) {
match search.next() {
None => { return !ch.is_alphabetic(); }
Some(expect) => {
if ch.to_lowercase().next() != Some(expect) {
return false;
}
}
}
}
return search.next().is_none();
}
fn wc_line(line: &str, search: &str) -> i32 {
let mut total = 0;
for word in line.split(' ') {
if matches(word, search) {
total += 1;
}
}
total
}
// fn wc_sequential(lines: &[&str], search: &str) -> i32 {
// lines.into_iter()
// .map(|line| wc_line(line, search))
// .fold(0, |sum, line| sum + line)
// }
fn wc_parallel(lines: &[&str], search: &str) -> i32 {
lines.into_par_iter()
.map(|line| wc_line(line, search))
.sum()
}
#[py::modinit(_word_count)]
fn init_mod(py: Python, m: &PyModule) -> PyResult<()> {
#[pyfn(m, "search")]
fn search_py(py: Python, path: String, search: String) -> PyResult<i32> {
let mut file = File::open(path).map_err(|e| e.to_pyerr(py))?;
let mut contents = String::new();
file.read_to_string(&mut contents).map_err(|e| e.to_pyerr(py))?;
let count = py.allow_threads(move || wc_parallel(&lines(&contents), &search));
Ok(count)
}
Ok(())
}

View File

@ -0,0 +1,3 @@
from ._word_count import search
__all__ = ['search']