make word count example with class

This commit is contained in:
Nikolay Kim 2017-07-27 13:03:51 -07:00
parent ebcd4f5a33
commit dcaa3130d9
7 changed files with 311 additions and 0 deletions

67
examples/word-count-cls/.gitignore vendored Normal file
View File

@ -0,0 +1,67 @@
target/
**/*.rs.bk
Cargo.lock
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
# C extensions
*.so
# Distribution / packaging
.Python
env/
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
bin/
local/
include/
man/
*.egg-info/
.installed.cfg
*.egg
.ropeproject/
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
pip-selfcheck.json
# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*,cover
# Translations
*.mo
*.pot
# Django stuff:
*.log
# Sphinx documentation
docs/_build/
# PyBuilder
target/

View File

@ -0,0 +1,14 @@
[package]
authors = ["Messense Lv <messense@icloud.com>"]
name = "word-count"
version = "0.1.0"
[dependencies]
rayon = "0.8"
[dependencies.pyo3]
path = "../../"
[lib]
name = "word_count"
crate-type = ["cdylib"]

View File

@ -0,0 +1,15 @@
# word-count
## Build
```bash
python setup.py install
```
## Usage
```python
from word_count_cls import WordCounter
WordCounter().search('path/to/file', 'word')
```

View File

@ -0,0 +1,54 @@
import sys
from setuptools import setup
from setuptools.command.test import test as TestCommand
try:
from setuptools_rust import RustExtension
except ImportError:
import subprocess
errno = subprocess.call([sys.executable, '-m', 'pip', 'install', 'setuptools-rust'])
if errno:
print("Please install setuptools-rust package")
raise SystemExit(errno)
else:
from setuptools_rust import RustExtension
class PyTest(TestCommand):
user_options = []
def run(self):
self.run_command("test_rust")
import subprocess
import sys
errno = subprocess.call([sys.executable, '-m', 'pytest', 'tests'])
raise SystemExit(errno)
setup_requires = ['setuptools-rust>=0.6.1']
install_requires = []
tests_require = install_requires + ['pytest', 'pytest-benchmark']
setup(
name='word-count-cls',
version='0.1.0',
classifiers=[
'License :: OSI Approved :: MIT License',
'Development Status :: 3 - Alpha',
'Intended Audience :: Developers',
'Programming Language :: Python',
'Programming Language :: Rust',
'Operating System :: POSIX',
'Operating System :: MacOS :: MacOS X',
],
packages=['word_count_cls'],
rust_extensions=[RustExtension('word_count_cls._word_count', 'Cargo.toml')],
install_requires=install_requires,
tests_require=tests_require,
setup_requires=setup_requires,
include_package_data=True,
zip_safe=False,
cmdclass=dict(test=PyTest)
)

View File

@ -0,0 +1,89 @@
// Source adopted from
// https://github.com/tildeio/helix-website/blob/master/crates/word_count/src/lib.rs
#![feature(proc_macro, specialization, const_fn)]
extern crate pyo3;
extern crate rayon;
use std::fs::File;
use std::io::prelude::*;
use rayon::prelude::*;
use pyo3::*;
#[py::class]
struct Words {
path: String,
token: PyToken,
}
#[py::methods]
impl Words {
#[new]
fn __new__(_cls: &PyType, py: Python, path: String) -> PyResult<PyObject> {
Ok(py.init(|t| Words {path: path, token: t})?.into())
}
fn search(&self, py: Python, search: String) -> PyResult<i32> {
let mut file = File::open(self.path.as_str())?;
let mut contents = String::new();
file.read_to_string(&mut contents)?;
let count = py.allow_threads(move || wc_parallel(&contents, &search));
Ok(count)
}
fn search_sequential(&self, search: String) -> PyResult<i32> {
let mut file = File::open(self.path.as_str())?;
let mut contents = String::new();
file.read_to_string(&mut contents)?;
Ok(wc_sequential(&contents, &search))
}
}
fn matches(word: &str, search: &str) -> bool {
let mut search = search.chars();
for ch in word.chars().skip_while(|ch| !ch.is_alphabetic()) {
match search.next() {
None => { return !ch.is_alphabetic(); }
Some(expect) => {
if ch.to_lowercase().next() != Some(expect) {
return false;
}
}
}
}
return search.next().is_none();
}
fn wc_line(line: &str, search: &str) -> i32 {
let mut total = 0;
for word in line.split(' ') {
if matches(word, search) {
total += 1;
}
}
total
}
fn wc_sequential(lines: &str, search: &str) -> i32 {
lines.lines()
.map(|line| wc_line(line, search))
.fold(0, |sum, line| sum + line)
}
fn wc_parallel(lines: &str, search: &str) -> i32 {
lines.par_lines()
.map(|line| wc_line(line, search))
.sum()
}
#[py::modinit(_word_count)]
fn init_mod(_py: Python, m: &PyModule) -> PyResult<()> {
m.add_class::<Words>()?;
Ok(())
}

View File

@ -0,0 +1,55 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import
import os
import pytest
import word_count_cls
current_dir = os.path.abspath(os.path.dirname(__file__))
path = os.path.join(current_dir, 'zen-of-python.txt')
@pytest.fixture(scope='session', autouse=True)
def textfile():
text = '''The Zen of Python, by Tim Peters
Beautiful is better than ugly.
Explicit is better than implicit.
Simple is better than complex.
Complex is better than complicated.
Flat is better than nested.
Sparse is better than dense.
Readability counts.
Special cases aren't special enough to break the rules.
Although practicality beats purity.
Errors should never pass silently.
Unless explicitly silenced.
In the face of ambiguity, refuse the temptation to guess.
There should be one-- and preferably only one --obvious way to do it.
Although that way may not be obvious at first unless you're Dutch.
Now is better than never.
Although never is often better than *right* now.
If the implementation is hard to explain, it's a bad idea.
If the implementation is easy to explain, it may be a good idea.
Namespaces are one honking great idea -- let's do more of those!\n''' * 1000
with open(path, 'w') as f:
f.write(text)
yield
os.remove(path)
def test_word_count_rust_parallel(benchmark):
count = benchmark(word_count_cls.Words(path).search, 'is')
assert count == 10000
def test_word_count_rust_sequential(benchmark):
count = benchmark(word_count_cls.Words(path).search_sequential, 'is')
assert count == 10000
def test_word_count_python_sequential(benchmark):
count = benchmark(word_count_cls.search_py, path, 'is')
assert count == 10000

View File

@ -0,0 +1,17 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import
from ._word_count import Words
__all__ = ['Words', 'search_py']
def search_py(path, needle):
total = 0
with open(path, 'r') as f:
for line in f:
words = line.split(' ')
for word in words:
if word == needle:
total += 1
return total