diff --git a/examples/word-count-cls/.gitignore b/examples/word-count-cls/.gitignore new file mode 100644 index 00000000..db8bc200 --- /dev/null +++ b/examples/word-count-cls/.gitignore @@ -0,0 +1,67 @@ +target/ +**/*.rs.bk +Cargo.lock + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] + +# C extensions +*.so + +# Distribution / packaging +.Python +env/ +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +bin/ +local/ +include/ +man/ +*.egg-info/ +.installed.cfg +*.egg +.ropeproject/ + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt +pip-selfcheck.json + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*,cover + +# Translations +*.mo +*.pot + +# Django stuff: +*.log + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ diff --git a/examples/word-count-cls/Cargo.toml b/examples/word-count-cls/Cargo.toml new file mode 100644 index 00000000..43fbd3ff --- /dev/null +++ b/examples/word-count-cls/Cargo.toml @@ -0,0 +1,14 @@ +[package] +authors = ["Messense Lv "] +name = "word-count" +version = "0.1.0" + +[dependencies] +rayon = "0.8" + +[dependencies.pyo3] +path = "../../" + +[lib] +name = "word_count" +crate-type = ["cdylib"] diff --git a/examples/word-count-cls/README.md b/examples/word-count-cls/README.md new file mode 100644 index 00000000..0a38437f --- /dev/null +++ b/examples/word-count-cls/README.md @@ -0,0 +1,15 @@ +# word-count + +## Build + +```bash +python setup.py install +``` + +## Usage + +```python +from word_count_cls import WordCounter + +WordCounter().search('path/to/file', 'word') +``` diff --git a/examples/word-count-cls/setup.py b/examples/word-count-cls/setup.py new file mode 100644 index 00000000..306fe0cc --- /dev/null +++ b/examples/word-count-cls/setup.py @@ -0,0 +1,54 @@ +import sys + +from setuptools import setup +from setuptools.command.test import test as TestCommand + +try: + from setuptools_rust import RustExtension +except ImportError: + import subprocess + errno = subprocess.call([sys.executable, '-m', 'pip', 'install', 'setuptools-rust']) + if errno: + print("Please install setuptools-rust package") + raise SystemExit(errno) + else: + from setuptools_rust import RustExtension + + +class PyTest(TestCommand): + user_options = [] + + def run(self): + self.run_command("test_rust") + + import subprocess + import sys + errno = subprocess.call([sys.executable, '-m', 'pytest', 'tests']) + raise SystemExit(errno) + + +setup_requires = ['setuptools-rust>=0.6.1'] +install_requires = [] +tests_require = install_requires + ['pytest', 'pytest-benchmark'] + +setup( + name='word-count-cls', + version='0.1.0', + classifiers=[ + 'License :: OSI Approved :: MIT License', + 'Development Status :: 3 - Alpha', + 'Intended Audience :: Developers', + 'Programming Language :: Python', + 'Programming Language :: Rust', + 'Operating System :: POSIX', + 'Operating System :: MacOS :: MacOS X', + ], + packages=['word_count_cls'], + rust_extensions=[RustExtension('word_count_cls._word_count', 'Cargo.toml')], + install_requires=install_requires, + tests_require=tests_require, + setup_requires=setup_requires, + include_package_data=True, + zip_safe=False, + cmdclass=dict(test=PyTest) +) diff --git a/examples/word-count-cls/src/lib.rs b/examples/word-count-cls/src/lib.rs new file mode 100644 index 00000000..5dbe44df --- /dev/null +++ b/examples/word-count-cls/src/lib.rs @@ -0,0 +1,89 @@ +// Source adopted from +// https://github.com/tildeio/helix-website/blob/master/crates/word_count/src/lib.rs +#![feature(proc_macro, specialization, const_fn)] +extern crate pyo3; +extern crate rayon; + +use std::fs::File; +use std::io::prelude::*; + +use rayon::prelude::*; +use pyo3::*; + +#[py::class] +struct Words { + path: String, + token: PyToken, +} + +#[py::methods] +impl Words { + + #[new] + fn __new__(_cls: &PyType, py: Python, path: String) -> PyResult { + Ok(py.init(|t| Words {path: path, token: t})?.into()) + } + + fn search(&self, py: Python, search: String) -> PyResult { + let mut file = File::open(self.path.as_str())?; + let mut contents = String::new(); + file.read_to_string(&mut contents)?; + + let count = py.allow_threads(move || wc_parallel(&contents, &search)); + Ok(count) + } + + fn search_sequential(&self, search: String) -> PyResult { + let mut file = File::open(self.path.as_str())?; + let mut contents = String::new(); + file.read_to_string(&mut contents)?; + Ok(wc_sequential(&contents, &search)) + } +} + + +fn matches(word: &str, search: &str) -> bool { + let mut search = search.chars(); + for ch in word.chars().skip_while(|ch| !ch.is_alphabetic()) { + match search.next() { + None => { return !ch.is_alphabetic(); } + Some(expect) => { + if ch.to_lowercase().next() != Some(expect) { + return false; + } + } + } + } + return search.next().is_none(); +} + +fn wc_line(line: &str, search: &str) -> i32 { + let mut total = 0; + for word in line.split(' ') { + if matches(word, search) { + total += 1; + } + } + total +} + +fn wc_sequential(lines: &str, search: &str) -> i32 { + lines.lines() + .map(|line| wc_line(line, search)) + .fold(0, |sum, line| sum + line) +} + +fn wc_parallel(lines: &str, search: &str) -> i32 { + lines.par_lines() + .map(|line| wc_line(line, search)) + .sum() +} + + + +#[py::modinit(_word_count)] +fn init_mod(_py: Python, m: &PyModule) -> PyResult<()> { + m.add_class::()?; + + Ok(()) +} diff --git a/examples/word-count-cls/tests/test_word_count.py b/examples/word-count-cls/tests/test_word_count.py new file mode 100644 index 00000000..e582d2dd --- /dev/null +++ b/examples/word-count-cls/tests/test_word_count.py @@ -0,0 +1,55 @@ +# -*- coding: utf-8 -*- +from __future__ import absolute_import + +import os + +import pytest + +import word_count_cls + +current_dir = os.path.abspath(os.path.dirname(__file__)) +path = os.path.join(current_dir, 'zen-of-python.txt') + + +@pytest.fixture(scope='session', autouse=True) +def textfile(): + text = '''The Zen of Python, by Tim Peters + +Beautiful is better than ugly. +Explicit is better than implicit. +Simple is better than complex. +Complex is better than complicated. +Flat is better than nested. +Sparse is better than dense. +Readability counts. +Special cases aren't special enough to break the rules. +Although practicality beats purity. +Errors should never pass silently. +Unless explicitly silenced. +In the face of ambiguity, refuse the temptation to guess. +There should be one-- and preferably only one --obvious way to do it. +Although that way may not be obvious at first unless you're Dutch. +Now is better than never. +Although never is often better than *right* now. +If the implementation is hard to explain, it's a bad idea. +If the implementation is easy to explain, it may be a good idea. +Namespaces are one honking great idea -- let's do more of those!\n''' * 1000 + with open(path, 'w') as f: + f.write(text) + yield + os.remove(path) + + +def test_word_count_rust_parallel(benchmark): + count = benchmark(word_count_cls.Words(path).search, 'is') + assert count == 10000 + + +def test_word_count_rust_sequential(benchmark): + count = benchmark(word_count_cls.Words(path).search_sequential, 'is') + assert count == 10000 + + +def test_word_count_python_sequential(benchmark): + count = benchmark(word_count_cls.search_py, path, 'is') + assert count == 10000 diff --git a/examples/word-count-cls/word_count_cls/__init__.py b/examples/word-count-cls/word_count_cls/__init__.py new file mode 100644 index 00000000..eab0efd4 --- /dev/null +++ b/examples/word-count-cls/word_count_cls/__init__.py @@ -0,0 +1,17 @@ +# -*- coding: utf-8 -*- +from __future__ import absolute_import + +from ._word_count import Words + +__all__ = ['Words', 'search_py'] + + +def search_py(path, needle): + total = 0 + with open(path, 'r') as f: + for line in f: + words = line.split(' ') + for word in words: + if word == needle: + total += 1 + return total