Add a word count example
This commit is contained in:
parent
40d34ca59d
commit
54a87aacdf
|
@ -0,0 +1,67 @@
|
|||
target/
|
||||
**/*.rs.bk
|
||||
Cargo.lock
|
||||
|
||||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
|
||||
# C extensions
|
||||
*.so
|
||||
|
||||
# Distribution / packaging
|
||||
.Python
|
||||
env/
|
||||
build/
|
||||
develop-eggs/
|
||||
dist/
|
||||
downloads/
|
||||
eggs/
|
||||
.eggs/
|
||||
lib/
|
||||
lib64/
|
||||
parts/
|
||||
sdist/
|
||||
var/
|
||||
bin/
|
||||
local/
|
||||
include/
|
||||
man/
|
||||
*.egg-info/
|
||||
.installed.cfg
|
||||
*.egg
|
||||
.ropeproject/
|
||||
|
||||
# PyInstaller
|
||||
# Usually these files are written by a python script from a template
|
||||
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||
*.manifest
|
||||
*.spec
|
||||
|
||||
# Installer logs
|
||||
pip-log.txt
|
||||
pip-delete-this-directory.txt
|
||||
pip-selfcheck.json
|
||||
|
||||
# Unit test / coverage reports
|
||||
htmlcov/
|
||||
.tox/
|
||||
.coverage
|
||||
.coverage.*
|
||||
.cache
|
||||
nosetests.xml
|
||||
coverage.xml
|
||||
*,cover
|
||||
|
||||
# Translations
|
||||
*.mo
|
||||
*.pot
|
||||
|
||||
# Django stuff:
|
||||
*.log
|
||||
|
||||
# Sphinx documentation
|
||||
docs/_build/
|
||||
|
||||
# PyBuilder
|
||||
target/
|
|
@ -0,0 +1,14 @@
|
|||
[package]
|
||||
authors = ["Messense Lv <messense@icloud.com>"]
|
||||
name = "word-count"
|
||||
version = "0.1.0"
|
||||
|
||||
[dependencies]
|
||||
rayon = "0.8"
|
||||
|
||||
[dependencies.pyo3]
|
||||
path = "../../"
|
||||
|
||||
[lib]
|
||||
name = "word_count"
|
||||
crate-type = ["cdylib"]
|
|
@ -0,0 +1,15 @@
|
|||
# word-count
|
||||
|
||||
## Build
|
||||
|
||||
```bash
|
||||
python setup.py install
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
```python
|
||||
from word_count import search
|
||||
|
||||
search('path/to/file', 'word')
|
||||
```
|
|
@ -0,0 +1,54 @@
|
|||
import sys
|
||||
|
||||
from setuptools import setup
|
||||
from setuptools.command.test import test as TestCommand
|
||||
|
||||
try:
|
||||
from setuptools_rust import RustExtension
|
||||
except ImportError:
|
||||
import subprocess
|
||||
errno = subprocess.call([sys.executable, '-m', 'pip', 'install', 'setuptools-rust'])
|
||||
if errno:
|
||||
print("Please install setuptools-rust package")
|
||||
raise SystemExit(errno)
|
||||
else:
|
||||
from setuptools_rust import RustExtension
|
||||
|
||||
|
||||
class PyTest(TestCommand):
|
||||
user_options = []
|
||||
|
||||
def run(self):
|
||||
self.run_command("test_rust")
|
||||
|
||||
import subprocess
|
||||
import sys
|
||||
errno = subprocess.call([sys.executable, '-m', 'pytest', 'tests'])
|
||||
raise SystemExit(errno)
|
||||
|
||||
|
||||
setup_requires = ['setuptools-rust>=0.6.0']
|
||||
install_requires = []
|
||||
tests_require = install_requires + ['pytest']
|
||||
|
||||
setup(
|
||||
name='word-count',
|
||||
version='0.1.0',
|
||||
classifiers=[
|
||||
'License :: OSI Approved :: MIT License',
|
||||
'Development Status :: 3 - Alpha',
|
||||
'Intended Audience :: Developers',
|
||||
'Programming Language :: Python',
|
||||
'Programming Language :: Rust',
|
||||
'Operating System :: POSIX',
|
||||
'Operating System :: MacOS :: MacOS X',
|
||||
],
|
||||
packages=['word_count'],
|
||||
rust_extensions=[RustExtension('word_count._word_count', 'Cargo.toml')],
|
||||
install_requires=install_requires,
|
||||
tests_require=tests_require,
|
||||
setup_requires=setup_requires,
|
||||
include_package_data=True,
|
||||
zip_safe=False,
|
||||
cmdclass=dict(test=PyTest)
|
||||
)
|
|
@ -0,0 +1,68 @@
|
|||
// Source adopted from
|
||||
// https://github.com/tildeio/helix-website/blob/master/crates/word_count/src/lib.rs
|
||||
#![feature(proc_macro, specialization, const_fn)]
|
||||
extern crate pyo3;
|
||||
extern crate rayon;
|
||||
|
||||
use std::fs::File;
|
||||
use std::io::prelude::*;
|
||||
|
||||
use rayon::iter::{ParallelIterator, IntoParallelIterator};
|
||||
use pyo3::{py, PyResult, Python, PyModule, ToPyErr};
|
||||
|
||||
fn lines(corpus: &str) -> Vec<&str> {
|
||||
corpus.lines().collect()
|
||||
}
|
||||
|
||||
fn matches(word: &str, search: &str) -> bool {
|
||||
let mut search = search.chars();
|
||||
for ch in word.chars().skip_while(|ch| !ch.is_alphabetic()) {
|
||||
match search.next() {
|
||||
None => { return !ch.is_alphabetic(); }
|
||||
Some(expect) => {
|
||||
if ch.to_lowercase().next() != Some(expect) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return search.next().is_none();
|
||||
}
|
||||
|
||||
fn wc_line(line: &str, search: &str) -> i32 {
|
||||
let mut total = 0;
|
||||
for word in line.split(' ') {
|
||||
if matches(word, search) {
|
||||
total += 1;
|
||||
}
|
||||
}
|
||||
total
|
||||
}
|
||||
|
||||
// fn wc_sequential(lines: &[&str], search: &str) -> i32 {
|
||||
// lines.into_iter()
|
||||
// .map(|line| wc_line(line, search))
|
||||
// .fold(0, |sum, line| sum + line)
|
||||
// }
|
||||
|
||||
fn wc_parallel(lines: &[&str], search: &str) -> i32 {
|
||||
lines.into_par_iter()
|
||||
.map(|line| wc_line(line, search))
|
||||
.sum()
|
||||
}
|
||||
|
||||
#[py::modinit(_word_count)]
|
||||
fn init_mod(py: Python, m: &PyModule) -> PyResult<()> {
|
||||
|
||||
#[pyfn(m, "search")]
|
||||
fn search_py(py: Python, path: String, search: String) -> PyResult<i32> {
|
||||
let mut file = File::open(path).map_err(|e| e.to_pyerr(py))?;
|
||||
let mut contents = String::new();
|
||||
file.read_to_string(&mut contents).map_err(|e| e.to_pyerr(py))?;
|
||||
|
||||
let count = py.allow_threads(move || wc_parallel(&lines(&contents), &search));
|
||||
Ok(count)
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
|
@ -0,0 +1,3 @@
|
|||
from ._word_count import search
|
||||
|
||||
__all__ = ['search']
|
Loading…
Reference in New Issue