make word count example with class
This commit is contained in:
parent
ebcd4f5a33
commit
dcaa3130d9
|
@ -0,0 +1,67 @@
|
|||
target/
|
||||
**/*.rs.bk
|
||||
Cargo.lock
|
||||
|
||||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
|
||||
# C extensions
|
||||
*.so
|
||||
|
||||
# Distribution / packaging
|
||||
.Python
|
||||
env/
|
||||
build/
|
||||
develop-eggs/
|
||||
dist/
|
||||
downloads/
|
||||
eggs/
|
||||
.eggs/
|
||||
lib/
|
||||
lib64/
|
||||
parts/
|
||||
sdist/
|
||||
var/
|
||||
bin/
|
||||
local/
|
||||
include/
|
||||
man/
|
||||
*.egg-info/
|
||||
.installed.cfg
|
||||
*.egg
|
||||
.ropeproject/
|
||||
|
||||
# PyInstaller
|
||||
# Usually these files are written by a python script from a template
|
||||
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||
*.manifest
|
||||
*.spec
|
||||
|
||||
# Installer logs
|
||||
pip-log.txt
|
||||
pip-delete-this-directory.txt
|
||||
pip-selfcheck.json
|
||||
|
||||
# Unit test / coverage reports
|
||||
htmlcov/
|
||||
.tox/
|
||||
.coverage
|
||||
.coverage.*
|
||||
.cache
|
||||
nosetests.xml
|
||||
coverage.xml
|
||||
*,cover
|
||||
|
||||
# Translations
|
||||
*.mo
|
||||
*.pot
|
||||
|
||||
# Django stuff:
|
||||
*.log
|
||||
|
||||
# Sphinx documentation
|
||||
docs/_build/
|
||||
|
||||
# PyBuilder
|
||||
target/
|
|
@ -0,0 +1,14 @@
|
|||
[package]
|
||||
authors = ["Messense Lv <messense@icloud.com>"]
|
||||
name = "word-count"
|
||||
version = "0.1.0"
|
||||
|
||||
[dependencies]
|
||||
rayon = "0.8"
|
||||
|
||||
[dependencies.pyo3]
|
||||
path = "../../"
|
||||
|
||||
[lib]
|
||||
name = "word_count"
|
||||
crate-type = ["cdylib"]
|
|
@ -0,0 +1,15 @@
|
|||
# word-count
|
||||
|
||||
## Build
|
||||
|
||||
```bash
|
||||
python setup.py install
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
```python
|
||||
from word_count_cls import WordCounter
|
||||
|
||||
WordCounter().search('path/to/file', 'word')
|
||||
```
|
|
@ -0,0 +1,54 @@
|
|||
import sys
|
||||
|
||||
from setuptools import setup
|
||||
from setuptools.command.test import test as TestCommand
|
||||
|
||||
try:
|
||||
from setuptools_rust import RustExtension
|
||||
except ImportError:
|
||||
import subprocess
|
||||
errno = subprocess.call([sys.executable, '-m', 'pip', 'install', 'setuptools-rust'])
|
||||
if errno:
|
||||
print("Please install setuptools-rust package")
|
||||
raise SystemExit(errno)
|
||||
else:
|
||||
from setuptools_rust import RustExtension
|
||||
|
||||
|
||||
class PyTest(TestCommand):
|
||||
user_options = []
|
||||
|
||||
def run(self):
|
||||
self.run_command("test_rust")
|
||||
|
||||
import subprocess
|
||||
import sys
|
||||
errno = subprocess.call([sys.executable, '-m', 'pytest', 'tests'])
|
||||
raise SystemExit(errno)
|
||||
|
||||
|
||||
setup_requires = ['setuptools-rust>=0.6.1']
|
||||
install_requires = []
|
||||
tests_require = install_requires + ['pytest', 'pytest-benchmark']
|
||||
|
||||
setup(
|
||||
name='word-count-cls',
|
||||
version='0.1.0',
|
||||
classifiers=[
|
||||
'License :: OSI Approved :: MIT License',
|
||||
'Development Status :: 3 - Alpha',
|
||||
'Intended Audience :: Developers',
|
||||
'Programming Language :: Python',
|
||||
'Programming Language :: Rust',
|
||||
'Operating System :: POSIX',
|
||||
'Operating System :: MacOS :: MacOS X',
|
||||
],
|
||||
packages=['word_count_cls'],
|
||||
rust_extensions=[RustExtension('word_count_cls._word_count', 'Cargo.toml')],
|
||||
install_requires=install_requires,
|
||||
tests_require=tests_require,
|
||||
setup_requires=setup_requires,
|
||||
include_package_data=True,
|
||||
zip_safe=False,
|
||||
cmdclass=dict(test=PyTest)
|
||||
)
|
|
@ -0,0 +1,89 @@
|
|||
// Source adopted from
|
||||
// https://github.com/tildeio/helix-website/blob/master/crates/word_count/src/lib.rs
|
||||
#![feature(proc_macro, specialization, const_fn)]
|
||||
extern crate pyo3;
|
||||
extern crate rayon;
|
||||
|
||||
use std::fs::File;
|
||||
use std::io::prelude::*;
|
||||
|
||||
use rayon::prelude::*;
|
||||
use pyo3::*;
|
||||
|
||||
#[py::class]
|
||||
struct Words {
|
||||
path: String,
|
||||
token: PyToken,
|
||||
}
|
||||
|
||||
#[py::methods]
|
||||
impl Words {
|
||||
|
||||
#[new]
|
||||
fn __new__(_cls: &PyType, py: Python, path: String) -> PyResult<PyObject> {
|
||||
Ok(py.init(|t| Words {path: path, token: t})?.into())
|
||||
}
|
||||
|
||||
fn search(&self, py: Python, search: String) -> PyResult<i32> {
|
||||
let mut file = File::open(self.path.as_str())?;
|
||||
let mut contents = String::new();
|
||||
file.read_to_string(&mut contents)?;
|
||||
|
||||
let count = py.allow_threads(move || wc_parallel(&contents, &search));
|
||||
Ok(count)
|
||||
}
|
||||
|
||||
fn search_sequential(&self, search: String) -> PyResult<i32> {
|
||||
let mut file = File::open(self.path.as_str())?;
|
||||
let mut contents = String::new();
|
||||
file.read_to_string(&mut contents)?;
|
||||
Ok(wc_sequential(&contents, &search))
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
fn matches(word: &str, search: &str) -> bool {
|
||||
let mut search = search.chars();
|
||||
for ch in word.chars().skip_while(|ch| !ch.is_alphabetic()) {
|
||||
match search.next() {
|
||||
None => { return !ch.is_alphabetic(); }
|
||||
Some(expect) => {
|
||||
if ch.to_lowercase().next() != Some(expect) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return search.next().is_none();
|
||||
}
|
||||
|
||||
fn wc_line(line: &str, search: &str) -> i32 {
|
||||
let mut total = 0;
|
||||
for word in line.split(' ') {
|
||||
if matches(word, search) {
|
||||
total += 1;
|
||||
}
|
||||
}
|
||||
total
|
||||
}
|
||||
|
||||
fn wc_sequential(lines: &str, search: &str) -> i32 {
|
||||
lines.lines()
|
||||
.map(|line| wc_line(line, search))
|
||||
.fold(0, |sum, line| sum + line)
|
||||
}
|
||||
|
||||
fn wc_parallel(lines: &str, search: &str) -> i32 {
|
||||
lines.par_lines()
|
||||
.map(|line| wc_line(line, search))
|
||||
.sum()
|
||||
}
|
||||
|
||||
|
||||
|
||||
#[py::modinit(_word_count)]
|
||||
fn init_mod(_py: Python, m: &PyModule) -> PyResult<()> {
|
||||
m.add_class::<Words>()?;
|
||||
|
||||
Ok(())
|
||||
}
|
|
@ -0,0 +1,55 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
from __future__ import absolute_import
|
||||
|
||||
import os
|
||||
|
||||
import pytest
|
||||
|
||||
import word_count_cls
|
||||
|
||||
current_dir = os.path.abspath(os.path.dirname(__file__))
|
||||
path = os.path.join(current_dir, 'zen-of-python.txt')
|
||||
|
||||
|
||||
@pytest.fixture(scope='session', autouse=True)
|
||||
def textfile():
|
||||
text = '''The Zen of Python, by Tim Peters
|
||||
|
||||
Beautiful is better than ugly.
|
||||
Explicit is better than implicit.
|
||||
Simple is better than complex.
|
||||
Complex is better than complicated.
|
||||
Flat is better than nested.
|
||||
Sparse is better than dense.
|
||||
Readability counts.
|
||||
Special cases aren't special enough to break the rules.
|
||||
Although practicality beats purity.
|
||||
Errors should never pass silently.
|
||||
Unless explicitly silenced.
|
||||
In the face of ambiguity, refuse the temptation to guess.
|
||||
There should be one-- and preferably only one --obvious way to do it.
|
||||
Although that way may not be obvious at first unless you're Dutch.
|
||||
Now is better than never.
|
||||
Although never is often better than *right* now.
|
||||
If the implementation is hard to explain, it's a bad idea.
|
||||
If the implementation is easy to explain, it may be a good idea.
|
||||
Namespaces are one honking great idea -- let's do more of those!\n''' * 1000
|
||||
with open(path, 'w') as f:
|
||||
f.write(text)
|
||||
yield
|
||||
os.remove(path)
|
||||
|
||||
|
||||
def test_word_count_rust_parallel(benchmark):
|
||||
count = benchmark(word_count_cls.Words(path).search, 'is')
|
||||
assert count == 10000
|
||||
|
||||
|
||||
def test_word_count_rust_sequential(benchmark):
|
||||
count = benchmark(word_count_cls.Words(path).search_sequential, 'is')
|
||||
assert count == 10000
|
||||
|
||||
|
||||
def test_word_count_python_sequential(benchmark):
|
||||
count = benchmark(word_count_cls.search_py, path, 'is')
|
||||
assert count == 10000
|
|
@ -0,0 +1,17 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
from __future__ import absolute_import
|
||||
|
||||
from ._word_count import Words
|
||||
|
||||
__all__ = ['Words', 'search_py']
|
||||
|
||||
|
||||
def search_py(path, needle):
|
||||
total = 0
|
||||
with open(path, 'r') as f:
|
||||
for line in f:
|
||||
words = line.split(' ')
|
||||
for word in words:
|
||||
if word == needle:
|
||||
total += 1
|
||||
return total
|
Loading…
Reference in New Issue