A high performance multiple functional word matcher
Project description
Matcher Rust Implement PyO3 binding
Usage
Python usage is in the test.ipynb file.
Matcher
import msgspec
import numpy as np
from matcher_py import Matcher, SimpleMatcher # type: ignore
from extension_types import MatchTableType, SimpleMatchType, MatchTable
msgpack_encoder = msgspec.msgpack.Encoder()
matcher = Matcher(
msgpack_encoder.encode(
{
"test": [
MatchTable(
table_id=1,
match_table_type=MatchTableType.Simple,
simple_match_type=SimpleMatchType.MatchFanjian | SimpleMatchType.MatchDeleteNormalize,
word_list=["蔔", "你好"],
exemption_simple_match_type=SimpleMatchType.MatchFanjian | SimpleMatchType.MatchDeleteNormalize,
exemption_word_list=[],
)
]
}
)
)
matcher.is_match(r"卜")
matcher.word_match(r"你,好")
matcher.word_match_as_string("你好")
matcher.batch_word_match_as_string(["你好", "你好", "你真棒"])
text_array = np.array(
[
"Laborum eiusmod anim aliqua non veniam laboris officia dolor. Adipisicing sit est irure Lorem duis adipisicing exercitation. Cillum excepteur non anim ipsum eiusmod deserunt veniam. Nulla veniam sunt sint ad velit occaecat in deserunt nulla nisi excepteur. Cillum veniam Lorem aute eu. Nisi voluptate laboris quis sint pariatur ullamco minim pariatur officia non anim nisi nulla ipsum ad. Veniam pariatur ut occaecat ut veniam velit aliquip commodo culpa elit eu eiusmod."
]
* 10000,
dtype=np.dtype("object")
)
matcher.numpy_word_match_as_string(text_array)
text_array = np.array(
[
"Laborum eiusmod anim aliqua non veniam laboris officia dolor. Adipisicing sit est irure Lorem duis adipisicing exercitation. Cillum excepteur non anim ipsum eiusmod deserunt veniam. Nulla veniam sunt sint ad velit occaecat in deserunt nulla nisi excepteur. Cillum veniam Lorem aute eu. Nisi voluptate laboris quis sint pariatur ullamco minim pariatur officia non anim nisi nulla ipsum ad. Veniam pariatur ut occaecat ut veniam velit aliquip commodo culpa elit eu eiusmod."
]
* 10000,
dtype=np.dtype("object")
)
matcher.numpy_word_match_as_string(text_array, inplace=True)
text_array
Simple Matcher
import msgspec
import numpy as np
from matcher_py import Matcher, SimpleMatcher # type: ignore
from extension_types import MatchTableType, SimpleMatchType, MatchTable
msgpack_encoder = msgspec.msgpack.Encoder()
simple_matcher = SimpleMatcher(
msgpack_encoder.encode(
{
SimpleMatchType.MatchFanjian | SimpleMatchType.MatchDeleteNormalize: {
1: "无,法,无,天",
2: "xxx",
3: "你好",
6: r"It's /\/\y duty",
4: "xxx,yyy",
},
SimpleMatchType.MatchFanjian: {
4: "xxx,yyy",
},
SimpleMatchType.MatchNone: {
5: "xxxxx,xxxxyyyyxxxxx",
},
}
)
)
simple_matcher.is_match("xxx")
simple_matcher.simple_process(r"It's /\/\y duty")
simple_matcher.batch_simple_process([r"It's /\/\y duty", "你好", "xxxxxxx"])
text_array = np.array(
[
"Laborum eiusmod anim aliqua non veniam laboris officia dolor. Adipisicing sit est irure Lorem duis adipisicing exercitation. Cillum excepteur non anim ipsum eiusmod deserunt veniam. Nulla veniam sunt sint ad velit occaecat in deserunt nulla nisi excepteur. Cillum veniam Lorem aute eu. Nisi voluptate laboris quis sint pariatur ullamco minim pariatur officia non anim nisi nulla ipsum ad. Veniam pariatur ut occaecat ut veniam velit aliquip commodo culpa elit eu eiusmod."
]
* 10000,
dtype=np.dtype("object"),
)
simple_matcher.numpy_simple_process(text_array)
text_array = np.array(
[
"Laborum eiusmod anim aliqua non veniam laboris officia dolor. Adipisicing sit est irure Lorem duis adipisicing exercitation. Cillum excepteur non anim ipsum eiusmod deserunt veniam. Nulla veniam sunt sint ad velit occaecat in deserunt nulla nisi excepteur. Cillum veniam Lorem aute eu. Nisi voluptate laboris quis sint pariatur ullamco minim pariatur officia non anim nisi nulla ipsum ad. Veniam pariatur ut occaecat ut veniam velit aliquip commodo culpa elit eu eiusmod."
]
* 10000,
dtype=np.dtype("object"),
)
simple_matcher.numpy_simple_process(text_array, inplace=True)
text_array
Project details
Release history Release notifications | RSS feed
Download files
Download the file for your platform. If you're not sure which to choose, learn more about installing packages.
Source Distribution
matcher_py-0.1.1.tar.gz
(406.5 kB
view details)
Built Distribution
File details
Details for the file matcher_py-0.1.1.tar.gz
.
File metadata
- Download URL: matcher_py-0.1.1.tar.gz
- Upload date:
- Size: 406.5 kB
- Tags: Source
- Uploaded using Trusted Publishing? No
- Uploaded via: maturin/1.6.0
File hashes
Algorithm | Hash digest | |
---|---|---|
SHA256 | 94b3d7719fb640f3eb3ab51cedb077f7758d6bd9a63b6dec52ce060a06dfb224 |
|
MD5 | 3d491bf7d8989be789c389a402abba8c |
|
BLAKE2b-256 | 4c292195aef52183c5be470b28d81bf490b0729ff778ffc848749db4981a301d |
File details
Details for the file matcher_py-0.1.1-cp310-abi3-macosx_11_0_arm64.whl
.
File metadata
- Download URL: matcher_py-0.1.1-cp310-abi3-macosx_11_0_arm64.whl
- Upload date:
- Size: 1.1 MB
- Tags: CPython 3.10+, macOS 11.0+ ARM64
- Uploaded using Trusted Publishing? No
- Uploaded via: maturin/1.6.0
File hashes
Algorithm | Hash digest | |
---|---|---|
SHA256 | b1bc351318abf0706e77e25ab009aa8a71318ff0c1886571a7fcf8f3ae6cfbcb |
|
MD5 | 362ddbae8a7f711789e4e356c71d2680 |
|
BLAKE2b-256 | 222ac8dd4d13edd4efe5228c30c985d02b58e2410dec28a20b4d94510e124a7a |