Package for Bulgarian Natural Language Processing (NLP)
Project description
bgnlp: Model-first approach to Bulgarian NLP
pip install bgnlp
Package functionalities
Part-of-speech (PoS) tagging
from bgnlp import PosTagger, PosTaggerConfig
config = PosTaggerConfig()
pos = PosTagger(config=config)
print(pos("Това е библиотека за обработка на естествен език."))
[{
"word": "Това",
"tag": "PDOsn",
"bg_desc": "местоимение",
"en_desc": "pronoun"
}, {
"word": "е",
"tag": "VLINr3s",
"bg_desc": "глагол",
"en_desc": "verb"
}, {
"word": "библиотека",
"tag": "NCFsof",
"bg_desc": "съществително име",
"en_desc": "noun"
}, {
"word": "за",
"tag": "R",
"bg_desc": "предлог",
"en_desc": "preposition"
}, {
"word": "обработка",
"tag": "NCFsof",
"bg_desc": "съществително име",
"en_desc": "noun"
}, {
"word": "на",
"tag": "R",
"bg_desc": "предлог",
"en_desc": "preposition"
}, {
"word": "естествен",
"tag": "Asmo",
"bg_desc": "прилагателно име",
"en_desc": "adjective"
}, {
"word": "език",
"tag": "NCMsom",
"bg_desc": "съществително име",
"en_desc": "noun"
}, {
"word": ".",
"tag": "U",
"bg_desc": "препинателен знак",
"en_desc": "punctuation"
}]
Lemmatization
from bgnlp import LemmaTaggerConfig, LemmaTagger
lemma = LemmaTagger(config=LemmaTaggerConfig())
text = "Добре дошли!"
print(lemma(text))
[{'word': 'Добре', 'lemma': 'Добре'}, {'word': 'дошли', 'lemma': 'дойда'}, {'word': '!', 'lemma': '!'}]
# Generating a string of lemmas.
print(lemma(text, as_string=True))
Добре дойда!
Named Entity Recognition (NER) tagging
Currently, the available NER tags are:
PER
- PersonORG
- OrganizationLOC
- Location
from bgnlp import NerTagger, NerTaggerConfig
ner = NerTagger(config=NerTaggerConfig())
text = "Барух Спиноза е роден в Амстердам"
print(f"Input: {text}")
print("Result:", ner(text))
Input: Барух Спиноза е роден в Амстердам
Result: [{'word': 'Барух Спиноза', 'entity_group': 'PER'}, {'word': 'Амстердам', 'entity_group': 'LOC'}]
Project details
Release history Release notifications | RSS feed
Download files
Download the file for your platform. If you're not sure which to choose, learn more about installing packages.
Source Distribution
bgnlp-0.0.13.tar.gz
(48.4 kB
view hashes)
Built Distribution
bgnlp-0.0.13-py3-none-any.whl
(47.7 kB
view hashes)