No project description provided
Project description
Grimbert
Speaker attribution in novels. Based on the older bert-quote-attribution project.
Documentation
from grimbert.model import SpeakerAttributionModel
from grimbert.predict import predict_speaker
from grimbert.datas import (
SpeakerAttributionDataset,
SpeakerAttributionDocument,
SpeakerAttributionQuote,
SpeakerAttributionMention
)
from transformers import BertTokenizerFast
model = SpeakerAttributionModel.from_pretrained(
"compnet-renard/spanbert-base-cased-literary-speaker-attribution"
)
tokenizer = BertTokenizerFast.from_pretrained(
"compnet-renard/spanbert-base-cased-literary-speaker-attribution"
)
tokens = '" This is horrible " , John said to Max .'.split(" ")
quote_start = 0
quote_end = 4
john_mention_start = 6
john_mention_end = 7
max_mention_start = 9
max_mention_end = 10
dataset = SpeakerAttributionDataset(
[
SpeakerAttributionDocument(
tokens,
[SpeakerAttributionQuote(
tokens[quote_start:quote_end], quote_start, quote_end, "John"
)],
[
SpeakerAttributionMention(
tokens[john_mention_start:john_mention_end],
john_mention_start,
john_mention_end,
"John"
),
SpeakerAttributionMention(
tokens[max_mention_start:max_mention_end],
max_mention_start,
max_mention_end,
"Max"
),
]
)
],
quote_ctx_len=512,
speaker_repr_nb=4,
tokenizer=tokenizer
)
preds = predict_speaker(dataset, model, tokenizer, batch_size=4)
Project details
Release history Release notifications | RSS feed
Download files
Download the file for your platform. If you're not sure which to choose, learn more about installing packages.
Source Distribution
grimbert-0.1.1.tar.gz
(23.4 kB
view hashes)
Built Distribution
grimbert-0.1.1-py3-none-any.whl
(25.2 kB
view hashes)