Model hub for transformers.
Project description
Usage Sample ''''''''''''
.. code:: python
import pandas as pd
from sklearn.model_selection import train_test_split
import torch
from transformers import BertTokenizer
from nlpx.tokenize.utils import get_df_text_labels
from nlpx.dataset import TextDataset, text_collate
from transformers_model import AutoCNNTextClassifier, AutoCNNTokenClassifier,BertDataset, BertCollator, BertTokenizeCollator
from nlpx.model.wrapper import ClassifyModelWrapper
texts = [[str],]
labels = [0, 0, 1, 2, 1...]
pretrained_path = "clue/albert_chinese_tiny"
classes = ['class1', 'class2', 'class3'...]
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
train_texts, test_texts, y_train, y_test = train_test_split(texts, labels, test_size=0.2, random_state=42)
######################## AutoCNNTextClassifier classification ##########################
train_set = TextDataset(train_texts, y_train)
test_set = TextDataset(test_texts, y_test)
model = AutoCNNTextClassifier(pretrained_path, len(classes), device)
wrapper = ClassifyModelWrapper(model, classes, device)
_ = wrapper.train(train_set, test_set, collate_fn=text_collate)
######################### AutoCNNTokenClassifier classification ##########################
tokenizer = BertTokenizer.from_pretrained(pretrained_path)
###################################### BertCollator ######################################
train_tokenizies = tokenizer.batch_encode_plus(
train_texts,
max_length=256,
padding="max_length",
truncation=True,
return_token_type_ids=True,
return_attention_mask=True,
return_tensors="pt",
)
test_tokenizies = tokenizer.batch_encode_plus(
test_texts,
max_length=256,
padding="max_length",
truncation=True,
return_token_type_ids=True,
return_attention_mask=True,
return_tensors="pt",
)
train_set = BertDataset(train_tokenizies, y_train)
test_set = BertDataset(test_tokenizies, y_test)
model = AutoCNNTokenClassifier(pretrained_path, len(classes), device)
wrapper = ClassifyModelWrapper(model, classes, device)
_ = wrapper.train(train_set, test_set, collate_fn=BertCollator())
################################ BertTokenizeCollator ################################
train_set = TextDataset(train_texts, y_train)
test_set = TextDataset(test_texts, y_test)
model = AutoCNNTokenClassifier(pretrained_path, len(classes), device)
wrapper = ClassifyModelWrapper(model, classes, device)
_ = wrapper.train(train_set, test_set, collate_fn=BertTokenizeCollator(tokenizer, 256))
Project details
Release history Release notifications | RSS feed
Download files
Download the file for your platform. If you're not sure which to choose, learn more about installing packages.
Source Distribution
File details
Details for the file transformers-model-0.0.5.tar.gz.
File metadata
- Download URL: transformers-model-0.0.5.tar.gz
- Upload date:
- Size: 7.0 kB
- Tags: Source
- Uploaded using Trusted Publishing? No
- Uploaded via: twine/5.0.0 CPython/3.9.18
File hashes
| Algorithm | Hash digest | |
|---|---|---|
| SHA256 |
e1e76b38f9c2b8c12960710116d74dc7f576d7ed82d7fd6d5543f807f71a298c
|
|
| MD5 |
96beb70ff05508440bbbc6741417e244
|
|
| BLAKE2b-256 |
f7b0f1ceb07416d20aa7e7cde11d8c1a738a818f3fe8f6cda42372f1f6707df6
|