A fresh nbdev project for kmodel.

These details have not been verified by PyPI

Project links

Project description

kmodel

kmodel provides reusable machine learning and deep learning helpers for multi-output modeling workflows. It covers tabular model training, scoring, post-processing, prediction, and fastai-based deep learning utilities through runnable examples derived from the project notebooks.

Installation

pip install kmodel

Quick start

The examples below follow the notebooks under nbs/ in order. Each function example lives in its own cell and starts with a short comment derived from the function docstring.

01 ML

from kmodel.ml import get_splits, split_data, train_ml, train_ml_cv, post_process, post_process_oof, predict_ml

from pathlib import Path
import pandas as pd
from sklearn.linear_model import LinearRegression
import seaborn as sns

df = sns.load_dataset("penguins").dropna(
    subset=["bill_length_mm", "bill_depth_mm", "flipper_length_mm", "body_mass_g", "species"]
).reset_index(drop=True)
feat_col = ["bill_length_mm", "bill_depth_mm", "flipper_length_mm", "body_mass_g"]
target_df = pd.get_dummies(df["species"], prefix="species", dtype=float)
target_col = target_df.columns.tolist()
df[target_col] = target_df
df.shape

(342, 10)

# Split samples in a dataframe with stratified, grouped, or stratified-grouped K-fold logic.
splits = get_splits(df, stratified="species", nfold=3)
split0 = splits[0]
len(split0[0]), len(split0[1])

StratifiedKFold(n_splits=3, random_state=123, shuffle=True)
# species in train set: 3
# species in test set: 3

(228, 114)

# Given a split tuple, return X_train, y_train, X_test, and y_test.
X_train, y_train, X_test, y_test = split_data(df, feat_col, target_col, split0)
X_train.shape, y_train.shape, X_test.shape, y_test.shape

((228, 4), (228, 3), (114, 4), (114, 3))

# Fit and predict with a sklearn model, returning validation targets and predictions.
model = LinearRegression()
target, pred = train_ml(df, feat_col, target_col, split0, model)
pred.head()

	species_Adelie	species_Chinstrap	species_Gentoo
0	0.993427	0.137000	-0.130427
3	1.064457	0.046586	-0.111043
9	0.839056	0.118838	0.042105
11	0.669557	0.423417	-0.092974
14	1.050863	-0.073914	0.023052

# Run cross-validation through the given splits.
oof = train_ml_cv(df, feat_col, target_col, splits=splits, model=LinearRegression())
oof.head()

	species_Adelie	species_Chinstrap	species_Gentoo	nfold
0	0.993427	0.137000	-0.130427	0
1	0.790344	0.103762	0.105894	1
2	0.673088	0.317647	0.009265	2
3	1.064457	0.046586	-0.111043	0
4	1.122991	0.154406	-0.277398	1

# Clip negatives and renormalize probability-like predictions.
post_process(pred.head())

	species_Adelie	species_Chinstrap	species_Gentoo
0	0.878807	1.211930e-01	8.846216e-09
3	0.958070	4.192990e-02	9.000554e-09
9	0.839056	1.188384e-01	4.210543e-02
11	0.612601	3.873988e-01	9.149350e-09
14	0.978535	9.311731e-09	2.146502e-02

# Post-process prediction columns in an out-of-fold dataframe.
oof = post_process_oof(oof, target_col)
oof[target_col].head()

	species_Adelie	species_Chinstrap	species_Gentoo
0	0.878807	0.121193	8.846216e-09
1	0.790344	0.103762	1.058942e-01
2	0.673088	0.317647	9.264531e-03
3	0.958070	0.041930	9.000554e-09
4	0.879124	0.120876	7.828416e-09

# Predict from a saved sklearn model.
model_path = Path("_tmp/penguins_ml.joblib")
model_path.parent.mkdir(parents=True, exist_ok=True)
_ = train_ml(df, feat_col, target_col, split0, LinearRegression(), save=model_path)
predict_ml(df.iloc[split0[1]], feat_col, target_col, model_pth=model_path).head()

	species_Adelie	species_Chinstrap	species_Gentoo
0	0.993427	0.137000	-0.130427
3	1.064457	0.046586	-0.111043
9	0.839056	0.118838	0.042105
11	0.669557	0.423417	-0.092974
14	1.050863	-0.073914	0.023052

02 DNN

from kmodel.dnn import seed_everything, GeneralDataset, MLP, lin_wn, CNN1D, PSSM_model, init_weights, CE, KLD, JSD, train_dl, predict_dl, train_dl_cv

import fastcore.all as fc
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import torch
import torch.nn as nn
import torch.nn.functional as F
from fastai.vision.all import *
from sklearn.model_selection import StratifiedKFold
from torch.utils.data import DataLoader

# Set up the objects used by the examples below.
seed_everything(123)
df = sns.load_dataset("penguins").dropna(
    subset=["bill_length_mm", "bill_depth_mm", "flipper_length_mm", "body_mass_g", "species"]
).reset_index(drop=True)
feat_col = ["bill_length_mm", "bill_depth_mm", "flipper_length_mm", "body_mass_g"]
target_df = pd.get_dummies(df["species"], prefix="species", dtype=float)
target_col = target_df.columns.tolist()
df[target_col] = target_df
n_feature = len(feat_col)
n_target = len(target_col)
n_aa = len(target_col)
skf = StratifiedKFold(n_splits=3, shuffle=True, random_state=123)
splits = list(skf.split(df.index, df["species"]))
split0 = splits[0]
ds = GeneralDataset(df, feat_col, target_col, A=n_aa)
xb, yb = next(iter(DataLoader(ds, batch_size=8, shuffle=True)))
logits = PSSM_model(n_feature, n_target, A=n_aa, model="MLP")(xb)
df.shape

(342, 10)

# Feed-forward model for tabular inputs.
mlp = MLP(n_feature, n_target)
mlp(xb).shape

torch.Size([8, 3])

# Weight-normalized linear block.
lin_wn(10, 3)

Sequential(
  (0): BatchNorm1d(10, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (1): Dropout(p=0.1, inplace=False)
  (2): ParametrizedLinear(
    in_features=10, out_features=3, bias=True
    (parametrizations): ModuleDict(
      (weight): ParametrizationList(
        (0): _WeightNorm()
      )
    )
  )
  (3): SiLU()
)

# Initialize convolution layers with Kaiming normal weights.
cnn = CNN1D(n_feature, n_target).apply(init_weights)
cnn(xb).shape

torch.Size([8, 3])

# Cross-entropy with soft labels.
CE(logits, yb)

tensor(1.0681, grad_fn=<MeanBackward0>)

# Average KL divergence across positions between target_probs and softmax(logits).
KLD(logits, yb)

tensor(1.0681, grad_fn=<MeanBackward0>)

# Average Jensen-Shannon divergence across positions between target_probs and softmax(logits).
JSD(logits, yb)

tensor(0.3023, grad_fn=<MeanBackward0>)

# Train a deep learning model with the fastai learner stack.
get_mlp = lambda: PSSM_model(n_feature, n_target, A=n_aa, model='MLP')
target, pred = train_dl(
    df,
    feat_col,
    target_col,
    split0,
    model_func=get_mlp,
    A=n_aa,
    n_epoch=1,
    bs=16,
    lr=3e-3,
    save='model',
)
pred.head()

lr in training is 0.003

<div>
  <table class="fastprogress">
    <thead>
      <tr>
        <th>epoch</th>
        <th>train_loss</th>
        <th>valid_loss</th>
        <th>KLD</th>
        <th>JSD</th>
        <th>time</th>
      </tr>
    </thead>
    <tbody>
      <tr>
        <td>0</td>
        <td>0.740750</td>
        <td>3.037915</td>
        <td>3.037915</td>
        <td>0.376177</td>
        <td>00:00</td>
      </tr>
    </tbody>
  </table>
</div>

	species_Adelie	species_Chinstrap	species_Gentoo
0	0.009204	0.009344	0.981452
3	0.063467	0.055438	0.881095
9	0.182922	0.156108	0.660971
11	0.294663	0.286624	0.418712
14	0.011959	0.011384	0.976657

# Predict a dataframe given a deep learning model saved by fastai.
test_pred = predict_dl(
    df.iloc[split0[1]].copy(),
    feat_col,
    target_col,
    model_func=get_mlp,
    model_pth='model',
    A=n_aa,
)
test_pred

	species_Adelie	species_Chinstrap	species_Gentoo
0	9.204363e-03	9.344031e-03	0.981452
3	6.346702e-02	5.543802e-02	0.881095
9	1.829216e-01	1.561077e-01	0.660971
11	2.946635e-01	2.866240e-01	0.418712
14	1.195943e-02	1.138383e-02	0.976657
...	...	...	...
328	5.574878e-09	1.360372e-08	1.000000
334	2.227252e-10	7.630787e-10	1.000000
335	5.731530e-07	1.071595e-06	0.999998
339	5.872652e-10	1.706496e-09	1.000000
340	5.236147e-08	1.102807e-07	1.000000

114 rows × 3 columns

# Cross-validation training loop for deep learning models.
oof = train_dl_cv(
    df,
    feat_col,
    target_col,
    splits=splits,
    model_func=get_mlp,
    A=n_aa,
    n_epoch=1,
    bs=16,
    lr=3e-3,
)
oof.nfold.value_counts()

------fold0------
lr in training is 0.003

<div>
  <table class="fastprogress">
    <thead>
      <tr>
        <th>epoch</th>
        <th>train_loss</th>
        <th>valid_loss</th>
        <th>KLD</th>
        <th>JSD</th>
        <th>time</th>
      </tr>
    </thead>
    <tbody>
      <tr>
        <td>0</td>
        <td>0.713906</td>
        <td>2.601878</td>
        <td>2.601878</td>
        <td>0.355676</td>
        <td>00:00</td>
      </tr>
    </tbody>
  </table>
</div>

------fold1------
lr in training is 0.003

<div>
  <table class="fastprogress">
    <thead>
      <tr>
        <th>epoch</th>
        <th>train_loss</th>
        <th>valid_loss</th>
        <th>KLD</th>
        <th>JSD</th>
        <th>time</th>
      </tr>
    </thead>
    <tbody>
      <tr>
        <td>0</td>
        <td>0.699992</td>
        <td>2.666008</td>
        <td>2.666008</td>
        <td>0.354615</td>
        <td>00:00</td>
      </tr>
    </tbody>
  </table>
</div>

------fold2------
lr in training is 0.003

<div>
  <table class="fastprogress">
    <thead>
      <tr>
        <th>epoch</th>
        <th>train_loss</th>
        <th>valid_loss</th>
        <th>KLD</th>
        <th>JSD</th>
        <th>time</th>
      </tr>
    </thead>
    <tbody>
      <tr>
        <td>0</td>
        <td>0.633100</td>
        <td>2.994696</td>
        <td>2.994696</td>
        <td>0.354643</td>
        <td>00:00</td>
      </tr>
    </tbody>
  </table>
</div>

nfold
0    114
1    114
2    114
Name: count, dtype: int64

Project details

These details have not been verified by PyPI

Project links

Release history Release notifications | RSS feed

This version

0.0.2

Apr 11, 2026

0.0.1

Apr 11, 2026

Download files

Download the file for your platform. If you're not sure which to choose, learn more about installing packages.

Source Distribution

kmodel-0.0.2.tar.gz (18.6 kB view details)

Uploaded Apr 11, 2026 Source

Built Distribution

If you're not sure about the file name format, learn more about wheel file names.

The dropdown lists show the available interpreters, ABIs, and platforms. Enable javascript to be able to filter the list of wheel files.

kmodel-0.0.2-py3-none-any.whl (16.5 kB view details)

Uploaded Apr 11, 2026 Python 3

File details

Details for the file kmodel-0.0.2.tar.gz.

File metadata

Download URL: kmodel-0.0.2.tar.gz
Upload date: Apr 11, 2026
Size: 18.6 kB
Tags: Source
Uploaded using Trusted Publishing? No
Uploaded via: twine/6.2.0 CPython/3.12.12

File hashes

Hashes for kmodel-0.0.2.tar.gz
Algorithm	Hash digest
SHA256	`0eca1c1f1ec36e5e2e97589aecef61fddc16bad70b1b889e3c7b90fa3d735152`
MD5	`28cd687f19bc39a481b40cae7a6dc958`
BLAKE2b-256	`60391f3c8a5b9d7c9796742b2684ac7a8b501bcfecc5a0268b7ea63b9b4644e0`

See more details on using hashes here.

File details

Details for the file kmodel-0.0.2-py3-none-any.whl.

File metadata

Download URL: kmodel-0.0.2-py3-none-any.whl
Upload date: Apr 11, 2026
Size: 16.5 kB
Tags: Python 3
Uploaded using Trusted Publishing? No
Uploaded via: twine/6.2.0 CPython/3.12.12

File hashes

Hashes for kmodel-0.0.2-py3-none-any.whl
Algorithm	Hash digest
SHA256	`d2ece9d73a4e60fa214a60a8e0217286d7c8e7b685ca30b600b30b405ffe447a`
MD5	`77dde94d9a70cc3a282721f3d706b026`
BLAKE2b-256	`81894ae469ca5d41352a12e362fce87ed7d308ac3cb14b712f346838cfcb9c07`

See more details on using hashes here.

kmodel 0.0.2

Navigation

Verified details

Maintainers

Unverified details

Project links

Meta

Classifiers

Project description

kmodel

Installation

Quick start

01 ML

02 DNN

Project details

Verified details

Maintainers

Unverified details

Project links

Meta

Classifiers

Release history Release notifications | RSS feed

Download files

Source Distribution

Built Distribution

File details

File metadata

File hashes

File details

File metadata

File hashes