No project description provided
Project description
kplot
kplot is an nbdev package that bundles helpers for General plotting
helpers, save utilities, and palette/color tools,
Dimensionality-reduction, scatter, and correlation, Categorical, count,
distribution, and composition plot helpers, Similarity, matrix, and
confusion plots split from 08_all.ipynb, and Ranking and AUCDF
helpers.
Installation
pip install -U python-kplot
Quick start
The examples below follow the notebooks under nbs/ in order. Each
function example lives in its own cell and starts with a short comment
derived from the function docstring.
01 utils
import seaborn as sns
from matplotlib import pyplot as plt
from kplot.utils import set_sns, save_svg, save_pdf, save_show, get_color_dict, get_plt_color, get_hue_big, add_stats
# Load the sample data used by the examples below.
df = sns.load_dataset('tips')
df.shape
(244, 7)
# set_sns: Set seaborn defaults for notebook display and saved figures.
set_sns(dpi=50)
# save_svg: Save the current matplotlib figure as SVG with editable text.
plt.figure()
plt.plot([0, 1], [0, 1])
# save_svg(Path('nbs') / '_tmp_utils.svg')
# save_pdf: Save the current matplotlib figure as PDF with TrueType fonts.
plt.figure()
plt.plot([0, 1], [1, 0])
# save_pdf(Path('nbs') / '_tmp_utils.pdf')
# save_show: Show the current figure or save it, then close open figures.
plt.figure()
plt.plot([0, 1], [0.5, 0.5])
# save_show(path=Path('nbs') / '_tmp_utils_show.png')
# get_color_dict: Assign colors to labels while tolerating duplicate category names.
get_color_dict(['A', 'B', 'C'], palette='Set2')
{'A': (0.4, 0.7607843137254902, 0.6470588235294118),
'B': (0.9882352941176471, 0.5529411764705883, 0.3843137254901961),
'C': (0.5529411764705883, 0.6274509803921569, 0.796078431372549)}
# get_plt_color: Return colors in plotting order for a dict, list, or named palette.
get_plt_color('Set2', ['a', 'b'])
# get_hue_big: Filter a hue column down to categories that meet a count threshold.
# get_hue_big(df, 'day', cnt_thr=40).tolist()
# add_stats: If `value` is str: compare between groups (x=group, y=value) If `value` is list/tuple: compare among values within each group (x=group, hue='variable')
fig, ax = plt.subplots(figsize=(5, 4))
sns.boxplot(data=df, x='sex', y='total_bill', ax=ax)
add_stats(ax, df, value='total_bill', group='sex')
02 scatter
import seaborn as sns
from kplot.scatter import reduce_feature, plot_2d, plot_cluster, plot_rel
# Load the sample data used by the examples below.
df = sns.load_dataset('penguins').dropna().reset_index(drop=True)
df2 = df[['bill_length_mm', 'bill_depth_mm', 'flipper_length_mm', 'body_mass_g']]
print(df.shape)
print(df2.shape)
(333, 7)
(333, 4)
# reduce_feature: Reduce a feature matrix to a lower-dimensional embedding dataframe.
reduce_feature(df2, method='pca', n=2)
| PCA1 | PCA2 | |
|---|---|---|
| 0 | -457.325073 | -13.351587 |
| 1 | -407.252205 | -9.179113 |
| 2 | -957.044676 | 8.160444 |
| 3 | -757.115802 | 1.867653 |
| 4 | -557.177302 | -3.389158 |
| ... | ... | ... |
| 328 | 718.068699 | 2.338199 |
| 329 | 643.090909 | 4.280699 |
| 330 | 1543.098355 | -2.232010 |
| 331 | 992.994900 | -4.605154 |
| 332 | 1193.002584 | -5.417312 |
333 rows × 2 columns
# plot_2d: Plot the first two columns of an embedding dataframe.
df2 = reduce_feature(df[['bill_length_mm', 'bill_depth_mm', 'flipper_length_mm', 'body_mass_g']], method='pca', n=2)
df2['species'] = df['species'].values
plot_2d(df2, hue='species', legend=True)
# plot_cluster: Reduce features and immediately plot the first two embedding dimensions.
plot_cluster(df[['bill_length_mm', 'bill_depth_mm', 'flipper_length_mm', 'body_mass_g', 'species']], method='pca', hue='species', legend=True)
# plot_rel: Plot a pairwise relationship with an optional correlation annotation.
df2 = df[['bill_length_mm', 'flipper_length_mm', 'species']].head(12).copy()
df2.index = [f'pt{i}' for i in range(len(df2))]
plot_rel(df2, x='bill_length_mm', y='flipper_length_mm', hue='species', index_list=['pt0', 'pt11'])
03 bar
import seaborn as sns
from kplot.bar import plot_hist, plot_count, plot_bar, plot_group_bar, plot_stacked, plot_violin, plot_box, plot_pie, plot_cnt, calculate_pct, plot_composition
# Load the sample data used by the examples below.
df = sns.load_dataset('tips').dropna()
df.shape
(244, 7)
# plot_hist: Plot a histogram with a KDE overlay and polygon bins.
plot_hist(df, 'total_bill')
# plot_count: Plot horizontal counts from a value-count series.
plot_count(df['day'].value_counts())
# plot_bar: Plot a bar chart from an unstacked dataframe.
plot_bar(df, value='total_bill', group='day')
# plot_group_bar: Plot grouped bars after melting multiple value columns.
plot_group_bar(df, value_cols=['total_bill', 'tip'], group='day')
# plot_stacked: Plot stacked counts for a categorical column.
plot_stacked(df, group='day', hue='sex')
# plot_violin: Plot violin plots with optional strip dots.
df2 = df[['time', 'total_bill']].rename(columns={'time': 'variable', 'total_bill': 'value'})
plot_violin(df2)
# plot_box: Plot a box plot ordered by the group median.
plot_box(df, value='total_bill', group='day')
# plot_pie: Plot a pie chart from a value-count series.
plot_pie(df['day'].value_counts())
# plot_cnt: Plot vertical counts with labels above the bars.
plot_cnt(df['day'].value_counts())
# calculate_pct: Calculate within-bin percentages for a stacked composition chart.
df2 = sns.load_dataset('titanic').dropna(subset=['class', 'sex']).reset_index(drop=True)
calculate_pct(df2, 'class', 'sex')
| sex | female | male |
|---|---|---|
| class | ||
| First | 43.518519 | 56.481481 |
| Second | 41.304348 | 58.695652 |
| Third | 29.327902 | 70.672098 |
# plot_composition: Plot stacked percentages for a bin-by-category composition.
plot_composition(df2, 'class', 'sex')
04 heatmap
import seaborn as sns
from kplot.heatmap import get_similarity, plot_corr, plot_confusion_matrix
# Load the sample data used by the examples below.
df = sns.load_dataset('titanic').dropna(subset=['age', 'fare', 'class', 'sex', 'survived']).reset_index(drop=True)
df2 = df[['age', 'fare', 'sibsp', 'parch']].head(8).copy()
df2.index = [f'row_{i}' for i in range(len(df2))]
print(df.shape)
print(df2.shape)
(714, 15)
(8, 4)
# get_similarity: Calculate both distance and similarity matrices for a dataframe.
get_similarity(df2)[0]
| row_0 | row_1 | row_2 | row_3 | row_4 | row_5 | row_6 | row_7 | |
|---|---|---|---|---|---|---|---|---|
| row_0 | 0.000000 | 66.001996 | 4.177993 | 47.657345 | 13.062925 | 54.911521 | 24.415786 | 6.714166 |
| row_1 | 66.001996 | 0.000000 | 64.492435 | 18.429118 | 63.312323 | 25.182682 | 61.821302 | 61.188418 |
| row_2 | 4.177993 | 64.492435 | 0.000000 | 46.073643 | 9.000868 | 52.100901 | 27.548548 | 3.910651 |
| row_3 | 47.657345 | 18.429118 | 46.073643 | 0.000000 | 45.061097 | 19.066500 | 46.039121 | 42.780883 |
| row_4 | 13.062925 | 63.312323 | 9.000868 | 45.061097 | 0.000000 | 47.754949 | 35.618122 | 8.803791 |
| row_5 | 54.911521 | 25.182682 | 52.100901 | 19.066500 | 47.754949 | 0.000000 | 60.513388 | 48.906725 |
| row_6 | 24.415786 | 61.821302 | 27.548548 | 46.039121 | 35.618122 | 60.513388 | 0.000000 | 27.089433 |
| row_7 | 6.714166 | 61.188418 | 3.910651 | 42.780883 | 8.803791 | 48.906725 | 27.089433 | 0.000000 |
# plot_corr: Plot a square matrix with an optional triangular mask.
plot_corr(df[['age', 'fare', 'sibsp', 'parch']].corr(numeric_only=True))
# plot_confusion_matrix: Plot a confusion matrix from target and prediction arrays.
plot_confusion_matrix(df['survived'], df['adult_male'], class_names=['False', 'True'], normalize=True)
Normalized confusion matrix
05 metrics
import seaborn as sns
from kplot.metrics import plot_rank, get_AUCDF
# Load the sample data used by the examples below.
df = sns.load_dataset('tips').dropna().sort_values('total_bill').reset_index(drop=True)
df.shape
(244, 7)
# plot_rank: Plot a ranked scatter and annotate the highest and lowest entries.
plot_rank(df, x='day', y='total_bill', n_hi=1, n_lo=1)
# get_AUCDF: Compute the normalized area under an empirical CDF over rank values.
get_AUCDF(df, 'total_bill', plot=False)
0.6519265042202643
Project details
Release history Release notifications | RSS feed
Download files
Download the file for your platform. If you're not sure which to choose, learn more about installing packages.
Source Distribution
Built Distribution
Filter files by name, interpreter, ABI, and platform.
If you're not sure about the file name format, learn more about wheel file names.
Copy a direct link to the current filters
File details
Details for the file python_kplot-0.0.1.tar.gz.
File metadata
- Download URL: python_kplot-0.0.1.tar.gz
- Upload date:
- Size: 26.1 kB
- Tags: Source
- Uploaded using Trusted Publishing? No
- Uploaded via: twine/6.2.0 CPython/3.12.12
File hashes
| Algorithm | Hash digest | |
|---|---|---|
| SHA256 |
32346e9c70132097867c9e09614f6675e9bfcd9f6335157bd8f2f02b57f4bcb3
|
|
| MD5 |
69338d499f6630c8beab14c6d075610b
|
|
| BLAKE2b-256 |
c89994dc3195bd156ee3a0ca64054cf6f194bed60b7dacb1e31fab21dc872b60
|
File details
Details for the file python_kplot-0.0.1-py3-none-any.whl.
File metadata
- Download URL: python_kplot-0.0.1-py3-none-any.whl
- Upload date:
- Size: 28.0 kB
- Tags: Python 3
- Uploaded using Trusted Publishing? No
- Uploaded via: twine/6.2.0 CPython/3.12.12
File hashes
| Algorithm | Hash digest | |
|---|---|---|
| SHA256 |
c43bb111a1f4540796619180a58e9486ac0dbd27675b8ff728c14d7784d7bf2b
|
|
| MD5 |
d5cec4eb5e09f686093a00ee3bc40270
|
|
| BLAKE2b-256 |
162f97a4b19f69c5a82ac77b81ded17833c178acf02a0308f2a290123b52697d
|