No project description provided
Project description
kplot
kplot is a Python package about plotting. The sections below introduce the package and show quick-start examples from each exported module.
Installation
pip install -U python-kplot
Quick start
The examples below follow the notebooks under nbs/ in order. Each
function example lives in its own cell and starts with a short comment
derived from the function docstring.
01 utils
from kplot.utils import set_sns, save_svg, save_pdf, save_show, get_color_dict, get_plt_color, get_hue_big, add_stats
import seaborn as sns
from matplotlib import pyplot as plt
# Set up the objects used by the examples below.
df = sns.load_dataset('tips')
df.shape
(244, 7)
# Set seaborn defaults for notebook display and saved figures.
set_sns(dpi=50)
# Save the current matplotlib figure as SVG with editable text.
plt.figure()
plt.plot([0, 1], [0, 1])
# save_svg(Path('nbs') / '_tmp_utils.svg')
# Save the current matplotlib figure as PDF with TrueType fonts.
plt.figure()
plt.plot([0, 1], [1, 0])
# save_pdf(Path('nbs') / '_tmp_utils.pdf')
# Show the current figure or save it, then close open figures.
plt.figure()
plt.plot([0, 1], [0.5, 0.5])
# save_show(path=Path('nbs') / '_tmp_utils_show.png')
# Assign colors to labels while tolerating duplicate category names.
get_color_dict(['A', 'B', 'C'], palette='Set2')
{'A': (0.4, 0.7607843137254902, 0.6470588235294118),
'B': (0.9882352941176471, 0.5529411764705883, 0.3843137254901961),
'C': (0.5529411764705883, 0.6274509803921569, 0.796078431372549)}
# Return colors in plotting order for a dict, list, or named palette.
get_plt_color('Set2', ['a', 'b'])
# Filter a hue column down to categories that meet a count threshold.
# get_hue_big(df, 'day', cnt_thr=40).tolist()
# If `value` is str: compare between groups (x=group, y=value) If `value` is list/tuple: compare among values within each group (x=group, hue='variable')
fig, ax = plt.subplots(figsize=(5, 4))
sns.boxplot(data=df, x='sex', y='total_bill', ax=ax)
add_stats(ax, df, value='total_bill', group='sex')
02 scatter
from kplot.scatter import reduce_feature, plot_2d, plot_cluster, plot_rel
import seaborn as sns
# Set up the objects used by the examples below.
df = sns.load_dataset('penguins').dropna().reset_index(drop=True)
df2 = df[['bill_length_mm', 'bill_depth_mm', 'flipper_length_mm', 'body_mass_g']]
print(df.shape)
print(df2.shape)
(333, 7)
(333, 4)
# Reduce a feature matrix to a lower-dimensional embedding dataframe.
reduce_feature(df2, method='pca', n=2)
| PCA1 | PCA2 | |
|---|---|---|
| 0 | -457.325073 | -13.351587 |
| 1 | -407.252205 | -9.179113 |
| 2 | -957.044676 | 8.160444 |
| 3 | -757.115802 | 1.867653 |
| 4 | -557.177302 | -3.389158 |
| ... | ... | ... |
| 328 | 718.068699 | 2.338199 |
| 329 | 643.090909 | 4.280699 |
| 330 | 1543.098355 | -2.232010 |
| 331 | 992.994900 | -4.605154 |
| 332 | 1193.002584 | -5.417312 |
333 rows × 2 columns
# Plot the first two columns of an embedding dataframe.
df2 = reduce_feature(df[['bill_length_mm', 'bill_depth_mm', 'flipper_length_mm', 'body_mass_g']], method='pca', n=2)
df2['species'] = df['species'].values
plot_2d(df2, hue='species', legend=True)
# Reduce features and immediately plot the first two embedding dimensions.
plot_cluster(df[['bill_length_mm', 'bill_depth_mm', 'flipper_length_mm', 'body_mass_g', 'species']], method='pca', hue='species', legend=True)
# Plot a pairwise relationship with an optional correlation annotation.
df2 = df[['bill_length_mm', 'flipper_length_mm', 'species']].head(12).copy()
df2.index = [f'pt{i}' for i in range(len(df2))]
plot_rel(df2, x='bill_length_mm', y='flipper_length_mm', hue='species', index_list=['pt0', 'pt11'])
03 bar
from kplot.bar import plot_hist, plot_count, plot_bar, plot_group_bar, plot_stacked, plot_violin, plot_box, plot_pie, plot_cnt, calculate_pct, plot_composition
import seaborn as sns
# Set up the objects used by the examples below.
df = sns.load_dataset('tips').dropna()
df.shape
(244, 7)
# Plot a histogram with a KDE overlay and polygon bins.
plot_hist(df, 'total_bill')
# Plot horizontal counts from a value-count series.
plot_count(df['day'].value_counts())
# Plot a bar chart from an unstacked dataframe.
plot_bar(df, value='total_bill', group='day')
# Plot grouped bars after melting multiple value columns.
plot_group_bar(df, value_cols=['total_bill', 'tip'], group='day')
# Plot stacked counts for a categorical column.
plot_stacked(df, group='day', hue='sex')
# Plot violin plots with optional strip dots.
df2 = df[['time', 'total_bill']].rename(columns={'time': 'variable', 'total_bill': 'value'})
plot_violin(df2)
# Plot a box plot ordered by the group median.
plot_box(df, value='total_bill', group='day')
# Plot a pie chart from a value-count series.
plot_pie(df['day'].value_counts())
# Plot vertical counts with labels above the bars.
plot_cnt(df['day'].value_counts())
# Calculate within-bin percentages for a stacked composition chart.
df2 = sns.load_dataset('titanic').dropna(subset=['class', 'sex']).reset_index(drop=True)
calculate_pct(df2, 'class', 'sex')
| sex | female | male |
|---|---|---|
| class | ||
| First | 43.518519 | 56.481481 |
| Second | 41.304348 | 58.695652 |
| Third | 29.327902 | 70.672098 |
# Plot stacked percentages for a bin-by-category composition.
plot_composition(df2, 'class', 'sex')
04 heatmap
from kplot.heatmap import get_similarity, plot_corr, plot_confusion_matrix
import seaborn as sns
# Set up the objects used by the examples below.
df = sns.load_dataset('titanic').dropna(subset=['age', 'fare', 'class', 'sex', 'survived']).reset_index(drop=True)
df2 = df[['age', 'fare', 'sibsp', 'parch']].head(8).copy()
df2.index = [f'row_{i}' for i in range(len(df2))]
print(df.shape)
print(df2.shape)
(714, 15)
(8, 4)
# Calculate both distance and similarity matrices for a dataframe.
get_similarity(df2)[0]
| row_0 | row_1 | row_2 | row_3 | row_4 | row_5 | row_6 | row_7 | |
|---|---|---|---|---|---|---|---|---|
| row_0 | 0.000000 | 66.001996 | 4.177993 | 47.657345 | 13.062925 | 54.911521 | 24.415786 | 6.714166 |
| row_1 | 66.001996 | 0.000000 | 64.492435 | 18.429118 | 63.312323 | 25.182682 | 61.821302 | 61.188418 |
| row_2 | 4.177993 | 64.492435 | 0.000000 | 46.073643 | 9.000868 | 52.100901 | 27.548548 | 3.910651 |
| row_3 | 47.657345 | 18.429118 | 46.073643 | 0.000000 | 45.061097 | 19.066500 | 46.039121 | 42.780883 |
| row_4 | 13.062925 | 63.312323 | 9.000868 | 45.061097 | 0.000000 | 47.754949 | 35.618122 | 8.803791 |
| row_5 | 54.911521 | 25.182682 | 52.100901 | 19.066500 | 47.754949 | 0.000000 | 60.513388 | 48.906725 |
| row_6 | 24.415786 | 61.821302 | 27.548548 | 46.039121 | 35.618122 | 60.513388 | 0.000000 | 27.089433 |
| row_7 | 6.714166 | 61.188418 | 3.910651 | 42.780883 | 8.803791 | 48.906725 | 27.089433 | 0.000000 |
# Plot a square matrix with an optional triangular mask.
plot_corr(df[['age', 'fare', 'sibsp', 'parch']].corr(numeric_only=True))
# Plot a confusion matrix from target and prediction arrays.
plot_confusion_matrix(df['survived'], df['adult_male'], class_names=['False', 'True'], normalize=True)
Normalized confusion matrix
05 metrics
from kplot.metrics import plot_rank, get_AUCDF
import seaborn as sns
# Set up the objects used by the examples below.
df = sns.load_dataset('tips').dropna().sort_values('total_bill').reset_index(drop=True)
df.shape
(244, 7)
# Plot a ranked scatter and annotate the highest and lowest entries.
plot_rank(df, x='day', y='total_bill', n_hi=1, n_lo=1)
# Compute the normalized area under an empirical CDF over rank values.
get_AUCDF(df, 'total_bill', plot=False)
0.6519265042202643
Project details
Release history Release notifications | RSS feed
Download files
Download the file for your platform. If you're not sure which to choose, learn more about installing packages.
Source Distribution
Built Distribution
Filter files by name, interpreter, ABI, and platform.
If you're not sure about the file name format, learn more about wheel file names.
Copy a direct link to the current filters
File details
Details for the file python_kplot-0.0.2.tar.gz.
File metadata
- Download URL: python_kplot-0.0.2.tar.gz
- Upload date:
- Size: 25.8 kB
- Tags: Source
- Uploaded using Trusted Publishing? No
- Uploaded via: twine/6.2.0 CPython/3.12.12
File hashes
| Algorithm | Hash digest | |
|---|---|---|
| SHA256 |
caceee29b03699eb54cfa2d4e9d1db9fe7a9cb548bd01d42b477eeb349249a7d
|
|
| MD5 |
39d8e2c182b7400d6f73704abf1c0321
|
|
| BLAKE2b-256 |
3d70d8f676f45f9cc51ee2fbf840456cea6e3fedbcf925f6b1b4e36d4fe068fd
|
File details
Details for the file python_kplot-0.0.2-py3-none-any.whl.
File metadata
- Download URL: python_kplot-0.0.2-py3-none-any.whl
- Upload date:
- Size: 27.8 kB
- Tags: Python 3
- Uploaded using Trusted Publishing? No
- Uploaded via: twine/6.2.0 CPython/3.12.12
File hashes
| Algorithm | Hash digest | |
|---|---|---|
| SHA256 |
327b1d7e3ddd2150ab4c613175a1327178fd2f67e635b9794bf59e4a8cee9d6e
|
|
| MD5 |
4f20f0429ed1730571ccbdc352907cac
|
|
| BLAKE2b-256 |
4f57889d4c73c907acc4ba26e602c9296d9bcd1e18902b8ddb5a958906cc71cb
|