Skip to main content

No project description provided

Project description

kplot

kplot is a Python package about plotting. The sections below introduce the package and show quick-start examples from each exported module.

Installation

pip install -U python-kplot

Quick start

The examples below follow the notebooks under nbs/ in order. Each function example lives in its own cell and starts with a short comment derived from the function docstring.

01 utils

from kplot.utils import set_sns, save_svg, save_pdf, save_show, get_color_dict, get_plt_color, get_hue_big, add_stats
import seaborn as sns
from matplotlib import pyplot as plt

# Set up the objects used by the examples below.
df = sns.load_dataset('tips')
df.shape
(244, 7)
# Set seaborn defaults for notebook display and saved figures.
set_sns(dpi=50)
# Save the current matplotlib figure as SVG with editable text.
plt.figure()
plt.plot([0, 1], [0, 1])
# save_svg(Path('nbs') / '_tmp_utils.svg')

# Save the current matplotlib figure as PDF with TrueType fonts.
plt.figure()
plt.plot([0, 1], [1, 0])
# save_pdf(Path('nbs') / '_tmp_utils.pdf')

# Show the current figure or save it, then close open figures.
plt.figure()
plt.plot([0, 1], [0.5, 0.5])
# save_show(path=Path('nbs') / '_tmp_utils_show.png')

# Assign colors to labels while tolerating duplicate category names.
get_color_dict(['A', 'B', 'C'], palette='Set2')
{'A': (0.4, 0.7607843137254902, 0.6470588235294118),
 'B': (0.9882352941176471, 0.5529411764705883, 0.3843137254901961),
 'C': (0.5529411764705883, 0.6274509803921569, 0.796078431372549)}
# Return colors in plotting order for a dict, list, or named palette.
get_plt_color('Set2', ['a', 'b'])

# Filter a hue column down to categories that meet a count threshold.
# get_hue_big(df, 'day', cnt_thr=40).tolist()
# If `value` is str: compare between groups (x=group, y=value) If `value` is list/tuple: compare among values within each group (x=group, hue='variable')
fig, ax = plt.subplots(figsize=(5, 4))
sns.boxplot(data=df, x='sex', y='total_bill', ax=ax)
add_stats(ax, df, value='total_bill', group='sex')

02 scatter

from kplot.scatter import reduce_feature, plot_2d, plot_cluster, plot_rel
import seaborn as sns

# Set up the objects used by the examples below.
df = sns.load_dataset('penguins').dropna().reset_index(drop=True)
df2 = df[['bill_length_mm', 'bill_depth_mm', 'flipper_length_mm', 'body_mass_g']]
print(df.shape)
print(df2.shape)
(333, 7)
(333, 4)
# Reduce a feature matrix to a lower-dimensional embedding dataframe.
reduce_feature(df2, method='pca', n=2)
<style scoped> .dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </style>
PCA1 PCA2
0 -457.325073 -13.351587
1 -407.252205 -9.179113
2 -957.044676 8.160444
3 -757.115802 1.867653
4 -557.177302 -3.389158
... ... ...
328 718.068699 2.338199
329 643.090909 4.280699
330 1543.098355 -2.232010
331 992.994900 -4.605154
332 1193.002584 -5.417312

333 rows × 2 columns

# Plot the first two columns of an embedding dataframe.
df2 = reduce_feature(df[['bill_length_mm', 'bill_depth_mm', 'flipper_length_mm', 'body_mass_g']], method='pca', n=2)
df2['species'] = df['species'].values
plot_2d(df2, hue='species', legend=True)

# Reduce features and immediately plot the first two embedding dimensions.
plot_cluster(df[['bill_length_mm', 'bill_depth_mm', 'flipper_length_mm', 'body_mass_g', 'species']], method='pca', hue='species', legend=True)

# Plot a pairwise relationship with an optional correlation annotation.
df2 = df[['bill_length_mm', 'flipper_length_mm', 'species']].head(12).copy()
df2.index = [f'pt{i}' for i in range(len(df2))]
plot_rel(df2, x='bill_length_mm', y='flipper_length_mm', hue='species', index_list=['pt0', 'pt11'])

03 bar

from kplot.bar import plot_hist, plot_count, plot_bar, plot_group_bar, plot_stacked, plot_violin, plot_box, plot_pie, plot_cnt, calculate_pct, plot_composition
import seaborn as sns

# Set up the objects used by the examples below.
df = sns.load_dataset('tips').dropna()
df.shape
(244, 7)
# Plot a histogram with a KDE overlay and polygon bins.
plot_hist(df, 'total_bill')

# Plot horizontal counts from a value-count series.
plot_count(df['day'].value_counts())

# Plot a bar chart from an unstacked dataframe.
plot_bar(df, value='total_bill', group='day')

# Plot grouped bars after melting multiple value columns.
plot_group_bar(df, value_cols=['total_bill', 'tip'], group='day')

# Plot stacked counts for a categorical column.
plot_stacked(df, group='day', hue='sex')

# Plot violin plots with optional strip dots.
df2 = df[['time', 'total_bill']].rename(columns={'time': 'variable', 'total_bill': 'value'})
plot_violin(df2)

# Plot a box plot ordered by the group median.
plot_box(df, value='total_bill', group='day')

# Plot a pie chart from a value-count series.
plot_pie(df['day'].value_counts())

# Plot vertical counts with labels above the bars.
plot_cnt(df['day'].value_counts())

# Calculate within-bin percentages for a stacked composition chart.
df2 = sns.load_dataset('titanic').dropna(subset=['class', 'sex']).reset_index(drop=True)
calculate_pct(df2, 'class', 'sex')
<style scoped> .dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </style>
sex female male
class
First 43.518519 56.481481
Second 41.304348 58.695652
Third 29.327902 70.672098
# Plot stacked percentages for a bin-by-category composition.
plot_composition(df2, 'class', 'sex')

04 heatmap

from kplot.heatmap import get_similarity, plot_corr, plot_confusion_matrix
import seaborn as sns

# Set up the objects used by the examples below.
df = sns.load_dataset('titanic').dropna(subset=['age', 'fare', 'class', 'sex', 'survived']).reset_index(drop=True)
df2 = df[['age', 'fare', 'sibsp', 'parch']].head(8).copy()
df2.index = [f'row_{i}' for i in range(len(df2))]
print(df.shape)
print(df2.shape)
(714, 15)
(8, 4)
# Calculate both distance and similarity matrices for a dataframe.
get_similarity(df2)[0]
<style scoped> .dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; } </style>
row_0 row_1 row_2 row_3 row_4 row_5 row_6 row_7
row_0 0.000000 66.001996 4.177993 47.657345 13.062925 54.911521 24.415786 6.714166
row_1 66.001996 0.000000 64.492435 18.429118 63.312323 25.182682 61.821302 61.188418
row_2 4.177993 64.492435 0.000000 46.073643 9.000868 52.100901 27.548548 3.910651
row_3 47.657345 18.429118 46.073643 0.000000 45.061097 19.066500 46.039121 42.780883
row_4 13.062925 63.312323 9.000868 45.061097 0.000000 47.754949 35.618122 8.803791
row_5 54.911521 25.182682 52.100901 19.066500 47.754949 0.000000 60.513388 48.906725
row_6 24.415786 61.821302 27.548548 46.039121 35.618122 60.513388 0.000000 27.089433
row_7 6.714166 61.188418 3.910651 42.780883 8.803791 48.906725 27.089433 0.000000
# Plot a square matrix with an optional triangular mask.
plot_corr(df[['age', 'fare', 'sibsp', 'parch']].corr(numeric_only=True))

# Plot a confusion matrix from target and prediction arrays.
plot_confusion_matrix(df['survived'], df['adult_male'], class_names=['False', 'True'], normalize=True)
Normalized confusion matrix

05 metrics

from kplot.metrics import plot_rank, get_AUCDF
import seaborn as sns

# Set up the objects used by the examples below.
df = sns.load_dataset('tips').dropna().sort_values('total_bill').reset_index(drop=True)
df.shape
(244, 7)
# Plot a ranked scatter and annotate the highest and lowest entries.
plot_rank(df, x='day', y='total_bill', n_hi=1, n_lo=1)

# Compute the normalized area under an empirical CDF over rank values.
get_AUCDF(df, 'total_bill', plot=False)
0.6519265042202643

Project details


Download files

Download the file for your platform. If you're not sure which to choose, learn more about installing packages.

Source Distribution

python_kplot-0.0.2.tar.gz (25.8 kB view details)

Uploaded Source

Built Distribution

If you're not sure about the file name format, learn more about wheel file names.

python_kplot-0.0.2-py3-none-any.whl (27.8 kB view details)

Uploaded Python 3

File details

Details for the file python_kplot-0.0.2.tar.gz.

File metadata

  • Download URL: python_kplot-0.0.2.tar.gz
  • Upload date:
  • Size: 25.8 kB
  • Tags: Source
  • Uploaded using Trusted Publishing? No
  • Uploaded via: twine/6.2.0 CPython/3.12.12

File hashes

Hashes for python_kplot-0.0.2.tar.gz
Algorithm Hash digest
SHA256 caceee29b03699eb54cfa2d4e9d1db9fe7a9cb548bd01d42b477eeb349249a7d
MD5 39d8e2c182b7400d6f73704abf1c0321
BLAKE2b-256 3d70d8f676f45f9cc51ee2fbf840456cea6e3fedbcf925f6b1b4e36d4fe068fd

See more details on using hashes here.

File details

Details for the file python_kplot-0.0.2-py3-none-any.whl.

File metadata

  • Download URL: python_kplot-0.0.2-py3-none-any.whl
  • Upload date:
  • Size: 27.8 kB
  • Tags: Python 3
  • Uploaded using Trusted Publishing? No
  • Uploaded via: twine/6.2.0 CPython/3.12.12

File hashes

Hashes for python_kplot-0.0.2-py3-none-any.whl
Algorithm Hash digest
SHA256 327b1d7e3ddd2150ab4c613175a1327178fd2f67e635b9794bf59e4a8cee9d6e
MD5 4f20f0429ed1730571ccbdc352907cac
BLAKE2b-256 4f57889d4c73c907acc4ba26e602c9296d9bcd1e18902b8ddb5a958906cc71cb

See more details on using hashes here.

Supported by

AWS Cloud computing and Security Sponsor Datadog Monitoring Depot Continuous Integration Fastly CDN Google Download Analytics Pingdom Monitoring Sentry Error logging StatusPage Status page