feature selection library
Project description
Selective: Feature Selection Library
Selective is a white-box feature selection library that supports unsupervised and supervised selection methods for classification and regression tasks.
The library provides:
-
Simple to complex selection methods: Variance, Correlation, Statistical, Linear, Tree-based, or Custom
-
Interoperable with data frames as the input
-
Automated task detection. No need to know what feature selection method works with what machine learning task
-
Benchmarking multiple selectors using cross-validation
-
Inspection of results and feature importance
Selective is developed by the Artificial Intelligence Center of Excellence at Fidelity Investments.
Quick Start
# Import Selective and SelectionMethod
from sklearn.datasets import load_boston
from feature.utils import get_data_label
from feature.selector import Selective, SelectionMethod
# Data
data, label = get_data_label(load_boston())
# Feature selectors from simple to more complex
selector = Selective(SelectionMethod.Variance(threshold=0.0))
selector = Selective(SelectionMethod.Correlation(threshold=0.5, method="pearson"))
selector = Selective(SelectionMethod.Statistical(num_features=3, method="anova"))
selector = Selective(SelectionMethod.Linear(num_features=3, regularization="none"))
selector = Selective(SelectionMethod.TreeBased(num_features=3))
# Feature reduction
subset = selector.fit_transform(data, label)
print("Reduction:", list(subset.columns))
print("Scores:", list(selector.get_absolute_scores()))
Available Methods
Benchmarking
# Imports
from sklearn.datasets import load_boston
from feature.utils import get_data_label
from xgboost import XGBClassifier, XGBRegressor
from feature.selector import SelectionMethod, benchmark, calculate_statistics
# Data
data, label = get_data_label(load_boston())
# Selectors
corr_threshold = 0.5
num_features = 3
tree_params = {"n_estimators": 50, "max_depth": 5, "random_state": 111, "n_jobs": 4}
selectors = {
# Correlation methods
"corr_pearson": SelectionMethod.Correlation(corr_threshold, method="pearson"),
"corr_kendall": SelectionMethod.Correlation(corr_threshold, method="kendall"),
"corr_spearman": SelectionMethod.Correlation(corr_threshold, method="spearman"),
# Statistical methods
"stat_anova": SelectionMethod.Statistical(num_features, method="anova"),
"stat_chi_square": SelectionMethod.Statistical(num_features, method="chi_square"),
"stat_mutual_info": SelectionMethod.Statistical(num_features, method="mutual_info"),
# Linear methods
"linear": SelectionMethod.Linear(num_features, regularization="none"),
"lasso": SelectionMethod.Linear(num_features, regularization="lasso", alpha=1000),
"ridge": SelectionMethod.Linear(num_features, regularization="ridge", alpha=1000),
# Non-linear tree-based methods
"random_forest": SelectionMethod.TreeBased(num_features),
"xgboost_classif": SelectionMethod.TreeBased(num_features, estimator=XGBClassifier(**tree_params)),
"xgboost_regress": SelectionMethod.TreeBased(num_features, estimator=XGBRegressor(**tree_params))
}
# Benchmark
score_df, selected_df, runtime_df = benchmark(selectors, data, label, cv=5)
print(score_df, "\n\n", selected_df, "\n\n", runtime_df)
# Get benchmark statistics by feature
stats_df = calculate_statistics(score_df, selected_df)
print(stats_df)
Visualization
import pandas as pd
from sklearn.datasets import load_boston
from feature.utils import get_data_label
from feature.selector import SelectionMethod, Selective, plot_importance
# Data
data, label = get_data_label(load_boston())
# Feature Selector
selector = Selective(SelectionMethod.Linear(num_features=10, regularization="none"))
subset = selector.fit_transform(data, label)
# Plot Feature Importance
df = pd.DataFrame(selector.get_absolute_scores(), index=data.columns)
plot_importance(df)
Installation
Selective is available to install as pip install selective
.
Source
Alternatively, you can build a wheel package on your platform from scratch using the source code:
git clone https://github.com/fidelity/selective.git
cd selective
pip install setuptools wheel # if wheel is not installed
python setup.py sdist bdist_wheel
pip install dist/selective-X.X.X-py3-none-any.whl
Support
Please submit bug reports and feature requests as Issues.
License
Selective is licensed under the GNU GPL 3.0.
Project details
Download files
Download the file for your platform. If you're not sure which to choose, learn more about installing packages.
Source Distribution
Built Distribution
Hashes for selective-1.0.1-py3-none-any.whl
Algorithm | Hash digest | |
---|---|---|
SHA256 | f10ed7ba1f91b3d2c8cfd14df717c3fd9bdf8d69152263bdf31637af011f6ef8 |
|
MD5 | 352614ea031771efb0cda52017df3722 |
|
BLAKE2b-256 | a0d3d11b3e79bc20b0a50cb4c68820df19fe8726817615ae267494b2c16cbcdc |