Skip to main content

A high-performance gradient boosting implementation using Cython

Project description

CyBooster: A Gradient Boosting Library

PyPI - License Downloads Documentation

CyBooster is a high-performance generic gradient boosting (any based learner can be used) library designed for classification and regression tasks. It is built on Cython (that is, C) for speed and efficiency. This version will also be more GPU friendly, thanks to JAX, making it suitable for large datasets.

Each base learner is augmented with a randomized neural network (a generalization of https://www.researchgate.net/publication/346059361_LSBoost_gradient_boosted_penalized_nonlinear_least_squares to any base learner), which allows the model to learn complex patterns in the data. The library supports both classification and regression tasks, making it versatile for various machine learning applications.

CyBooster is born from mlsauce, that might be difficult to install on some systems.

Installation

To install CyBooster, you can use pip or uv (faster):

pip install cybooster

or

uv pip install cybooster

From GitHub:

pip install git+https://github.com/Techtonique/cybooster.git

Usage

1 - Model-agnostic boosting

from cybooster import BoosterClassifier, BoosterRegressor
from sklearn.datasets import load_iris, load_diabetes, load_breast_cancer, load_digits, load_wine
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, mean_squared_error, root_mean_squared_error
from sklearn.linear_model import LinearRegression
from time import time 


# Regression Example
X, y = load_diabetes(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
regressor = BoosterRegressor(obj=LinearRegression(), n_estimators=100, learning_rate=0.1,
                             n_hidden_features=10, verbose=1, seed=42)
start = time()
regressor.fit(X_train, y_train)
y_pred = regressor.predict(X_test)
print(f"Elapsed: {time() - start} s")
rmse = root_mean_squared_error(y_test, y_pred)
print(f"RMSE for regression: {rmse:.4f}")

# Classification Example
X, y = load_iris(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
classifier = BoosterClassifier(obj=LinearRegression(), n_estimators=100, learning_rate=0.1,
                               n_hidden_features=10, verbose=1, seed=42)
start = time()
try: 
    classifier.fit(X_train, y_train)
except Exception as e: # this is for Windows users
    y_train = y_train.astype('int32')
    classifier.fit(X_train, y_train)
y_pred = classifier.predict(X_test)
print(f"Elapsed: {time() - start} s")
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy for classification: {accuracy:.4f}")

X, y = load_wine(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
classifier = BoosterClassifier(obj=LinearRegression(), n_estimators=100, learning_rate=0.1,
                               n_hidden_features=10, verbose=1, seed=42)
start = time()
try:
    classifier.fit(X_train, y_train)
except Exception as e: # this is for Windows users
    y_train = y_train.astype('int32')
    classifier.fit(X_train, y_train)
y_pred = classifier.predict(X_test)
print(f"Elapsed: {time() - start} s")
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy for classification: {accuracy:.4f}")

X, y = load_breast_cancer(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
classifier = BoosterClassifier(obj=LinearRegression(), n_estimators=100, learning_rate=0.1,
                               n_hidden_features=10, verbose=1, seed=42)
start = time()
try: 
    classifier.fit(X_train, y_train)
except Exception as e: # this is for Windows users
    y_train = y_train.astype('int32')
    classifier.fit(X_train, y_train)
y_pred = classifier.predict(X_test)
print(f"Elapsed: {time() - start} s")
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy for classification: {accuracy:.4f}")

X, y = load_digits(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
classifier = BoosterClassifier(obj=LinearRegression(), n_estimators=100, learning_rate=0.1,
                               n_hidden_features=10, verbose=1, seed=42)
start = time()
try: 
    classifier.fit(X_train, y_train)
except Exception as e: # this is for Windows users
    y_train = y_train.astype('int32')
    classifier.fit(X_train, y_train)
y_pred = classifier.predict(X_test)
print(f"Elapsed: {time() - start} s")
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy for classification: {accuracy:.4f}")

2 - Model-agnostic NGBoostRegressor

import numpy as np
from cybooster import NGBoostRegressor, SkNGBoostRegressor
from sklearn.datasets import load_diabetes, fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, mean_squared_error, root_mean_squared_error
from sklearn.linear_model import LinearRegression, Ridge
from sklearn.tree import ExtraTreeRegressor
from time import time 


X, y = fetch_openml("boston", version=1, as_frame=True, return_X_y=True)
cols = list(X.columns)
print("columns", cols)
X_train, X_test, y_train, y_test = train_test_split(X.values, y.values, test_size=0.2, random_state=42)
X_train = np.asarray(X_train, dtype=np.float64)
y_train = np.asarray(y_train, dtype=np.float64)
X_test = np.asarray(X_test, dtype=np.float64)
y_test = np.asarray(y_test, dtype=np.float64)

regressor = NGBoostRegressor()
start = time()
regressor.fit(X_train, y_train)
y_pred = regressor.predict(X_test)
print(f"Elapsed: {time() - start} s")
rmse = root_mean_squared_error(y_test, y_pred)
print(f"RMSE for regression: {rmse:.4f}")
print("return_std:", regressor.predict(X_test, return_std=True))

regressor = SkNGBoostRegressor()
start = time()
regressor.fit(X_train, y_train)
y_pred = regressor.predict(X_test)
print(f"Elapsed: {time() - start} s")
rmse = root_mean_squared_error(y_test, y_pred)
print(f"RMSE for regression: {rmse:.4f}")
print("return_std:", regressor.predict(X_test, return_std=True))

regressor = NGBoostRegressor(LinearRegression())
start = time()
regressor.fit(X_train, y_train)
y_pred = regressor.predict(X_test)
print(f"Elapsed: {time() - start} s")
rmse = root_mean_squared_error(y_test, y_pred)
print(f"RMSE for regression: {rmse:.4f}")
print("return_std:", regressor.predict(X_test, return_std=True))

regressor = SkNGBoostRegressor(LinearRegression())
start = time()
regressor.fit(X_train, y_train)
y_pred = regressor.predict(X_test)
print(f"Elapsed: {time() - start} s")
rmse = root_mean_squared_error(y_test, y_pred)
print(f"RMSE for regression: {rmse:.4f}")
print("return_std:", regressor.predict(X_test, return_std=True))

regressor = NGBoostRegressor(Ridge())
start = time()
regressor.fit(X_train, y_train)
y_pred = regressor.predict(X_test)
print(f"Elapsed: {time() - start} s")
rmse = root_mean_squared_error(y_test, y_pred)
print(f"RMSE for regression: {rmse:.4f}")
print("return_std:", regressor.predict(X_test, return_std=True))

regressor = SkNGBoostRegressor(Ridge())
start = time()
regressor.fit(X_train, y_train)
y_pred = regressor.predict(X_test)
print(f"Elapsed: {time() - start} s")
rmse = root_mean_squared_error(y_test, y_pred)
print(f"RMSE for regression: {rmse:.4f}")
print("return_std:", regressor.predict(X_test, return_std=True))

regressor = NGBoostRegressor(ExtraTreeRegressor())
start = time()
regressor.fit(X_train, y_train)
y_pred = regressor.predict(X_test)
print(f"Elapsed: {time() - start} s")
rmse = root_mean_squared_error(y_test, y_pred)
print(f"RMSE for regression: {rmse:.4f}")
print("return_std:", regressor.predict(X_test, return_std=True))

regressor = SkNGBoostRegressor(ExtraTreeRegressor())
start = time()
regressor.fit(X_train, y_train)
y_pred = regressor.predict(X_test)
print(f"Elapsed: {time() - start} s")
rmse = root_mean_squared_error(y_test, y_pred)
print(f"RMSE for regression: {rmse:.4f}")
print("return_std:", regressor.predict(X_test, return_std=True))

Project details


Download files

Download the file for your platform. If you're not sure which to choose, learn more about installing packages.

Source Distribution

cybooster-0.7.1.tar.gz (25.0 kB view details)

Uploaded Source

Built Distribution

If you're not sure about the file name format, learn more about wheel file names.

cybooster-0.7.1-cp314-cp314-manylinux2014_x86_64.whl (2.5 MB view details)

Uploaded CPython 3.14

File details

Details for the file cybooster-0.7.1.tar.gz.

File metadata

  • Download URL: cybooster-0.7.1.tar.gz
  • Upload date:
  • Size: 25.0 kB
  • Tags: Source
  • Uploaded using Trusted Publishing? No
  • Uploaded via: twine/6.1.0 CPython/3.13.7

File hashes

Hashes for cybooster-0.7.1.tar.gz
Algorithm Hash digest
SHA256 5e50c858926da65308c37e5eb7a035b70d7633d6d3b5cf6cfa565c0455c42162
MD5 efc416764e4f2151e9ee8f7f925b0590
BLAKE2b-256 25e80bec2d04c2ce0594eacc19f118d39b1a6c3d2960f72d99ce7317346839ee

See more details on using hashes here.

File details

Details for the file cybooster-0.7.1-cp314-cp314-manylinux2014_x86_64.whl.

File metadata

File hashes

Hashes for cybooster-0.7.1-cp314-cp314-manylinux2014_x86_64.whl
Algorithm Hash digest
SHA256 0a56086b090f7fe39f6c5b9381a9b906970cdbb2cf2cf6704e8a2d129d1c7803
MD5 aca9f6ec06e84ad42221d9c8e7709ac2
BLAKE2b-256 49623d49bce31105dd33f8f69119e64e8abe59e5cafe0c239d8f9b64d7f2d080

See more details on using hashes here.

Supported by

AWS Cloud computing and Security Sponsor Datadog Monitoring Depot Continuous Integration Fastly CDN Google Download Analytics Pingdom Monitoring Sentry Error logging StatusPage Status page