Skip to main content

A simple library for converting the output of an XGB model to SQL.

Project description

Project name here

Summary description here.

This file will become your README and also the index of your documentation.

Install

pip install xgb2sql

How to use

So easy even I could do it!

from xgb2sql import core
import xgboost as xgb
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split

X, y = load_breast_cancer(return_X_y=True)

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

woo = xgb.XGBClassifier(n_estimators=5)
woo.fit(X_train, y_train)
xgb.to_graphviz(woo)

svg

tree = core.xgb2sql(woo.get_booster(), 'breast_cancer')
print(tree)
WITH booster_output AS (
	SELECT
		CASE
			WHEN ((f7 < 0.0489199981) OR (f7 IS NULL))
			AND ((f20 < 16.8250008) OR (f20 IS NULL))
			AND ((f10 < 0.591250002) OR (f10 IS NULL))
		THEN 0.191869915
			WHEN ((f7 < 0.0489199981) OR (f7 IS NULL))
			AND ((f20 < 16.8250008) OR (f20 IS NULL))
			AND (f10 >= 0.591250002)
		THEN 0
			WHEN ((f7 < 0.0489199981) OR (f7 IS NULL))
			AND (f20 >= 16.8250008)
			AND ((f1 < 18.9599991) OR (f1 IS NULL))
		THEN 0.120000005
			WHEN ((f7 < 0.0489199981) OR (f7 IS NULL))
			AND (f20 >= 16.8250008)
			AND (f1 >= 18.9599991)
		THEN -0.13333334
			WHEN (f7 >= 0.0489199981)
			AND ((f23 < 785.799988) OR (f23 IS NULL))
			AND ((f21 < 23.7399998) OR (f21 IS NULL))
		THEN 0.155555561
			WHEN (f7 >= 0.0489199981)
			AND ((f23 < 785.799988) OR (f23 IS NULL))
			AND (f21 >= 23.7399998)
		THEN -0.100000001
			WHEN (f7 >= 0.0489199981)
			AND (f23 >= 785.799988)
			AND ((f1 < 14.3000002) OR (f1 IS NULL))
		THEN 0
			WHEN (f7 >= 0.0489199981)
			AND (f23 >= 785.799988)
			AND (f1 >= 14.3000002)
		THEN -0.191176474
		END AS column_0, 
		CASE
			WHEN ((f7 < 0.0500999987) OR (f7 IS NULL))
			AND ((f20 < 16.8250008) OR (f20 IS NULL))
			AND ((f13 < 38.6049995) OR (f13 IS NULL))
		THEN 0.17467472
			WHEN ((f7 < 0.0500999987) OR (f7 IS NULL))
			AND ((f20 < 16.8250008) OR (f20 IS NULL))
			AND (f13 >= 38.6049995)
		THEN 0.0302315652
			WHEN ((f7 < 0.0500999987) OR (f7 IS NULL))
			AND (f20 >= 16.8250008)
			AND ((f1 < 18.9599991) OR (f1 IS NULL))
		THEN 0.113052242
			WHEN ((f7 < 0.0500999987) OR (f7 IS NULL))
			AND (f20 >= 16.8250008)
			AND (f1 >= 18.9599991)
		THEN -0.124826349
			WHEN (f7 >= 0.0500999987)
			AND ((f22 < 103.25) OR (f22 IS NULL))
			AND ((f21 < 25.9249992) OR (f21 IS NULL))
		THEN 0.140555695
			WHEN (f7 >= 0.0500999987)
			AND ((f22 < 103.25) OR (f22 IS NULL))
			AND (f21 >= 25.9249992)
		THEN -0.0846852511
			WHEN (f7 >= 0.0500999987)
			AND (f22 >= 103.25)
			AND ((f21 < 20.3549995) OR (f21 IS NULL))
		THEN -0.01987583
			WHEN (f7 >= 0.0500999987)
			AND (f22 >= 103.25)
			AND (f21 >= 20.3549995)
		THEN -0.174933031
		END AS column_1, 
		CASE
			WHEN ((f27 < 0.142349988) OR (f27 IS NULL))
			AND ((f20 < 17.6149998) OR (f20 IS NULL))
			AND ((f13 < 35.2600021) OR (f13 IS NULL))
		THEN 0.159918889
			WHEN ((f27 < 0.142349988) OR (f27 IS NULL))
			AND ((f20 < 17.6149998) OR (f20 IS NULL))
			AND (f13 >= 35.2600021)
		THEN 0.0472318567
			WHEN ((f27 < 0.142349988) OR (f27 IS NULL))
			AND (f20 >= 17.6149998)
			AND ((f29 < 0.0649200007) OR (f29 IS NULL))
		THEN -0.0155247366
			WHEN ((f27 < 0.142349988) OR (f27 IS NULL))
			AND (f20 >= 17.6149998)
			AND (f29 >= 0.0649200007)
		THEN -0.119407289
			WHEN (f27 >= 0.142349988)
			AND ((f23 < 729.549988) OR (f23 IS NULL))
			AND ((f4 < 0.1083) OR (f4 IS NULL))
		THEN 0.120342232
			WHEN (f27 >= 0.142349988)
			AND ((f23 < 729.549988) OR (f23 IS NULL))
			AND (f4 >= 0.1083)
		THEN -0.108723581
			WHEN (f27 >= 0.142349988)
			AND (f23 >= 729.549988)
			AND ((f10 < 0.241250008) OR (f10 IS NULL))
		THEN -0.0287595335
			WHEN (f27 >= 0.142349988)
			AND (f23 >= 729.549988)
			AND (f10 >= 0.241250008)
		THEN -0.163232192
		END AS column_2, 
		CASE
			WHEN ((f7 < 0.0489199981) OR (f7 IS NULL))
			AND ((f20 < 16.8250008) OR (f20 IS NULL))
			AND ((f10 < 0.528550029) OR (f10 IS NULL))
		THEN 0.151598975
			WHEN ((f7 < 0.0489199981) OR (f7 IS NULL))
			AND ((f20 < 16.8250008) OR (f20 IS NULL))
			AND (f10 >= 0.528550029)
		THEN 0.0131686451
			WHEN ((f7 < 0.0489199981) OR (f7 IS NULL))
			AND (f20 >= 16.8250008)
			AND ((f1 < 18.9599991) OR (f1 IS NULL))
		THEN 0.101920418
			WHEN ((f7 < 0.0489199981) OR (f7 IS NULL))
			AND (f20 >= 16.8250008)
			AND (f1 >= 18.9599991)
		THEN -0.113945559
			WHEN (f7 >= 0.0489199981)
			AND ((f23 < 785.799988) OR (f23 IS NULL))
			AND ((f21 < 23.7399998) OR (f21 IS NULL))
		THEN 0.131930456
			WHEN (f7 >= 0.0489199981)
			AND ((f23 < 785.799988) OR (f23 IS NULL))
			AND (f21 >= 23.7399998)
		THEN -0.0824727714
			WHEN (f7 >= 0.0489199981)
			AND (f23 >= 785.799988)
			AND ((f12 < 2.02349997) OR (f12 IS NULL))
		THEN -0.0275684185
			WHEN (f7 >= 0.0489199981)
			AND (f23 >= 785.799988)
			AND (f12 >= 2.02349997)
		THEN -0.155280709
		END AS column_3, 
		CASE
			WHEN ((f27 < 0.145449996) OR (f27 IS NULL))
			AND ((f22 < 107.599998) OR (f22 IS NULL))
			AND ((f13 < 46.7900009) OR (f13 IS NULL))
		THEN 0.142997682
			WHEN ((f27 < 0.145449996) OR (f27 IS NULL))
			AND ((f22 < 107.599998) OR (f22 IS NULL))
			AND (f13 >= 46.7900009)
		THEN 0.00895034242
			WHEN ((f27 < 0.145449996) OR (f27 IS NULL))
			AND (f22 >= 107.599998)
			AND ((f21 < 20.0849991) OR (f21 IS NULL))
		THEN 0.12236432
			WHEN ((f27 < 0.145449996) OR (f27 IS NULL))
			AND (f22 >= 107.599998)
			AND (f21 >= 20.0849991)
		THEN -0.0948726162
			WHEN (f27 >= 0.145449996)
			AND ((f23 < 710.200012) OR (f23 IS NULL))
			AND ((f21 < 25.0550003) OR (f21 IS NULL))
		THEN 0.0869635344
			WHEN (f27 >= 0.145449996)
			AND ((f23 < 710.200012) OR (f23 IS NULL))
			AND (f21 >= 25.0550003)
		THEN -0.0576682575
			WHEN (f27 >= 0.145449996)
			AND (f23 >= 710.200012)
			AND ((f6 < 0.0892650038) OR (f6 IS NULL))
		THEN -0.0451009385
			WHEN (f27 >= 0.145449996)
			AND (f23 >= 710.200012)
			AND (f6 >= 0.0892650038)
		THEN -0.147640571
		END AS column_4
	FROM breast_cancer
	WHERE source = 'test'
)

SELECT
    1 / ( 1 + EXP ( - (
    column_0
	+ column_1
	+ column_2
	+ column_3
	+ column_4 ) ) ) AS score
FROM booster_output
Tada!

Project details


Download files

Download the file for your platform. If you're not sure which to choose, learn more about installing packages.

Source Distribution

xgb2sql-0.18.tar.gz (13.5 kB view details)

Uploaded Source

Built Distribution

xgb2sql-0.18-py3-none-any.whl (9.6 kB view details)

Uploaded Python 3

File details

Details for the file xgb2sql-0.18.tar.gz.

File metadata

  • Download URL: xgb2sql-0.18.tar.gz
  • Upload date:
  • Size: 13.5 kB
  • Tags: Source
  • Uploaded using Trusted Publishing? No
  • Uploaded via: twine/5.1.1 CPython/3.11.4

File hashes

Hashes for xgb2sql-0.18.tar.gz
Algorithm Hash digest
SHA256 027c3c1dd4671233d29622ffc9436d0c853e7a029d85d7527b97a3a2c045e153
MD5 2568d757250ed8b30b8e74e286329be4
BLAKE2b-256 22ab51699c6414465f2c32c13ef7c34d79f12a059eb8421d0860aa36284b2694

See more details on using hashes here.

File details

Details for the file xgb2sql-0.18-py3-none-any.whl.

File metadata

  • Download URL: xgb2sql-0.18-py3-none-any.whl
  • Upload date:
  • Size: 9.6 kB
  • Tags: Python 3
  • Uploaded using Trusted Publishing? No
  • Uploaded via: twine/5.1.1 CPython/3.11.4

File hashes

Hashes for xgb2sql-0.18-py3-none-any.whl
Algorithm Hash digest
SHA256 60b242201066285c14603bf550308b55fe8398c655fe5b6acf05c72aaaf335e2
MD5 7900bf802c682dd876ae72a6270b0399
BLAKE2b-256 f29af810526ec582b6cfc265af2773b7a0d3ab043f259f1654071db6ff59ed60

See more details on using hashes here.

Supported by

AWS AWS Cloud computing and Security Sponsor Datadog Datadog Monitoring Fastly Fastly CDN Google Google Download Analytics Microsoft Microsoft PSF Sponsor Pingdom Pingdom Monitoring Sentry Sentry Error logging StatusPage StatusPage Status page