A library that simplifies some basic ML stuff.

These details have not been verified by PyPI

Project links

Development Status
- 3 - Alpha
Intended Audience
- Developers
License
- OSI Approved :: MIT License
Programming Language
Topic
- Software Development :: Build Tools

Project description

wuml

Chieh's quick ML library

Pip Installation

pip install wuml

Decimate Data with Missing data and Impute the rest

#!/usr/bin/env python

import wuml 

'''
	This code loads data with missing entries at random as a wData type
	It will automatically remove the features and samples that are missing too many entries
	On the decimated data, it will perform imputation
	It will lastly save and export the results to a csv file
'''

data = wuml.wData('../data/chem.exposures.csv', row_id_with_label=0)
dataDecimated = wuml.decimate_data_with_missing_entries(data, column_threshold=0.95, row_threshold=0.9,newDataFramePath='')
#	column_threshold=0.95, this will keep features that are at least 95% full


X = wuml.impute(dataDecimated)		# perform mice imputation
X.to_csv('../data/Chem_decimated_imputed.csv')

Code Feature Results

Notice that all the missing entries have been filled

(Pdb) X.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 1122 entries, 0 to 1176
Data columns (total 24 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Unnamed: 0    1122 non-null   float64
 1   id            1122 non-null   float64
 2   MBP           1122 non-null   float64
 3   MBzP          1122 non-null   float64
 4   MCNP          1122 non-null   float64
 5   MCOP          1122 non-null   float64
 6   MCPP          1122 non-null   float64
 7   MECPP         1122 non-null   float64
 8   MEHHP         1122 non-null   float64
 9   MEHP          1122 non-null   float64
 10  MEOHP         1122 non-null   float64
 11  MEP           1122 non-null   float64
 12  MiBP          1122 non-null   float64
 13  preBMI        1122 non-null   float64
 14  preBMI_cat    1122 non-null   float64
 15  ppsex         1122 non-null   float64
 16  ISAGE         1122 non-null   float64
 17  mage_cat      1122 non-null   float64
 18  edu_cat       1122 non-null   float64
 19  currjob_cat   1122 non-null   float64
 20  marital_cat   1122 non-null   float64
 21  smk_cat       1122 non-null   float64
 22  alc_cat       1122 non-null   float64
 23  finalga_best  1122 non-null   float64
dtypes: float64(24)
memory usage: 219.1 KB

Example of basic regression requiring Neural Network TensorFlow

#!/usr/bin/env python


#	The idea of training a neural network boils down to 3 steps
#		1. Define a network structure
#			Example: This is a 3 layer network with 100 node width
#				networkStructure=[(100,'relu'),(100,'relu'),(1,'none')]
#		2. Define a cost function
#		3. Call train()


import wuml
import numpy as np
import torch
import wplotlib


data = wuml.wData(xpath='examples/data/regress.csv', ypath='examples/data/regress_label.csv', batch_size=20)

def costFunction(x, y, ŷ, ind):
	ŷ = torch.squeeze(ŷ)
	return torch.sum((y- ŷ) ** 2)	

bNet = wuml.basicNetwork(costFunction, data, networkStructure=[(100,'relu'),(100,'relu'),(1,'none')], max_epoch=500, learning_rate=0.001)
bNet.train()

#	Test out on test data
newX = np.expand_dims(np.arange(0,5,0.1),1)
Ŷ = bNet(newX, output_type='ndarray')		#Takes Numpy array or Tensor as input and outputs a Tensor

#	plot the results out
splot = wplotlib.scatter()
splot.add_plot(data.X, data.Y, marker='o')

lp = wplotlib.lines()	
lp.add_plot(newX, Ŷ)

splot.show(title='Basic Network Regression', xlabel='x-axis', ylabel='y-axis')

Example of basic classification

#!/usr/bin/env python

import wuml
import numpy as np
import torch
import torch.nn as nn
import wplotlib


#	The idea of training a neural network boils down to 3 steps
#		1. Define a network structure
#			Example: This is a 3 layer network with 100 node width
#				networkStructure=[(100,'relu'),(100,'relu'),(1,'softmax')]
#		2. Define a cost function
#		3. Call train()

data = wuml.wData(xpath='examples/data/wine.csv', ypath='examples/data/wine_label.csv', batch_size=20)

def costFunction(x, y, ŷ, ind):
	lossFun = nn.CrossEntropyLoss() 
	loss = lossFun(ŷ, y) #weird from pytorch, dim of y is 1, and ŷ is 20x3	
	return loss


#It is important for pytorch that with classification, you need to define Y_dataType=torch.int64
bNet = wuml.basicNetwork(costFunction, data, networkStructure=[(100,'relu'),(100,'relu'),(3,'none')], 
						Y_dataType=torch.int64, max_epoch=3000, learning_rate=0.001)
bNet.train()
netOutput = bNet(data.X)

#	Output Accuracy
_, Ŷ = torch.max(netOutput, 1)

Acc = wuml.accuracy(Y, Ŷ)
#Acc= accuracy_score(data.Y, Ŷ.cpu().numpy())
print('Accuracy: %.3f'%Acc)

Code Output

Network Info:
	Learning rate: 0.001
	Max number of epochs: 3000
	Cuda Available: True
	Network Structure
		Linear(in_features=13, out_features=100, bias=True) , relu
		Linear(in_features=100, out_features=100, bias=True) , relu
		Linear(in_features=100, out_features=3, bias=True) , none
	epoch: 3000, Avg Loss: 0.0442, Learning Rate: 0.00001563Accuracy: 0.989

Distribution Estimation from Samples Via KDE

#!/usr/bin/env python

import wuml 
import numpy as np
import scipy.stats
from wplotlib import histograms
from wplotlib import lines
	

data = wuml.wData(X_npArray=np.random.randn(1000))
Pₓ = wuml.KDE(data)

X = np.arange(-3,3,0.05)
realProb = scipy.stats.norm(0, 1).pdf(X)
estimatedProb = Pₓ(X)
newX = Pₓ.generate_samples(300)

textstr = 'Blue: True Density\nRed: KDE estimated density\nGreen: Histogram sampled from KDE'
lp = lines()
H = histograms()
lp.add_plot(X,realProb, color='blue', marker=',')
lp.add_plot(X,estimatedProb, color='red', marker=',')
lp.add_text(X,estimatedProb, textstr, β=0.8)
H.histogram(newX, num_bins=10, title='Using KDE to Estimate Distributions', facecolor='green', α=0.5, showImg=False, normalize=True)
H.show()

Determine Sample weights based on rarity

#!/usr/bin/env python

import wuml 
import numpy as np
import scipy.stats
from wplotlib import histograms
from wplotlib import lines
	
'''
	Identifies a weight associated with each sample based on its likelihood. 
	Given p(X1) > p(Xi) for all i
	Using KDE if p(X1)/p(X2)=2  the weight for X1 = 1, and X2 = 2
	This means that if X1 is the most likely samples and if X1 is 
	2 times more likely than X2, then X1 would have a weight of 1
	and X2 would have a weight of 2.
	This weight can then be used to balance the sample importance for regression

'''

data = wuml.wData('examples/data/Chem_decimated_imputed.csv', row_id_with_label=0)
data.delete_column('id')	# the id should not be part of the likelihood 
sample_weights = wuml.get_likelihood_weight(data)
sample_weights.to_csv('examples/data/Chem_sample_weights.csv', include_column_names=False)

Project details

These details have not been verified by PyPI

Project links

Development Status
- 3 - Alpha
Intended Audience
- Developers
License
- OSI Approved :: MIT License
Programming Language
Topic
- Software Development :: Build Tools

Release history Release notifications | RSS feed

0.148

Mar 19, 2023

0.147

Feb 13, 2023

0.146

Feb 10, 2023

0.145

Jan 26, 2023

0.144

Jan 25, 2023

0.143

Dec 14, 2022

0.142

Oct 27, 2022

0.141

Oct 27, 2022

0.140

Oct 27, 2022

0.139

Oct 17, 2022

0.138

Oct 5, 2022

0.137

Sep 15, 2022

0.136

Jun 22, 2022

0.135

Jun 21, 2022

0.134

Apr 25, 2022

0.133

Apr 4, 2022

0.132

Mar 26, 2022

0.131

Mar 26, 2022

0.130

Mar 26, 2022

0.129

Mar 26, 2022

0.128

Mar 25, 2022

0.127

Mar 25, 2022

0.126

Mar 23, 2022

0.125

Mar 23, 2022

0.123

Nov 23, 2021

0.122

Nov 23, 2021

0.121

Nov 19, 2021

0.120

Nov 18, 2021

0.119

Nov 18, 2021

0.118

Nov 12, 2021

0.117

Nov 12, 2021

0.116

Nov 12, 2021

0.115

Nov 10, 2021

0.114

Nov 10, 2021

0.113

Nov 5, 2021

0.112

Nov 5, 2021

0.111

Oct 29, 2021

0.110

Oct 29, 2021

0.109

Oct 28, 2021

0.108

Oct 28, 2021

0.107

Oct 28, 2021

0.106

Oct 28, 2021

0.105

Oct 28, 2021

0.104

Oct 27, 2021

0.103

Oct 27, 2021

0.102

Oct 26, 2021

0.101

Oct 26, 2021

0.100

Oct 26, 2021

0.99

Oct 25, 2021

0.98

Oct 22, 2021

0.97

Oct 22, 2021

0.96

Oct 21, 2021

0.95

Oct 21, 2021

0.94

Oct 21, 2021

0.93

Oct 21, 2021

0.92

Oct 20, 2021

0.91

Oct 20, 2021

0.90

Oct 20, 2021

0.89

Oct 20, 2021

0.88

Oct 20, 2021

0.87

Oct 20, 2021

0.86

Oct 19, 2021

0.85

Oct 16, 2021

0.84

Oct 15, 2021

0.83

Oct 15, 2021

0.82

Oct 15, 2021

0.81

Oct 14, 2021

0.80

Oct 13, 2021

0.79

Oct 13, 2021

0.78

Oct 12, 2021

0.77

Oct 12, 2021

0.76

Oct 12, 2021

0.75

Oct 12, 2021

0.74

Oct 12, 2021

0.73

Oct 9, 2021

0.72

Oct 9, 2021

0.71

Oct 9, 2021

0.70

Oct 8, 2021

0.69

Oct 8, 2021

0.68

Oct 7, 2021

0.67

Oct 7, 2021

0.66

Oct 7, 2021

0.65

Oct 6, 2021

0.64

Oct 6, 2021

0.63

Oct 6, 2021

0.62

Oct 6, 2021

0.61

Oct 6, 2021

0.60

Oct 6, 2021

0.59

Oct 1, 2021

0.58

Oct 1, 2021

0.57

Sep 30, 2021

0.56

Sep 28, 2021

0.55

Sep 28, 2021

0.54

Sep 28, 2021

0.53

Sep 28, 2021

0.52

Sep 27, 2021

0.51

Sep 27, 2021

0.50

Sep 26, 2021

0.48

Sep 24, 2021

0.47

Sep 24, 2021

0.46

Sep 24, 2021

0.45

Sep 24, 2021

0.44

Sep 24, 2021

0.43

Sep 23, 2021

0.42

Sep 23, 2021

0.41

Sep 21, 2021

0.40

Sep 3, 2021

0.39

Sep 3, 2021

0.38

Aug 31, 2021

0.37

Aug 28, 2021

0.35

Aug 28, 2021

0.34

Aug 28, 2021

0.33

Aug 20, 2021

0.32

Aug 20, 2021

0.31

Aug 17, 2021

0.28

Aug 17, 2021

This version

0.27

Aug 17, 2021

0.26

Aug 17, 2021

0.25

Aug 17, 2021

0.24

Aug 17, 2021

0.23

Aug 13, 2021

0.22

Aug 13, 2021

0.21

Aug 13, 2021

0.20

Aug 13, 2021

0.18

Aug 13, 2021

0.17

Aug 13, 2021

0.16

Aug 13, 2021

0.15

Aug 13, 2021

0.14

Aug 10, 2021

0.013

Aug 10, 2021

0.012

Aug 6, 2021

0.011

Aug 5, 2021

0.010

Jul 27, 2021

0.009

Jul 27, 2021

0.008

Jul 27, 2021

0.007

Jul 27, 2021

0.006

Jul 27, 2021

0.005

Jul 27, 2021

0.004

Jul 27, 2021

0.003

Jul 27, 2021

0.002

Jul 27, 2021

0.001

Jul 27, 2021

Download files

Download the file for your platform. If you're not sure which to choose, learn more about installing packages.

Source Distribution

wuml-0.27.tar.gz (20.6 kB view hashes)

Uploaded Aug 17, 2021 Source

Hashes for wuml-0.27.tar.gz

Hashes for wuml-0.27.tar.gz
Algorithm	Hash digest
SHA256	`73ce0aa250c1b4415787ec28ed1f06ab11f019420c98a63c565294839e6a6d6c`
MD5	`22381638a029f9fc5f98e75c13f1022f`
BLAKE2b-256	`ac17a725ed749d840aa757f77a66dc5d2933980dd5bd9a577ce30631449de973`