import numpy as np
Project description
'''import numpy as np import matplotlib.pyplot as plt from sklearn.datasets import make_blobs from sklearn.preprocessing import StandardScaler from sklearn.model_selection import train_test_split from sklearn.neighbors import KNeighborsClassifier from sklearn.tree import DecisionTreeClassifier from sklearn.svm import SVC from sklearn.ensemble import RandomForestClassifier from sklearn.cluster import KMeans from sklearn.linear_model import LinearRegression from sklearn.metrics import accuracy_score, mean_squared_error
#----------------------------------------------------------------------------------------------------------#
X, y = make_blobs(n_samples=1000, centers=4, n_features=2, random_state=42) plt.scatter(X[:, 0], X[:, 1], c=y, cmap='viridis') plt.title('Synthetic Dataset') plt.xlabel('Feature 1') plt.ylabel('Feature 2') plt.show()
#----------------------------------------------------------------------------------------------------------#
Pre-processing
scaler = StandardScaler() X_scaled = scaler.fit_transform(X)
#----------------------------------------------------------------------------------------------------------#
Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)
#----------------------------------------------------------------------------------------------------------#
KNN Classification
knn = KNeighborsClassifier(n_neighbors=5) knn.fit(X_train, y_train) knn_pred = knn.predict(X_test) knn_accuracy = accuracy_score(y_test, knn_pred)
#----------------------------------------------------------------------------------------------------------#
Decision Tree
dt = DecisionTreeClassifier() dt.fit(X_train, y_train) dt_pred = dt.predict(X_test) dt_accuracy = accuracy_score(y_test, dt_pred)
#----------------------------------------------------------------------------------------------------------#
SVM
svm = SVC(kernel='linear') svm.fit(X_train, y_train) svm_pred = svm.predict(X_test) svm_accuracy = accuracy_score(y_test, svm_pred)
#----------------------------------------------------------------------------------------------------------#
Random Forest
rf = RandomForestClassifier(n_estimators=100) rf.fit(X_train, y_train) rf_pred = rf.predict(X_test) rf_accuracy = accuracy_score(y_test, rf_pred)
#----------------------------------------------------------------------------------------------------------#
K-means Clustering
kmeans = KMeans(n_clusters=4) kmeans.fit(X_scaled) cluster_centers = kmeans.cluster_centers_
#----------------------------------------------------------------------------------------------------------#
Linear Regression
lr = LinearRegression() lr.fit(X_train, y_train) lr_pred = lr.predict(X_test) lr_rmse = mean_squared_error(y_test, lr_pred, squared=False)
classifiers = ['KNN', 'Decision Tree', 'SVM', 'Random Forest', 'Linear Regression'] accuracies = [knn_accuracy, dt_accuracy, svm_accuracy, rf_accuracy, lr_rmse]
plt.bar(classifiers, accuracies) plt.xlabel('Classifiers') plt.ylabel('Accuracy') plt.title('Accuracy of Different Classifiers') plt.show()
#----------------------------------------------------------------------------------------------------------#
#Desion Tree from sklearn.tree import plot_tree
Visualize decision tree
plt.figure(figsize=(12, 8)) plot_tree(dt, filled=True, feature_names=['Feature 1', 'Feature 2'], class_names=['Class 0', 'Class 1', 'Class 2', 'Class 3']) plt.title('Decision Tree Visualization') plt.show()
Visualize one decision tree from random forest (change index to visualize different trees)
plt.figure(figsize=(12, 8)) plot_tree(rf.estimators_[0], filled=True, feature_names=['Feature 1', 'Feature 2'], class_names=['Class 0', 'Class 1', 'Class 2', 'Class 3']) plt.title('Decision Tree from Random Forest') plt.show()
#----------------------------------------------------------------------------------------------------------#
Visualize SVM decision boundaries
plt.figure(figsize=(12, 8)) h = .02 # step size in the mesh x_min, x_max = X_scaled[:, 0].min() - 1, X_scaled[:, 0].max() + 1 y_min, y_max = X_scaled[:, 1].min() - 1, X_scaled[:, 1].max() + 1 xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
Z = svm.predict(np.c_[xx.ravel(), yy.ravel()]) Z = Z.reshape(xx.shape) plt.contourf(xx, yy, Z, cmap=plt.cm.viridis, alpha=0.8)
Plot the dataset
plt.scatter(X_scaled[:, 0], X_scaled[:, 1], c=y, cmap='viridis') plt.title('SVM Decision Boundaries') plt.xlabel('Feature 1') plt.ylabel('Feature 2') plt.show()
#----------------------------------------------------------------------------------------------------------#
Visualize KNN decision boundaries
plt.figure(figsize=(12, 8)) h = .02 # step size in the mesh x_min, x_max = X_scaled[:, 0].min() - 1, X_scaled[:, 0].max() + 1 y_min, y_max = X_scaled[:, 1].min() - 1, X_scaled[:, 1].max() + 1 xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
Z = knn.predict(np.c_[xx.ravel(), yy.ravel()]) Z = Z.reshape(xx.shape) plt.contourf(xx, yy, Z, cmap=plt.cm.viridis, alpha=0.8)
Plot the dataset
plt.scatter(X_scaled[:, 0], X_scaled[:, 1], c=y, cmap='viridis') plt.title('KNN Decision Boundaries') plt.xlabel('Feature 1') plt.ylabel('Feature 2') plt.show()
#----------------------------------------------------------------------------------------------------------#
Visualize K-means clustering
plt.figure(figsize=(12, 8))
Plot the dataset
plt.scatter(X_scaled[:, 0], X_scaled[:, 1], c=y, cmap='viridis', alpha=0.5)
Plot cluster centroids
plt.scatter(cluster_centers[:, 0], cluster_centers[:, 1], c='red', marker='x', s=100, label='Cluster Centroids')
plt.title('K-means Clustering') plt.xlabel('Feature 1') plt.ylabel('Feature 2') plt.legend() plt.show()
Visualize Linear Regression
plt.figure(figsize=(12, 8))
Plot the training data
plt.scatter(X_train[:, 0], y_train, color='blue', label='Training Data')
Plot the test data
plt.scatter(X_test[:, 0], y_test, color='green', label='Test Data')
Plot the regression line
plt.plot(X_test[:, 0], lr.predict(X_test), color='red', linewidth=2, label='Linear Regression')
plt.title('Linear Regression') plt.xlabel('Feature 1') plt.ylabel('Target Variable') plt.legend() plt.show()
#----------------------------------------------------------------------------------------------------------#
#K-fold
import matplotlib.pyplot as plt from sklearn.model_selection import KFold from sklearn.model_selection import cross_val_score import numpy as np
model = SVC(kernel='linear')
kfold = KFold(n_splits=5, shuffle=True, random_state=42)
scores = cross_val_score(model, X_train, y_train, cv=kfold)
plt.figure(figsize=(8, 6)) plt.plot(np.arange(1, 6), scores, marker='o', linestyle='-') plt.xlabel('Fold') plt.ylabel('Accuracy') plt.title('K-Fold Cross-Validation Scores') plt.grid(True) plt.show()
#----------------------------------------------------------------------------------------------------------#
Visualize SVM decision boundaries with polynomial kernel
svm_poly = SVC(kernel='poly', degree=3) # Polynomial kernel with degree 3 svm_poly.fit(X_train, y_train)
plt.figure(figsize=(12, 8)) h = .02 # step size in the mesh x_min, x_max = X_scaled[:, 0].min() - 1, X_scaled[:, 0].max() + 1 y_min, y_max = X_scaled[:, 1].min() - 1, X_scaled[:, 1].max() + 1 xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
Z = svm_poly.predict(np.c_[xx.ravel(), yy.ravel()]) Z = Z.reshape(xx.shape) plt.contourf(xx, yy, Z, cmap=plt.cm.viridis, alpha=0.8)
Plot the dataset
plt.scatter(X_scaled[:, 0], X_scaled[:, 1], c=y, cmap='viridis') plt.title('SVM Decision Boundaries with Polynomial Kernel') plt.xlabel('Feature 1') plt.ylabel('Feature 2') plt.show() '''
Project details
Release history Release notifications | RSS feed
Download files
Download the file for your platform. If you're not sure which to choose, learn more about installing packages.
Source Distribution
Built Distribution
Hashes for shimpiproductions-3.1-0.1.tar.gz
Algorithm | Hash digest | |
---|---|---|
SHA256 | 15df96d40cda63f5f26d361739896b9bf4fd6016cf61d47a92f05cf2f28aad7a |
|
MD5 | 6fc2f671d2c64b531254faa3a8ff010b |
|
BLAKE2b-256 | eb60e8fdbedb1fa3573fe458a2aec54c0a035cc33f20c000d86eeb88c0a5e56b |
Hashes for shimpiproductions_3.1-0.1-py3-none-any.whl
Algorithm | Hash digest | |
---|---|---|
SHA256 | 0fb7357eb046cbb93a70c8f16d3b83f760905486156e8e86b3913a1016c56db8 |
|
MD5 | c01685bbf01f3bb5031abdfd5cbd0795 |
|
BLAKE2b-256 | 8acb0e96dc2465352acd981a49591ef9e7870fa4ba11ac37fc363bee4ad58301 |