Skip to main content

import pandas as pd

Project description

import pandas as pd from sklearn import preprocessing import numpy as np from sklearn.discriminant_analysis import StandardScaler from sklearn.preprocessing import scale, MinMaxScaler,OneHotEncoder from sklearn.decomposition import PCA import matplotlib.pyplot as plt from apyori import apriori from sklearn.naive_bayes import GaussianNB from sklearn.cluster import KMeans from sklearn.metrics import classification_report, confusion_matrix, accuracy_score from sklearn.model_selection import train_test_split

df=pd.read_csv("housing.csv") print(df) print(df.head()) print(df.tail()) print(df.max()) print(df.min())

median_house_value_column = df['median_house_value']

print(median_house_value_column.var()) print(median_house_value_column.median()) print(median_house_value_column.mode()) print(median_house_value_column.mean()) print(median_house_value_column.std()) print(median_house_value_column.count()) print(median_house_value_column.describe()) print(median_house_value_column.dtypes) df = df.dropna() print(df)

x_array = np.array(df['median_house_value']).reshape(-1, 1) normalized_arr = preprocessing.normalize(x_array) print(normalized_arr)

numeric_columns = df.select_dtypes(include=[np.number]).columns.tolist() categorical_columns = list(set(df.columns) - set(numeric_columns)) df_encoded = pd.get_dummies(df, columns=categorical_columns) scaler = MinMaxScaler(feature_range=(0, 2)) df_encoded[numeric_columns] = scaler.fit_transform(df_encoded[numeric_columns]) print(df_encoded.head())

pca = PCA() pca.fit(df_encoded) print(pca.explained_variance_ratio_)

df1 = pd.read_csv("student.csv",header=0) df2 = pd.read_csv("mark.csv",header=0) df_stu=pd.merge(df1,df2,on='Student_id')

print(df_stu.head()) print(df_stu.shape)

store_data = pd.read_csv("store_data.csv", header=None) print(store_data.head()) records=[]

for i in range(0,7501): records.append([str(store_data.values[i,j] ) for j in range(0,20)])

association_rules = apriori(records, min_support=0.0045, min_confidence=0.2, min_lift=3, min_length=2) association_results = list(association_rules) print(association_results)

print("There are {} Relation derived.".format(len(association_results))) for i in range(0, len(association_results)): print(association_results[i][0])

for item in association_results: # first index of the inner list # Contains base item and add item pair = item[0] items = [x for x in pair] print("Rule: " + items[0] + " -> " + items[1])

# second index of the inner list
print("Support: " + str(item[1]))

# third index of the list located at 0th
# of the third index of the inner list

print("Confidence: " + str(item[2][0][2]))
print("Lift: " + str(item[2][0][3]))
print("=====================================")

X = np.array([[5,3], [10,15], [15,12], [24,10], [30,45], [85,70], [71,80], [60,78], [55,52], [80,91]]) kmeans = KMeans(n_clusters=2) # k=2 0 , 1 kmeans.fit(X)

print(kmeans.cluster_centers_) plt.scatter(X[:,0],X[:,1], label='True Position') plt.show() plt.scatter(X[:,0],X[:,1], c=kmeans.labels_, cmap='rainbow') plt.show() plt.scatter(X[:,0], X[:,1], c=kmeans.labels_, cmap='rainbow') plt.scatter(kmeans.cluster_centers_[:,0] ,kmeans.cluster_centers_[:,1], color='black') plt.show()

df_kmeans = pd.get_dummies(df, drop_first=True) #to remove redundandant column print(df_kmeans.head())

scaler = StandardScaler() scaled_df_kmeans = scaler.fit_transform(df_kmeans)

Y = df['housing_median_age'] X = df.drop(columns=['ocean_proximity'])

Applying KMeans Clustering to the housing data

kmeans_model = KMeans(n_clusters=3) clusters = kmeans_model.fit_predict(X) X.insert(X.columns.get_loc("housing_median_age"), "Cluster", clusters)

Displaying the clusters

print(X["Cluster"].value_counts())

Additional exploratory analysis for determining the number of clusters

ssd = [] for k in range(2, 9): kmeans_model = KMeans(n_clusters=k) kmeans_model.fit(X) ssd.append(kmeans_model.inertia_)

Plotting SSD for different K values

plt.figure(figsize=(6, 4), dpi=100) plt.plot(range(2, 9), ssd, color="green", marker="o") plt.xlabel("Number of clusters (K)") plt.ylabel("SSD for K") plt.show()

Splitting the data for Gaussian Naive Bayes classification

x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.25, random_state=42)

Gaussian Naive Bayes classifier

naive = GaussianNB() naive_model = naive.fit(x_train, y_train)

Predictions and evaluation

y_pred = naive_model.predict(x_test) accuracy = accuracy_score(y_test, y_pred) print('The accuracy of the model is: ', accuracy)

print(confusion_matrix(y_test, y_pred)) print(classification_report(y_test, y_pred))

Project details


Release history Release notifications | RSS feed

This version

0.2

Download files

Download the file for your platform. If you're not sure which to choose, learn more about installing packages.

Source Distribution

shimpiproductions-0.2.tar.gz (5.0 kB view details)

Uploaded Source

Built Distribution

If you're not sure about the file name format, learn more about wheel file names.

shimpiproductions-0.2-py3-none-any.whl (4.2 kB view details)

Uploaded Python 3

File details

Details for the file shimpiproductions-0.2.tar.gz.

File metadata

  • Download URL: shimpiproductions-0.2.tar.gz
  • Upload date:
  • Size: 5.0 kB
  • Tags: Source
  • Uploaded using Trusted Publishing? No
  • Uploaded via: twine/4.0.2 CPython/3.12.0

File hashes

Hashes for shimpiproductions-0.2.tar.gz
Algorithm Hash digest
SHA256 1b7c2ea6a54d78252894bf6cef9c82f89cc0b4d75caea164252b780a3b388728
MD5 7c052a77202fbb8a5b9987ee97ce13ad
BLAKE2b-256 9e33718744f54ab2e8a740124e34848e2f0a1595ec0e63a108200c86e0b39b54

See more details on using hashes here.

File details

Details for the file shimpiproductions-0.2-py3-none-any.whl.

File metadata

File hashes

Hashes for shimpiproductions-0.2-py3-none-any.whl
Algorithm Hash digest
SHA256 59ca6fca1abc79d65f8de3eadab7399d183ca9b53f629153440e378ec48accd0
MD5 6c591aaa32e837b494b84ac563b001c5
BLAKE2b-256 68a875b9cf461ea80896c660b91db7afa8c99caa1c643501e6fda4e100a9053a

See more details on using hashes here.

Supported by

AWS Cloud computing and Security Sponsor Datadog Monitoring Depot Continuous Integration Fastly CDN Google Download Analytics Pingdom Monitoring Sentry Error logging StatusPage Status page