0% found this document useful (0 votes)
8 views3 pages

MLP Kmeans

The document outlines a data analysis process using the Iris dataset and IMDB data, employing KMeans and Gaussian Mixture Models for clustering, and Random Forest, SVM, and Neural Networks for classification. It includes data preprocessing steps, accuracy evaluations, and visualizations of classification results. The performance metrics such as confusion matrices and accuracy scores for each model are presented.
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
8 views3 pages

MLP Kmeans

The document outlines a data analysis process using the Iris dataset and IMDB data, employing KMeans and Gaussian Mixture Models for clustering, and Random Forest, SVM, and Neural Networks for classification. It includes data preprocessing steps, accuracy evaluations, and visualizations of classification results. The performance metrics such as confusion matrices and accuracy scores for each model are presented.
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
You are on page 1/ 3

import matplotlib.

pyplot as plt
from sklearn import datasets
from sklearn.cluster import KMeans
import sklearn.metrics as sm
import pandas as pd
import numpy as np
iris = datasets.load_iris()
X = pd.DataFrame(iris.data)
X.columns = ['Sepal_Length','Sepal_Width','Petal_Length','Petal_Width']
y = pd.DataFrame(iris.target)
y.columns = ['Targets']
model = KMeans(n_clusters=3)
model.fit(X)
plt.figure(figsize=(14,7))
colormap = np.array(['red', 'lime', 'black'])
plt.subplot(1, 2, 1)
plt.scatter(X.Petal_Length, X.Petal_Width, c=colormap[y.Targets], s=40)
plt.title('Real Classification')
plt.xlabel('Petal Length')
plt.ylabel('Petal Width')
plt.subplot(1, 2, 2)
plt.scatter(X.Petal_Length, X.Petal_Width, c=colormap[model.labels_], s=40)
plt.title('K Mean Classification')
plt.xlabel('Petal Length')
plt.ylabel('Petal Width')

print('The accuracy score of K-Mean: ',sm.accuracy_score(y, model.labels_))

print('The Confusion matrixof K-Mean: ',sm.confusion_matrix(y, model.labels_))

from sklearn import preprocessing


scaler = preprocessing.StandardScaler()
scaler.fit(X)

xsa = scaler.transform(X)
xs = pd.DataFrame(xsa, columns = X.columns)
#xs.sample(5)
from sklearn.mixture import GaussianMixture
gmm = GaussianMixture(n_components=3)
gmm.fit(xs)
y_gmm = gmm.predict(xs)
plt.subplot(2, 2, 3)
plt.scatter(X.Petal_Length, X.Petal_Width, c=colormap[y_gmm], s=40)
plt.title('GMM Classification')
plt.xlabel('Petal Length')
plt.ylabel('Petal Width')
print('The accuracy score of EM: ',sm.accuracy_score(y, y_gmm))
print('The Confusion matrix of EM: ',sm.confusion_matrix(y, y_gmm))
print('Accuracy Metrics of EM: ',classification_report(y,y_gmm))

Exp-Kmeans Em program

import pandas as pd
import numpy as np
import seaborn as sns
from sklearn.ensemble import RandomForestClassifier
from sklearn import svm
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import confusion_matrix,accuracy_score,classification_report
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

data=pd.read_csv("IMDBNew.csv")
print(data)

"""
drop Missing values
"""
print(data.dropna(axis=0,inplace=True))
print(data.isnull().sum())

"""
Imputing Missing Values

Revenue_mean=data["Revenue (Millions)"].mean()
print(Revenue_mean)
data["Revenue (Millions)"].fillna(Revenue_mean,inplace=True)
print(data.isnull().sum())

"""

"""
Preprocessing Data
"""

bins=(10,50,100)
group_name=[0,1]
data['Metascore']=pd.cut(data['Metascore'],bins=bins,labels=group_name)
print(data['Metascore'].unique())

movie_quality=LabelEncoder()
data['Metascore']=movie_quality.fit_transform(data['Metascore'])

print(data['Metascore'])

print(data['Metascore'].value_counts())

sns.countplot(data['Metascore'])

"""
Dividing the dataset
"""

X=data.drop('Metascore',axis=1)
y=data['Metascore']

"""
Train and test Data
"""

X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)

"""
Random Forest Classifier
"""

print("Confusion Matrix and Accuracy for Random Forest")

rfc=RandomForestClassifier(n_estimators=200)
rfc.fit(X_train,y_train)
pred_rfc=rfc.predict(X_test)

print(confusion_matrix(y_test,pred_rfc))

accuracy=accuracy_score(y_test,pred_rfc)
print(accuracy)

"""
Support Vector Machine
"""

print("Confusion Matrix and Accuracy for SVM")

clf=svm.SVC()
clf.fit(X_train,y_train)
pred_clf=clf.predict(X_test)

print(confusion_matrix(y_test,pred_clf))

accuracy=accuracy_score(y_test,pred_clf)
print(accuracy)

"""
Neural Network Classifier
"""

print("Confusion Matrix and Accuracy for Neural Network")

nlpc=MLPClassifier(hidden_layer_sizes=(11,11,11),max_iter=500)
nlpc.fit(X_train,y_train)
pred_nlpc=nlpc.predict(X_test)

print(confusion_matrix(y_test,pred_nlpc))

accuracy=accuracy_score(y_test,pred_nlpc)
print(accuracy)

Exp-ANN,SVM,RFC

You might also like