0% found this document useful (0 votes)
55 views3 pages

Assignment 3 Solution

Uploaded by

anandadeepbala
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
55 views3 pages

Assignment 3 Solution

Uploaded by

anandadeepbala
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
You are on page 1/ 3

ASSIGNMENT NUMBER 3 SOLUTION

#Import libraries
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt # for data visualization
import seaborn as sns # for statistical data visualization
%matplotlib inline

# Input data files are available in the "../input/" directory.


# For example, running this (by clicking run or pressing Shift+Enter)
will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
for filename in filenames:
print(os.path.join(dirname, filename))
import warnings
warnings.filterwarnings('ignore')
#Importing the dataset
data = '/content/Live.csv'
df = pd.read_csv(data)

#Drop redundant columns


df.drop(['Column1', 'Column2', 'Column3', 'Column4'], axis=1,
inplace=True)
df.info()
df.describe()

# view the labels in the variable


df['status_type'].unique()
df.drop(['status_id', 'status_published'], axis=1, inplace=True)
df.head()
#Declaration of feature vector and target variable
X = df
y = df['status_type']

#Conversion of categorical variable into integers


from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
X['status_type'] = le.fit_transform(X['status_type'])
y = le.transform(y)
#K-Means model with two clusters
from sklearn.cluster import KMeans
kmeans = KMeans(n_clusters=2, random_state=0)
kmeans.fit(X)
labels = kmeans.labels_
# check how many of the samples were correctly labeled
correct_labels = sum(y == labels)
print("Result: %d out of %d samples were correctly labeled." %
(correct_labels, y.size))
print('Accuracy score: {0:0.2f}'. format(correct_labels/float(y.size)))
#K-Means model with 3 clusters
kmeans = KMeans(n_clusters=3, random_state=0)
kmeans.fit(X)

# check how many of the samples were correctly labeled


labels = kmeans.labels_
correct_labels = sum(y == labels)
print("Result: %d out of %d samples were correctly labeled." %
(correct_labels, y.size))
print('Accuracy score: {0:0.2f}'. format(correct_labels/float(y.size)))
#Use elbow method to find optimal number of clusters
from sklearn.cluster import KMeans
cs = []
for i in range(1, 11):
kmeans = KMeans(n_clusters = i, init = 'k-means++', max_iter = 300,
n_init = 10, random_state = 0)
kmeans.fit(X)
cs.append(kmeans.inertia_)
plt.plot(range(1, 11), cs)
plt.title('The Elbow Method')
plt.xlabel('Number of clusters')
plt.ylabel('CS')
plt.show()
#By the above plot, we can see that there is a kink at k=2.Hence k=2
can be considered a good number of the cluster to cluster this data.

DATASET LINK: https://www.semanticscholar.org/paper/Dataset-on-usage-and-


engagement-patterns-for-Live-Dehouche/c0ec91003f8bdce99a56fa60dc2d20268cc808b8

You might also like