0% found this document useful (0 votes)
124 views3 pages

Código K-Means en Spyder

This document imports several Python libraries for data analysis and visualization. It loads and analyzes a CSV dataset, performs k-means clustering on numeric columns to group the data into clusters, and visualizes the clustered data and cluster centroids in various plots. It also analyzes properties of the clusters like diversity and determines the closest data point to a new data sample.

Uploaded by

Manuel Calva Z
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
124 views3 pages

Código K-Means en Spyder

This document imports several Python libraries for data analysis and visualization. It loads and analyzes a CSV dataset, performs k-means clustering on numeric columns to group the data into clusters, and visualizes the clustered data and cluster centroids in various plots. It also analyzes properties of the clusters like diversity and determines the closest data point to a new data sample.

Uploaded by

Manuel Calva Z
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 3

import pandas as pd

import numpy as np
import matplotlib.pyplot as plt
import seaborn as sb
from sklearn.cluster import KMeans
from sklearn.metrics import pairwise_distances_argmin_min
from mpl_toolkits.mplot3d import Axes3D
plt.rcParams['figure.figsize'] = (16, 9)
plt.style.use('ggplot')
Dataframe = pd.read_csv(r"C:\xampp\htdocs\sistemasugerencias_itsz\BASE
PERSONAL.csv")
Dataframe.head()
print(Dataframe)
Dataframe.describe()
print (Dataframe.describe())
print(Dataframe.groupby('correo').size())
Dataframe.drop(['correo'],1).hist()
plt.show()
sb.pairplot(Dataframe.dropna(),
hue='correo',height=3,vars=["nombre","ap_paterno","ap_materno"],kind='scatter')
X = np.array(Dataframe[["nombre","ap_paterno","ap_materno"]])
y = np.array(Dataframe['correo'])
X.shape
print (X.shape)
fig = plt.figure()
ax = Axes3D(fig)
colores=['blue','red','green','blue','cyan',
'yellow','orange','black','pink','brown',
'purple']
asignar=[]
for row in y:
asignar.append(colores[row])
ax.scatter(X[:, 0], X[:, 1], X[:, 2], c=asignar,s=70)

Nc = range(1, 20)
kmeans = [KMeans(n_clusters=i) for i in Nc]
kmeans
score = [kmeans[i].fit(X).score(X) for i in
range(len(kmeans))]
score
plt.plot(Nc,score)
plt.xlabel('Numero de clusters')
plt.ylabel('Núcleo')
plt.title('Gráfica de dispersión')
plt.show()
kmeans = KMeans(n_clusters=5).fit(X)
centroids = kmeans.cluster_centers_
print(centroids)

labels = kmeans.predict(X)

C = kmeans.cluster_centers_
colores=['red','green','blue','purple','gray']
asignar=[]
for row in labels:
asignar.append(colores[row])

ax = Axes3D(fig)
ax.scatter(X[:, 0], X[:, 1], X[:, 2], c=asignar,s=60)
ax.scatter(C[:, 0], C[:, 1], C[:, 2], marker='*', c=colores, s=1000)

f1 = Dataframe['op'].values
f2 = Dataframe['ex'].values

plt.scatter(f1, f2, c=asignar, s=70)


plt.scatter(C[:, 0], C[:, 1], marker='*', c=colores, s=1000)
plt.show()

f1 = Dataframe['op'].values
f2 = Dataframe['ag'].values

plt.scatter(f1, f2, c=asignar, s=70)


plt.scatter(C[:, 0], C[:, 2], marker='*', c=colores, s=1000)
plt.show()

f1 = Dataframe['ex'].values
f2 = Dataframe['ag'].values

plt.scatter(f1, f2, c=asignar, s=70)


plt.scatter(C[:, 1], C[:, 2], marker='*', c=colores, s=1000)
plt.show()

copy = pd.DataFrame()
copy['usuario']=Dataframe['usuario'].values
copy['categoria']=Dataframe['categoria'].values
copy['label'] = labels;
cantidadGrupo = pd.DataFrame()
cantidadGrupo['color']=colores
cantidadGrupo['cantidad']=copy.groupby('label').size()
print(cantidadGrupo)

group_referrer_index = copy['label'] ==0


group_referrals = copy[group_referrer_index]

diversidadGrupo = pd.DataFrame()
diversidadGrupo['categoria']=[0,1,2,3,4,5,6,7,8,9]
diversidadGrupo['cantidad']=group_referrals.groupby('categoria').size()
print(diversidadGrupo)

closest, _ = pairwise_distances_argmin_min(kmeans.cluster_centers_, X)
print(closest)

users=Dataframe['usuario'].values
for row in closest:
print(users[row])

X_new = np.array([[50,57.74,15.66]])

new_labels = kmeans.predict(X_new)
print(new_labels)

You might also like