Código K-Means en Spyder
Código K-Means en Spyder
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sb
from sklearn.cluster import KMeans
from sklearn.metrics import pairwise_distances_argmin_min
from mpl_toolkits.mplot3d import Axes3D
plt.rcParams['figure.figsize'] = (16, 9)
plt.style.use('ggplot')
Dataframe = pd.read_csv(r"C:\xampp\htdocs\sistemasugerencias_itsz\BASE
PERSONAL.csv")
Dataframe.head()
print(Dataframe)
Dataframe.describe()
print (Dataframe.describe())
print(Dataframe.groupby('correo').size())
Dataframe.drop(['correo'],1).hist()
plt.show()
sb.pairplot(Dataframe.dropna(),
hue='correo',height=3,vars=["nombre","ap_paterno","ap_materno"],kind='scatter')
X = np.array(Dataframe[["nombre","ap_paterno","ap_materno"]])
y = np.array(Dataframe['correo'])
X.shape
print (X.shape)
fig = plt.figure()
ax = Axes3D(fig)
colores=['blue','red','green','blue','cyan',
'yellow','orange','black','pink','brown',
'purple']
asignar=[]
for row in y:
asignar.append(colores[row])
ax.scatter(X[:, 0], X[:, 1], X[:, 2], c=asignar,s=70)
Nc = range(1, 20)
kmeans = [KMeans(n_clusters=i) for i in Nc]
kmeans
score = [kmeans[i].fit(X).score(X) for i in
range(len(kmeans))]
score
plt.plot(Nc,score)
plt.xlabel('Numero de clusters')
plt.ylabel('Núcleo')
plt.title('Gráfica de dispersión')
plt.show()
kmeans = KMeans(n_clusters=5).fit(X)
centroids = kmeans.cluster_centers_
print(centroids)
labels = kmeans.predict(X)
C = kmeans.cluster_centers_
colores=['red','green','blue','purple','gray']
asignar=[]
for row in labels:
asignar.append(colores[row])
ax = Axes3D(fig)
ax.scatter(X[:, 0], X[:, 1], X[:, 2], c=asignar,s=60)
ax.scatter(C[:, 0], C[:, 1], C[:, 2], marker='*', c=colores, s=1000)
f1 = Dataframe['op'].values
f2 = Dataframe['ex'].values
f1 = Dataframe['op'].values
f2 = Dataframe['ag'].values
f1 = Dataframe['ex'].values
f2 = Dataframe['ag'].values
copy = pd.DataFrame()
copy['usuario']=Dataframe['usuario'].values
copy['categoria']=Dataframe['categoria'].values
copy['label'] = labels;
cantidadGrupo = pd.DataFrame()
cantidadGrupo['color']=colores
cantidadGrupo['cantidad']=copy.groupby('label').size()
print(cantidadGrupo)
diversidadGrupo = pd.DataFrame()
diversidadGrupo['categoria']=[0,1,2,3,4,5,6,7,8,9]
diversidadGrupo['cantidad']=group_referrals.groupby('categoria').size()
print(diversidadGrupo)
closest, _ = pairwise_distances_argmin_min(kmeans.cluster_centers_, X)
print(closest)
users=Dataframe['usuario'].values
for row in closest:
print(users[row])
X_new = np.array([[50,57.74,15.66]])
new_labels = kmeans.predict(X_new)
print(new_labels)