1 - Pca Python Code
1 - Pca Python Code
variables = [
'_0101_MUEBLES', '_0111_COLCHONES', '_0511_BOLSAS','_0625_ELECTRICOS',
'_0628_COMPUTACION',
'_0629_TV_Y_VIDEO', '_0632_CELULARES','_0634_LINEA_BLANCA', '_0701_FRAGANCIAS',
'JOVENES','FAMILIAS_C_BEBES_NINOS',
'ADULTOS_S_HIJOS','FAMILIAS_C_ADOLESCENTES_NINOS',
'ADULTOS_MAYORES','TICKET_PROMEDIO','LC','CLIMA','UTILIDAD_ACUM_PCTJ',
'ROTACION_ACUM_PCTJ',
'VENTA_PROM', 'INVENTARIO_PROM', 'MT2','IDH', 'PEA', 'COMPETENCIA'
]
consolidado[variables]
pca = PCA(n_components=None)
pca.fit(consolidado[variables])
explained_variance_ratio = pca.explained_variance_ratio_
print("Explained Variance Ratios (by Principal Components):")
print(explained_variance_ratio)
explained_variance_df = pd.DataFrame({
'Principal Component': [f'PC{i+1}' for i in
range(len(explained_variance_ratio))],
'Explained Variance Ratio': explained_variance_ratio,
'Cumulative Variance Ratio': explained_variance_ratio.cumsum()
})
components = pca.components_
print("Principal Components (each row corresponds to a PC, each column to an original
feature):")
print(components)
pc1_importance = components_df.loc['PC1'].sort_values(ascending=False)
print("\nFeatures sorted by importance in PC1:")
print(pc1_importance)
pc2_importance = components_df.loc['PC2'].sort_values(ascending=False)
print("\nFeatures sorted by importance in PC2:")
print(pc2_importance)
pc3_importance = components_df.loc['PC3'].sort_values(ascending=False)
print("\nFeatures sorted by importance in PC3:")
print(pc3_importance)
pc4_importance = components_df.loc['PC4'].sort_values(ascending=False)
print("\nFeatures sorted by importance in PC4:")
print(pc4_importance)
pc5_importance = components_df.loc['PC5'].sort_values(ascending=False)
print("\nFeatures sorted by importance in PC5:")
print(pc5_importance)
pc6_importance = components_df.loc['PC6'].sort_values(ascending=False)
print("\nFeatures sorted by importance in PC6:")
print(pc6_importance)