0% found this document useful (0 votes)

3 views3 pages

Final Code

The document outlines a data analysis process using RFM (Recency, Frequency, Monetary) metrics to evaluate customer behavior from a dataset. It includes steps for calculating RFM scores, identifying and removing outliers, scaling the data, and determining the optimal number of clusters using the Elbow Method and Silhouette Score for KMeans clustering. The final output includes visualizations and metrics for optimal clustering.

Uploaded by

mayurj

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as TXT, PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

3 views3 pages

Final Code

Uploaded by

mayurj

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as TXT, PDF, TXT or read online on Scribd

You are on page 1/ 3

# -- coding: utf-8 --

"""New_3.ipynb

Automatically generated by Colab.

Original file is located at

https://colab.research.google.com/drive/1ZlO5nGZT8XhUlvk6fn91RKwTTdAtv004
"""

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
import datetime as dt

# Load the dataset

df = pd.read_csv("/content/drive/MyDrive/kaggle.csv")

# Calculate RFM metrics

# Recency
df['Recency'] = df['Recency']
# Frequency
df['Frequency'] = df['NumDealsPurchases'] + df['NumCatalogPurchases'] +
df['NumStorePurchases'] + df['NumWebPurchases']
# Monetary
df['Monetary'] = df['MntFishProducts'] + df['MntMeatProducts'] + df['MntFruits'] +
df['MntSweetProducts'] + df['MntWines'] +df['MntGoldProds']

rfm = df[['Recency', 'Frequency', 'Monetary']]

rfm.head()

rfm.loc[:, "R_Score"] = pd.qcut(rfm['Recency'], 5, labels=[5, 4, 3, 2, 1])

rfm.loc[:, "F_Score"] = pd.qcut(rfm['Frequency'], 5, labels=[1, 2, 3, 4, 5])
rfm.loc[:, "M_Score"] = pd.qcut(rfm['Monetary'], 5, labels=[1, 2, 3, 4, 5])
rfm.loc[:, "RFM_SCORE"] = (rfm['R_Score'].astype(str) + rfm['F_Score'].astype(str)
+ rfm['M_Score'].astype(str))

rfm.head()

from scipy.stats import zscore

# Convert 'RFM_score' to numeric before applying zscore

rfm['RFM_score'] = pd.to_numeric(rfm['RFM_SCORE'])

# Step 1: Calculate z-scores for the columns in rfm_scaled

z_scores = rfm[['RFM_score']].apply(zscore)

# Step 2: Identify outliers (z-score > 3 or < -3)

outliers = (z_scores.abs() > 3).any(axis=1)

print(type(outliers))
outliers.head(10)
rfm_outliers=df[outliers].reset_index(drop=True)
rfm_outliers.head(10)
#print(rfm_outliers)

# Step 3: Remove outliers from the original DataFrame (df)

rfm_cleaned = rfm[~outliers].reset_index(drop=True) # This line keeps data that are
not outliers

rfm_df = rfm_cleaned[['RFM_score']]
print(rfm_df)

# Save the cleaned data to a CSV file

#rfm_df.to_csv("RFM_Cleaned.csv", index=False)

from sklearn.preprocessing import MinMaxScaler

import pandas as pd
from sklearn.preprocessing import RobustScaler

# Initialize the MinMaxScaler

scaler = MinMaxScaler()

# Select only the desired columns for scaling

rfm_scaled = rfm_df[['RFM_score']]

# Apply fit_transform on the selected columns

rfm_scaled_t = scaler.fit_transform(rfm_scaled)

# Create the DataFrame with the correct column names

rfm_scaled_df = pd.DataFrame(rfm_scaled_t, columns=['RFM_score'])

print("Scaled Dataset Using MinMaxScaler")

#print(rfm_scaled_df.head())
print(rfm_scaled_df)

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans # Import KMeans
from sklearn.metrics import silhouette_score # Import silhouette_score

# Step 3: Elbow Method

wcss = [] # Within-Cluster Sum of Squares
k_range = range(2, 11) # Define k_range here
for k in k_range:
kmeans = KMeans(n_clusters=k, random_state=42)
kmeans.fit(rfm_scaled_df)
wcss.append(kmeans.inertia_)

# Finding the optimal k using the Elbow Method (point of maximum curvature)
diff_wcss = np.diff(wcss)
diff_wcss_ratio = diff_wcss[:-1] / diff_wcss[1:]
optimal_k = k_range[np.argmax(diff_wcss_ratio) + 1] # Use the defined k_range
print(f'Optimal number of clusters (k) based on Elbow Method: {optimal_k}')

# Plot the Elbow Curve

plt.figure(figsize=(8, 5))
plt.plot(k_range, wcss, marker='o') # Use k_range here as well
plt.title('Elbow Method')
plt.xlabel('Number of Clusters (K)')
plt.ylabel('WCSS (Within-Cluster Sum of Squares)')
plt.show()

# Step 3: Silhouette Coefficient

silhouette_scores = []
for k in range(2, 11): # Silhouette is not defined for k=1
kmeans = KMeans(n_clusters=k, random_state=42)
# Use the numeric data for clustering
labels = kmeans.fit_predict(rfm_scaled_df)
silhouette_scores.append(silhouette_score(rfm_scaled_df, labels))

#Printing the optimal k based on the highest silhouette score

# Convert range to a list to make it subscriptable
k_values = list(range(2, 11))
optimal_k = k_values[np.argmax(silhouette_scores)]
print(f'Optimal number of clusters (k) based on Silhouette Score: {optimal_k}')

# Display the scores

print("Silhouette Scores for Different K Values:")
for k, score in zip(range(2, 11), silhouette_scores): # Assuming k_values was
intended to be range(2, 11)
print(f"K = {k}: Silhouette Score = {score:.2f}")

Making Sausages
100% (4)
Making Sausages
225 pages
Unit 4 NAND-NOR-Clocked Flip Flops
100% (1)
Unit 4 NAND-NOR-Clocked Flip Flops
20 pages
FINAL PRESENTATION
No ratings yet
FINAL PRESENTATION
123 pages
Roadmap C1 TB 9781292228709 UNIT 1
No ratings yet
Roadmap C1 TB 9781292228709 UNIT 1
38 pages
21BCE5775 Clustering
No ratings yet
21BCE5775 Clustering
42 pages
Ian Sinclair - Working With MSX BASIC
No ratings yet
Ian Sinclair - Working With MSX BASIC
222 pages
22F-3437 22F-3407 Assignment 4 Ai
No ratings yet
22F-3437 22F-3407 Assignment 4 Ai
15 pages
Bloom's Taxonomy - My Presentation
No ratings yet
Bloom's Taxonomy - My Presentation
30 pages
MLFILE
No ratings yet
MLFILE
21 pages
Stack1 Runbook R21
No ratings yet
Stack1 Runbook R21
84 pages
Bone Suplement Market Segmentation
No ratings yet
Bone Suplement Market Segmentation
20 pages
Ds Paper
No ratings yet
Ds Paper
35 pages
Experiment 1111
No ratings yet
Experiment 1111
25 pages
Reiki Symbols Infographic PDF
100% (8)
Reiki Symbols Infographic PDF
1 page
Clustering Mall Data Students
No ratings yet
Clustering Mall Data Students
11 pages
Pattern Recognition Practicals
No ratings yet
Pattern Recognition Practicals
8 pages
K Means Clustering
No ratings yet
K Means Clustering
5 pages
Mlda - Lab
No ratings yet
Mlda - Lab
35 pages
ML Assignment
No ratings yet
ML Assignment
11 pages
2403res62 - CS564 - Assignment - 4 - K-Means-Iris - Intrinsic - CVIs
No ratings yet
2403res62 - CS564 - Assignment - 4 - K-Means-Iris - Intrinsic - CVIs
30 pages
Customer Segmentation Report
No ratings yet
Customer Segmentation Report
8 pages
BDA LabReport-9
No ratings yet
BDA LabReport-9
17 pages
Set 2
No ratings yet
Set 2
19 pages
ML Lab
No ratings yet
ML Lab
8 pages
DWDM Lab All
No ratings yet
DWDM Lab All
20 pages
Project Data Mining (AMAN YADAV)
No ratings yet
Project Data Mining (AMAN YADAV)
12 pages
Final ML File
No ratings yet
Final ML File
34 pages
Data Mining Ex1
No ratings yet
Data Mining Ex1
10 pages
Prac7 8 9 10
No ratings yet
Prac7 8 9 10
12 pages
01 K Means - Merged
No ratings yet
01 K Means - Merged
26 pages
Wa0003
No ratings yet
Wa0003
16 pages
23CC554
No ratings yet
23CC554
10 pages
Reading Data: #Importing Required Libraries
No ratings yet
Reading Data: #Importing Required Libraries
16 pages
DMT Cia2
No ratings yet
DMT Cia2
11 pages
IDM Assignment
No ratings yet
IDM Assignment
15 pages
S6 - Data Mining Lab Experiments (Except 1)
No ratings yet
S6 - Data Mining Lab Experiments (Except 1)
6 pages
DWM Practical
No ratings yet
DWM Practical
12 pages
Experiment 9
No ratings yet
Experiment 9
10 pages
Market Analysis by Pchandru
No ratings yet
Market Analysis by Pchandru
10 pages
ML Exp5 C36
No ratings yet
ML Exp5 C36
18 pages
Subspace Cluster I Nig
No ratings yet
Subspace Cluster I Nig
6 pages
Untitled Document-2-1-13-7-11.4
No ratings yet
Untitled Document-2-1-13-7-11.4
5 pages
DS Prac 8
No ratings yet
DS Prac 8
4 pages
Lab 11 - HT
No ratings yet
Lab 11 - HT
4 pages
Drawback of Standard K-Means Algorithm
No ratings yet
Drawback of Standard K-Means Algorithm
5 pages
D3 Docs
No ratings yet
D3 Docs
6 pages
Q2 Intervention 3 Subtraction of Polynomials
No ratings yet
Q2 Intervention 3 Subtraction of Polynomials
5 pages
Soal Try Out UN Fis
No ratings yet
Soal Try Out UN Fis
6 pages
Ass6 (DMDS)
No ratings yet
Ass6 (DMDS)
7 pages
BSCPL 1600kVA-100kVA-PSS BOM
No ratings yet
BSCPL 1600kVA-100kVA-PSS BOM
7 pages
Lab Report6 - B21CI014
No ratings yet
Lab Report6 - B21CI014
8 pages
Practical 5
No ratings yet
Practical 5
6 pages
Kmeans
No ratings yet
Kmeans
5 pages
AAM 7th Prac
No ratings yet
AAM 7th Prac
4 pages
Pratibha Sikheriya (Data Mining)
No ratings yet
Pratibha Sikheriya (Data Mining)
4 pages
Implement Clustering Algorithms For Unsupervised Classification
No ratings yet
Implement Clustering Algorithms For Unsupervised Classification
4 pages
Experiment-7: Implementation of K-Means Clustering Algorithm
No ratings yet
Experiment-7: Implementation of K-Means Clustering Algorithm
3 pages
Tugas Clustering - 132021012 - Kevin Gazkia Naufal
No ratings yet
Tugas Clustering - 132021012 - Kevin Gazkia Naufal
6 pages
Slip
No ratings yet
Slip
5 pages
Mechanical Engineering Laws (EDITED)
No ratings yet
Mechanical Engineering Laws (EDITED)
25 pages
Artificial Intelligence & Machine Learning Curriculum Pregrad
No ratings yet
Artificial Intelligence & Machine Learning Curriculum Pregrad
12 pages
Data Science Libraries
No ratings yet
Data Science Libraries
4 pages
Avinash Tiwari 9
No ratings yet
Avinash Tiwari 9
4 pages
Water Quality Regulations
No ratings yet
Water Quality Regulations
25 pages
Practical 03
No ratings yet
Practical 03
3 pages
Mole Concept
100% (1)
Mole Concept
2 pages
K Means Clustering
No ratings yet
K Means Clustering
1 page
DataScience All 1to8
No ratings yet
DataScience All 1to8
6 pages
MLT Exp 09
No ratings yet
MLT Exp 09
3 pages
Financial Report - Sole Trader
No ratings yet
Financial Report - Sole Trader
21 pages
Unit7.TheshapeofAfrica 변형
No ratings yet
Unit7.TheshapeofAfrica 변형
47 pages
Salesforce PD1
No ratings yet
Salesforce PD1
3 pages
Subject: ML Name: Priyanshu Gandhi Date: 10/4/21 Expt. No.: 9 Roll No.: C008 Title: Clustering Implementation in Python
No ratings yet
Subject: ML Name: Priyanshu Gandhi Date: 10/4/21 Expt. No.: 9 Roll No.: C008 Title: Clustering Implementation in Python
7 pages
Damasco, John Rey C
No ratings yet
Damasco, John Rey C
11 pages
Clustering
No ratings yet
Clustering
1 page
Laporan - Final - Simkom - Siap Jilid - Ridhani
No ratings yet
Laporan - Final - Simkom - Siap Jilid - Ridhani
58 pages
15 Step Tab: Radiohead
No ratings yet
15 Step Tab: Radiohead
4 pages
Karnataka Power Corporation LTD.,: Bid Document
No ratings yet
Karnataka Power Corporation LTD.,: Bid Document
54 pages
NTTR Info and Mission by The 476 - DCS - Nevada Test and Training Range - ED Foru
No ratings yet
NTTR Info and Mission by The 476 - DCS - Nevada Test and Training Range - ED Foru
17 pages
Harley Davidson Strategic Analysis Term Paper
100% (1)
Harley Davidson Strategic Analysis Term Paper
4 pages
BIR S1905 - Registration Update Sheet
No ratings yet
BIR S1905 - Registration Update Sheet
1 page
SE 463 Software Testing and Quality Assurance
No ratings yet
SE 463 Software Testing and Quality Assurance
15 pages
Ga Wire Mesh Welder 2021
No ratings yet
Ga Wire Mesh Welder 2021
11 pages
Advanced Artificial Lift Methods - PE751: Problem 1
No ratings yet
Advanced Artificial Lift Methods - PE751: Problem 1
3 pages
Radiation Exposure
100% (9)
Radiation Exposure
1 page
Armor Coat Quick-Setting Epoxy Adhesive, 28.4-mL Canadian Tire
No ratings yet
Armor Coat Quick-Setting Epoxy Adhesive, 28.4-mL Canadian Tire
5 pages
6 Browser Rendering
No ratings yet
6 Browser Rendering
2 pages
Assessment Task:: Tabios, Bonna A. BSN 2-2
No ratings yet
Assessment Task:: Tabios, Bonna A. BSN 2-2
3 pages
Profound Python Data Science
From Everand
Profound Python Data Science
Onder Teker
No ratings yet
Computer Engineering Laboratory Solution Primer
From Everand
Computer Engineering Laboratory Solution Primer
Karan Bhandari
No ratings yet

Final Code

Uploaded by

Final Code

Uploaded by

# -*- coding: utf-8 -*-

Automatically generated by Colab.

Original file is located at

# Load the dataset

# Calculate RFM metrics

rfm = df[['Recency', 'Frequency', 'Monetary']]

rfm.loc[:, "R_Score"] = pd.qcut(rfm['Recency'], 5, labels=[5, 4, 3, 2, 1])

from scipy.stats import zscore

# Convert 'RFM_score' to numeric before applying zscore

# Step 1: Calculate z-scores for the columns in rfm_scaled

# Step 2: Identify outliers (z-score > 3 or < -3)

# Step 3: Remove outliers from the original DataFrame (df)

# Save the cleaned data to a CSV file

from sklearn.preprocessing import MinMaxScaler

# Initialize the MinMaxScaler

# Select only the desired columns for scaling

# Apply fit_transform on the selected columns

# Create the DataFrame with the correct column names

print("Scaled Dataset Using MinMaxScaler")

# Step 3: Elbow Method

# Plot the Elbow Curve

# Step 3: Silhouette Coefficient

#Printing the optimal k based on the highest silhouette score

# Display the scores

You might also like

# -- coding: utf-8 --