0% found this document useful (0 votes)
7 views

code

Uploaded by

Houssam Alrifaii
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
7 views

code

Uploaded by

Houssam Alrifaii
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
You are on page 1/ 1

import pandas as pd

# Load the data


df = pd.read_csv('loyalty.csv')

# 1. Identify and replace missing values


# Replace missing 'spend' values with 0
df['spend'].fillna(0, inplace=True)

# Ensure to replace missing values for other relevant fields as specified


df['first_month'].fillna(0, inplace=True)
df['items_in_first_month'].fillna(0, inplace=True)
df['region'].fillna('Unknown', inplace=True) # Missing regions
replaced with 'Unknown'
df['loyalty_years'].fillna('0-1', inplace=True) # Missing loyalty
years replaced with '0-1'
df['joining_month'].fillna('Unknown', inplace=True) # Missing joining
month replaced with 'Unknown'
df['promotion'].fillna('No', inplace=True) # Missing promotion
values replaced with 'No'

# 2. Convert values between data types


# Convert numeric fields from string (if necessary) ensuring type (including
rounding)
df['spend'] = pd.to_numeric(df['spend'], errors='coerce').fillna(0).round(2) #
Convert 'spend' to float and round to 2 decimals
df['first_month'] = pd.to_numeric(df['first_month'],
errors='coerce').fillna(0).round(2) # Convert 'first_month' to float and round
df['items_in_first_month'] = df['items_in_first_month'].fillna(0).astype(int) #
Convert 'items_in_first_month' from float to int

# 3. Clean categorical and text data by manipulating strings


# Convert categorical columns to the proper format
# For 'region' and 'joining_month', we can standardize string formats
df['region'] = df['region'].str.strip().str.title() # Strip whitespace and title
case for consistency
df['joining_month'] = df['joining_month'].str.strip().str.capitalize() # Strip
whitespace and capitalize

# Ensure all categorical features are of type 'category' for efficient memory usage

df['region'] = df['region'].astype('category')
df['loyalty_years'] = df['loyalty_years'].astype('category')
df['joining_month'] = df['joining_month'].astype('category')
df['promotion'] = df['promotion'].astype('category')

# Display the cleaned DataFrame


print(df.head())

# Saving cleaned data to a new CSV file if needed


df.to_csv('cleaned_loyalty.csv', index=False)

You might also like