0% found this document useful (0 votes)
7 views

code

Uploaded by

Houssam Alrifaii
Copyright
© © All Rights Reserved
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
7 views

code

Uploaded by

Houssam Alrifaii
Copyright
© © All Rights Reserved
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
You are on page 1/ 1

import pandas as pd

# Load the data


df = pd.read_csv('loyalty.csv')

# 1. Identify and replace missing values


# Replace missing 'spend' values with 0
df['spend'].fillna(0, inplace=True)

# Ensure to replace missing values for other relevant fields as specified


df['first_month'].fillna(0, inplace=True)
df['items_in_first_month'].fillna(0, inplace=True)
df['region'].fillna('Unknown', inplace=True) # Missing regions
replaced with 'Unknown'
df['loyalty_years'].fillna('0-1', inplace=True) # Missing loyalty
years replaced with '0-1'
df['joining_month'].fillna('Unknown', inplace=True) # Missing joining
month replaced with 'Unknown'
df['promotion'].fillna('No', inplace=True) # Missing promotion
values replaced with 'No'

# 2. Convert values between data types


# Convert numeric fields from string (if necessary) ensuring type (including
rounding)
df['spend'] = pd.to_numeric(df['spend'], errors='coerce').fillna(0).round(2) #
Convert 'spend' to float and round to 2 decimals
df['first_month'] = pd.to_numeric(df['first_month'],
errors='coerce').fillna(0).round(2) # Convert 'first_month' to float and round
df['items_in_first_month'] = df['items_in_first_month'].fillna(0).astype(int) #
Convert 'items_in_first_month' from float to int

# 3. Clean categorical and text data by manipulating strings


# Convert categorical columns to the proper format
# For 'region' and 'joining_month', we can standardize string formats
df['region'] = df['region'].str.strip().str.title() # Strip whitespace and title
case for consistency
df['joining_month'] = df['joining_month'].str.strip().str.capitalize() # Strip
whitespace and capitalize

# Ensure all categorical features are of type 'category' for efficient memory usage

df['region'] = df['region'].astype('category')
df['loyalty_years'] = df['loyalty_years'].astype('category')
df['joining_month'] = df['joining_month'].astype('category')
df['promotion'] = df['promotion'].astype('category')

# Display the cleaned DataFrame


print(df.head())

# Saving cleaned data to a new CSV file if needed


df.to_csv('cleaned_loyalty.csv', index=False)

You might also like