Data Cleaning and Exploratory Analysis On A Public Dataset
Data Cleaning and Exploratory Analysis On A Public Dataset
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
#data cleaning
#1.Inspecting the data
titanic_data['Embarked'].fillna(titanic_data['Embarked'].mode()
[0],inplace = True)
titanic_data.dropna(subset=['Fare'],inplace = True)
#3.Correcting DataTypes
#converting 'Survived' and 'Pclass' to categorial
titanic_data['Survived'] = titanic_data['Survived'].astype('category')
titanic_data['Pclass'] = titanic_data['Pclass'].astype('category')
plt.subplot(1,2,1)
sns.countplot(data = titanic_data,x='Pclass')
plt.show()
#c.Survival Rate by Passenger Class
titanic_data['Survived'] = titanic_data['Survived'].astype(int)
titanic_data['Pclass'] = titanic_data['Pclass'].astype(int)
plt.figure(figsize=(8, 6))
sns.barplot(data=titanic_data, x='Pclass', y='Survived')
plt.title('Survival Rate by Passenger Class')
plt.show()
#f.Correlation Matrix
plt.figure(figsize=(8,4))
sns.heatmap(titanic_data.corr(),annot = True,cmap='coolwarm',fmt =
'.2f')
plt.title('Correlation Matrix')
plt.show()