Assignment
Assignment
As their 687 null values in Cabin column and total rows are
891 so we should drop it
Same is Name column has no relation with survived column
any name has not effect wheather an accident will be occurred
or no
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
print(titanic_data['Embarked'].mode()[0])
# replacing the missing values in "Embarked" column with
mode value
titanic_data['Embarked'].fillna(titanic_data['Embarked'].mode(
)[0], inplace=True)
Data Analysis
Data Visualization
sns.set()
titanic_data['Sex'].value_counts()
titanic_data['Sex'].value_counts()
titanic_data['Embarked'].value_counts()
titanic_data.replace({'Sex':{'male':0,'female':1}, 'Embarked':
{'S':0,'C':1,'Q':2}}, inplace=True)
titanic_data.head()
X = titanic_data.drop(columns =
['PassengerId','Name','Ticket','Survived'],axis=1)
Y = titanic_data['Survived']
print(X)
print(Y)
Model Training
Logistic Regression
model = LogisticRegression()
# training the Logistic Regression model with training data
model.fit(X_train, Y_train)
Model Evaluation
Accuracy Score
print(X_train_prediction)
training_data_accuracy = accuracy_score(Y_train,
X_train_prediction)
print('Accuracy score of training data : ',
training_data_accuracy)
print(X_test_prediction)
test_data_accuracy = accuracy_score(Y_test,
X_test_prediction)
print('Accuracy score of test data : ', test_data_accuracy)