Bin Ar Ization
Bin Ar Ization
import numpy as np
import pandas as pd
In [29]:
from sklearn.model_selection import train_test_split,cross_val_score
from sklearn.tree import DecisionTreeClassifier
In [30]:
df = pd.read_csv('train.csv')[['Age','Fare','SibSp','Parch','Survived']]
In [31]:
df.dropna(inplace=True)
In [32]:
df.head()
0 22.0 7.2500 1 0 0
1 38.0 71.2833 1 0 1
2 26.0 7.9250 0 0 1
3 35.0 53.1000 1 0 1
4 35.0 8.0500 0 0 0
In [33]:
df['family'] = df['SibSp'] + df['Parch']
In [34]:
df.head()
0 22.0 7.2500 1 0 0 1
1 38.0 71.2833 1 0 1 1
2 26.0 7.9250 0 0 1 0
3 35.0 53.1000 1 0 1 1
4 35.0 8.0500 0 0 0 0
In [35]:
df.drop(columns=['SibSp','Parch'],inplace=True)
In [36]:
df.head()
Out[36]: Age Fare Survived family
0 22.0 7.2500 0 1
1 38.0 71.2833 1 1
2 26.0 7.9250 1 0
3 35.0 53.1000 1 1
4 35.0 8.0500 0 0
In [37]:
X = df.drop(columns=['Survived'])
y = df['Survived']
In [38]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_stat
In [39]:
X_train.head()
73 26.0 14.4542 1
In [40]:
# Without binarization
clf = DecisionTreeClassifier()
clf.fit(X_train,y_train)
y_pred = clf.predict(X_test)
accuracy_score(y_test,y_pred)
Out[40]: 0.6293706293706294
In [41]:
np.mean(cross_val_score(DecisionTreeClassifier(),X,y,cv=10,scoring='accuracy')
Out[41]: 0.6429381846635367
In [20]:
# Applying Binarization
In [43]:
X_train_trf = trf.fit_transform(X_train)
X_test_trf = trf.transform(X_test)
In [44]:
pd.DataFrame(X_train_trf,columns=['family','Age','Fare'])
In [45]:
clf = DecisionTreeClassifier()
clf.fit(X_train_trf,y_train)
y_pred2 = clf.predict(X_test_trf)
accuracy_score(y_test,y_pred2)
Out[45]: 0.6363636363636364
In [46]:
X_trf = trf.fit_transform(X)
np.mean(cross_val_score(DecisionTreeClassifier(),X_trf,y,cv=10,scoring='accura
Out[46]: 0.6304186228482003
In [ ]: