ML File 211173
ML File 211173
CODE:
import pandas as pd
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler, LabelEncoder
import matplotlib.pyplot as plt
print(titanic_df.head().to_string(index=False))
print(titanic_df.info())
#PCA
CODE:
import pandas as pd
import numpy as np
from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
# Generate a subtree
def generate_sub_tree(feature_name, train_data, label, class_list):
feature_value_count_dict =
train_data[feature_name].value_counts(sort=False)
tree = {}
for feature_value, count in feature_value_count_dict.items():
feature_value_data = train_data[train_data[feature_name] ==
feature_value]
assigned_to_node = False
for c in class_list:
class_count = feature_value_data[feature_value_data[label]
== c].shape[0]
if class_count == count:
tree[feature_value] = c
train_data = train_data[train_data[feature_name] !=
feature_value]
assigned_to_node = True
if not assigned_to_node:
tree[feature_value] = "?"
return tree, train_data
# ID3 Algorithm
def id3(train_data_m, label):
train_data = train_data_m.copy()
tree = {}
class_list = train_data[label].unique()
make_tree(tree, None, train_data, label, class_list)
return tree
with open("titanic_tree.dot") as f:
dot_graph = f.read()
graphviz.Source(dot_graph)
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_imputed, y,
test_size=0.2, random_state=42)
Accuracy: 0.7988826815642458
EXPERIMENT-3
AIM:
CODE:
import pandas as pd
import numpy as np
import random
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
from sklearn.tree import DecisionTreeClassifier, export_graphviz
import graphviz
# Data preprocessing
titanic_df.drop(['Name', 'Ticket', 'Cabin', 'PassengerId'], axis=1,
inplace=True)
titanic_df['Age'].fillna(titanic_df['Age'].median(), inplace=True)
titanic_df['Embarked'].fillna(titanic_df['Embarked'].mode()[0],
inplace=True)
titanic_df['Sex'] = titanic_df['Sex'].map({'male': 0, 'female': 1})
titanic_df = pd.get_dummies(titanic_df, columns=['Embarked'])
class DecisionTree:
def __init__(self, max_depth=None):
self.max_depth = max_depth
def fit(self, X, y):
self.tree = self._grow_tree(X, y)
# Stopping criteria
if depth == self.max_depth or num_labels == 1 or num_samples <
2:
return {'prediction': np.argmax(np.bincount(y))}
# Split data
left_indices = np.where(X[:, best_feature] <=
best_threshold)[0]
right_indices = np.where(X[:, best_feature] >
best_threshold)[0]
# Create sub-trees
left_tree = self._grow_tree(X[left_indices], y[left_indices],
depth + 1)
right_tree = self._grow_tree(X[right_indices],
y[right_indices], depth + 1)
if len(left_indices) == 0 or len(right_indices) == 0:
continue
class RandomForest:
def __init__(self, n_estimators=100, max_depth=None):
self.n_estimators = n_estimators
self.max_depth = max_depth
self.trees = []
def fit(self, X, y):
for _ in range(self.n_estimators):
tree = DecisionTree(max_depth=self.max_depth)
indices = np.random.choice(len(X), len(X), replace=True)
tree.fit(X[indices], y[indices])
self.trees.append(tree)
# Make predictions
predictions = rf.predict(X_test)
graph = graphviz.Source(dot_graph)
graph.render("titanic_decision_tree", format='png', cleanup=True) #
Save tree as PNG
OUTPUT:
Accuracy: 0.5921787709497207
titanic_decision_tree.png