CO3
CO3
1. Program to implement decision trees using and standard dataset available in the public domain and find the
accuracy of the algorithm.(Implement the pruning technique to avoid overfitting and re-evaluate the decision
tree's performance after pruning. Compare the decision tree model's performance with other classification
algorithms, such as k-Nearest Neighbors (k-NN) or Naive Bayes. Use either the ID3, C4.5, or CART (Gini impurity)
algorithm)
data = load_iris()
X, y = data.data, data.target
tree_clf.fit(X_train, y_train)
y_pred = tree_clf.predict(X_test)
plt.figure(figsize=(15, 8))
plt.show()
pruned_tree_clf.fit(X_train, y_train)
y_pruned_pred = pruned_tree_clf.predict(X_test)
plt.figure(figsize=(15, 8))
plt.show()
knn_clf = KNeighborsClassifier(n_neighbors=5)
knn_clf.fit(X_train, y_train)
y_knn_pred = knn_clf.predict(X_test)
# Naive Bayes
nb_clf = GaussianNB()
nb_clf.fit(X_train, y_train)
y_nb_pred = nb_clf.predict(X_test)
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.datasets import load_iris
import seaborn as sns
import matplotlib.pyplot as plt
iris = load_iris()
df = pd.DataFrame(iris.data, columns=iris.feature_names)
df.columns = ["sepal_length", "sepal_width", "petal_length", "petal_width"]
correlation_matrix = df.corr()
print("Correlation Matrix:\n", correlation_matrix)
plt.figure(figsize=(8, 6))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm')
plt.title("Correlation Matrix for Iris Dataset Features")
plt.show()
X = df.drop(columns="sepal_length")
y = df["sepal_length"]
X_simple = X_train[["petal_length"]]
X_simple_test = X_test[["petal_length"]]
lr_simple = LinearRegression()
lr_simple.fit(X_simple, y_train)
y_pred_simple = lr_simple.predict(X_simple_test)
lr_multiple = LinearRegression()
lr_multiple.fit(X_train, y_train)
y_pred_multiple = lr_multiple.predict(X_test)
Output
Correlation Matrix:
sepal_length sepal_width petal_length petal_width
sepal_length 1.000000 -0.117570 0.871754 0.817941
sepal_width -0.117570 1.000000 -0.428440 -0.366126
petal_length 0.871754 -0.428440 1.000000 0.962865
petal_width 0.817941 -0.366126 0.962865 1.000000
import pandas as pd
import numpy as np
iris = load_iris()
df = pd.DataFrame(iris.data, columns=iris.feature_names)
X = df.drop(columns="sepal_length")
y = df["sepal_length"]
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
X_single_feature = X_train[["petal_length"]]
X_single_test = X_test[["petal_length"]]
lr_single = LinearRegression()
lr_single.fit(X_single_feature, y_train)
y_pred_single = lr_single.predict(X_single_test)
lr_multi = LinearRegression()
lr_multi.fit(X_train_scaled, y_train)
y_pred_multi = lr_multi.predict(X_test_scaled)
ridge = Ridge(alpha=1.0)
ridge.fit(X_train_scaled, y_train)
y_pred_ridge = ridge.predict(X_test_scaled)
lasso = Lasso(alpha=0.1)
lasso.fit(X_train_scaled, y_train)
y_pred_lasso = lasso.predict(X_test_scaled)