0% found this document useful (0 votes)
8 views8 pages

CO3

Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
8 views8 pages

CO3

Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 8

CO3

1. Program to implement decision trees using and standard dataset available in the public domain and find the
accuracy of the algorithm.(Implement the pruning technique to avoid overfitting and re-evaluate the decision
tree's performance after pruning. Compare the decision tree model's performance with other classification
algorithms, such as k-Nearest Neighbors (k-NN) or Naive Bayes. Use either the ID3, C4.5, or CART (Gini impurity)
algorithm)

from sklearn.datasets import load_iris

from sklearn.model_selection import train_test_split

from sklearn.tree import DecisionTreeClassifier, plot_tree

from sklearn.metrics import accuracy_score

from sklearn.neighbors import KNeighborsClassifier

from sklearn.naive_bayes import GaussianNB

import matplotlib.pyplot as plt

data = load_iris()

X, y = data.data, data.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

tree_clf = DecisionTreeClassifier(criterion="gini", random_state=42)

tree_clf.fit(X_train, y_train)

y_pred = tree_clf.predict(X_test)

accuracy_before_pruning = accuracy_score(y_test, y_pred)

print("Decision Tree Accuracy (before pruning):", accuracy_before_pruning)

plt.figure(figsize=(15, 8))

plot_tree(tree_clf, filled=True, feature_names=data.feature_names, class_names=data.target_names)

plt.title("Decision Tree before Pruning")

plt.show()

pruned_tree_clf = DecisionTreeClassifier(criterion="gini", max_depth=3, random_state=42)

pruned_tree_clf.fit(X_train, y_train)
y_pruned_pred = pruned_tree_clf.predict(X_test)

accuracy_after_pruning = accuracy_score(y_test, y_pruned_pred)

print("Decision Tree Accuracy (after pruning):", accuracy_after_pruning)

plt.figure(figsize=(15, 8))

plot_tree(pruned_tree_clf, filled=True, feature_names=data.feature_names, class_names=data.target_names)

plt.title("Decision Tree after Pruning")

plt.show()

# k-Nearest Neighbors (k-NN)

knn_clf = KNeighborsClassifier(n_neighbors=5)

knn_clf.fit(X_train, y_train)

y_knn_pred = knn_clf.predict(X_test)

knn_accuracy = accuracy_score(y_test, y_knn_pred)

print("k-Nearest Neighbors Accuracy:", knn_accuracy)

# Naive Bayes

nb_clf = GaussianNB()

nb_clf.fit(X_train, y_train)

y_nb_pred = nb_clf.predict(X_test)

nb_accuracy = accuracy_score(y_test, y_nb_pred)

print("Naive Bayes Accuracy:", nb_accuracy)

print("\nSummary of model accuracies:")

print(f"Decision Tree (before pruning): {accuracy_before_pruning:.4f}")

print(f"Decision Tree (after pruning): {accuracy_after_pruning:.4f}")

print(f"k-Nearest Neighbors: {knn_accuracy:.4f}")

print(f"Naive Bayes: {nb_accuracy:.4f}")


k-Nearest Neighbors Accuracy: 1.0
Naive Bayes Accuracy: 0.9777777777777777
Summary of model accuracies:
Decision Tree (before pruning): 1.0000
Decision Tree (after pruning): 1.0000
k-Nearest Neighbors: 1.0000
Naive Bayes: 0.9778
2. Explore the concepts of simple linear regression, multiple linear regression, and correlations using the ordinary
least squares estimation method to fit regression models.

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.datasets import load_iris
import seaborn as sns
import matplotlib.pyplot as plt

iris = load_iris()
df = pd.DataFrame(iris.data, columns=iris.feature_names)
df.columns = ["sepal_length", "sepal_width", "petal_length", "petal_width"]

correlation_matrix = df.corr()
print("Correlation Matrix:\n", correlation_matrix)

plt.figure(figsize=(8, 6))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm')
plt.title("Correlation Matrix for Iris Dataset Features")
plt.show()

X = df.drop(columns="sepal_length")
y = df["sepal_length"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

X_simple = X_train[["petal_length"]]
X_simple_test = X_test[["petal_length"]]
lr_simple = LinearRegression()
lr_simple.fit(X_simple, y_train)
y_pred_simple = lr_simple.predict(X_simple_test)

mse_simple = mean_squared_error(y_test, y_pred_simple)


r2_simple = r2_score(y_test, y_pred_simple)
print("Simple Linear Regression MSE:", mse_simple)
print("Simple Linear Regression R^2:", r2_simple)

lr_multiple = LinearRegression()
lr_multiple.fit(X_train, y_train)
y_pred_multiple = lr_multiple.predict(X_test)

mse_multiple = mean_squared_error(y_test, y_pred_multiple)


r2_multiple = r2_score(y_test, y_pred_multiple)
print("Multiple Linear Regression MSE:", mse_multiple)
print("Multiple Linear Regression R^2:", r2_multiple)

Output
Correlation Matrix:
sepal_length sepal_width petal_length petal_width
sepal_length 1.000000 -0.117570 0.871754 0.817941
sepal_width -0.117570 1.000000 -0.428440 -0.366126
petal_length 0.871754 -0.428440 1.000000 0.962865
petal_width 0.817941 -0.366126 0.962865 1.000000

Simple Linear Regression MSE: 0.129093146356764


Simple Linear Regression R^2: 0.812980761507489
Multiple Linear Regression MSE: 0.10212647866320387
Multiple Linear Regression R^2: 0.8520477902310163
3. Work with a dataset containing independent variables (features) and a dependent variable (target) to predict
and analyze their relationships.(Implement feature scaling (e.g., standardization or normalization) for the
independent variables and re-evaluate the performance of the multiple linear regression model. Implement
regularization techniques (e.g., Lasso or Ridge regression) to handle potential overfitting in the multiple linear
regression model. Compare the performance of regularized and nonregularized models.)

import pandas as pd

import numpy as np

from sklearn.model_selection import train_test_split

from sklearn.preprocessing import StandardScaler

from sklearn.linear_model import LinearRegression, Ridge, Lasso

from sklearn.metrics import mean_squared_error, r2_score

from sklearn.datasets import load_iris

iris = load_iris()

df = pd.DataFrame(iris.data, columns=iris.feature_names)

df.columns = ["sepal_length", "sepal_width", "petal_length", "petal_width"]

X = df.drop(columns="sepal_length")

y = df["sepal_length"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()

X_train_scaled = scaler.fit_transform(X_train)

X_test_scaled = scaler.transform(X_test)

X_single_feature = X_train[["petal_length"]]

X_single_test = X_test[["petal_length"]]

lr_single = LinearRegression()

lr_single.fit(X_single_feature, y_train)

y_pred_single = lr_single.predict(X_single_test)

mse_single = mean_squared_error(y_test, y_pred_single)

r2_single = r2_score(y_test, y_pred_single)

print("Simple Linear Regression MSE:", mse_single)


print("Simple Linear Regression R^2:", r2_single)

lr_multi = LinearRegression()

lr_multi.fit(X_train_scaled, y_train)

y_pred_multi = lr_multi.predict(X_test_scaled)

mse_multi = mean_squared_error(y_test, y_pred_multi)

r2_multi = r2_score(y_test, y_pred_multi)

print("Multiple Linear Regression MSE:", mse_multi)

print("Multiple Linear Regression R^2:", r2_multi)

ridge = Ridge(alpha=1.0)

ridge.fit(X_train_scaled, y_train)

y_pred_ridge = ridge.predict(X_test_scaled)

mse_ridge = mean_squared_error(y_test, y_pred_ridge)

r2_ridge = r2_score(y_test, y_pred_ridge)

print("Ridge Regression MSE:", mse_ridge)

print("Ridge Regression R^2:", r2_ridge)

lasso = Lasso(alpha=0.1)

lasso.fit(X_train_scaled, y_train)

y_pred_lasso = lasso.predict(X_test_scaled)

mse_lasso = mean_squared_error(y_test, y_pred_lasso)

r2_lasso = r2_score(y_test, y_pred_lasso)

print("Lasso Regression MSE:", mse_lasso)

print("Lasso Regression R^2:", r2_lasso)

print("Multiple Linear Regression Coefficients:", lr_multi.coef_)

print("Ridge Regression Coefficients:", ridge.coef_)

print("Lasso Regression Coefficients:", lasso.coef_)


Output
Simple Linear Regression MSE: 0.129093146356764

Simple Linear Regression R^2: 0.812980761507489

Multiple Linear Regression MSE: 0.10212647866320375

Multiple Linear Regression R^2: 0.8520477902310164

Ridge Regression MSE: 0.09363860271269377

Ridge Regression R^2: 0.8643443074473242

Lasso Regression MSE: 0.12157358128102438

Lasso Regression R^2: 0.8238744717775386

Multiple Linear Regression Coefficients: [ 0.29673801 1.32167517 -0.50506043]

Ridge Regression Coefficients: [ 0.27896087 1.1378305 -0.33189876]

Lasso Regression Coefficients: [0.09182964 0.64697702 0. ]

You might also like