Print Out ML - Finallllllllllllllll
Print Out ML - Finallllllllllllllll
# Import libraries
from sklearn.datasets import load_iris
Exp 02
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt
import numpy as np
# Load Iris dataset
iris = load_iris()
X = iris.data
y = iris.target
feature_names = iris.feature_names
# For simplicity, use regression on actual flower measurements:
# Simple Linear Regression: Predict sepal width from sepal length
X_simple = X[:, [0]] # Sepal Length
y_simple = X[:, 1] # Sepal Width
# Simple Linear Regression
simple_model = LinearRegression()
simple_model.fit(X_simple, y_simple)
y_simple_pred = simple_model.predict(X_simple)
# Plot Simple Linear Regression
plt.figure(figsize=(8, 5))
plt.scatter(X_simple, y_simple, color='blue', label='Actual')
plt.plot(X_simple, y_simple_pred, color='red', label='Predicted')
plt.xlabel('Sepal Length (cm)')
plt.ylabel('Sepal Width (cm)')
plt.title('Simple Linear Regression (Iris Dataset)')
plt.legend()
plt.grid(True)
plt.show()
# Model details
print("Simple Linear Regression:")
print(" Coefficient (slope):", simple_model.coef_[0])
print(" Intercept:", simple_model.intercept_)
# Multiple Linear Regression: Predict petal width from multiple features
X_multi = X[:, :3] # Sepal Length, Sepal Width, Petal Length
y_multi = X[:, 3] # Petal Width
# Multiple Linear Regression
multi_model = LinearRegression()
multi_model.fit(X_multi, y_multi)
y_multi_pred = multi_model.predict(X_multi)
# Model details
print("\nMultiple Linear Regression:")
print(" Coefficients:", multi_model.coef_)
print(" Intercept:", multi_model.intercept_)
print(" First 5 Predictions:", y_multi_pred[:5])
print(" First 5 Actual Values:", y_multi[:5])
# Import required libraries
Exp 03
from sklearn.datasets import make_classification
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import matplotlib.pyplot as plt
# Generate binary classification dataset
X, y = make_classification(n_samples=200, n_features=2, n_redundant=0,
n_informative=2, n_clusters_per_class=1,
flip_y=0.01, random_state=42)
# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Train logistic regression model
model = LogisticRegression()
model.fit(X_train, y_train)
# Predict
y_pred = model.predict(X_test)
# Evaluate
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
# Visualization of decision boundary
import numpy as np
# Create mesh to plot
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.1),
np.arange(y_min, y_max, 0.1))
Z = model.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
plt.figure(figsize=(8, 6))
plt.contourf(xx, yy, Z, alpha=0.4)
plt.scatter(X[:, 0], X[:, 1], c=y, marker='o', edgecolors='k')
plt.title('Logistic Regression Decision Boundary')
plt.xlabel('Feature 1')
plt.ylabel('Feature 2')
plt.grid(True)
plt.show()
# Import libraries Exp 04
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import matplotlib.pyplot as plt
import numpy as np
# Generate binary dataset
X, y = make_classification(n_samples=200, n_features=2, n_redundant=0,
n_informative=2, n_clusters_per_class=1,
flip_y=0.01, random_state=0)
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
# Logistic Regression model
log_model = LogisticRegression()
log_model.fit(X_train, y_train)
log_pred = log_model.predict(X_test)
# SVM model
svm_model = SVC(kernel='linear') # Use linear kernel for fair comparison
svm_model.fit(X_train, y_train)
svm_pred = svm_model.predict(X_test)
# Accuracy
print("Logistic Regression Accuracy:", accuracy_score(y_test, log_pred))
print("SVM Accuracy:", accuracy_score(y_test, svm_pred))
# Classification reports
print("\nLogistic Regression Report:\n", classification_report(y_test, log_pred))
print("\nSVM Report:\n", classification_report(y_test, svm_pred))
# Decision Boundary Plot
def plot_decision_boundary(model, title):
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.linspace(x_min, x_max, 200),
np.linspace(y_min, y_max, 200))
Z = model.predict(np.c_[xx.ravel(), yy.ravel()]).reshape(xx.shape)
plt.figure(figsize=(7, 5))
plt.contourf(xx, yy, Z, alpha=0.4, cmap='coolwarm')
plt.scatter(X[:, 0], X[:, 1], c=y, s=20, edgecolor='k', cmap='coolwarm')
plt.title(title)
plt.xlabel('Feature 1')
plt.ylabel('Feature 2')
plt.grid(True)
plt.show()
# Visual comparison
plot_decision_boundary(log_model, "Logistic Regression - Decision Boundary")
plot_decision_boundary(svm_model, "SVM (Linear Kernel) - Decision Boundary")
Exp
04
# Import libraries
from sklearn.datasets import make_blobs
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
# Generate synthetic dataset
X, _ = make_blobs(n_samples=300, centers=4, cluster_std=0.60, random_state=0)
# Apply K-Means clustering (assume 4 clusters)
kmeans = KMeans(n_clusters=4, random_state=0, n_init=10)
kmeans.fit(X)
y_kmeans = kmeans.predict(X)
# Plot clustering results
plt.figure(figsize=(7, 5))
Exp
plt.scatter(X[:, 0], X[:, 1], c=y_kmeans, cmap='viridis', s=50)
plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1], 05
c='red', s=200, alpha=0.75, marker='X', label='Centroids')
plt.title('K-Means Clustering Result')
plt.xlabel('Feature 1')
plt.ylabel('Feature 2')
plt.legend()
plt.grid(True)
plt.show()
#Import necessary libraries
Exp
import numpy as np
import matplotlib.pyplot as plt 06
from sklearn.datasets import make_regression
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
# Generate a synthetic regression dataset
X, y = make_regression(n_samples=100, n_features=1, noise=10, random_state=42)
# Split dataset into training and testing sets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
#Fit a linear regression model
model = LinearRegression()
model.fit(X_train, y_train)
# Make predictions
y_pred = model.predict(X_test)
# Plotting the regression line and data points
plt.scatter(X_test, y_test, color='blue', label='Actual Data')
plt.plot(X_test, y_pred, color='red', label='Regression Line')
plt.title('Linear Regression')
plt.xlabel('X')
plt.ylabel('y')
plt.legend()
plt.show()
# Calculate MSE, RMSE, MAE, and R²
# 1. Mean Squared Error (MSE)
mse = mean_squared_error(y_test, y_pred)
# 2. Root Mean Squared Error (RMSE)
rmse = np.sqrt(mse)
# 3. Mean Absolute Error (MAE)
mae = mean_absolute_error(y_test, y_pred)
# 4. R² (Coefficient of Determination)
r2 = r2_score(y_test, y_pred)
# Display the calculated metrics
print(f"Mean Squared Error (MSE): {mse:.4f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.4f}")
print(f"Mean Absolute Error (MAE): {mae:.4f}")
print(f"R² (Coefficient of Determination): {r2:.4f}")
# Import necessary libraries
import numpy as np
import matplotlib.pyplot as plt
Exp
from sklearn.datasets import make_regression 07
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
# Generate a synthetic regression dataset
X, y = make_regression(n_samples=100, n_features=1, noise=10, random_state=42)
# Split dataset into training and testing sets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
# Fit a linear regression model
model = LinearRegression()
model.fit(X_train, y_train)
# Make predictions
y_pred = model.predict(X_test)
# Plotting the regression line and data points
plt.scatter(X_test, y_test, color='blue', label='Actual Data')
plt.plot(X_test, y_pred, color='red', label='Regression Line')
plt.title('Linear Regression')
plt.xlabel('X')
plt.ylabel('y')
plt.legend()
plt.show()
# Manual Calculation of MSE, RMSE, MAE, and R²
# 1. Mean Squared Error (MSE)
mse_manual = np.mean((y_test - y_pred)**2)
# 2. Root Mean Squared Error (RMSE)
rmse_manual = np.sqrt(mse_manual)
# 3. Mean Absolute Error (MAE)
mae_manual = np.mean(np.abs(y_test - y_pred))
# 4. R² (Coefficient of Determination)
ss_total = np.sum((y_test - np.mean(y_test))**2)
ss_residual = np.sum((y_test - y_pred)**2)
r2_manual = 1 - (ss_residual / ss_total)
# Calculations using scikit-learn
# 1. Mean Squared Error (MSE) using sklearn
mse_sklearn = mean_squared_error(y_test, y_pred)
# 2. Root Mean Squared Error (RMSE) using sklearn
rmse_sklearn = np.sqrt(mse_sklearn)
# 3. Mean Absolute Error (MAE) using sklearn
mae_sklearn = mean_absolute_error(y_test, y_pred)
# 4. R² (Coefficient of Determination) using sklearn
r2_sklearn = r2_score(y_test, y_pred)
# Print the manual and sklearn calculated values for comparison
print("Manual Calculations:")
print(f"Mean Squared Error (MSE): {mse_manual:.4f}")
print(f"Root Mean Squared Error (RMSE): {rmse_manual:.4f}")
print(f"Mean Absolute Error (MAE): {mae_manual:.4f}")
print(f"R² (Coefficient of Determination): {r2_manual:.4f}")
print("\nSklearn Calculations:")
print(f"Mean Squared Error (MSE): {mse_sklearn:.4f}")
print(f"Root Mean Squared Error (RMSE): {rmse_sklearn:.4f}")
print(f"Mean Absolute Error (MAE): {mae_sklearn:.4f}")
print(f"R² (Coefficient of Determination): {r2_sklearn:.4f}")
Exp
08
# KMeans Plot
axes[0].scatter(X[:, 0], X[:, 1], c=kmeans_labels, cmap='viridis', s=50)
axes[0].set_title('KMeans Clustering')
# DBSCAN Plot
axes[1].scatter(X[:, 0], X[:, 1], c=dbscan_labels, cmap='viridis', s=50)
axes[1].set_title('DBSCAN Clustering')
plt.suptitle(title, fontsize=16)
plt.tight_layout()
plt.show()
#Apply KMeans and DBSCAN on different datasets
# 1. Well-separated clusters
kmeans = KMeans(n_clusters=3)
kmeans_labels_1 = kmeans.fit_predict(X1)
dbscan = DBSCAN(eps=0.3, min_samples=10)
dbscan_labels_1 = dbscan.fit_predict(X1)
plot_clusters(X1, kmeans_labels_1, dbscan_labels_1, "Well-Separated Clusters (make_blobs)")
# 2. Non-globular dataset (moons)
kmeans_labels_2 = kmeans.fit_predict(X2)
dbscan_labels_2 = dbscan.fit_predict(X2)
plot_clusters(X2, kmeans_labels_2, dbscan_labels_2, "Non-Globular Dataset (make_moons)")
# 3. Noisy dataset (circles)
kmeans_labels_3 = kmeans.fit_predict(X3)
dbscan_labels_3 = dbscan.fit_predict(X3)
plot_clusters(X3, kmeans_labels_3, dbscan_labels_3, "Noisy Dataset (make_circles)")