0% found this document useful (0 votes)
16 views11 pages

Print Out ML - Finallllllllllllllll

The document outlines various experiments using machine learning techniques, including simple and multiple linear regression, logistic regression, support vector machines (SVM), and K-Means clustering. It demonstrates how to train models, evaluate their performance using metrics like accuracy and loss, and visualize results through plots. Additionally, it covers synthetic dataset generation and model comparison for classification tasks.

Uploaded by

Ankush Yadav
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
16 views11 pages

Print Out ML - Finallllllllllllllll

The document outlines various experiments using machine learning techniques, including simple and multiple linear regression, logistic regression, support vector machines (SVM), and K-Means clustering. It demonstrates how to train models, evaluate their performance using metrics like accuracy and loss, and visualize results through plots. Additionally, it covers synthetic dataset generation and model comparison for classification tasks.

Uploaded by

Ankush Yadav
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 11

Exp 01

# Import libraries
from sklearn.datasets import load_iris
Exp 02
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt
import numpy as np
# Load Iris dataset
iris = load_iris()
X = iris.data
y = iris.target
feature_names = iris.feature_names
# For simplicity, use regression on actual flower measurements:
# Simple Linear Regression: Predict sepal width from sepal length
X_simple = X[:, [0]] # Sepal Length
y_simple = X[:, 1] # Sepal Width
# Simple Linear Regression
simple_model = LinearRegression()
simple_model.fit(X_simple, y_simple)
y_simple_pred = simple_model.predict(X_simple)
# Plot Simple Linear Regression
plt.figure(figsize=(8, 5))
plt.scatter(X_simple, y_simple, color='blue', label='Actual')
plt.plot(X_simple, y_simple_pred, color='red', label='Predicted')
plt.xlabel('Sepal Length (cm)')
plt.ylabel('Sepal Width (cm)')
plt.title('Simple Linear Regression (Iris Dataset)')
plt.legend()
plt.grid(True)
plt.show()
# Model details
print("Simple Linear Regression:")
print(" Coefficient (slope):", simple_model.coef_[0])
print(" Intercept:", simple_model.intercept_)
# Multiple Linear Regression: Predict petal width from multiple features
X_multi = X[:, :3] # Sepal Length, Sepal Width, Petal Length
y_multi = X[:, 3] # Petal Width
# Multiple Linear Regression
multi_model = LinearRegression()
multi_model.fit(X_multi, y_multi)
y_multi_pred = multi_model.predict(X_multi)
# Model details
print("\nMultiple Linear Regression:")
print(" Coefficients:", multi_model.coef_)
print(" Intercept:", multi_model.intercept_)
print(" First 5 Predictions:", y_multi_pred[:5])
print(" First 5 Actual Values:", y_multi[:5])
# Import required libraries
Exp 03
from sklearn.datasets import make_classification
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import matplotlib.pyplot as plt
# Generate binary classification dataset
X, y = make_classification(n_samples=200, n_features=2, n_redundant=0,
n_informative=2, n_clusters_per_class=1,
flip_y=0.01, random_state=42)
# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Train logistic regression model
model = LogisticRegression()
model.fit(X_train, y_train)
# Predict
y_pred = model.predict(X_test)
# Evaluate
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
# Visualization of decision boundary
import numpy as np
# Create mesh to plot
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.1),
np.arange(y_min, y_max, 0.1))
Z = model.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
plt.figure(figsize=(8, 6))
plt.contourf(xx, yy, Z, alpha=0.4)
plt.scatter(X[:, 0], X[:, 1], c=y, marker='o', edgecolors='k')
plt.title('Logistic Regression Decision Boundary')
plt.xlabel('Feature 1')
plt.ylabel('Feature 2')
plt.grid(True)
plt.show()
# Import libraries Exp 04
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import matplotlib.pyplot as plt
import numpy as np
# Generate binary dataset
X, y = make_classification(n_samples=200, n_features=2, n_redundant=0,
n_informative=2, n_clusters_per_class=1,
flip_y=0.01, random_state=0)
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
# Logistic Regression model
log_model = LogisticRegression()
log_model.fit(X_train, y_train)
log_pred = log_model.predict(X_test)
# SVM model
svm_model = SVC(kernel='linear') # Use linear kernel for fair comparison
svm_model.fit(X_train, y_train)
svm_pred = svm_model.predict(X_test)
# Accuracy
print("Logistic Regression Accuracy:", accuracy_score(y_test, log_pred))
print("SVM Accuracy:", accuracy_score(y_test, svm_pred))
# Classification reports
print("\nLogistic Regression Report:\n", classification_report(y_test, log_pred))
print("\nSVM Report:\n", classification_report(y_test, svm_pred))
# Decision Boundary Plot
def plot_decision_boundary(model, title):
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.linspace(x_min, x_max, 200),
np.linspace(y_min, y_max, 200))
Z = model.predict(np.c_[xx.ravel(), yy.ravel()]).reshape(xx.shape)

plt.figure(figsize=(7, 5))
plt.contourf(xx, yy, Z, alpha=0.4, cmap='coolwarm')
plt.scatter(X[:, 0], X[:, 1], c=y, s=20, edgecolor='k', cmap='coolwarm')
plt.title(title)
plt.xlabel('Feature 1')
plt.ylabel('Feature 2')
plt.grid(True)
plt.show()
# Visual comparison
plot_decision_boundary(log_model, "Logistic Regression - Decision Boundary")
plot_decision_boundary(svm_model, "SVM (Linear Kernel) - Decision Boundary")
Exp
04

# Import libraries
from sklearn.datasets import make_blobs
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
# Generate synthetic dataset
X, _ = make_blobs(n_samples=300, centers=4, cluster_std=0.60, random_state=0)
# Apply K-Means clustering (assume 4 clusters)
kmeans = KMeans(n_clusters=4, random_state=0, n_init=10)
kmeans.fit(X)
y_kmeans = kmeans.predict(X)
# Plot clustering results
plt.figure(figsize=(7, 5))
Exp
plt.scatter(X[:, 0], X[:, 1], c=y_kmeans, cmap='viridis', s=50)
plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1], 05
c='red', s=200, alpha=0.75, marker='X', label='Centroids')
plt.title('K-Means Clustering Result')
plt.xlabel('Feature 1')
plt.ylabel('Feature 2')
plt.legend()
plt.grid(True)
plt.show()
#Import necessary libraries
Exp
import numpy as np
import matplotlib.pyplot as plt 06
from sklearn.datasets import make_regression
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
# Generate a synthetic regression dataset
X, y = make_regression(n_samples=100, n_features=1, noise=10, random_state=42)
# Split dataset into training and testing sets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
#Fit a linear regression model
model = LinearRegression()
model.fit(X_train, y_train)
# Make predictions
y_pred = model.predict(X_test)
# Plotting the regression line and data points
plt.scatter(X_test, y_test, color='blue', label='Actual Data')
plt.plot(X_test, y_pred, color='red', label='Regression Line')
plt.title('Linear Regression')
plt.xlabel('X')
plt.ylabel('y')
plt.legend()
plt.show()
# Calculate MSE, RMSE, MAE, and R²
# 1. Mean Squared Error (MSE)
mse = mean_squared_error(y_test, y_pred)
# 2. Root Mean Squared Error (RMSE)
rmse = np.sqrt(mse)
# 3. Mean Absolute Error (MAE)
mae = mean_absolute_error(y_test, y_pred)
# 4. R² (Coefficient of Determination)
r2 = r2_score(y_test, y_pred)
# Display the calculated metrics
print(f"Mean Squared Error (MSE): {mse:.4f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.4f}")
print(f"Mean Absolute Error (MAE): {mae:.4f}")
print(f"R² (Coefficient of Determination): {r2:.4f}")
# Import necessary libraries
import numpy as np
import matplotlib.pyplot as plt
Exp
from sklearn.datasets import make_regression 07
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
# Generate a synthetic regression dataset
X, y = make_regression(n_samples=100, n_features=1, noise=10, random_state=42)
# Split dataset into training and testing sets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
# Fit a linear regression model
model = LinearRegression()
model.fit(X_train, y_train)
# Make predictions
y_pred = model.predict(X_test)
# Plotting the regression line and data points
plt.scatter(X_test, y_test, color='blue', label='Actual Data')
plt.plot(X_test, y_pred, color='red', label='Regression Line')
plt.title('Linear Regression')
plt.xlabel('X')
plt.ylabel('y')
plt.legend()
plt.show()
# Manual Calculation of MSE, RMSE, MAE, and R²
# 1. Mean Squared Error (MSE)
mse_manual = np.mean((y_test - y_pred)**2)
# 2. Root Mean Squared Error (RMSE)
rmse_manual = np.sqrt(mse_manual)
# 3. Mean Absolute Error (MAE)
mae_manual = np.mean(np.abs(y_test - y_pred))
# 4. R² (Coefficient of Determination)
ss_total = np.sum((y_test - np.mean(y_test))**2)
ss_residual = np.sum((y_test - y_pred)**2)
r2_manual = 1 - (ss_residual / ss_total)
# Calculations using scikit-learn
# 1. Mean Squared Error (MSE) using sklearn
mse_sklearn = mean_squared_error(y_test, y_pred)
# 2. Root Mean Squared Error (RMSE) using sklearn
rmse_sklearn = np.sqrt(mse_sklearn)
# 3. Mean Absolute Error (MAE) using sklearn
mae_sklearn = mean_absolute_error(y_test, y_pred)
# 4. R² (Coefficient of Determination) using sklearn
r2_sklearn = r2_score(y_test, y_pred)
# Print the manual and sklearn calculated values for comparison
print("Manual Calculations:")
print(f"Mean Squared Error (MSE): {mse_manual:.4f}")
print(f"Root Mean Squared Error (RMSE): {rmse_manual:.4f}")
print(f"Mean Absolute Error (MAE): {mae_manual:.4f}")
print(f"R² (Coefficient of Determination): {r2_manual:.4f}")
print("\nSklearn Calculations:")
print(f"Mean Squared Error (MSE): {mse_sklearn:.4f}")
print(f"Root Mean Squared Error (RMSE): {rmse_sklearn:.4f}")
print(f"Mean Absolute Error (MAE): {mae_sklearn:.4f}")
print(f"R² (Coefficient of Determination): {r2_sklearn:.4f}")
Exp
08

# Import necessary libraries


import numpy as np
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
# Generate a synthetic binary classification dataset
X, y = make_classification(
n_samples=500, # Number of samples
n_features=2, # Number of total features
n_informative=2, # Number of informative features
n_redundant=0, # No redundant features
n_repeated=0, # No repeated features
random_state=42
)
# Split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
# Logistic Regression Model
log_model = LogisticRegression()
log_model.fit(X_train, y_train)
y_pred_log = log_model.predict(X_test)
# SVM Classifier Model
svm_model = SVC(kernel='linear') # Linear kernel
svm_model.fit(X_train, y_train)
y_pred_svm = svm_model.predict(X_test)
# Compare performance
accuracy_log = accuracy_score(y_test, y_pred_log)
accuracy_svm = accuracy_score(y_test, y_pred_svm)
print(f"Logistic Regression Accuracy: {accuracy_log:.4f}")
print(f"SVM Accuracy: {accuracy_svm:.4f}")
# 1. Logistic Regression - Cross-Entropy Loss (Objective Function)
def logistic_loss(X, y, model):
# Compute the predictions using sigmoid function
y_pred = model.predict_proba(X)[:, 1]
# Compute the Cross-Entropy (log-likelihood) Loss
loss = -np.mean(y * np.log(y_pred) + (1 - y) * np.log(1 - y_pred))
return loss
# Calculate Logistic Regression Loss (cross-entropy)
log_loss = logistic_loss(X_test, y_test, log_model)
print(f"Logistic Regression Cross-Entropy Loss: {log_loss:.4f}")
# 2. SVM - Hinge Loss Calculation (Objective Function)
def hinge_loss(X, y, model):
# Get the decision function values (w^T * x + b)
decision_values = model.decision_function(X)
# Calculate Hinge Loss
hinge = np.maximum(0, 1 - y * decision_values)
return np.mean(hinge)
# Map binary labels to {-1, 1} for SVM (required by SVM hinge loss)
y_test_svm = 2 * y_test - 1 # Convert {0, 1} to {-1, 1}
# Calculate SVM Hinge Loss
svm_hinge_loss = hinge_loss(X_test, y_test_svm, svm_model)
print(f"SVM Hinge Loss: {svm_hinge_loss:.4f}")
# Plot decision boundaries for both models
plt.figure(figsize=(12, 5))
# Import necessary libraries
import numpy as np Exp
from sklearn.datasets import make_classification 09
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns # For heatmap visualization
# Generate a synthetic binary classification dataset
X, y = make_classification(
n_samples=500, # Number of samples
n_features=2, # Number of total features
n_informative=2, # Number of informative features
n_redundant=0, # No redundant features
n_repeated=0, # No repeated features
random_state=42
)
# Split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
# Logistic Regression Model
log_model = LogisticRegression()
log_model.fit(X_train, y_train)
y_pred_log = log_model.predict(X_test)
# Evaluate Logistic Regression Model
accuracy_log = accuracy_score(y_test, y_pred_log)
precision_log = precision_score(y_test, y_pred_log)
recall_log = recall_score(y_test, y_pred_log)
conf_matrix_log = confusion_matrix(y_test, y_pred_log)
# Print the metrics for Logistic Regression
print(f"Logistic Regression Accuracy: {accuracy_log:.4f}")
print(f"Logistic Regression Precision: {precision_log:.4f}")
print(f"Logistic Regression Recall: {recall_log:.4f}")
print("Logistic Regression Confusion Matrix:")
print(conf_matrix_log)
# Plot confusion matrix using heatmap
plt.figure(figsize=(6, 5))
sns.heatmap(conf_matrix_log, annot=True, fmt='d', cmap='Blues', cbar=False)
plt.title("Logistic Regression Confusion Matrix")
plt.xlabel("Predicted")
plt.ylabel("True")
plt.show()
# Import necessary libraries
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_blobs, make_moons, make_circles
from sklearn.cluster import KMeans, DBSCAN
# Generate Datasets
# 1. Well-separated clusters (ideal for KMeans)
X1, _ = make_blobs(n_samples=300, centers=3, cluster_std=0.60, random_state=0)
# 2. Non-globular dataset (moons) (DBSCAN can handle this well)
X2, _ = make_moons(n_samples=300, noise=0.1, random_state=0)
# 3. Noisy dataset (DBSCAN handles noise well)
X3, _ = make_circles(n_samples=300, noise=0.1, factor=0.5, random_state=0)
# Function to plot the results of clustering
def plot_clusters(X, kmeans_labels, dbscan_labels, title):
fig, axes = plt.subplots(1, 2, figsize=(14, 7))

# KMeans Plot
axes[0].scatter(X[:, 0], X[:, 1], c=kmeans_labels, cmap='viridis', s=50)
axes[0].set_title('KMeans Clustering')

# DBSCAN Plot
axes[1].scatter(X[:, 0], X[:, 1], c=dbscan_labels, cmap='viridis', s=50)
axes[1].set_title('DBSCAN Clustering')

plt.suptitle(title, fontsize=16)
plt.tight_layout()
plt.show()
#Apply KMeans and DBSCAN on different datasets
# 1. Well-separated clusters
kmeans = KMeans(n_clusters=3)
kmeans_labels_1 = kmeans.fit_predict(X1)
dbscan = DBSCAN(eps=0.3, min_samples=10)
dbscan_labels_1 = dbscan.fit_predict(X1)
plot_clusters(X1, kmeans_labels_1, dbscan_labels_1, "Well-Separated Clusters (make_blobs)")
# 2. Non-globular dataset (moons)
kmeans_labels_2 = kmeans.fit_predict(X2)
dbscan_labels_2 = dbscan.fit_predict(X2)
plot_clusters(X2, kmeans_labels_2, dbscan_labels_2, "Non-Globular Dataset (make_moons)")
# 3. Noisy dataset (circles)
kmeans_labels_3 = kmeans.fit_predict(X3)
dbscan_labels_3 = dbscan.fit_predict(X3)
plot_clusters(X3, kmeans_labels_3, dbscan_labels_3, "Noisy Dataset (make_circles)")

You might also like