Tanu Raman ML Lab File
Tanu Raman ML Lab File
LAB FILE
For
MACHINE LEARNING
(BAI-301)
B.Tech./CSE-AI
print('\n************************************************************************************
****\n')
# 2. Scatter Plot: Product A vs Product B sales
plt.figure(figsize=(7, 4))
plt.scatter(product_A_sales, product_B_sales, color='orange')
plt.title("Scatter Plot: Product A Sales vs Product B Sales")
plt.xlabel("Product A Sales")
plt.ylabel("Product B Sales")
plt.show()
print('\n************************************************************************************
****\n')
# 3. Bar Chart: Monthly Sales of Product C
plt.figure(figsize=(7, 4))
plt.bar(months, product_C_sales, color='green')
plt.title("Bar Chart: Monthly Sales of Product C")
plt.xlabel("Month")
plt.ylabel("Sales Volume")
plt.xticks(months)
plt.show(
print('\n************************************************************************************
****\n')
# 4. Pie Chart: Percentage of Total Sales by Product
total_sales = [sum(product_A_sales), sum(product_B_sales), sum(product_C_sales)]
products = ['Product A', 'Product B', 'Product C']
plt.figure(figsize=(5, 5))
plt.pie(total_sales, labels=products, autopct='%1.1f%%', startangle=140)
plt.title("Pie Chart: Total Sales Distribution by Product")
plt.show()
Output:
LAB-2
AIM:
Program to perform cleaning of the dataset:
1. Drop a variable
2. Remove null values
3. Remove duplicate values
CODE:
import pandas as pd
df = pd.DataFrame(data)
print(df)
print('**************************************************************************************
**\n\nAfter dropping the "ProductName" column\n')
Code:
import pandas as pd
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from imblearn.over_sampling import SMOTE
# Sample dataset
data = {'column1': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
'column2': [100, 200, 300, 400, 500, 100, 200, 300, 400, 500]}
df = pd.DataFrame(data)
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X, y)
# 2. Standardization
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_resampled)
# 3. Normalization
scaler = MinMaxScaler()
X_normalized = scaler.fit_transform(X_resampled)
print("X_resampled:\n", X_resampled,'\n\n')
print("X_scaled:\n", X_scaled,'\n\n')
print("X_normalized:\n", X_normalized)
Output:
LAB-4
AIM:
Program to perform data prediction using any dataset, for example, iris dataset, through:
1. Supervised Learning (Linear Regression)
2. Unsupervised Learning (K-Means Clustering {k=5})
CODE:
# Import necessary libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import load_diabetes, make_blobs
from sklearn.linear_model import LinearRegression
from sklearn.cluster import KMeans
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
# Make predictions
y_pred = model.predict(X_test)
LAB-5
AIM:
Program to implement reinforcement learning on a dataset
CODE:
import numpy as np
import matplotlib.pyplot as plt
# Environment: Define a simple grid-world
class GridWorld:
def __init__(self, rows, cols, start, goal, obstacles=[]):
self.rows = rows
self.cols = cols
self.start = start
self.goal = goal
self.obstacles = obstacles
self.state = start
def reset(self):
self.state = self.start
return self.state
# Check reward
if next_state == self.goal:
return next_state, 1, True # Goal reached
return next_state, -0.01, False # Small penalty for each step
def render(self):
grid = np.zeros((self.rows, self.cols))
grid[self.goal] = 2
for obs in self.obstacles:
grid[obs] = -1
grid[self.state] = 1
print(grid)
# Q-Learning implementation
def q_learning(env, episodes, alpha=0.1, gamma=0.99, epsilon=0.1):
q_table = np.zeros((env.rows, env.cols, 4)) # Initialize Q-table
for episode in range(episodes):
state = env.reset()
done = False
while not done:
# Epsilon-greedy action selection
if np.random.rand() < epsilon:
action = np.random.randint(4) # Explore
else:
action = np.argmax(q_table[state[0], state[1]]) # Exploit
# Take action
next_state, reward, done = env.step(action)
return q_table
if __name__ == "__main__":
main()
OUTPUT:
LAB-6
AIM:
Program to perform data sampling and estimation using density based
clustering method.
CODE:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.cluster import DBSCAN
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import silhouette_score
# Generate synthetic dataset (you can replace this with your dataset)
def generate_data():
# For demonstration, let's create a simple moon-shaped dataset
from sklearn.datasets import make_moons
X, _ = make_moons(n_samples=300, noise=0.05, random_state=42)
return X
# Summary of results
def print_summary(clusters, sampled_data):
print("\n--- Clustering Summary ---")
print(f"Number of clusters (excluding noise): {len(set(clusters)) - (1 if -1 in
clusters else 0)}")
print(f"Noise points: {np.sum(clusters == -1)}")
if __name__ == "__main__":
main()
OUTPUT:
LAB-7
AIM:
Program to perform model regularization, PCA, and optimization using
feature scaling methods.
CODE:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.linear_model import Ridge, Lasso
from sklearn.feature_selection import SelectFromModel
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
np.random.seed(42)
n_samples, n_features = 1000, 20
X = np.random.rand(n_samples, n_features)
y = np.random.choice([0, 1], size=n_samples)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3,
random_state=42)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
pca = PCA(n_components=0.95)
X_train_pca = pca.fit_transform(X_train_scaled)
X_test_pca = pca.transform(X_test_scaled)
print(f"Original Features: {X_train.shape[1]}")
print(f"Reduced Features after PCA: {X_train_pca.shape[1]}")
ridge = Ridge(alpha=1.0)
lasso = Lasso(alpha=0.01)
ridge.fit(X_train_pca, y_train)
lasso.fit(X_train_pca, y_train)
selector = SelectFromModel(lasso, prefit=True)
X_train_selected = selector.transform(X_train_pca)
X_test_selected = selector.transform(X_test_pca)
print(f"Features selected by Lasso: {X_train_selected.shape[1]}")
param_grid = {
'n_estimators': [50, 100, 200],
'max_depth': [None, 10, 20],
'min_samples_split': [2, 5, 10],
}
rf = RandomForestClassifier(random_state=42)
grid_search = GridSearchCV(rf, param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train_selected, y_train)
best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test_selected)
accuracy = accuracy_score(y_test, y_pred)
print(f"Best Parameters: {grid_search.best_params_}")
print(f"Test Accuracy: {accuracy:.4f}")
CODE:
LAB-8
AIM:
Program to perform the kernel based SVM modelling of the considered
dataset.
CODE:
import numpy as np
import pandas as pd
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import classi cation_report, accuracy_score
iris = datasets.load_iris()
X = iris.data
y = iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3,
random_state=42)
scaler = StandardScaler()
X_train = scaler. t_transform(X_train)
X_test = scaler.transform(X_test)
svm_model = SVC(kernel='rbf', gamma='scale', C=1.0) # You can
experiment with the 'kernel', 'C', and 'gamma' parameters
svm_model. t(X_train, y_train)
y_pred = svm_model.predict(X_test)
print("Accuracy Score: ", accuracy_score(y_test, y_pred))
print("Classi cation Report: \n", classi cation_report(y_test, y_pred))
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
pca = PCA(n_components=2)
X_train_2d = pca. t_transform(X_train)
X_test_2d = pca.transform(X_test)
plt. gure( gsize=(8, 6))
plt.scatter(X_test_2d[:, 0], X_test_2d[:, 1], c=y_test, cmap=plt.cm.Paired,
s=30, edgecolors='k')
plt.title("SVM Decision Boundaries (RBF Kernel) with PCA-reduced Data")
fi
fi
fi
fi
fi
fi
fi
fi
plt.show()
OUTPUT:
LAB-9
AIM:
Program to perform the model regularization and optimization using ensemble
methods.
CODE:
import numpy as np
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor,
StackingRegressor
from sklearn.linear_model import Ridge, Lasso
from sklearn.metrics import mean_squared_error
data = fetch_california_housing()
X, y = data.data, data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
random_state=42)
random_forest = RandomForestRegressor(n_estimators=100, random_state=42)
random_forest. t(X_train, y_train)
y_pred_rf = random_forest.predict(X_test)
mse_rf = mean_squared_error(y_test, y_pred_rf)
print(f"Random Forest MSE: {mse_rf:.3f}")
gradient_boosting = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1,
random_state=42)
gradient_boosting. t(X_train, y_train)
y_pred_gb = gradient_boosting.predict(X_test)
mse_gb = mean_squared_error(y_test, y_pred_gb)
print(f"Gradient Boosting MSE: {mse_gb:.3f}")
base_models = [
('ridge', Ridge(alpha=1.0)),
('lasso', Lasso(alpha=0.1)),
('rf', RandomForestRegressor(n_estimators=100, random_state=42))
]
meta_model = GradientBoostingRegressor(n_estimators=50, learning_rate=0.1,
random_state=42)
stacking_regressor = StackingRegressor(estimators=base_models,
nal_estimator=meta_model, cv=5)
stacking_regressor. t(X_train, y_train)
y_pred_stack = stacking_regressor.predict(X_test)
mse_stack = mean_squared_error(y_test, y_pred_stack)
print(f"Stacking Regressor MSE: {mse_stack:.3f}")
OUTPUT:
LAB-10
fi
fi
fi
fi
AIM:
Program to perform the oversampling (SVM Smote) and undersampling (Random
sampling) of a considered dataset using Machine Learning
CODE:
import numpy as np
import pandas as pd
from sklearn.datasets import make_classi cation
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import classi cation_report, accuracy_score
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import RandomUnderSampler
from collections import Counter
def evaluate_model(X_train, X_test, y_train, y_test):
model = SVC(kernel='linear', random_state=42)
model. t(X_train, y_train)
y_pred = model.predict(X_test)
print("Classi cation Report:\n", classi cation_report(y_test, y_pred))
print("Accuracy Score:", accuracy_score(y_test, y_pred))
def generate_data():
X, y = make_classi cation(
n_samples=1000, n_features=20, n_informative=15, n_redundant=5,
n_clusters_per_class=1, weights=[0.9, 0.1], ip_y=0, random_state=42
)
print("Original class distribution:", Counter(y))
return X, y
def oversample_smote(X, y):
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote. t_resample(X, y)
print("After SMOTE Oversampling:", Counter(y_resampled))
return X_resampled, y_resampled
def undersample_random(X, y):
undersampler = RandomUnderSampler(random_state=42)
X_resampled, y_resampled = undersampler. t_resample(X, y)
print("After Random Undersampling:", Counter(y_resampled))
return X_resampled, y_resampled
if __name__ == "__main__":
X, y = generate_data()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
random_state=42)
fi
fi
fi
fi
fi
fi
fi
fi
fl
print("\n--- Original Data ---")
evaluate_model(X_train, X_test, y_train, y_test)
X_smote, y_smote = oversample_smote(X_train, y_train)
print("\n--- After SMOTE Oversampling ---")
evaluate_model(X_smote, X_test, y_smote, y_test)
X_under, y_under = undersample_random(X_train, y_train)
print("\n--- After Random Undersampling ---")
evaluate_model(X_under, X_test, y_under, y_test)
OUTPUT: