0% found this document useful (0 votes)
3 views14 pages

AML Lab

The document outlines various machine learning experiments, including regression, classification, clustering, and deep learning techniques using Python libraries such as pandas, sklearn, and TensorFlow. Each experiment demonstrates different algorithms like Linear Regression, SVM, Random Forest, K-Means, PCA, and HMM, along with data preprocessing steps, model evaluation metrics, and visualizations. The experiments utilize datasets like the Boston housing dataset, Iris dataset, and MNIST for training and testing the models.

Uploaded by

ruchitha
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
3 views14 pages

AML Lab

The document outlines various machine learning experiments, including regression, classification, clustering, and deep learning techniques using Python libraries such as pandas, sklearn, and TensorFlow. Each experiment demonstrates different algorithms like Linear Regression, SVM, Random Forest, K-Means, PCA, and HMM, along with data preprocessing steps, model evaluation metrics, and visualizations. The experiments utilize datasets like the Boston housing dataset, Iris dataset, and MNIST for training and testing the models.

Uploaded by

ruchitha
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 14

Exp-1:

import pandas as pd
import os
cwd = os.getcwd()
print(cwd)
df =
pd.read_csv("C:/Users/User/Downloads/Salary.
csv")
import matplotlib.pyplot as plt
from sklearn.linear_model import
LinearRegression
df.head(10)
df.tail(7)
df.head()
x=df.iloc[:,:-1].values
y=df.iloc[:,-1:].values
print(x)
from sklearn.model_selection import
train_test_split
x_train,x_test,y_train,y_test =
train_test_split(x,y,test_size=0.3)
print("x_train"
+str(x_train),"x_test"+str(x_test),"y_train"+str(
y_train),"y_test"+str(y_test))
from sklearn.linear_model import
LinearRegression
model=LinearRegression()
model.fit(x_train,y_train)
predictions=model.predict(x_test)
print(predictions)
y_test
predictions-y_test
import seaborn as sns
sns.displot(predictions-y_test)
plt.scatter(x_train,y_train,color="red")
plt.plot(x_train,model.predict(x_train))
plt.scatter(x_test,y_test,color="black")
plt.plot(x_test,model.predict(x_test))
r_sq = model.score(x_train , y_train)
print("Cofficient of Determination: " , r_sq)
print('Intercept:' , model.intercept_)
print('Slop:' , model.coef_)
y_pred = model.predict(x_train)
print('Predict response:' , y_pred ,sep='\n')
print('y='
+str(float(model.coef_))+'x+'+str(float(model.i
ntercept_)))
y=8726.213298429322*(8.6)+28719.75575916
2297
y
Exp-2
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
boston=pd.read_csv("C:/Users/User/Download
s/boston-housing-dataset.csv")
boston.head()
boston.target=boston['MEDV']
boston.target
boston.isnull().sum()
sns.set(rc={'figure.figsize':(11.7,8.27)})
sns.distplot(boston['MEDV'],bins=30)
plt.show()
correlation_matrix = boston.corr().round(2)
sns.heatmap(data=correlation_matrix,
annot=True)
plt.figure(figsize=(20,5))
features = ['LSTAT','RM']
target = boston['MEDV']
for i,col in enumerate(features):
plt.subplot(1,len(features),i+1)
x=boston[col]
y=target
plt.scatter(x,y, marker='o')
plt.title(col)
plt.xlabel(col)
plt.ylabel('MEDV')
X=pd.DataFrame(np.c_[boston['LSTAT']],colu
mns = ['LSTST'])
print(X)
Y =boston['MEDV']
print(Y)
from sklearn.model_selection import
train_test_split
X_train,X_test,Y_train,Y_test =
train_test_split(X,Y,test_size=0.2,random_state
=5)print(X_train.shape),print(X_test.shape)
print(Y_train.shape)
print(Y_test.shape)
from sklearn.linear_model import
LinearRegression
from sklearn .metrics import
mean_squared_error , r2_score
lin_model=LinearRegression()
lin_model.fit(X_train,Y_train)
y_train_predict = lin_model.predict(X_train)
rmse=(np.sqrt(mean_squared_error(Y_train,y_t
rain_predict)))
r2 = r2_score(Y_train,y_train_predict)
print("The model performance for training set")
print('RMSE is {}'.format(rmse))
print('R2 score is{}'.format(r2))
plt.scatter(X_train,Y_train,color='red')
plt.plot(X_train,lin_model.predict(X_train))
plt.scatter(X_train,Y_train,color='black')
plt.plot(X_train,lin_model.predict(X_test))
EXP-3 SVM
from sklearn import datasets
iris=datasets.load_iris()
print("Feature Names:",iris.feature_names)
print("Target Name:",iris.target_names)
print("\nDataset Description:")
print(iris.DESCR)
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score
from sklearn.decomposition import PCA
iris = datasets.load_iris()
X = iris.data # All four features (sepal length, sepal width, petal length, petal width)
y = iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
pca = PCA(n_components=2)
X_train_2d = pca.fit_transform(X_train)
X_test_2d = pca.transform(X_test)
def train_and_evaluate_svm(kernel_type)
svm_model = SVC(kernel=kernel_type)
svm_model.fit(X_train_2d, y_train)
y_pred = svm_model.predict(X_test_2d)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy for {kernel_type} kernel: {accuracy * 100:.2f}%")
print(f"Classification Report for {kernel_type} kernel:")
print(classification_report(y_test, y_pred))
def plot_decision_boundary(kernel_type):
svm_model = SVC(kernel=kernel_type)
svm_model.fit(X_train_2d, y_train)
h = .02 # Step size in meshgrid
x_min, x_max = X_train_2d[:, 0].min() - 1, X_train_2d[:, 0].max() + 1\
y_min, y_max = X_train_2d[:, 1].min() - 1, X_train_2d[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
Z = svm_model.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
plt.contourf(xx, yy, Z, alpha=0.8, cmap=plt.cm.Paired)
plt.scatter(X_train_2d[:, 0], X_train_2d[:, 1], c=y_train, edgecolors='k', marker='o', s=100, cmap=plt.cm.Paired, label="Train data")
plt.scatter(X_test_2d[:, 0], X_test_2d[:, 1], c=y_test, edgecolors='k', marker='x', s=100, cmap=plt.cm.Paired, label="Test data")
plt.title(f"SVM with {kernel_type} kernel on Iris Dataset (PCA Reduced)")
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.legend()
plt.show()
kernels = ['linear', 'poly', 'rbf']
for kernel in kernels:
train_and_evaluate_svm(kernel)
plot_decision_boundary(kernel)
EXP-4
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
td=pd.read_csv("/content/drive/MyDrive/tested.csv")
td = td.drop(['PassengerId', 'Name', 'Ticket', 'Cabin'], axis=1)
td
td['Age'].fillna(td['Age'].median(), inplace=True)
td['Embarked'].fillna(td['Embarked'].mode()[0], inplace=True)
td['Fare'].fillna(td['Fare'].median(), inplace=True)
X = td.drop(['Survived'], axis=1)
y = td['Survived']
X
Y
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
from sklearn.preprocessing import StandardScaler, LabelEncoder
label_encoders = {}
categorical_cols = ['Sex', 'Embarked']
for col in categorical_cols:
le = LabelEncoder()
X_train[col] = le.fit_transform(X_train[col])
X_test[col] = le.transform(X_test[col]) # Use transform to avoid data leakage
label_encoders[col] = le
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
X_train_scaled
pca = PCA(n_components=2) # Reducing to 2 components for visualization
X_train_pca = pca.fit_transform(X_train_scaled)
X_test_pca = pca.transform(X_test_scaled)
X_test_pca
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
rf_classifier.fit(X_train, y_train)
rf_pred = rf_classifier.predict(X_test)
dt_accuracy = accuracy_score(y_test, dt_pred)
rf_accuracy = accuracy_score(y_test, rf_pred)
print("Decision Tree Accuracy:", dt_accuracy)
print("Random Forest Accuracy:", rf_accuracy)
print("\nDecision Tree Classification Report:")
print(classification_report(y_test, dt_pred))
print("\nRandom Forest Classification Report:")
print(classification_report(y_test, rf_pred))
plt.figure(figsize=(10, 4))
plt.subplot(1, 2, 1)
sns.heatmap(confusion_matrix(y_test, dt_pred), annot=True, cmap="Blues", fmt='d')
plt.title("Decision Tree Confusion Matrix")
plt.subplot(1, 2, 2)
sns.heatmap(confusion_matrix(y_test, rf_pred), annot=True, cmap="Blues", fmt='d')
plt.title("Random Forest Confusion Matrix")
plt.show()
Exp-5
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import datasets
from sklearn.cluster import KMeans
from sklearn.metrics import
silhouette_score
iris = datasets.load_iris()
X = iris.data # Feature matrix
y = iris.target # True labels (for
evaluation purposes)
kmeans = KMeans(n_clusters=3,
random_state=42) # 3 clusters (since Iris
has 3 classes)
kmeans.fit(X)
y_kmeans = kmeans.predict(X)
print(f'Inertia (Sum of squared distances
of samples to their closest cluster center):
{kmeans.inertia_}')
sil_score = silhouette_score(X,
y_kmeans)
print(f'Silhouette Score: {sil_score}')
plt.figure(figsize=(10, 6))
plt.scatter(X[:, 0], X[:, 1], c=y_kmeans,
cmap='viridis', marker='o', s=100,
alpha=0.6)
plt.scatter(kmeans.cluster_centers_[:, 0],
kmeans.cluster_centers_[:, 1], c='red',
marker='x', s=200, label='Centroids')
plt.title('K-Means Clustering on Iris
Dataset')
plt.xlabel('Feature 1')
plt.ylabel('Feature 2')
plt.legend()
plt.show()
df = pd.DataFrame(X,
columns=iris.feature_names)
df['Cluster'] = y_kmeans
sns.pairplot(df, hue='Cluster',
palette='viridis', markers=["o", "s", "D"])
plt.show()
EXP-6
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.cluster.hierarchy import
linkage, dendrogram
from sklearn.cluster import
AgglomerativeClustering
from sklearn.datasets import load_iris
from sklearn.preprocessing import
StandardScaler
iris = load_iris()
X = iris.data # Features
y = iris.target # True labels (not used in
clustering)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
linked = linkage(X_scaled,
method='ward')
plt.figure(figsize=(10, 5))
dendrogram(linked, labels=iris.target)
plt.title("Hierarchical Clustering
Dendrogram")
plt.xlabel("Sample Index")
plt.ylabel("Distance")
plt.show()
n_clusters = 3 # Choosing 3 clusters
since Iris has 3 species
hc =
AgglomerativeClustering(n_clusters=n_cl
usters, linkage='ward')
clusters = hc.fit_predict(X_scaled)
plt.figure(figsize=(8, 5))
sns.scatterplot(x=X_scaled[:, 0],
y=X_scaled[:, 1], hue=clusters,
palette="viridis")
plt.title("Clusters Found by Hierarchical
Clustering")
plt.xlabel("Feature 1 (Scaled)")
plt.ylabel("Feature 2 (Scaled)")
plt.legend(title="Cluster")
plt.show()
EXP-7 PCA
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.preprocessing import
StandardScaler
from sklearn.datasets import load_wine
wine_data = load_wine()
X = wine_data.data # Features
y = wine_data.target # Target labels
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
pca = PCA()
X_pca = pca.fit_transform(X_scaled)
explained_variance =
pca.explained_variance_ratio_
plt.figure(figsize=(8, 5))
plt.plot(np.cumsum(explained_variance),
marker='o', color='b')
plt.title('Cumulative Explaine1d Variance
vs. Number of Principal Components')
plt.xlabel('Number of Principal
Components')
plt.ylabel('Cumulative Explained
Variance')
plt.grid(True)
plt.show()
n_components =
np.argmax(np.cumsum(explained_varianc
e) >= 0.95) + 1
pca =
PCA(n_components=n_components)
X_pca_95 = pca.fit_transform(X_scaled)
print(f"Original dataset shape:
{X.shape}")
print(f"Reduced dataset shape (95%
variance retained): {X_pca_95.shape}")
EXP-8 HMM
import numpy as np
from hmmlearn.hmm import
MultinomialHMM
import matplotlib.pyplot as plt
np.random.seed(42)
transition_matrix = np.array([
[0.7, 0.3], # State 0 -> State 0, State 0
-> State 1
[0.4, 0.6] # State 1 -> State 0, State 1
-> State 1
])emission_matrix = np.array([
[0.9, 0.1], # State 0 emits 0 with
probability 0.9, 1 with probability 0.1
[0.2, 0.8] # State 1 emits 0 with
probability 0.2, 1 with probability 0.8
])start_probabilities = np.array([0.6, 0.4])
n_samples = 100
model =
MultinomialHMM(n_components=2,
random_state=42, n_trials=1) # Setting
n_trials to 1 since you're having 2
observation states
model.startprob_ = start_probabilities
model.transmat_ = transition_matrix
model.emissionprob_ = emission_matrix
X, Z = model.sample(n_samples) # X are
the observations, Z are the hidden states
model_fit =
MultinomialHMM(n_components=2,
random_state=42, n_trials=1)
model_fit.fit(X)
hidden_states = model_fit.predict(X)
plt.figure(figsize=(10, 6))+
++0
plt.subplot(2, 1, 1)
plt.title('Generated Observations')
plt.plot(X, label='Observations')
plt.xlabel('Time')
plt.ylabel('Observation Value')
plt.legend()
plt.subplot(2, 1, 2)
plt.title('Predicted Hidden States')
plt.plot(hidden_states, label='Hidden
States', color='red')
plt.xlabel('Time')
plt.ylabel('Hidden State')
plt.legend()
plt.tight_layout()
plt.show()
accuracy = np.sum(hidden_states == Z) /
n_samples
print(f"Accuracy of hidden state
prediction: {accuracy * 100:.2f}%")
import numpy as np
from hmmlearn.hmm import
GaussianHMM
np.random.seed(42)
X=
np.concatenate([np.random.normal(0, 1,
(100, 1)), np.random.normal(5, 1, (100,
1))])
X = X.reshape(-1, 1)
model =
GaussianHMM(n_components=2,
covariance_type="diag",
random_state=42)
model.fit(X)
hidden_states = model.predict(X)
print(f"Predicted hidden states:
{hidden_states[:10]}")
import matplotlib.pyplot as plt
plt.plot(X, label="Observations")
plt.plot(hidden_states, label="Predicted
Hidden States", linestyle="--")
plt.legend()
plt.show()
EXP-9 CNN
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import matplotlib.pyplot as plt
(x_train, y_train), (x_test, y_test) =
keras.datasets.mnist.load_data()
x_train, x_test = x_train / 255.0, x_test /
255.0
x_train = x_train.reshape(-1, 28, 28, 1)
x_test = x_test.reshape(-1, 28, 28, 1)
model = keras.Sequential([
layers.Conv2D(32, kernel_size=(3,3),
activation='relu', input_shape=(28, 28,
1)),
layers.MaxPooling2D(pool_size=(2,2))
,
layers.Conv2D(64, kernel_size=(3,3),
activation='relu'),
layers.MaxPooling2D(pool_size=(2,2))
,
layers.Flatten(),
layers.Dense(128, activation='relu'),
layers.Dense(10, activation='softmax')
])
model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
history = model.fit(x_train, y_train,
epochs=10, validation_data=(x_test,
y_test))
test_loss, test_acc =
model.evaluate(x_test, y_test, verbose=2)
print(f"Test accuracy: {test_acc:.4f}")
plt.plot(history.history['accuracy'],
label='Training Accuracy')
plt.plot(history.history['val_accuracy'],
label='Validation Accuracy')
plt.xlabel('Epochs')
]plt.ylabel('Accuracy')
plt.legend()
plt.show()
EXP -10 RNN
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import
Sequential
from tensorflow.keras.layers import
SimpleRNN, Dense
def generate_time_series(batch_size,
n_steps):
freq1, freq2, offsets1, offsets2 =
np.random.rand(4, batch_size, 1)
time = np.linspace(0, 1, n_steps)
series = 0.5 * np.sin((time - offsets1) *
(freq1 * 10 + 10)) # Wave 1
series += 0.5 * np.sin((time - offsets2)
* (freq2 * 20 + 20)) # Wave 2
series += 0.1 *
(np.random.rand(batch_size, n_steps) -
0.5) # Noise
return series[..., np.newaxis]
n_steps = 50
batch_size = 1000
X_train =
generate_time_series(batch_size, n_steps)
y_train = X_train[:, -1]
X_valid = generate_time_series(200,
n_steps)
y_valid = X_valid[:, -1]
model = Sequential([
SimpleRNN(20,
return_sequences=False,
input_shape=[n_steps, 1]),
Dense(1)
])
model.compile(loss="mse",
optimizer="adam")
model.fit(X_train, y_train, epochs=20,
validation_data=(X_valid, y_valid))
X_test = generate_time_series(1, n_steps)
y_pred = model.predict(X_test)
print("Predicted value:", y_pred)

You might also like