AML Lab
AML Lab
import pandas as pd
import os
cwd = os.getcwd()
print(cwd)
df =
pd.read_csv("C:/Users/User/Downloads/Salary.
csv")
import matplotlib.pyplot as plt
from sklearn.linear_model import
LinearRegression
df.head(10)
df.tail(7)
df.head()
x=df.iloc[:,:-1].values
y=df.iloc[:,-1:].values
print(x)
from sklearn.model_selection import
train_test_split
x_train,x_test,y_train,y_test =
train_test_split(x,y,test_size=0.3)
print("x_train"
+str(x_train),"x_test"+str(x_test),"y_train"+str(
y_train),"y_test"+str(y_test))
from sklearn.linear_model import
LinearRegression
model=LinearRegression()
model.fit(x_train,y_train)
predictions=model.predict(x_test)
print(predictions)
y_test
predictions-y_test
import seaborn as sns
sns.displot(predictions-y_test)
plt.scatter(x_train,y_train,color="red")
plt.plot(x_train,model.predict(x_train))
plt.scatter(x_test,y_test,color="black")
plt.plot(x_test,model.predict(x_test))
r_sq = model.score(x_train , y_train)
print("Cofficient of Determination: " , r_sq)
print('Intercept:' , model.intercept_)
print('Slop:' , model.coef_)
y_pred = model.predict(x_train)
print('Predict response:' , y_pred ,sep='\n')
print('y='
+str(float(model.coef_))+'x+'+str(float(model.i
ntercept_)))
y=8726.213298429322*(8.6)+28719.75575916
2297
y
Exp-2
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
boston=pd.read_csv("C:/Users/User/Download
s/boston-housing-dataset.csv")
boston.head()
boston.target=boston['MEDV']
boston.target
boston.isnull().sum()
sns.set(rc={'figure.figsize':(11.7,8.27)})
sns.distplot(boston['MEDV'],bins=30)
plt.show()
correlation_matrix = boston.corr().round(2)
sns.heatmap(data=correlation_matrix,
annot=True)
plt.figure(figsize=(20,5))
features = ['LSTAT','RM']
target = boston['MEDV']
for i,col in enumerate(features):
plt.subplot(1,len(features),i+1)
x=boston[col]
y=target
plt.scatter(x,y, marker='o')
plt.title(col)
plt.xlabel(col)
plt.ylabel('MEDV')
X=pd.DataFrame(np.c_[boston['LSTAT']],colu
mns = ['LSTST'])
print(X)
Y =boston['MEDV']
print(Y)
from sklearn.model_selection import
train_test_split
X_train,X_test,Y_train,Y_test =
train_test_split(X,Y,test_size=0.2,random_state
=5)print(X_train.shape),print(X_test.shape)
print(Y_train.shape)
print(Y_test.shape)
from sklearn.linear_model import
LinearRegression
from sklearn .metrics import
mean_squared_error , r2_score
lin_model=LinearRegression()
lin_model.fit(X_train,Y_train)
y_train_predict = lin_model.predict(X_train)
rmse=(np.sqrt(mean_squared_error(Y_train,y_t
rain_predict)))
r2 = r2_score(Y_train,y_train_predict)
print("The model performance for training set")
print('RMSE is {}'.format(rmse))
print('R2 score is{}'.format(r2))
plt.scatter(X_train,Y_train,color='red')
plt.plot(X_train,lin_model.predict(X_train))
plt.scatter(X_train,Y_train,color='black')
plt.plot(X_train,lin_model.predict(X_test))
EXP-3 SVM
from sklearn import datasets
iris=datasets.load_iris()
print("Feature Names:",iris.feature_names)
print("Target Name:",iris.target_names)
print("\nDataset Description:")
print(iris.DESCR)
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score
from sklearn.decomposition import PCA
iris = datasets.load_iris()
X = iris.data # All four features (sepal length, sepal width, petal length, petal width)
y = iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
pca = PCA(n_components=2)
X_train_2d = pca.fit_transform(X_train)
X_test_2d = pca.transform(X_test)
def train_and_evaluate_svm(kernel_type)
svm_model = SVC(kernel=kernel_type)
svm_model.fit(X_train_2d, y_train)
y_pred = svm_model.predict(X_test_2d)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy for {kernel_type} kernel: {accuracy * 100:.2f}%")
print(f"Classification Report for {kernel_type} kernel:")
print(classification_report(y_test, y_pred))
def plot_decision_boundary(kernel_type):
svm_model = SVC(kernel=kernel_type)
svm_model.fit(X_train_2d, y_train)
h = .02 # Step size in meshgrid
x_min, x_max = X_train_2d[:, 0].min() - 1, X_train_2d[:, 0].max() + 1\
y_min, y_max = X_train_2d[:, 1].min() - 1, X_train_2d[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
Z = svm_model.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
plt.contourf(xx, yy, Z, alpha=0.8, cmap=plt.cm.Paired)
plt.scatter(X_train_2d[:, 0], X_train_2d[:, 1], c=y_train, edgecolors='k', marker='o', s=100, cmap=plt.cm.Paired, label="Train data")
plt.scatter(X_test_2d[:, 0], X_test_2d[:, 1], c=y_test, edgecolors='k', marker='x', s=100, cmap=plt.cm.Paired, label="Test data")
plt.title(f"SVM with {kernel_type} kernel on Iris Dataset (PCA Reduced)")
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.legend()
plt.show()
kernels = ['linear', 'poly', 'rbf']
for kernel in kernels:
train_and_evaluate_svm(kernel)
plot_decision_boundary(kernel)
EXP-4
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
td=pd.read_csv("/content/drive/MyDrive/tested.csv")
td = td.drop(['PassengerId', 'Name', 'Ticket', 'Cabin'], axis=1)
td
td['Age'].fillna(td['Age'].median(), inplace=True)
td['Embarked'].fillna(td['Embarked'].mode()[0], inplace=True)
td['Fare'].fillna(td['Fare'].median(), inplace=True)
X = td.drop(['Survived'], axis=1)
y = td['Survived']
X
Y
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
from sklearn.preprocessing import StandardScaler, LabelEncoder
label_encoders = {}
categorical_cols = ['Sex', 'Embarked']
for col in categorical_cols:
le = LabelEncoder()
X_train[col] = le.fit_transform(X_train[col])
X_test[col] = le.transform(X_test[col]) # Use transform to avoid data leakage
label_encoders[col] = le
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
X_train_scaled
pca = PCA(n_components=2) # Reducing to 2 components for visualization
X_train_pca = pca.fit_transform(X_train_scaled)
X_test_pca = pca.transform(X_test_scaled)
X_test_pca
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
rf_classifier.fit(X_train, y_train)
rf_pred = rf_classifier.predict(X_test)
dt_accuracy = accuracy_score(y_test, dt_pred)
rf_accuracy = accuracy_score(y_test, rf_pred)
print("Decision Tree Accuracy:", dt_accuracy)
print("Random Forest Accuracy:", rf_accuracy)
print("\nDecision Tree Classification Report:")
print(classification_report(y_test, dt_pred))
print("\nRandom Forest Classification Report:")
print(classification_report(y_test, rf_pred))
plt.figure(figsize=(10, 4))
plt.subplot(1, 2, 1)
sns.heatmap(confusion_matrix(y_test, dt_pred), annot=True, cmap="Blues", fmt='d')
plt.title("Decision Tree Confusion Matrix")
plt.subplot(1, 2, 2)
sns.heatmap(confusion_matrix(y_test, rf_pred), annot=True, cmap="Blues", fmt='d')
plt.title("Random Forest Confusion Matrix")
plt.show()
Exp-5
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import datasets
from sklearn.cluster import KMeans
from sklearn.metrics import
silhouette_score
iris = datasets.load_iris()
X = iris.data # Feature matrix
y = iris.target # True labels (for
evaluation purposes)
kmeans = KMeans(n_clusters=3,
random_state=42) # 3 clusters (since Iris
has 3 classes)
kmeans.fit(X)
y_kmeans = kmeans.predict(X)
print(f'Inertia (Sum of squared distances
of samples to their closest cluster center):
{kmeans.inertia_}')
sil_score = silhouette_score(X,
y_kmeans)
print(f'Silhouette Score: {sil_score}')
plt.figure(figsize=(10, 6))
plt.scatter(X[:, 0], X[:, 1], c=y_kmeans,
cmap='viridis', marker='o', s=100,
alpha=0.6)
plt.scatter(kmeans.cluster_centers_[:, 0],
kmeans.cluster_centers_[:, 1], c='red',
marker='x', s=200, label='Centroids')
plt.title('K-Means Clustering on Iris
Dataset')
plt.xlabel('Feature 1')
plt.ylabel('Feature 2')
plt.legend()
plt.show()
df = pd.DataFrame(X,
columns=iris.feature_names)
df['Cluster'] = y_kmeans
sns.pairplot(df, hue='Cluster',
palette='viridis', markers=["o", "s", "D"])
plt.show()
EXP-6
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.cluster.hierarchy import
linkage, dendrogram
from sklearn.cluster import
AgglomerativeClustering
from sklearn.datasets import load_iris
from sklearn.preprocessing import
StandardScaler
iris = load_iris()
X = iris.data # Features
y = iris.target # True labels (not used in
clustering)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
linked = linkage(X_scaled,
method='ward')
plt.figure(figsize=(10, 5))
dendrogram(linked, labels=iris.target)
plt.title("Hierarchical Clustering
Dendrogram")
plt.xlabel("Sample Index")
plt.ylabel("Distance")
plt.show()
n_clusters = 3 # Choosing 3 clusters
since Iris has 3 species
hc =
AgglomerativeClustering(n_clusters=n_cl
usters, linkage='ward')
clusters = hc.fit_predict(X_scaled)
plt.figure(figsize=(8, 5))
sns.scatterplot(x=X_scaled[:, 0],
y=X_scaled[:, 1], hue=clusters,
palette="viridis")
plt.title("Clusters Found by Hierarchical
Clustering")
plt.xlabel("Feature 1 (Scaled)")
plt.ylabel("Feature 2 (Scaled)")
plt.legend(title="Cluster")
plt.show()
EXP-7 PCA
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.preprocessing import
StandardScaler
from sklearn.datasets import load_wine
wine_data = load_wine()
X = wine_data.data # Features
y = wine_data.target # Target labels
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
pca = PCA()
X_pca = pca.fit_transform(X_scaled)
explained_variance =
pca.explained_variance_ratio_
plt.figure(figsize=(8, 5))
plt.plot(np.cumsum(explained_variance),
marker='o', color='b')
plt.title('Cumulative Explaine1d Variance
vs. Number of Principal Components')
plt.xlabel('Number of Principal
Components')
plt.ylabel('Cumulative Explained
Variance')
plt.grid(True)
plt.show()
n_components =
np.argmax(np.cumsum(explained_varianc
e) >= 0.95) + 1
pca =
PCA(n_components=n_components)
X_pca_95 = pca.fit_transform(X_scaled)
print(f"Original dataset shape:
{X.shape}")
print(f"Reduced dataset shape (95%
variance retained): {X_pca_95.shape}")
EXP-8 HMM
import numpy as np
from hmmlearn.hmm import
MultinomialHMM
import matplotlib.pyplot as plt
np.random.seed(42)
transition_matrix = np.array([
[0.7, 0.3], # State 0 -> State 0, State 0
-> State 1
[0.4, 0.6] # State 1 -> State 0, State 1
-> State 1
])emission_matrix = np.array([
[0.9, 0.1], # State 0 emits 0 with
probability 0.9, 1 with probability 0.1
[0.2, 0.8] # State 1 emits 0 with
probability 0.2, 1 with probability 0.8
])start_probabilities = np.array([0.6, 0.4])
n_samples = 100
model =
MultinomialHMM(n_components=2,
random_state=42, n_trials=1) # Setting
n_trials to 1 since you're having 2
observation states
model.startprob_ = start_probabilities
model.transmat_ = transition_matrix
model.emissionprob_ = emission_matrix
X, Z = model.sample(n_samples) # X are
the observations, Z are the hidden states
model_fit =
MultinomialHMM(n_components=2,
random_state=42, n_trials=1)
model_fit.fit(X)
hidden_states = model_fit.predict(X)
plt.figure(figsize=(10, 6))+
++0
plt.subplot(2, 1, 1)
plt.title('Generated Observations')
plt.plot(X, label='Observations')
plt.xlabel('Time')
plt.ylabel('Observation Value')
plt.legend()
plt.subplot(2, 1, 2)
plt.title('Predicted Hidden States')
plt.plot(hidden_states, label='Hidden
States', color='red')
plt.xlabel('Time')
plt.ylabel('Hidden State')
plt.legend()
plt.tight_layout()
plt.show()
accuracy = np.sum(hidden_states == Z) /
n_samples
print(f"Accuracy of hidden state
prediction: {accuracy * 100:.2f}%")
import numpy as np
from hmmlearn.hmm import
GaussianHMM
np.random.seed(42)
X=
np.concatenate([np.random.normal(0, 1,
(100, 1)), np.random.normal(5, 1, (100,
1))])
X = X.reshape(-1, 1)
model =
GaussianHMM(n_components=2,
covariance_type="diag",
random_state=42)
model.fit(X)
hidden_states = model.predict(X)
print(f"Predicted hidden states:
{hidden_states[:10]}")
import matplotlib.pyplot as plt
plt.plot(X, label="Observations")
plt.plot(hidden_states, label="Predicted
Hidden States", linestyle="--")
plt.legend()
plt.show()
EXP-9 CNN
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import matplotlib.pyplot as plt
(x_train, y_train), (x_test, y_test) =
keras.datasets.mnist.load_data()
x_train, x_test = x_train / 255.0, x_test /
255.0
x_train = x_train.reshape(-1, 28, 28, 1)
x_test = x_test.reshape(-1, 28, 28, 1)
model = keras.Sequential([
layers.Conv2D(32, kernel_size=(3,3),
activation='relu', input_shape=(28, 28,
1)),
layers.MaxPooling2D(pool_size=(2,2))
,
layers.Conv2D(64, kernel_size=(3,3),
activation='relu'),
layers.MaxPooling2D(pool_size=(2,2))
,
layers.Flatten(),
layers.Dense(128, activation='relu'),
layers.Dense(10, activation='softmax')
])
model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
history = model.fit(x_train, y_train,
epochs=10, validation_data=(x_test,
y_test))
test_loss, test_acc =
model.evaluate(x_test, y_test, verbose=2)
print(f"Test accuracy: {test_acc:.4f}")
plt.plot(history.history['accuracy'],
label='Training Accuracy')
plt.plot(history.history['val_accuracy'],
label='Validation Accuracy')
plt.xlabel('Epochs')
]plt.ylabel('Accuracy')
plt.legend()
plt.show()
EXP -10 RNN
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import
Sequential
from tensorflow.keras.layers import
SimpleRNN, Dense
def generate_time_series(batch_size,
n_steps):
freq1, freq2, offsets1, offsets2 =
np.random.rand(4, batch_size, 1)
time = np.linspace(0, 1, n_steps)
series = 0.5 * np.sin((time - offsets1) *
(freq1 * 10 + 10)) # Wave 1
series += 0.5 * np.sin((time - offsets2)
* (freq2 * 20 + 20)) # Wave 2
series += 0.1 *
(np.random.rand(batch_size, n_steps) -
0.5) # Noise
return series[..., np.newaxis]
n_steps = 50
batch_size = 1000
X_train =
generate_time_series(batch_size, n_steps)
y_train = X_train[:, -1]
X_valid = generate_time_series(200,
n_steps)
y_valid = X_valid[:, -1]
model = Sequential([
SimpleRNN(20,
return_sequences=False,
input_shape=[n_steps, 1]),
Dense(1)
])
model.compile(loss="mse",
optimizer="adam")
model.fit(X_train, y_train, epochs=20,
validation_data=(X_valid, y_valid))
X_test = generate_time_series(1, n_steps)
y_pred = model.predict(X_test)
print("Predicted value:", y_pred)