8&9 Assignment ADS
8&9 Assignment ADS
Problem:
a) the example attached without using predefined functions
CODE:
class NaiveBayesClassifier:
def _init_(self):
self.priors = {}
self.posteriors = {}
max_prob = -1
pred_label = None
for label in self.priors:
prob = self.priors[label]
for feature, value in enumerate(sample):
if value in self.posteriors[label][feature]:
prob *= self.posteriors[label][feature][value]
else:
prob *= 0.1
if prob > max_prob:
max_prob = prob
pred_label = label
predictions.append(pred_label)
return predictions
X=[
['R1', 'youth', 'high', 'no', 'fair'],
test_data = [
['R15', 'youth', 'medium', 'yes', 'fair']
]
predictions = model.predict(test_data)
print("Predictions:",predictions)
OUTPUT:
Problem:
CODE:
from sklearn.naive_bayes import GaussianNB
from sklearn.preprocessing import LabelEncoder
data = [
['R1', 'youth', 'high', 'no', 'fair'],
['R2', 'youth', 'high', 'no', 'excellent'],
['R3', 'middle-aged', 'high', 'no',
'fair'],
['R4', 'senior', 'medium', 'no', 'fair'],
['R5', 'senior', 'low', 'yes', 'fair'],
['R6', 'senior', 'low', 'yes', 'excellent'],
]
X_train = [row[1:] for row in data]
y_train = ['no', 'no', 'yes', 'yes', 'yes',
'no', 'yes', 'no', 'yes', 'yes',
'yes', 'yes', 'yes', 'no']
test_data = [
['R15', 'youth', 'medium', 'yes', 'fair']
]
combined_data = X_train + [row[1:] for row in test_data]
encoder = LabelEncoder()
combined_encoded = []
for i in range(len(combined_data[0])):
combined_encoded.append(encoder.fit_transform([row[i] for
row in combined_data]))
combined_encoded = list(zip(*combined_encoded))
model = GaussianNB()
model.fit(combined_encoded[:len(X_train)], y_train)
predictions = model.predict(combined_encoded[len(X_train):])
print("Predictions:", predictions)
OUTPUT:
Problem:
c) any popular dataset using predefined function
CODE:
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score
import pandas as pd
data = pd.read_csv('/content/Iris (1).csv')
if 'Id' in data.columns:
data.drop(columns=['Id'], inplace=True)
X = data.drop(columns=['Species']).values
y = data['Species'].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
random_state=42)
model = GaussianNB()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
OUTPUT:
ASSIGNMENT-9
Problem:
Implement PCA and SVD without using inbuilt functions
Apply PCA and SVD on two real world datasets, clearly mention your
observations how PCA and SVD helps the data science process
Code:
import numpy as np
def svd(X):
covariance_matrix = np.dot(X.T, X) / (X.shape[0] - 1)
eigenvalues, eigenvectors = np.linalg.eig(covariance_matrix)
sorted_indices = np.argsort(eigenvalues)[::-1]
sorted_eigenvectors = eigenvectors[:, sorted_indices]
singular_values = np.sqrt(np.abs(eigenvalues[sorted_indices]))
U = np.dot(X, sorted_eigenvectors / singular_values)
return U
data=np.array([[1,2,3],[4,5,6],[7,8,9],[10,11,12]])
svd_data= svd(data)
print("SVD:\n",svd_data)
--This is SVD implementation on 4*3 matrix--
SVD:
def svd(X):
standardized_X, _, _ = standardize_data(X)
# Compute covariance matrix
cov_matrix = covariance_matrix(standardized_X)
# Eigen decomposition
singular_values = np.sqrt(eigenvalues[sorted_indices])
singular_value_matrix = np.diag(singular_values)
# Compute U and V
U = standardized_X.dot(sorted_eigenvectors)
V = sorted_eigenvectors.T
# Reconstruct original matrix
reconstructed_X = np.dot(U, np.dot(singular_value_matrix, V))
return U, singular_value_matrix, V, reconstructed_X
# Load Iris dataset
iris = load_iris()
X = iris.data
# SVD
U, singular_value_matrix, V, reconstructed_data_svd = svd(X)
print("\nSVD U matrix:")
print(U)
print("\nSVD Singular Value Matrix:")
print(singular_value_matrix)
print("\nSVD V matrix:")
print(V)
SVD U matrix:
[[-2.26470281e+00 -4.80026597e-01 -1.27706022e-01 2.41682039e-02]
import numpy as np
import pandas as pd
def standardize_data(X):
mean = np.mean(X, axis=0)
std_dev = np.std(X, axis=0)
cov_matrix = covariance_matrix(standardized_X)
https://colab.research.google.com/drive/1Jawh4jGYMCU8HRFrqtpffKvidRR0qp12#scrollTo=V
pIHL9k7i8DP&printMode=true 3/4
# Eigen decomposition
singular_values = np.sqrt(eigenvalues[sorted_indices])
singular_value_matrix = np.diag(singular_values)
# Compute U and V
U = standardized_X.dot(sorted_eigenvectors)
V = sorted_eigenvectors.T
wine_quality = pd.read_csv(data_path)
X = wine_quality.values
# SVD
U, singular_value_matrix, V, reconstructed_data_svd = svd(X)
print("\nSVD U matrix:")
print(U)
print("\nSVD Singular Value Matrix:")
print(singular_value_matrix)
print("\nSVD V matrix:")
print(V)
print("\nSVD reconstructed data:")
print(reconstructed_data_svd)
SVD U matrix:
-0.16234897]
...
[-2.50698935 0.46368472 1.04675403 ... 0.7372593 -0.19267716
-0.02013498]
[-2.48395074 -0.61722305 2.20531812 ... 0.69776756 -0.35662953
-0.19522711]
[-2.611506 0.55873545 1.1233558 ... 0.36613367 -0.59729112
0.08976279]]
SVD Singular Value Matrix:
[[1.80969666 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0.
0. ]
[0. 1.51370316 0. 0. 0. 0.
0. 0. 0. 0. 0. 0.
0. ]
[0. 0. 1.30464529 0. 0. 0.
0. 0. 0. 0. 0. 0.
0. ]
[0. 0. 0. 1.10278853 0. 0.
0. 0. 0. 0. 0. 0.
0. ]
[0. 0. 0. 0. 0.98400443 0.
0. 0. 0. 0. 0. 0.
0. ]
[0. 0. 0. 0. 0. 0.96229396
0. 0. 0. 0. 0. 0.
0. ]
[0. 0. 0. 0. 0. 0.
0.79217884 0. 0. 0. 0. 0.
0. ]
[0. 0. 0. 0. 0. 0.
0. 0.74718617 0. 0. 0. 0.
0. ]
[0. 0. 0. 0. 0. 0.
0. 0. 0.6987322 0. 0. 0.
0. ]
[0. 0. 0. 0. 0. 0.
0. 0. 0. 0.63809232 0. 0.
0. ]
[0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0.55735222 0.
0. ]
[0. 0. 0. 0. 0. 0.
PCA Implementation
n_components = 1
pca_data = pca(data, n_components)
print("PCA:\n",pca_data)
--This is PCA implementation on 4*3 matrix--
PCA:
[[-7.79422863]
[-2.59807621]
[ 2.59807621]
[ 7.79422863]]
1) IRIS dataset
import numpy as np
from sklearn.datasets import load_iris
def standardize_data(X):
mean = np.mean(X, axis=0)
# Eigen decomposition
eigenvalues, eigenvectors = np.linalg.eig(cov_matrix)
# Sort eigenvectors based on eigenvalues
sorted_indices = np.argsort(eigenvalues)[::-1]
sorted_eigenvectors = eigenvectors[:, sorted_indices]
# Select top k eigenvectors
principal_components = sorted_eigenvectors[:, :num_components]
# Transform data
transformed_data = np.dot(standardized_X, principal_components)
return transformed_data
# Load Iris dataset
iris = load_iris()
X = iris.data
# PCA
num_components = 3
transformed_data_pca = pca(X, num_components)
print("PCA transformed data:")
print(transformed_data_pca)
PCA transformed data:
n_samples = X.shape[0]
cov_matrix = np.dot(X.T, X) / n_samples
return cov_matrix
def pca(X, num_components):
standardized_X, _, _ = standardize_data(X)
cov_matrix = covariance_matrix(standardized_X)
# Eigen decomposition
eigenvalues, eigenvectors = np.linalg.eig(cov_matrix)
# Sort eigenvectors based on eigenvalues
sorted_indices = np.argsort(eigenvalues)[::-1]
sorted_eigenvectors = eigenvectors[:, sorted_indices]
# Select top k eigenvectors
principal_components = sorted_eigenvectors[:, :num_components]
# Transform data
X = wine_quality.values
# PCA
num_components = 3
transformed_data_pca = pca(X, num_components)
print("PCA transformed data:")
print(transformed_data_pca)
PCA transformed data:
[[-1.2151326 1.79455548 -1.53760979]
[-0.46743535 2.4779897 0.07017969]
[-0.40169488 1.87194873 -0.73697061]
...
[-2.50698935 0.46368472 1.04675403]
[-2.48395074 -0.61722305 2.20531812]
[-2.611506 0.55873545 1.1233558 ]]