PCA
PCA
opt = {
'image_size': 32,
'is_grayscale': False,
'val_split': 0.75
}
def load_image(path):
im = Image.open(path).convert('L' if opt['is_grayscale'] else 'RGB')
im = im.resize((opt['image_size'],opt['image_size']))
im = np.array(im)
im = im/256
return im
def display_images(imgs,classes,row=1,col=2,w=32,h=32):
fig=plt.figure(figsize=(8, 8))
for i in range(1, col*row +1):
img = imgs[i-1]
fig.add_subplot(row, col, i)
if opt['is_grayscale']:
plt.imshow(img , cmap='gray')
else:
plt.imshow(img)
plt.title("Class:{}".format(classes[i-1]))
plt.axis('off')
plt.show()
def load_data(dir_path):
image_list = []
y_list = []
label_dict = cfw_dict
for filename in sorted(os.listdir(dir_path)):
if filename.endswith(".png"):
im = load_image(os.path.join(dir_path,filename))
y = filename.split('_')[0]
y = label_dict[y]
image_list.append(im)
y_list.append(y)
else:
continue
image_list = np.array(image_list)
y_list = np.array(y_list)
print("Dataset shape:",image_list.shape)
print("Label shape:",y_list.shape)
return image_list,y_list
dirpath = '/home/nasir/python-ws/assingments/assingment2/SMAI-Dataset/IIIT-CFW/
IIIT-CFW'
X,y = load_data(dirpath)
Dataset shape: (672, 32, 32, 3)
Label shape: (672,)
X[0].dtype
dtype('float64')
N,H,W = X.shape[0:3]
C = 1 if opt['is_grayscale'] else X.shape[3]
ind = np.random.randint(0,y.shape[0],6)
display_images(X[ind,...],y[ind], row=2,col=3)
X[0]
array([[[0.00390625, 0. , 0. ],
[0. , 0.00390625, 0.00390625],
[0. , 0.01171875, 0.015625 ],
...,
[0.0234375 , 0.046875 , 0.01171875],
[0.02734375, 0.0390625 , 0.0078125 ],
[0.0234375 , 0.03515625, 0.0078125 ]],
...,
plt.imshow(X[0])
<matplotlib.image.AxesImage at 0x7f9ba3bb0250>
X=X.reshape(672, 3072)
y=X[0]
y1=y.reshape(32, 32, 3)
plt.imshow(y1)
<matplotlib.image.AxesImage at 0x7f9ba1833520>
len(X)
672
data=X
print(X)
[[0.00390625 0. 0. ... 0.859375 0.578125 0.27734375]
[0.00390625 0. 0.08203125 ... 0.28515625 0.24609375 0.27734375]
[0.9765625 0.98046875 0.9609375 ... 0.64453125 0.8203125 0.45703125]
...
[0.95703125 0.7421875 0.54296875 ... 0.9921875 0.99609375 0.9765625 ]
[0.96484375 0.98046875 0.28125 ... 0.50390625 0.51171875 0.19140625]
[0.796875 0.8203125 0.8046875 ... 0.2109375 0.44140625 0.52734375]]
import pandas as pd
y=pd.DataFrame(X)
y
0 1 2 3 4 5 6 \
0 0.003906 0.000000 0.000000 0.000000 0.003906 0.003906 0.000000
1 0.003906 0.000000 0.082031 0.003906 0.000000 0.070312 0.007812
2 0.976562 0.980469 0.960938 0.980469 0.984375 0.945312 0.976562
3 0.656250 0.656250 0.687500 0.652344 0.656250 0.683594 0.644531
4 0.000000 0.546875 0.562500 0.000000 0.542969 0.542969 0.074219
.. ... ... ... ... ... ... ...
667 0.570312 0.472656 0.417969 0.292969 0.167969 0.093750 0.351562
668 0.992188 0.992188 0.996094 0.996094 0.988281 0.996094 0.996094
669 0.957031 0.742188 0.542969 0.949219 0.746094 0.550781 0.949219
670 0.964844 0.980469 0.281250 0.972656 0.988281 0.285156 0.968750
671 0.796875 0.820312 0.804688 0.789062 0.812500 0.796875 0.851562
def getvectors(self):
#calculating the mean
X_mean=np.mean(self.X,axis=0)
#mean centering the data
X_meaned=X-X_mean
print(X_meaned.shape)
# Compute the covariance matrix
cov_matrix = np.dot(X_meaned.T,X_meaned)
# Get the eigenvalues and eigenvectors
eigenvalues, eigenvectors = np.linalg.eig(cov_matrix)
# Sort the eigenvectors based on eigenvalues
sorted_indices = np.argsort(eigenvalues)[::-1]
sorted_eigenvectors = eigenvectors[:, sorted_indices]
# Choose the desired number of principal components
num_components = self.p_comps
selected_eigenvectors = sorted_eigenvectors[:, :num_components]
return selected_eigenvectors
def reducedimage(self,sel_eigen):
# Project the image onto the selected eigenvectors
reduced_image = np.dot(sel_eigen.T,X.T)
return reduced_image
pca1=Pca(X,1000)
(672, 3072)
pca1_df=pd.DataFrame(pca1.finalimg)
pca1_df
0 1 2 \
0 -17.347318+0.000000j -21.032250+0.000000j -30.016021+0.000000j
1 12.921849+0.000000j 21.694817+0.000000j 7.841285+0.000000j
2 -2.303765+0.000000j -2.031538+0.000000j -3.672945+0.000000j
3 -7.927858+0.000000j -3.187595+0.000000j -6.401104+0.000000j
4 4.367666+0.000000j 3.044460+0.000000j 5.532498+0.000000j
.. ... ... ...
995 0.013505-0.012052j 0.013505-0.012052j 0.013505-0.012052j
996 0.001471+0.003997j 0.001471+0.003997j 0.001471+0.003997j
997 0.001471-0.003997j 0.001471-0.003997j 0.001471-0.003997j
998 0.011971-0.007727j 0.011971-0.007727j 0.011971-0.007727j
999 0.011971+0.007727j 0.011971+0.007727j 0.011971+0.007727j
3 4 5 \
0 -28.662534+0.000000j -22.801657+0.000000j -34.877382+0.000000j
1 12.228595+0.000000j 12.725849+0.000000j 13.395109+0.000000j
2 -1.210359+0.000000j -9.264425+0.000000j -11.560975+0.000000j
3 -3.125037+0.000000j -2.366442+0.000000j -3.183381+0.000000j
4 7.434119+0.000000j 5.799617+0.000000j 6.497211+0.000000j
.. ... ... ...
995 0.013505-0.012052j 0.013505-0.012052j 0.013505-0.012052j
996 0.001471+0.003997j 0.001471+0.003997j 0.001471+0.003997j
997 0.001471-0.003997j 0.001471-0.003997j 0.001471-0.003997j
998 0.011971-0.007727j 0.011971-0.007727j 0.011971-0.007727j
999 0.011971+0.007727j 0.011971+0.007727j 0.011971+0.007727j
6 7 8 \
0 -37.541784+0.000000j -28.97449+0.00000j -33.228114+0.000000j
1 1.213471+0.000000j 4.896505+0.000000j 10.467162+0.000000j
2 -6.044526+0.000000j -8.382431+0.000000j -1.671362+0.000000j
3 -0.786509+0.000000j 3.510783+0.000000j 0.382807+0.000000j
4 6.364132+0.000000j 5.524970+0.000000j 1.530844+0.000000j
.. ... ... ...
995 0.013505-0.012052j 0.013505-0.012052j 0.013505-0.012052j
996 0.001471+0.003997j 0.001471+0.003997j 0.001471+0.003997j
997 0.001471-0.003997j 0.001471-0.003997j 0.001471-0.003997j
998 0.011971-0.007727j 0.011971-0.007727j 0.011971-0.007727j
999 0.011971+0.007727j 0.011971+0.007727j 0.011971+0.007727j
670 671
0 -31.785884+0.000000j -39.974815+0.000000j
1 5.976527+0.000000j 6.554488+0.000000j
2 -1.000672+0.000000j -0.959378+0.000000j
3 -6.426645+0.000000j -4.059933+0.000000j
4 10.063658+0.000000j 2.951960+0.000000j
.. ... ...
995 0.013505-0.012052j 0.013505-0.012052j
996 0.001471+0.003997j 0.001471+0.003997j
997 0.001471-0.003997j 0.001471-0.003997j
998 0.011971-0.007727j 0.011971-0.007727j
999 0.011971+0.007727j 0.011971+0.007727j
# Step 7: Visualization
# Visualize the PCA-transformed images
fig, axes = plt.subplots(1, num_components_90_percent, figsize=(15, 2))
for i in range(num_components_90_percent):
# Inverse transform from PCA space to original space
pca_image = np.dot(pca_projection[:, i], eigenvectors[:, i].T) * data_std +
data_mean
pca_image = np.clip(pca_image, 0, 255).astype(np.uint8)
pca_image = pca_image.reshape(32, 32, 3)
plt.show()
ValueError: shapes (672,) and (3072,) not aligned: 672 (dim 0) != 3072 (dim 0)
import numpy as np
from PIL import Image
from PIL import Image
import numpy as np
# Convert each element in the array to Pillow Image objects and store them in a new
array
gray_image_array = []
for rgb_image in X:
# Convert RGB image to Pillow Image
pil_rgb_image = Image.fromarray((rgb_image * 255).astype(np.uint8)) # Convert
back to uint8 for Pillow
# Convert to grayscale and explicitly set the data type to float32 or float64
pil_gray_image = pil_rgb_image.convert('L')
gray_image = np.array(pil_gray_image, dtype=np.float64) # Or dtype=np.float32
if you prefer
# Normalize pixel values to the range [0, 1] for float data types
gray_image /= 255.0
...
#code by phind
import numpy as np
import matplotlib.pyplot as plt
# Step 5: Select the first k eigenvectors, which will be the new k dimensions
k = 2
eigenvectors_subset = eigenvectors_sorted[:, :k]
# Step 2: Unstandardize the data by multiplying with the standard deviation and
adding the mean
images_reconstructed = images_inverse * np.std(images_flattened, axis=0) +
np.mean(images_flattened, axis=0)
# Plot the first few images
fig, axes = plt.subplots(nrows=2, ncols=5, figsize=(10, 5))
for i, ax in enumerate(axes.flatten()):
# Reshape the image to its original shapeThis task requires you to implement
the EM algorithm for GMM and perform
clustering operations on a given dataset(s). The list of subtasks is given below.
• Find the parameters of GMM associated with the customer-dataset, us-
ing the EM method. Vary the number of components, and observe the
results. Implement GMM in a class which has the routines to fit data (e.g.
gmm.fit(data, number of clusters)), a routine to obtain the parameters, a
routine to calculate the likelihoods for a given set of samples and a routine
to obtain the membership values of data samples.This task requires you to implement
the EM algorithm for GMM and perform
clustering operations on a given dataset(s). The list of subtasks is given below.
• Find the parameters of GMM associated with the customer-dataset, us-
ing the EM method. Vary the number of components, and observe the
results. Implement GMM in a class which has the routines to fit data (e.g.
gmm.fit(data, number of clusters)), a routine to obtain the parameters, a
routine to calculate the likelihoods for a given set of samples and a routine
to obtain the membership values of data samples.This task requires you to implement
the EM algorithm for GMM and perform
clustering operations on a given dataset(s). The list of subtasks is given below.
• Find the parameters of GMM associated with the customer-dataset, us-
ing the EM method. Vary the number of components, and observe the
results. Implement GMM in a class which has the routines to fit data (e.g.
gmm.fit(data, number of clusters)), a routine to obtain the parameters, a
routine to calculate the likelihoods for a given set of samples and a routine
to obtain the membership values of data samples.
image = images_reconstructed[i].reshape(32,32)
plt.tight_layout()
plt.show()
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
Cell In[55], line 7
3 import matplotlib.pyplot as plt
5 # Assume we have a variable `images` where each element is a 2D array
representing an image
6 # e.g., images = [image1, image2, ..., imageN] where image1 is an 8x8 numpy
array
----> 7 images_flattened = images_flattened = np.array([img.flatten().convert('L')
for img in X])
8 # Step 1: Standardize the dataset
9 images_standardized = (images_flattened - np.mean(images_flattened,
axis=0)) / np.std(images_flattened, axis=0)