Data Science Exercise Medium
Data Science Exercise Medium
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import gridspec
#load data
X_original = np.loadtxt('2d_span_data.csv', delimiter=',')
def center(X):
X_means = np.mean(X, axis=1)[:, np.newaxis]
X_centered = X - X_means
return X_centered
#compute pcs
def compute_pcs(X, lam=1e-7):
P = float(X.shape[1])
Cov = (1 / P) * np.dot(X, X.T) + lam * np.eye(X.shape[0])
D, V = np.linalg.eigh(Cov)
return D, V
# Compute eigenvalues/eigenvectors
D, V = compute_pcs(X)
# Get the 2 principal components (columns are PCs)
PCs = V[:, -2:] # shape (2, 2)
# encode data
W = np.dot(PCs.T, X) # shape (2, P)
1
#visuals
fig = plt.figure(figsize=(12, 5))
gs = gridspec.GridSpec(1, 2)
plt.tight_layout()
plt.show()
2
3