ML II Lab
ML II Lab
1. import numpy as np
2. from sklearn.neighbors import KNeighborsClassifier
3.
4. x = np.random.rand(100, 1)
5. y = np.array(["class1" if i <= 0.5 else "class2" for i in x[:50]] + [-1]*50)
6. print("classes for x1 to x50 ->\n", y[:50])
7.
8. x_train, y_train = x[:50], y[:50]
9. x_test = x[50:]
10.
11. for k in [1, 2, 3, 4, 5, 20]:
12. model = KNeighborsClassifier(n_neighbors=k).fit(x_train, y_train)
13. pred = model.predict(x_test)
14. print(f"\nk = {k}: Predicted classes for x51 to x100 ->\n{pred}")
PGM 6
1. import numpy as np
2. import pandas as pd
3. import matplotlib.pyplot as plt
4. from scipy.spatial.distance import cdist
5.
6. df = pd.read_csv("lwr_dataset.csv")
7. X = df[['X']].values
8. y = df['Y'].values
9. X = np.hstack([np.ones((X.shape[0], 1)), X])
10.
11. def kernel(x, X, tau):
12. return np.exp(-cdist([[x]], X, 'sqeuclidean') / (2 * tau**2))
13.
14. x_range = np.linspace(X[:,1].min(), X[:,1].max(), 100)
15. y_pred = []
16.
17. for x in x_range:
18. w = np.diag(kernel(x, X[:,1:], 0.5).flatten())
19. theta = np.linalg.pinv(X.T @ w @ X) @ (X.T @ w @ y)
20. y_pred.append([1, x] @ theta)
21.
22. plt.scatter(X[:,1], y)
23. plt.plot(x_range, y_pred, 'r')
24. plt.show()
PGM 7
1. import numpy as np
2. import pandas as pd
3. import matplotlib.pyplot as plt
4. from sklearn.linear_model import LinearRegression
5. from sklearn.model_selection import train_test_split
6. from sklearn.preprocessing import PolynomialFeatures
7. from sklearn.datasets import fetch_california_housing
8.
9. # Load California Housing Data
10. california = fetch_california_housing(as_frame=True)
11. X, y = california.data[['MedInc']], california.target # Using 'MedInc'
(Median Income) as predictor
12.
13. # Split Data
14. X_train, X_test, y_train, y_test = train_test_split(X, y,
test_size=0.2, random_state=42)
15.
16. # --- Linear Regression ---
17. linear_model = LinearRegression().fit(X_train, y_train)
18. y_linear_pred = linear_model.predict(X_test)
19.
20. # --- Polynomial Regression ---
21. poly = PolynomialFeatures(degree=2)
22. X_train_poly, X_test_poly = poly.fit_transform(X_train),
poly.transform(X_test)
23. poly_model = LinearRegression().fit(X_train_poly, y_train)
24. y_poly_pred = poly_model.predict(X_test_poly)
25.
26. # Plot Linear Regression
27. plt.scatter(X_test, y_test, color='blue', label='Actual Prices')
28. plt.plot(X_test, y_linear_pred, color='red', linewidth=2, label='Linear
Fit')
29. plt.xlabel('Median Income')
30. plt.ylabel('House Price')
31. plt.title('Linear Regression on California Housing Data')
32. plt.legend()
33. plt.show()
34.
35. # Plot Polynomial Regression
36. plt.scatter(X_test, y_test, color='blue', label='Actual Prices')
37. plt.plot(*zip(*sorted(zip(X_test['MedInc'], y_poly_pred))),
color='green', linewidth=2, label='Polynomial Fit')
38. plt.xlabel('Median Income')
39. plt.ylabel('House Price')
40. plt.title('Polynomial Regression on California Housing Data')
41. plt.legend()
42. plt.show()
PGM 8
1. import numpy as np, matplotlib.pyplot as plt
2. from sklearn.datasets import load_breast_cancer
3. from sklearn.model_selection import train_test_split
4. from sklearn.tree import DecisionTreeClassifier, plot_tree
5. from sklearn.metrics import accuracy_score
6.
7. # Load Data & Train Model
8. data = load_breast_cancer()
9. X_train, X_test, y_train, y_test = train_test_split(data.data, data.target,
test_size=0.2, random_state=42)
10. dt_model = DecisionTreeClassifier(criterion='entropy', max_depth=4,
random_state=42).fit(X_train, y_train)
11.
12. # Predict & Print Accuracy
13. print(f'Accuracy: {accuracy_score(y_test,
dt_model.predict(X_test)):.2f}')
14.
15. # Visualize Decision Tree
16. plot_tree(dt_model, feature_names=data.feature_names, filled=True)
17. plt.show()
18.
19. # Classify New Sample
20. sample = np.array([[14.5, 20.1, 96.5, 666.1, 0.1, 0.15, 0.2, 0.1, 0.2,
0.05,
21. 0.5, 1.2, 3.0, 50.0, 0.007, 0.04, 0.05, 0.02, 0.03,
0.005,
22. 18.5, 25.1, 120.3, 900.5, 0.15, 0.25, 0.3, 0.15,
0.3, 0.08]])
23. predicted_class = dt_model.predict(sample)[0]
24. print(f'Predicted Class: {data.target_names[predicted_class]}')
25.
PGM 9
1. import numpy as np, pandas as pd, matplotlib.pyplot as plt
2. from sklearn.datasets import fetch_olivetti_faces
3. from sklearn.model_selection import train_test_split
4. from sklearn.naive_bayes import GaussianNB
5. from sklearn.metrics import accuracy_score, confusion_matrix
6.
7. # Load Data & Split
8. data = fetch_olivetti_faces()
9. X_train, X_test, y_train, y_test = train_test_split(data.data,
data.target, test_size=0.3, random_state=46)
10.
11. # Train & Predict
12. nb = GaussianNB().fit(X_train, y_train)
13. y_pred = nb.predict(X_test)
14.
15. # Print Results
16. print(f'Accuracy: {accuracy_score(y_test, y_pred):.2f}')
17. print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
18.
19. # Display Faces
20. plt.figure(figsize=(10, 5))
21. for i in range(10):
22. plt.subplot(2, 5, i+1)
23. plt.imshow(data.images[i], cmap='gray'); plt.axis('off');
plt.title(f"Person {data.target[i]}")
24. plt.suptitle("Sample Faces from Olivetti Dataset"); plt.show()
PGM 10
1. import pandas as pd, numpy as np, matplotlib.pyplot as plt, seaborn as sns
2. from sklearn.cluster import KMeans
3. from sklearn.preprocessing import StandardScaler
4. from sklearn.decomposition import PCA
5.
6. # Load & Process Data
7. data =
pd.read_csv("https://raw.githubusercontent.com/jbrownlee/Datasets/master/brea
st-cancer-wisconsin.data", header=None)
8. data.drop([0], axis=1, inplace=True); data.replace('?', np.nan,
inplace=True); data.dropna(inplace=True)
9. data.iloc[:, -1] = data.iloc[:, -1].astype(int)
10. data_scaled = StandardScaler().fit_transform(data.iloc[:, :-1])
11.
12. # Optimal k using Elbow Method
13. inertia = [KMeans(n_clusters=k,
random_state=42).fit(data_scaled).inertia_ for k in range(1, 11)]
14. plt.plot(range(1, 11), inertia, marker='o', linestyle='--');
plt.xlabel('Clusters'); plt.ylabel('Inertia')
15. plt.title('Elbow Method for Optimal k'); plt.show()
16.
17. # K-Means Clustering Visualization
18. data['Cluster'] = KMeans(n_clusters=2,
random_state=42).fit_predict(data_scaled)
19. data_pca = PCA(n_components=2).fit_transform(data_scaled)
20. sns.scatterplot(x=data_pca[:, 0], y=data_pca[:, 1],
hue=data['Cluster'], palette='coolwarm')
21. plt.title("K-Means Clustering");
22. plt.show()