0% found this document useful (0 votes)
3 views9 pages

ML Minimized Programs

The document contains multiple Python programs that demonstrate various data analysis and machine learning techniques using libraries like pandas, matplotlib, seaborn, and scikit-learn. Key topics include data visualization (histograms, box plots, heatmaps, pair plots), principal component analysis (PCA), locally weighted regression, linear and polynomial regression, decision trees, Gaussian Naive Bayes classification, and KMeans clustering. Each program loads specific datasets, processes the data, applies machine learning models, and visualizes the results.

Uploaded by

bsmmadihalli
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
3 views9 pages

ML Minimized Programs

The document contains multiple Python programs that demonstrate various data analysis and machine learning techniques using libraries like pandas, matplotlib, seaborn, and scikit-learn. Key topics include data visualization (histograms, box plots, heatmaps, pair plots), principal component analysis (PCA), locally weighted regression, linear and polynomial regression, decision trees, Gaussian Naive Bayes classification, and KMeans clustering. Each program loads specific datasets, processes the data, applies machine learning models, and visualizes the results.

Uploaded by

bsmmadihalli
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
You are on page 1/ 9

1.

prgm

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import fetch_california_housing

# Load dataset
df = pd.DataFrame(fetch_california_housing().data,
columns=fetch_california_housing().feature_names)
df['Target'] = fetch_california_housing().target

# Histograms
df.hist(figsize=(12, 10), bins=30, edgecolor='black')
plt.suptitle('Histograms')
plt.tight_layout()
plt.subplots_adjust(top=0.9)
plt.show()

# Box Plots
plt.figure(figsize=(12, 10))
sns.boxplot(data=df, orient='h', palette='Set2')
plt.title('Box Plots')
plt.show()

2. prgm

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_california_housing

# Load the dataset


df = pd.DataFrame(fetch_california_housing().data,
columns=fetch_california_housing().feature_names)
df['target'] = fetch_california_housing().target

# Correlation matrix heatmap


sns.heatmap(df.corr(), annot=True, cmap='coolwarm', fmt='.2f')
plt.title('Correlation Matrix')
plt.show()

# Pair plot
sns.pairplot(df)
plt.suptitle('Pairwise Relationships', y=1.02)
plt.show()

3.pgrm

import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.preprocessing import StandardScaler

# Load and standardize the Iris dataset


X = load_iris().data
X_scaled = StandardScaler().fit_transform(X)

# Compute the covariance matrix and perform eigendecomposition


cov_matrix = np.cov(X_scaled.T)
eigenvalues, eigenvectors = np.linalg.eig(cov_matrix)

# Project data onto the top 2 principal components


X_pca = X_scaled.dot(eigenvectors[:, :2])

# Plot the results


plt.scatter(X_pca[:, 0], X_pca[:, 1], c=load_iris().target, cmap='viridis')
plt.xlabel('PC 1')
plt.ylabel('PC 2')
plt.title('PCA of Iris Dataset')
plt.colorbar(label='Target Class')
plt.show()

4. pgrm

import csv

h=['0'for i in range(6)]
with open("C:\\Users\\SKSVMACET\\Desktop\\kl.csv") as f:
data=csv.reader(f)
data=list(data)

for i in data:
if i[-1]=="Yes":
for j in range(6):
if h[j]=='0':
h[j]=i[j]
elif h[j]!=i[j]:
h[j]='?'

print(h)

5. prgm

import numpy as np
from sklearn.neighbors import KNeighborsClassifier
import matplotlib.pyplot as plt

# Generate 100 random points between 0 and 1


X = np.random.rand(100, 1)
y = np.array([1 if x <= 0.5 else 2 for x in X[:50]])

# Classify remaining points (X[50:] with unknown labels)


X_new = X[50:]

# k-NN classification for different k values


k_values = [1, 2, 3, 4, 5, 20, 30]
plt.figure(figsize=(10, 8))

for i, k in enumerate(k_values, 1):


knn = KNeighborsClassifier(n_neighbors=k)
knn.fit(X[:50], y)
y_pred = knn.predict(X_new)

plt.subplot(3, 3, i)
plt.scatter(X[:50], y, c=y, marker='o')
plt.scatter(X_new, y_pred, c=y_pred, marker='x')
plt.title(f'k={k}')
plt.xlabel('x')
plt.ylabel('Class')

plt.tight_layout()
plt.show()
6. prgm

import numpy as np
import matplotlib.pyplot as plt

# Generate synthetic data


X = np.sort(np.random.rand(100, 1), axis=0)
y = np.sin(2 * np.pi * X).ravel() + 0.1 * np.random.randn(100)

# Perform Locally Weighted Regression


def lwr(X, y, query_points, tau=0.1):
predictions = []
for x in query_points:
W = np.diag([np.exp(-np.linalg.norm(x - xi) ** 2 / (2 * tau ** 2)) for xi in X])
X_bias = np.hstack((np.ones((len(X), 1)), X))
theta = np.linalg.pinv(X_bias.T @ W @ X_bias) @ (X_bias.T @ W @ y)
predictions.append(np.hstack(([1], x)) @ theta)
return predictions

# Visualize data and LWR result


query_points = np.linspace(0, 1, 100).reshape(-1, 1)
predictions = lwr(X, y, query_points)

plt.scatter(X, y, color='blue')
plt.plot(query_points, predictions, color='red', lw=2)
plt.title('Locally Weighted Regression')
plt.xlabel('X')
plt.ylabel('y')
plt.show()

7. prgm

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import make_pipeline
from sklearn.metrics import mean_squared_error, r2_score

def linear_regression_california():
housing = fetch_california_housing()
X, y = housing.data[:, 3].reshape(-1, 1), housing.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = LinearRegression().fit(X_train, y_train)
y_pred = model.predict(X_test)

plt.scatter(X_test, y_test, color="blue")


plt.plot(X_test, y_pred, color="red")
plt.xlabel("AveRooms")
plt.ylabel("House Price ($100K)")
plt.show()
print("MSE:", mean_squared_error(y_test, y_pred), "R2:", r2_score(y_test, y_pred))

def polynomial_regression_auto_mpg():
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data"
data = pd.read_csv(url, sep='\s+', names=["mpg", "cylinders", "displacement", "horsepower",
"weight", "acceleration", "model_year", "origin"], na_values="?").dropna()
X, y = data["displacement"].values.reshape(-1, 1), data["mpg"].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = make_pipeline(PolynomialFeatures(degree=2), LinearRegression()).fit(X_train,
y_train)
y_pred = model.predict(X_test)

plt.scatter(X_test, y_test, color="blue")


plt.scatter(X_test, y_pred, color="red")
plt.xlabel("Displacement")
plt.ylabel("MPG")
plt.show()
print("MSE:", mean_squared_error(y_test, y_pred), "R2:", r2_score(y_test, y_pred))

if __name__ == "__main__":
linear_regression_california()
polynomial_regression_auto_mpg()

8. prgm
# Import necessary libraries
import numpy as np
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

# Step 1: Load the Breast Cancer dataset


data = load_breast_cancer()
X = data.data # Features
y = data.target # Labels (0 for malignant, 1 for benign)

# Step 2: Split the dataset into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 3: Create and train the Decision Tree classifier


clf = DecisionTreeClassifier(random_state=42)
clf.fit(X_train, y_train)

# Step 4: Evaluate the model on the test data


y_pred = clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy of the Decision Tree classifier: {accuracy:.2f}")

# Step 5: Classify a new sample (using an example sample from the dataset)
# Let's use the first sample from the test set as an example for prediction
sample = X_test[0].reshape(1, -1) # Reshaping the sample to 2D for prediction

# Predicting the class of the new sample


predicted_class = clf.predict(sample)
class_names = data.target_names

# Output the predicted class for the sample


print(f"The new sample is classified as: {class_names[predicted_class[0]]}")

9. prgm

import numpy as np
from sklearn.datasets import fetch_olivetti_faces
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import matplotlib.pyplot as plt

# Load the Olivetti Faces dataset


data = fetch_olivetti_faces(shuffle=True, random_state=42)
X = data.data
y = data.target

# Split the dataset into training and test sets


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Initialize the Gaussian Naive Bayes classifier


gnb = GaussianNB()

# Train the classifier


gnb.fit(X_train, y_train)

# Make predictions on the test set


y_pred = gnb.predict(X_test)

# Calculate the accuracy


accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy * 100:.2f}%')

# Print the classification report (without zero_division argument)


print("\nClassification Report:")
print(classification_report(y_test, y_pred))

# Print the confusion matrix


print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))

# Cross-validation accuracy
cross_val_accuracy = cross_val_score(gnb, X, y, cv=5, scoring='accuracy')
print(f'\nCross-validation accuracy: {cross_val_accuracy.mean() * 100:.2f}%')

# Plotting some sample images from the test set


fig, axes = plt.subplots(3, 5, figsize=(12, 8))
for ax, image, label, prediction in zip(axes.ravel(), X_test, y_test, y_pred):
ax.imshow(image.reshape(64, 64), cmap=plt.cm.gray)
ax.set_title(f"True: {label}, Pred: {prediction}")
ax.axis('off')

plt.show()

10. prgm

import matplotlib.pyplot as plt


from sklearn.datasets import load_breast_cancer
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

# Load and preprocess data


X = load_breast_cancer().data
X = StandardScaler().fit_transform(X)

# Apply KMeans clustering


y_kmeans = KMeans(n_clusters=2, random_state=42).fit_predict(X)

# PCA for 2D visualization


X_pca = PCA(n_components=2).fit_transform(X)

# Plot the clustering result


plt.scatter(X_pca[:, 0], X_pca[:, 1], c=y_kmeans, cmap='viridis', s=50, alpha=0.6)
plt.title('K-Means Clustering on Breast Cancer Dataset')
plt.xlabel('PC 1')
plt.ylabel('PC 2')
plt.show()

You might also like