0% found this document useful (0 votes)
3 views4 pages

ML Short Code - Under Updating

The document contains multiple experiments related to data analysis and machine learning using Python. It includes loading datasets, performing exploratory data analysis, applying regression models, and implementing classification and clustering algorithms. Each experiment demonstrates different techniques and libraries, such as Pandas, NumPy, Scikit-learn, and Matplotlib.

Uploaded by

pavanharsha1012
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
3 views4 pages

ML Short Code - Under Updating

The document contains multiple experiments related to data analysis and machine learning using Python. It includes loading datasets, performing exploratory data analysis, applying regression models, and implementing classification and clustering algorithms. Each experiment demonstrates different techniques and libraries, such as Pandas, NumPy, Scikit-learn, and Matplotlib.

Uploaded by

pavanharsha1012
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 4

#Exp-1: #Exp-2: #EXP-3:

import pandas as pd import pandas as pd import numpy as np


import numpy as np import seaborn as sns import pandas as pd
import seaborn as sns import matplotlib.pyplot as plt from sklearn.datasets import load_iris
import matplotlib.pyplot as plt from sklearn.datasets import fetch_california_housing from sklearn.decomposition import PCA
from sklearn.datasets import fetch_california_housing -------------------------------------------------------------- import matplotlib.pyplot as plt
------------------------------------------------------------------------ # Step 1: Load the California Housing Dataset -------------------------------------------------------------------
# Step 1: Load the California Housing dataset california_data =fetch_california_housing(as_frame=True) # Load the Iris dataset
data = fetch_california_housing(as_frame=True) data = california_data.frame iris = load_iris()
housing_df = data.frame data = iris.data
# Step 2: Create histograms for numerical features # Step 2: Compute the correlation matrix labels = iris.target
numerical_features = correlation_matrix = data.corr() label_names = iris.target_names
housing_df.select_dtypes(include=[np.number]).columns ----------------------------------------------------------
# Plot histograms # Step 3: Visualize the correlation matrix using a heatmap # Convert to a DataFrame for better visualization
plt.figure(figsize=(15, 10)) plt.figure(figsize=(10, 8)) iris_df = pd.DataFrame(data, columns=iris.feature_names)
for i, feature in enumerate(numerical_features): sns.heatmap(correlation_matrix, annot=True, ---------------------------------------------------------------------
plt.subplot(3, 3, i + 1) cmap='coolwarm', fmt='.2f', linewidths=0.5) # Perform PCA to reduce dimensionality to 2
sns.histplot(housing_df[feature], kde=True, bins=30, plt.title('Correlation Matrix of California Housing pca = PCA(n_components=2)
color='blue') Features') data_reduced = pca.fit_transform(data)
plt.title(f'Distribution of {feature}') plt.show()
plt.tight_layout() # Create a DataFrame for the reduced data
plt.show() # Step 4: Create a pair plot to visualize pairwise reduced_df = pd.DataFrame(data_reduced, columns=['Principal
# Step 3: Generate box plots for numerical features relationships Component 1', 'Principal Component 2'])
plt.figure(figsize=(15, 10)) sns.pairplot(data, diag_kind='kde', plot_kws={'alpha': reduced_df['Label'] = labels
for i, feature in enumerate(numerical_features): 0.5}) -----------------------------------------------------------------
plt.subplot(3, 3, i + 1) plt.suptitle('Pair Plot of California Housing Features', # Plot the reduced data
sns.boxplot(x=housing_df[feature], color='orange') y=1.02) plt.figure(figsize=(8, 6))
plt.title(f'Box Plot of {feature}') plt.show() colors = ['r', 'g', 'b']
plt.tight_layout() for i, label in enumerate(np.unique(labels)):
plt.show() plt.scatter(
# Step 4: Identify outliers using the IQR method reduced_df[reduced_df['Label'] == label]['Principal
print("Outliers Detection:") Component 1'],
outliers_summary = {} reduced_df[reduced_df['Label'] == label]['Principal
for feature in numerical_features: Component 2'],
Q1 = housing_df[feature].quantile(0.25) label=label_names[label],
Q3 = housing_df[feature].quantile(0.75) color=colors[i]
IQR = Q3 - Q1 )
lower_bound = Q1 - 1.5 * IQR
upper_bound = Q3 + 1.5 * IQR plt.title('PCA on Iris Dataset')
outliers = housing_df[(housing_df[feature] < lower_bound) | plt.xlabel('Principal Component 1')
(housing_df[feature] > upper_bound)] plt.ylabel('Principal Component 2')
outliers_summary[feature] = len(outliers) plt.legend()
print(f"{feature}: {len(outliers)} outliers") plt.grid()
# Optional: Print a summary of the dataset plt.show()
print("\nDataset Summary:")
print(housing_df.describe())
#EXP-4: #EXP-5: #EXP-6
import pandas as pd import numpy as np import numpy as np
------------------------------------------------------------------------------------ from sklearn.neighbors import KNeighborsClassifier import matplotlib.pyplot as plt
def find_s_algorithm(data_file): import matplotlib.pyplot as plt -----------------------------------------------------------------
# Step 1: Load the dataset --------------------------------------------------------------- np.random.seed(42)
data = pd.read_csv(data_file) # 1. Generate 100 random x values in range [0, 1] X = np.linspace(-3, 3, 100).reshape(-1, 1)
print("data_file:") x = np.random.rand(100).reshape(-1, 1) y = np.sin(X).ravel() + np.random.normal(scale=0.1,
print(data) # 2. Assign labels to first 50 size=X.shape[0])
y = np.array(['Class 1' if xi <= 0.5 else 'Class 2' for xi in
# Step 2: Identify the columns and the target attribute x[:50].flatten()]) def locally_weighted_regression(x_query, X_train, y_train, tau):
target_column = data.columns[-1] # Assuming the last column is the W = np.exp(-((X_train - x_query) ** 2) / (2 * tau ** 2))
target # 3. Prepare training and test data X_bias = np.c_[np.ones_like(X_train), X_train]
features = data.columns[:-1] # All columns except the target X_train = x[:50] theta = np.linalg.pinv(X_bias.T @ np.diag(W.ravel()) @
X_test = x[50:] X_bias) @ X_bias.T @ np.diag(W.ravel()) @ y_train
# Step 3: Initialize the hypothesis with the first positive example # 4. Test for different k values return np.array([1, x_query]) @ theta # Predict y for x_query
positive_data = data[data[target_column] == 'Yes'] # Assuming 'Yes' k_values = [1, 2, 3, 4, 5, 20, 30]
represents positive examples results = {} X_test = np.linspace(-3, 3, 100)
hypothesis = positive_data.iloc[0, :-1].values # The first positive for k in k_values: y_pred = np.array([locally_weighted_regression(x, X, y,
example knn = KNeighborsClassifier(n_neighbors=k) tau=0.5) for x in X_test])
knn.fit(X_train, y)
# Step 4: Update the hypothesis based on all positive examples y_pred = knn.predict(X_test) plt.scatter(X, y, color="gray", alpha=0.5, label="Training Data")
for i, example in positive_data.iterrows(): results[k] = y_pred plt.plot(X_test, y_pred, color="red", linewidth=2, label="LWR
# Compare and generalize hypothesis if necessary Fit (τ=0.5)")
for j in range(len(hypothesis)): # 5. Display results plt.legend()
if hypothesis[j] != example[features[j]]: for k in k_values: plt.show()
hypothesis[j] = '?' # Use '?' to generalize when mismatches occur print(f"\nK = {k} classification results:")
for i, pred in enumerate(results[k]):
# Step 5: Output the final hypothesis print(f"x{51 + i}: {X_test[i][0]:.3f} -> {pred}")
return hypothesis # Optional: Visualization
plt.figure(figsize=(10, 6))
# Example usage plt.scatter(X_train, [0]*50, c=['red' if yi == 'Class 1' else
data_file = r'C:/Users/91943/Desktop/KITM/ML LAB/training_data.csv' # 'blue' for yi in y], label='Training Data')
Path to the CSV file for k in k_values:
final_hypothesis = find_s_algorithm(data_file) y_pred_k = results[k]
print("\nFinal Hypothesis:", final_hypothesis) plt.scatter(X_test, [k]*50, c=['red' if yi == 'Class 1' else
'blue' for yi in y_pred_k], marker='x', label=f'Test
(k={k})')
plt.yticks(k_values + [0])
plt.xlabel("x value")
plt.ylabel("k (for visualization only)")
plt.legend()
plt.title("KNN Classification of x values")
plt.grid(True)
plt.show()
#EXP-7: Linear Regression
import numpy as np
import pandas as pd #EXP-7: polynomial Regression: #EXP-8:
import matplotlib.pyplot as plt import numpy as np import numpy as np
from sklearn.model_selection import train_test_split import pandas as pd import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression import matplotlib.pyplot as plt from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.metrics import mean_squared_error, r2_score import seaborn as sns from sklearn.datasets import load_breast_cancer
---------------------------------------------------------------------------------- from sklearn.model_selection import train_test_split from sklearn.model_selection import train_test_split
# Load the Boston Housing Dataset from sklearn.preprocessing import PolynomialFeatures from sklearn.metrics import accuracy_score
boston_df = from sklearn.metrics import mean_squared_error import matplotlib.pyplot as plt
pd.read_csv(r"C:\Users\91943\Downloads\boston_housing_data.csv") ------------------------------------------------------------------- ----------------------------------------------------------------------
print("Linear Regression on Boston Housing Dataset") auto_mpg = data = load_breast_cancer()
---------------------------------------------------------------------------- pd.read_csv(r"C:\Users\91943\Downloads\auto-mpg.csv") X_train, X_test, y_train, y_test = train_test_split(data.data,
X = boston_df[['RM']] # Using 'RM' (average number of rooms) as the auto_mpg.dropna(inplace=True) # Remove missing values data.target, test_size=0.2, random_state=42)
feature
y = boston_df['MEDV'] # Target variable auto_mpg = auto_mpg[auto_mpg['horsepower'] != '?'] model = DecisionTreeClassifier(max_depth=4,
------------------------------------------------------------------------------------ auto_mpg['horsepower'] = random_state=42)
# Split the data into training and testing sets auto_mpg['horsepower'].astype(float) model.fit(X_train, y_train)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, ---------------------------------------------------------------- print(f"Accuracy: {accuracy_score(y_test,
random_state=42) X_auto = auto_mpg[['horsepower']] model.predict(X_test)):.2f}")
y_auto = auto_mpg['mpg']
# Create and train the Linear Regression model ------------------------------------------------------------------- plot_tree(model, filled=True,
LR_model = LinearRegression() X_train, X_test, y_train, y_test = train_test_split(X_auto, feature_names=data.feature_names,
LR_model.fit(X_train, y_train) y_auto, test_size=0.2, random_state=42) class_names=data.target_names)
plt.show()
# Make predictions poly = PolynomialFeatures(degree=2)
y_pred = LR_model.predict(X_test) X_train_poly = poly.fit_transform(X_train) sample = [[15.3, 20.5, 85.2, 521, 0.1, 0.1, 0.08, 0.18, 0.19, 0.06,
X_test_poly = poly.transform(X_test) 0.32, 0.43, 2.5, 20.3, 0.007, 0.018, 0.016, 0.003, 0.015, 0.002,
# Evaluate the model 17.5, 25.0, 110.5, 900, 0.14, 0.25, 0.18, 0.25, 0.27, 0.09]]
mse = mean_squared_error(y_test, y_pred) poly_reg = LinearRegression() print("Predicted Class:",
r2 = r2_score(y_test, y_pred) poly_reg.fit(X_train_poly, y_train) data.target_names[model.predict(sample)[0]])
print(f"Mean Squared Error: {mse:.4f}") y_pred_poly = poly_reg.predict(X_test_poly)
print(f"R^2 Score: {r2:.4f}") -----------------------------------------------------------------
------------------------------------------------------------------------------------------ plt.scatter(X_test, y_test, color="blue", label="Actual
# Plot the results Data", alpha=0.5)
plt.scatter(X_test, y_test, color='green', label='Actual') plt.scatter(X_test, y_pred_poly, color="red",
plt.plot(X_test, y_pred, color='red', label='Predicted') label="Predicted Data", alpha=0.5)
plt.xlabel('Average Number of Rooms (RM)') plt.xlabel("Horsepower")
plt.ylabel('House Price (MEDV)') plt.ylabel("MPG")
plt.title('Linear Regression on Boston Housing Dataset') plt.legend()
plt.legend() plt.title("Polynomial Regression on Auto MPG Dataset")
plt.show() plt.show()
#EXP-9 #EXP-10 plt.title('K-Means Clustering of Breast Cancer Dataset')
from sklearn.datasets import fetch_olivetti_faces import numpy as np plt.xlabel('Principal Component 1')
from sklearn.model_selection import train_test_split import pandas as pd plt.ylabel('Principal Component 2')
from sklearn.naive_bayes import GaussianNB import matplotlib.pyplot as plt plt.legend(title="Cluster")
from sklearn.metrics import accuracy_score import seaborn as sns plt.show()
import matplotlib.pyplot as plt from sklearn.datasets import load_breast_cancer -----------------------------------------------------------------------
---------------------------------------------------------------------- from sklearn.cluster import KMeans plt.figure(figsize=(8, 6))
# Step 1: Load the Olivetti face dataset from sklearn.preprocessing import StandardScaler sns.scatterplot(data=df, x='PC1', y='PC2', hue='True Label',
data = fetch_olivetti_faces() from sklearn.decomposition import PCA palette='coolwarm', s=100, edgecolor='black', alpha=0.7)
X = data.data # Each image is flattened (64x64 = 4096 pixels) from sklearn.metrics import confusion_matrix, plt.title('True Labels of Breast Cancer Dataset')
y = data.target # Labels: person IDs (0–39) classification_report plt.xlabel('Principal Component 1')
-------------------------------------------------------------------- ------------------------------------------------------------------ plt.ylabel('Principal Component 2')
# Step 2: Split data into training and test sets data = load_breast_cancer() plt.legend(title="True Label")
# Use 7 images per person for training, 3 for testing X = data.data plt.show()
X_train, X_test, y_train, y_test = train_test_split( y = data.target -----------------------------------------------------------------------------
X, y, test_size=0.3, stratify=y, random_state=42 ------------------------------------------------------------- plt.figure(figsize=(8, 6))
) scaler = StandardScaler() sns.scatterplot(data=df, x='PC1', y='PC2', hue='Cluster',
------------------------------------------------------------------- X_scaled = scaler.fit_transform(X) palette='Set1', s=100, edgecolor='black', alpha=0.7)
# Step 3: Train Naive Bayes classifier centers = pca.transform(kmeans.cluster_centers_)
nb_classifier = GaussianNB() kmeans = KMeans(n_clusters=2, random_state=42) plt.scatter(centers[:, 0], centers[:, 1], s=200, c='red', marker='X',
nb_classifier.fit(X_train, y_train) y_kmeans = kmeans.fit_predict(X_scaled) label='Centroids')
----------------------------------------------------------- plt.figtext(0.3, 1.01, "\nBCSL606/KIT/Student plt.title('K-Means Clustering with Centroids')
# Step 4: Predict and evaluate Name(4KM22CS0025)/2025/EXP-7/Exp-9:", ha="center", plt.xlabel('Principal Component 1')
y_pred = nb_classifier.predict(X_test) fontsize=12) plt.ylabel('Principal Component 2')
accuracy = accuracy_score(y_test, y_pred) print("Confusion Matrix:") plt.legend(title="Cluster")
print(confusion_matrix(y, y_kmeans)) plt.show()
print(f"Naive Bayes Classifier Accuracy: {accuracy * 100:.2f}%") print("\nClassification Report:")
---------------------------------------------------------------------- print(classification_report(y, y_kmeans))
# Optional: Show a few test faces with predictions
plt.figure(figsize=(10, 4)) pca = PCA(n_components=2)
for i in range(8): X_pca = pca.fit_transform(X_scaled)
plt.subplot(2, 4, i+1)
plt.imshow(X_test[i].reshape(64, 64), cmap='gray') df = pd.DataFrame(X_pca, columns=['PC1', 'PC2'])
plt.title(f"Pred: {y_pred[i]}\nTrue: {y_test[i]}") df['Cluster'] = y_kmeans
plt.axis('off') df['True Label'] = y
plt.tight_layout()
plt.figtext(0.3, 1.01, "\nBCSL606/KIT/Student plt.figure(figsize=(8, 6))
Name(4KM22CS0025)/2025/Exp-9:", ha="center", fontsize=12) sns.scatterplot(data=df, x='PC1', y='PC2', hue='Cluster',
plt.show() palette='Set1', s=100, edgecolor='black', alpha=0.7)

You might also like