ML Spy Programs
ML Spy Programs
import pandas as pd
import numpy as np
import os
if os.path.exists(file_path):
housing_df = pd.read_csv(file_path)
else:
exit()
numerical_features = housing_df.select_dtypes(include=[np.number]).columns
# Plot histograms
plt.figure(figsize=(15, 10))
plt.subplot(3, 3, i + 1)
plt.title(f'Distribution of {feature}')
plt.tight_layout()
plt.show()
plt.figure(figsize=(15, 10))
sns.boxplot(x=housing_df[feature], color='orange')
plt.tight_layout()
plt.show()
print("\nOutliers Detection:")
outliers_summary = {}
Q1 = housing_df[feature].quantile(0.25)
Q3 = housing_df[feature].quantile(0.75)
IQR = Q3 - Q1
outliers_summary[feature] = len(outliers)
print("\nDataset Summary:")
print(housing_df.describe())
program 2
import pandas as pd
california_data = fetch_california_housing(as_frame=True)
data = california_data.frame
correlation_matrix = data.corr()
plt.figure(figsize=(10, 8))
plt.show()
plt.show()
program 3
import numpy as np
import pandas as pd
iris = load_iris()
data = iris.data
labels = iris.target
label_names = iris.target_names
pca = PCA(n_components=2)
data_reduced = pca.fit_transform(data)
plt.figure(figsize=(8, 6))
label=label_names[label],
color=colors[i]
plt.legend()
plt.grid()
plt.show()
Program 4
import pandas as pd
def find_s_algorithm(file_path):
data = pd.read_csv(file_path)
print("Training data:")
print(data)
attributes = data.columns[:-1]
class_label = data.columns[-1]
if row[class_label] == 'Yes':
hypothesis[i] = value
else:
hypothesis[i] = '?'
return hypothesis
file_path = 'C:/Users/Admin/Desktop/datasets/data1.csv'
hypothesis = find_s_algorithm(file_path)
Program 5
import numpy as np
data = np.random.rand(100)
k_nearest_neighbors = distances[:k]
return Counter(k_nearest_labels).most_common(1)[0][0]
train_data = data[:50]
train_labels = labels
test_data = data[50:]
print("Training dataset: First 50 points labeled based on the rule (x <= 0.5 -> Class1, x > 0.5 ->
Class2)")
results = {}
for k in k_values:
results[k] = classified_labels
print("\n")
print("Classification complete.\n")
for k in k_values:
classified_labels = results[k]
plt.figure(figsize=(10, 6))
plt.scatter(train_data, [0] * len(train_data), c=["blue" if label == "Class1" else "red" for label in
train_labels],
plt.xlabel("Data Points")
plt.ylabel("Classification Level")
plt.legend()
plt.grid(True)
plt.show()
Program 6
import numpy as np
def gaussian_kernel(x,xi,tau):
def locally_weighted_regression(x,X,y,tau):
m=X.shape[0]
W=np.diag(weights)
X_transpose_W=X.T @W
theta=np.linalg.inv(X_transpose_W@X)@X_transpose_W@y
return x @ theta
np.random.seed(42)
X=np.linspace(0,2*np.pi,100)
y=np.sin(X)+0.1*np.random.randn(100)
X_bias=np.c_[np.ones(X.shape),X]
x_test=np.linspace(0,2*np.pi,200)
x_test_bias= np.c_[np.ones(x_test.shape),x_test]
tau=0.5
for xi in x_test_bias])
plt.figure(figsize=(10,6))
plt.scatter(X,y,color='red',label='Training Data',alpha=0.7)
plt.plot(x_test,y_pred,color='blue',label=f'LWR Fit(tau={tau})',linewidth=2)
plt.xlabel('X',fontsize=12)
plt.ylabel('y',fontsize=12)
plt.legend(fontsize=10)
plt.grid(alpha=0.3)
plt.show()
Program 7
import pandas as pd
import numpy as np
column_names = [
boston_data = pd.read_csv("C:/Users/Admin/Desktop/3vc22cs077/hello/BostonHousing.csv",
header=None, names=column_names)
# Train-test split
model = LinearRegression()
model.fit(X_train, y_train)
# Predict
y_pred = model.predict(X_test)
# Plot
plt.figure(figsize=(8, 5))
plt.grid(True)
plt.show()
# Evaluation
print("Linear Regression:")
# Load Auto MPG dataset (Make sure the correct file is used here)
auto_data = pd.read_csv("C:/Users/Admin/Desktop/3vc22cs077/hello/auto-mpg.csv")
# Clean data
auto_data.dropna(inplace=True)
auto_data['horsepower'] = auto_data['horsepower'].astype(float)
X_auto = auto_data[['horsepower']].values
y_auto = auto_data['mpg'].values
# Polynomial features
poly = PolynomialFeatures(degree=2)
X_poly = poly.fit_transform(X_auto)
# Train-test split
poly_reg = LinearRegression()
poly_reg.fit(X_train, y_train)
# Predict
y_pred = poly_reg.predict(X_test)
X_sorted = X_test[sort_idx][:, 1]
y_sorted = y_pred[sort_idx]
plt.figure(figsize=(8, 5))
plt.xlabel("Horsepower")
plt.legend()
plt.grid(True)
plt.show()
# Evaluation
print("Polynomial Regression:")
Program 8
# -- coding: utf-8 --
"""
"""
import numpy as np
data = load_breast_cancer()
X = data.data
y = data.target
clf = DecisionTreeClassifier(random_state=42)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
new_sample = np.array([X_test[0]])
prediction = clf.predict(new_sample)
plt.figure(figsize=(12,8))
plt.show()
Program 9
# -- coding: utf-8 --
"""
@author: Admin
"""
import numpy as np
X = data.data
y = data.target
gnb = GaussianNB()
gnb.fit(X_train, y_train)
y_pred = gnb.predict(X_test)
print("\nClassification Report:")
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))
cross_val_accuracy = cross_val_score(gnb, X, y, cv=5, scoring='accuracy')
ax.axis('off')
plt.show()
Program 10
# -- coding: utf-8 --
"""
@author: Admin
"""
import numpy as np
import pandas as pd
data = load_breast_cancer()
X = data.data
y = data.target
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
y_kmeans = kmeans.fit_predict(X_scaled)
print("Confusion Matrix:")
print(confusion_matrix(y, y_kmeans))
print("\nClassification Report:")
print(classification_report(y, y_kmeans))
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X_scaled)
df['Cluster'] = y_kmeans
df['True Label'] = y
plt.figure(figsize=(8, 6))
plt.legend(title="Cluster")
plt.show()
plt.figure(figsize=(8, 6))
plt.legend(title="True Label")
plt.show()
plt.figure(figsize=(8, 6))
centers = pca.transform(kmeans.cluster_centers_)
plt.legend(title="Cluster")
plt.show()