ML Programs
ML Programs
import pandas as pd
import numpy as np
housing_df = pd.read_csv("C:/Users/Mohammed
sadiq/OneDrive/Desktop/python1/Datasets/housing (1).csv")
numerical_features = housing_df.select_dtypes(include=[np.number]).columns
# Plot histograms
plt.figure(figsize=(15, 10))
plt.subplot(3, 3, i + 1)
plt.title(f'Distribution of {feature}')
plt.tight_layout()
plt.show()
plt.figure(figsize=(15, 10))
plt.subplot(3, 3, i + 1)
sns.boxplot(x=housing_df[feature], color='orange')
plt.tight_layout()
plt.show()
print("Outliers Detection:")
outliers_summary = {}
Q1 = housing_df[feature].quantile(0.25)
Q3 = housing_df[feature].quantile(0.75)
IQR = Q3 - Q1
outliers_summary[feature] = len(outliers)
print("\nDataset Summary:")
print(housing_df.describe())
program 2
import pandas as pd
data=pd.read_csv("C:/Users/Mohammed sadiq/OneDrive/Desktop/python1/Datasets/housing
(1).csv")
correlation_matrix = data.corr(numeric_only=True)
plt.figure(figsize=(10, 8))
plt.show()
plt.show()
program-3
import numpy as np
import pandas as pd
iris = load_iris()
data = iris.data
labels = iris.target
label_names = iris.target_names
pca = PCA(n_components=2)
data_reduced = pca.fit_transform(data)
plt.figure(figsize=(8, 6))
plt.scatter(
label=label_names[label],
color=colors[i]
plt.legend()
plt.grid()
plt.show()
program-4
import pandas as pd
def find_s_algorithm(file_path):
data = pd.read_csv(file_path)
print("Training data:")
print(data)
attributes = data.columns[:-1]
class_label = data.columns[-1]
if row[class_label] == 'yes':
for i, value in enumerate(row[attributes]):
hypothesis[i] = value
else:
hypothesis[i] = '?'
return hypothesis
hypothesis = find_s_algorithm(file_path)
program-5
import numpy as np
data = np.random.rand(100)
distances.sort(key=lambda x: x[0])
k_nearest_neighbors = distances[:k]
train_data = data[:50]
train_labels = labels
test_data = data[50:]
# k values to test
print("Training dataset: First 50 points labeled based on the rule (x <= 0.5 -> Class1, x > 0.5 ->
Class2)")
results = {}
for k in k_values:
results[k] = classified_labels
print("\n")
print("Classification complete.\n")
# Plot results
for k in k_values:
classified_labels = results[k]
plt.figure(figsize=(10, 6))
plt.scatter(train_data, [0] * len(train_data), c=["blue" if label == "Class1" else "red" for label in
train_labels],
plt.xlabel("Data Points")
plt.ylabel("Classification Level")
plt.legend()
plt.grid(True)
plt.show()
program-6
import numpy as np
return x @ theta
# Data
np.random.seed(42)
# Prediction
tau = 0.5
# Plot
program-7
import pandas as pd
import numpy as np
boston_data = pd.read_csv("C:/Users/Mohammed
sadiq/OneDrive/Desktop/python1/Datasets/BostonHousing.csv",
header=None, names=column_names)
model = LinearRegression()
model.fit(X_train, y_train)
# Predict
y_pred = model.predict(X_test)
# Visualization
plt.figure(figsize=(8, 5))
plt.legend()
plt.grid(True)
plt.show()
# Evaluation
print("Linear Regression:")
# Clean data
auto_data.dropna(inplace=True)
auto_data['horsepower'] = auto_data['horsepower'].astype(float)
X_auto = auto_data[['horsepower']].values
y_auto = auto_data['mpg'].values
# Polynomial Features
poly = PolynomialFeatures(degree=2)
X_poly = poly.fit_transform(X_auto)
# Train/test split
random_state=42)
# Train model
poly_reg = LinearRegression()
poly_reg.fit(X_train, y_train)
# Predict
y_pred = poly_reg.predict(X_test)
X_sorted = X_test[sort_idx][:, 1]
y_sorted = y_pred[sort_idx]
# Plot results
plt.figure(figsize=(8, 5))
plt.xlabel("Horsepower")
plt.legend()
plt.grid(True)
plt.show()
print("Polynomial Regression:")
program-8
import numpy as np
import matplotlib.pyplot as plt
# Load dataset
data = load_breast_cancer()
X, y = data.data, data.target
# Train-test split
model = DecisionTreeClassifier(random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
result = model.predict(sample)
plt.figure(figsize=(12, 8))
program-9
import numpy as np
# Load dataset
X, y = data.data, data.target
model = GaussianNB()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
# Cross-validation accuracy
# Plot predictions
fig, axes = plt.subplots(3, 5, figsize=(12, 8))
ax.axis('off')
plt.show()
program-10
import numpy as np
import pandas as pd
data = load_breast_cancer()
X = data.data
y = data.target
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
y_kmeans = kmeans.fit_predict(X_scaled)
print("Confusion Matrix:")
print(confusion_matrix(y, y_kmeans))
print("\nClassification Report:")
print(classification_report(y, y_kmeans))
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X_scaled)
df['Cluster'] = y_kmeans
df['True Label'] = y
plt.figure(figsize=(8, 6))
edgecolor='black', alpha=0.7)
plt.legend(title="Cluster")
plt.show()
plt.figure(figsize=(8, 6))
plt.legend(title="True Label")
plt.show()
plt.figure(figsize=(8, 6))
edgecolor='black', alpha=0.7)
centers = pca.transform(kmeans.cluster_centers_)
plt.legend(title="Cluster")
plt.show()