Wa0003
Wa0003
# Make predictions
y_pred = model.predict(X_test)
21 A
import pandas as pd
import numpy as np
from scipy import stats
def univariate_analysis(dataset):
univariate_stats = {}
for column in dataset.columns:
univariate_stats[column] = {
'Mean': dataset[column].mean(),
'Median': dataset[column].median(),
'Mode': dataset[column].mode()[0],
'Variance': dataset[column].var(),
'Standard Deviation': dataset[column].std(),
'Skewness': dataset[column].skew(),
'Kurtosis': dataset[column].kurt()
}
return pd.DataFrame(univariate_stats)
21 B
import statsmodels.api as sm
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score
X_pima = pima_diabetes.drop(columns='Outcome')
y_pima = pima_diabetes['Outcome']
21 C
# Multiple Regression
def multiple_regression(X, y):
X = sm.add_constant(X)
model = sm.OLS(y, X).fit()
return model.summary()
21 D
# Compare Univariate results
print("Comparing Univariate Results:\n")
print("UCI Diabetes Stats:\n", uci_univariate)
print("\nPima Diabetes Stats:\n", pima_univariate)
# Compare Bivariate and Multiple Regression outputs by analyzing the summaries printed
earlier
EXPERIMENT 5
27
import numpy as np
from collections import Counter
# KNN function
def knn_classify(data, labels, query_point, k=3):
# Calculate distances between the query point and all points in the dataset
distances = []
for i, point in enumerate(data):
distance = euclidean_distance(point, query_point)
distances.append((distance, labels[i]))
# Query point
query_point = [3, 3]
# Number of neighbors
k=3
28
pip install scikit-learn
from sklearn.tree import DecisionTreeClassifier
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
# Predict a new query point (example point from the Iris dataset)
query_point = [5.1, 3.5, 1.4, 0.2] # You can change this to any sample
predicted_class = clf.predict([query_point])
print(f"The predicted class for the query point {query_point} is:
{data.target_names[predicted_class][0]}")
29
import numpy as np
return clusters
30
import numpy as np
# DBSCAN function
def dbscan(data, epsilon, min_points):
# Initialize all points as unvisited
visited = [False] * len(data)
clusters = [-1] * len(data) # -1 means noise
cluster_id = 0
# Find neighbors
neighbors = find_neighbors(data, point_idx, epsilon)
if not visited[neighbor_idx]:
visited[neighbor_idx] = True
new_neighbors = find_neighbors(data, neighbor_idx, epsilon)
if len(new_neighbors) >= min_points:
neighbors.extend(new_neighbors)
return clusters
# Parameters
epsilon = 2
min_points = 2
31
import numpy as np
# Perform PCA
reduced_data, eigenvalues, eigenvectors = pca(data, num_components)