ML Lab Works
ML Lab Works
# Filename: find_s.py
"""
Implements the FIND-S algorithm to find the most specific hypothesis
that fits the given training data.
"""
import csv
print("\n")
n = len(data[0]) - 1
print(" No. of attributes are:", n)
print("Initial Hypothesis: ")
hypothesis = ['0'] * n
print(hypothesis)
print(hypothesis)
print("\n")
print("After every iteration: ")
for i in range(0, len(data)):
if (data[i][n] == 'yes'):
for j in range(0, n):
if (hypothesis[j] != data[i][j]):
hypothesis[j] = '?'
print(hypothesis)
print("\n")
print("Final Hypothesis:")
print(hypothesis)
for i, h in enumerate(concepts):
print("\nInstance", i+1, "is", h)
if target[i] == "yes":
print("Instance is positive")
for x in range(len(specific_h)):
if h[x] != specific_h[x]:
specific_h[x] = '?'
general_h[x][x] = '?'
elif target[i] == "no":
print("Instance is negative")
for x in range(len(specific_h)):
if h[x] != specific_h[x]:
general_h[x][x] = specific_h[x]
else:
general_h[x][x] = '?'
indices = [i for i, val in enumerate(general_h) if val == ['?', '?', '?', '?', '?']] #
Adjusted to 5 question marks
for i in indices:
general_h.remove(['?', '?', '?', '?', '?']) #Adjusted to 5 question marks
return specific_h, general_h
X = np.array([[1, 1, 1],
[1, 0, 1],
[0, 1, 0],
[0, 0, 1],
[1, 1, 0]])
y = np.array([1, 1, 0, 0, 1])
tree.fit(X, y)
diameter = data['diameter'].values.reshape(-1, 1)
price = data['price'].values
model = LinearRegression()
model.fit(diameter, price)
intercept = model.intercept_
slope = model.coef_[0]
print("Intercept:", intercept)
print("Slope:", slope)
predictions = model.predict(diameter)
data = pd.read_csv('labs.csv')
print(data)
X = data[['studyhours']]
y = data['examresult']
model = LogisticRegression()
model.fit(X_train, y_train)
predictions = model.predict(X_test)
accuracy = accuracy_score(y_test, predictions)
print("Accuracy:", accuracy)
print("New predictions:")
for i, prediction in enumerate(new_predictions):
print("Instance {}: predicted Result: {}".format(i+1, prediction))
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
data = pd.read_csv('lab3.csv')
print(data)
X = data[['studyhours']]
Y = data['examresult'] # Corrected target variable name
model = LogisticRegression()
model.fit(X_train, y_train)
predictions = model.predict(X_test)
accuracy = accuracy_score(y_test, predictions)
print("Accuracy: ", accuracy)
print("New predictions:")
for i, prediction in enumerate(new_predictions):
print("Instance {}: Predicted Result {}".format(i+1, prediction))
import pandas as pd
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LinearRegression, LogisticRegression #Added
Logistic Regression import
from statistics import mean, stdev
data = pd.read_csv("wineQT.csv")
X_set = data.drop('quality', axis=1) # Corrected to X_set
y_set = data['quality']
# Linear Regression
model = LinearRegression()
scores = cross_val_score(model, X_set, y_set, cv=10)
print("Linear Regression Scores:", scores)
print("Linear Regression Bias (Mean):", mean(scores))
print("Linear Regression Variance (StDev):", stdev(scores))
for k in k_values:
model = LinearRegression() # Create new model for each K
iris = load_iris()
X = iris.data
y = iris.target
print("Correct predictions:")
for i in range(len(y_test)):
if y_test[i] == y_pred[i]:
print("True label:", iris.target_names[y_test[i]], "-Predicted label:",
iris.target_names[y_pred[i]])
print("\nWrong Predictions:")
for i in range(len(y_test)):
if y_test[i] != y_pred[i]:
print("True label:", iris.target_names[y_test[i]], "-predicted label:",
iris.target_names[y_pred[i]])
np.random.seed(42)
x_train = np.linspace(0, 10, 100)
y_train = 2 * np.sin(x_train) + np.random.normal(0, 0.2, 100)
x_test = np.linspace(0, 10, 50)
tau = 0.1 # Corrected tau value (0.01 was likely too small)
iris = load_iris()
X, y = iris.data, iris.target
target_names = iris.target_names
nb_classifier = GaussianNB()
nb_classifier.fit(X_train, y_train)
y_pred = nb_classifier.predict(X_test)
print('Accuracy:', accuracy)
print("Precision:", precision)
# K-Means Clustering
kmeans = KMeans(n_clusters=2, random_state=42, n_init=10)
kmeans.fit(X_scaled)
kmeans_labels = kmeans.labels_
# Create a colormap
colormap = np.array(['red', 'green'])
# Original Data
plt.subplot(1, 3, 1)
plt.scatter(X['trestbps'], X['chol'], c=colormap[data['target']], s=40)
plt.title('Original Data')
plt.xlabel('trestbps')
plt.ylabel('chol')
# K-Means Clustering
plt.subplot(1, 3, 2)
plt.scatter(X['trestbps'], X['chol'], c=colormap[kmeans_labels], s=40)
plt.title('K-Means Clustering')
plt.xlabel('trestbps')
plt.ylabel('chol')
# EM Clustering
plt.subplot(1, 3, 3)
plt.scatter(X['trestbps'], X['chol'], c=colormap[em_labels], s=40)
plt.title('EM Clustering')
plt.xlabel('trestbps')
plt.ylabel('chol')
plt.show()
# Load data
iris = load_iris()
X = iris.data[:, :2] # Use only the first two features for visualization
y = iris.target
# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25,
random_state=0)
# Scale data
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
# Make predictions
y_pred_train = svm_classifier.predict(X_train)
y_pred_test = svm_classifier.predict(X_test)
# Calculate accuracy
accuracy_train = accuracy_score(y_train, y_pred_train)
accuracy_test = accuracy_score(y_test, y_pred_test)
print("Training accuracy:", accuracy_train)
print("Testing accuracy:", accuracy_test)
Z = classifier.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
pca = PCA(n_components=2)
pca.fit(X)
X_transformed = pca.transform(X)
print("Original data:\n", X)
print("\nTransformed data:\n", X_transformed)
plt.figure(figsize=(8, 6))
for i in range(len(iris.target_names)):
plt.scatter(X_pca[y == i, 0], X_pca[y == i, 1], label=iris.target_names[i])