Codes & Outputs
Codes & Outputs
import pandas as pd
def read_training_data(file_path):
data = pd.read_csv(file_path)
features = data.iloc[:, :-1].values.tolist() # All columns
hypothesis = None
None:
example else:
# Update the hypothesis by generalizing it
for j in range(len(hypothesis)):
if hypothesis[j] != features[i][j]:
hypothesis[j] = '?' # Generalize
features, target =
read_training_data(file_path) # Apply
FIND-S algorithm
most_specific_hypothesis =
result
print("Most Specific Hypothesis:", most_specific_hypothesis)
Output:
Most Specific Hypothesis: ['Sunny', 'Warm', '?', 'Strong', '?', '?']
Sample CSV File:
Sky,Temp,Humidity,Wind,Water,Forecast,EnjoySport
Sunny,Warm,Normal,Strong,Warm,Same,Yes
Sunny,Warm,High,Strong,Warm,Same,Yes
Rainy,Cold,High,Strong,Warm,Change,No
Sunny,Warm,High,Strong,Cool,Change,Yes
Exp 2 Code:
import pandas as pd
def candidate_elimination(filename):
# Load data from CSV file
data = pd.read_csv(filename)
attributes = data.iloc[:, :-1].values
target = data.iloc[:, -1].values
# Initialize S and G
num_attributes = attributes.shape[1]
S = ["ϕ"] * num_attributes # Most specific hypothesis
G = [["?"] * num_attributes] # Most general hypothesis
# Process each example in the dataset
for i, example in enumerate(attributes):
if target[i] == "Yes": # Positive example
# Generalize S to include the example
for attr_idx in range(num_attributes):
if S[attr_idx] == "ϕ": # Empty hypothesis
S[attr_idx] = example[attr_idx]
elif S[attr_idx] != example[attr_idx]:
S[attr_idx] = "?"
# Remove inconsistent hypotheses from G
G = [g for g in G if all(g[attr_idx] == "?" or g[attr_idx] ==
example[attr_idx] or g[attr_idx] == "ϕ"
for attr_idx in range(num_attributes))]
elif target[i] == "No": # Negative example
# Remove inconsistent hypotheses from S
S = ["ϕ" if S[attr_idx] != "?" and S[attr_idx] == example[attr_idx] else
S[attr_idx]
for attr_idx in range(num_attributes)]
# Specialize G to exclude the example
new_G = []
for g in G:
for attr_idx in range(num_attributes):
if g[attr_idx] == "?":
for value in set(attributes[:, attr_idx]):
if value != example[attr_idx]:
specialized_hypothesis = g[:]
specialized_hypothesis[attr_idx] = value
new_G.append(specialized_hypothesis)
G = new_G
# Remove hypotheses in G that are more specific than S
G = [g for g in G if all(g[attr_idx] == "?" or g[attr_idx] == S[attr_idx] or
S[attr_idx] == "ϕ"
for attr_idx in range(num_attributes))]
print("Final Specific Boundary (S):", S)
print("Final General Boundary (G):", G)
# Example usage
candidate_elimination("training_data.csv")
CSV File:
Weather,Temperature,Humidity,Wind,Target
Sunny,Hot,High,Weak,No
Sunny,Hot,High,Strong,No
Overcast,Hot,High,Weak,Yes
Rain,Mild,High,Weak,Yes
Rain,Cool,Normal,Weak,Yes
Rain,Cool,Normal,Strong,No
Overcast,Cool,Normal,Strong,Yes
Sunny,Mild,High,Weak,No
Output:
Final Specific Boundary (S): ['?', '?', '?', '?']
Final General Boundary (G): []
____________________________________________________________
________________________
Exp3 Code:
import pandas as pd
import numpy as np
from math import log2
class DecisionTreeID3:
def __init__(self):
self.tree = None
def fit(self, data, target_attribute):
attributes = list(data.columns)
attributes.remove(target_attribute)
self.tree = self._id3(data, attributes, target_attribute)
def _id3(self, data, attributes, target_attribute):
target_values = data[target_attribute]
# Base cases
if len(set(target_values)) == 1: # All examples have the same class
return target_values.iloc[0]
if not attributes: # No attributes left
return target_values.mode()[0]