0% found this document useful (0 votes)
5 views9 pages

Codes & Outputs

code and out put copy formate

Uploaded by

koxey22172
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
5 views9 pages

Codes & Outputs

code and out put copy formate

Uploaded by

koxey22172
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 9

Exp 1Code:

import pandas as pd
def read_training_data(file_path):
data = pd.read_csv(file_path)
features = data.iloc[:, :-1].values.tolist() # All columns

except the last one target = data.iloc[:, -1].values.tolist() #


The last column

return features, target


def find_s_algorithm(features, target):
# Initialize the most specific hypothesis

hypothesis = None

for i, label in enumerate(target):


if label == "Yes": # Consider only

positive examples if hypothesis is

None:

hypothesis = features[i] # First positive

example else:
# Update the hypothesis by generalizing it
for j in range(len(hypothesis)):
if hypothesis[j] != features[i][j]:
hypothesis[j] = '?' # Generalize

attribute return hypothesis


# Demonstration
if name == " main ":
file_path = "D:/training_data.csv" # Replace with the path to

your CSV file # Read training data

features, target =

read_training_data(file_path) # Apply

FIND-S algorithm

most_specific_hypothesis =

find_s_algorithm(features, target) # Display the

result
print("Most Specific Hypothesis:", most_specific_hypothesis)
Output:
Most Specific Hypothesis: ['Sunny', 'Warm', '?', 'Strong', '?', '?']
Sample CSV File:
Sky,Temp,Humidity,Wind,Water,Forecast,EnjoySport
Sunny,Warm,Normal,Strong,Warm,Same,Yes
Sunny,Warm,High,Strong,Warm,Same,Yes
Rainy,Cold,High,Strong,Warm,Change,No
Sunny,Warm,High,Strong,Cool,Change,Yes

Exp 2 Code:
import pandas as pd
def candidate_elimination(filename):
# Load data from CSV file
data = pd.read_csv(filename)
attributes = data.iloc[:, :-1].values
target = data.iloc[:, -1].values
# Initialize S and G
num_attributes = attributes.shape[1]
S = ["ϕ"] * num_attributes # Most specific hypothesis
G = [["?"] * num_attributes] # Most general hypothesis
# Process each example in the dataset
for i, example in enumerate(attributes):
if target[i] == "Yes": # Positive example
# Generalize S to include the example
for attr_idx in range(num_attributes):
if S[attr_idx] == "ϕ": # Empty hypothesis
S[attr_idx] = example[attr_idx]
elif S[attr_idx] != example[attr_idx]:
S[attr_idx] = "?"
# Remove inconsistent hypotheses from G
G = [g for g in G if all(g[attr_idx] == "?" or g[attr_idx] ==
example[attr_idx] or g[attr_idx] == "ϕ"
for attr_idx in range(num_attributes))]
elif target[i] == "No": # Negative example
# Remove inconsistent hypotheses from S
S = ["ϕ" if S[attr_idx] != "?" and S[attr_idx] == example[attr_idx] else
S[attr_idx]
for attr_idx in range(num_attributes)]
# Specialize G to exclude the example
new_G = []
for g in G:
for attr_idx in range(num_attributes):
if g[attr_idx] == "?":
for value in set(attributes[:, attr_idx]):
if value != example[attr_idx]:
specialized_hypothesis = g[:]
specialized_hypothesis[attr_idx] = value
new_G.append(specialized_hypothesis)
G = new_G
# Remove hypotheses in G that are more specific than S
G = [g for g in G if all(g[attr_idx] == "?" or g[attr_idx] == S[attr_idx] or
S[attr_idx] == "ϕ"
for attr_idx in range(num_attributes))]
print("Final Specific Boundary (S):", S)
print("Final General Boundary (G):", G)
# Example usage
candidate_elimination("training_data.csv")
CSV File:
Weather,Temperature,Humidity,Wind,Target
Sunny,Hot,High,Weak,No
Sunny,Hot,High,Strong,No
Overcast,Hot,High,Weak,Yes
Rain,Mild,High,Weak,Yes
Rain,Cool,Normal,Weak,Yes
Rain,Cool,Normal,Strong,No
Overcast,Cool,Normal,Strong,Yes
Sunny,Mild,High,Weak,No
Output:
Final Specific Boundary (S): ['?', '?', '?', '?']
Final General Boundary (G): []
____________________________________________________________
________________________
Exp3 Code:
import pandas as pd
import numpy as np
from math import log2
class DecisionTreeID3:
def __init__(self):
self.tree = None
def fit(self, data, target_attribute):
attributes = list(data.columns)
attributes.remove(target_attribute)
self.tree = self._id3(data, attributes, target_attribute)
def _id3(self, data, attributes, target_attribute):
target_values = data[target_attribute]
# Base cases
if len(set(target_values)) == 1: # All examples have the same class
return target_values.iloc[0]
if not attributes: # No attributes left
return target_values.mode()[0]

# Select the attribute with the highest information gain


best_attribute = self._select_best_attribute(data, attributes,
target_attribute)
tree = {best_attribute: {}}

# Split data by the best attribute


for value in data[best_attribute].unique():
subset = data[data[best_attribute] == value]
if subset.empty:
tree[best_attribute][value] = target_values.mode()[0]
else:
remaining_attributes = [attr for attr in attributes if attr !=
best_attribute]
tree[best_attribute][value] = self._id3(subset,
remaining_attributes, target_attribute)
return tree

def _select_best_attribute(self, data, attributes, target_attribute):


target_entropy = self._entropy(data[target_attribute])
information_gains = {}
for attribute in attributes:
attribute_entropy = 0
for value in data[attribute].unique():
subset = data[data[attribute] == value]
weight = len(subset) / len(data)
attribute_entropy += weight *
self._entropy(subset[target_attribute])
information_gains[attribute] = target_entropy - attribute_entropy
return max(information_gains, key=information_gains.get)
def _entropy(self, values):
counts = values.value_counts()
probabilities = counts / len(values)
return -sum(probabilities * np.log2(probabilities))
def predict(self, sample):
return self._classify(sample, self.tree)
def _classify(self, sample, tree):
if not isinstance(tree, dict):
return tree
attribute = next(iter(tree))
value = sample.get(attribute)
subtree = tree.get(value, None)
if subtree is None:
return "Unknown" # Handle unseen attribute values
return self._classify(sample, subtree)
# Example usage
if __name__ == "__main__":
# Sample dataset
data = {
"Weather": ["Sunny", "Sunny", "Overcast", "Rain", "Rain", "Rain",
"Overcast", "Sunny"],
"Temperature": ["Hot", "Hot", "Hot", "Mild", "Cool", "Cool", "Mild",
"Mild"],
"Humidity": ["High", "High", "High", "High", "Normal", "Normal",
"Normal", "High"],
"Wind": ["Weak", "Strong", "Weak", "Weak", "Weak", "Strong",
"Strong", "Weak"],
"PlayTennis": ["No", "No", "Yes", "Yes", "Yes", "No", "Yes", "No"]
}
df = pd.DataFrame(data)
target_attribute = "PlayTennis"
# Train decision tree
tree_model = DecisionTreeID3()
tree_model.fit(df, target_attribute)
# Display the decision tree
print("Decision Tree:", tree_model.tree)
# Classify a new sample
sample = {"Weather": "Rain", "Temperature": "Cool", "Humidity":
"Normal", "Wind": "Strong"}
prediction = tree_model.predict(sample)
print(f"Prediction for {sample}: {prediction}")
Output:
Decision Tree: {'Weather': {'Sunny': 'No', 'Overcast': 'Yes', 'Rain': {'Wind':
{'Weak': 'Yes', 'Strong': 'No'}}}}
Prediction for {'Weather': 'Rain', 'Temperature': 'Cool', 'Humidity': 'Normal',
'Wind': 'Strong'}: Unknown

You might also like