0% found this document useful (0 votes)

19 views7 pages

Lab 3

about the lab 3

Uploaded by

Yajneswarpadhiary Padhiary

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as DOCX, PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

19 views7 pages

Lab 3

about the lab 3

Uploaded by

Yajneswarpadhiary Padhiary

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as DOCX, PDF, TXT or read online on Scribd

You are on page 1/ 7

import pandas as pd

import math

df = pd.read_csv('1d3.csv')

print("\n Input Data Set is:\n", df)

t = df.keys()[-1]

print('Target Attribute is: ', t)

attribute_names = list(df.keys())

attribute_names.remove(t)

print('Predicting Attributes:', attribute_names)

def entropy(probs):

return sum([-prob * math.log(prob, 2) for prob in probs])

def entropy_of_list(ls, value):

from collections import Counter

cnt = Counter(x for x in ls)

print('Target attribute class count(Yes/No)', dict(cnt))

total_instances = len(ls)

print("Total no of instances/records associated with {0} is: {1}".format(value, total_instances))

probs = [x / total_instances for x in cnt.values()]

return entropy(probs)

def information_gain(df, split_attribute, target_attribute, battr):

print("\n\n-----Information Gain Calculation of ", split_attribute, "--------")

df_split = df.groupby(split_attribute)

for gname, group in df_split:

glist = []

glist.append(gname)

print('Grouped Attribute Values \n', group)

df_agg1 = df_split.agg({target_attribute: lambda x: entropy_of_list(x, glist.pop())})

df_agg1.columns = ['Entropy']

df_agg2 = df_split.agg({target_attribute: lambda x: len(x) / nobs})

df_agg2.columns = ['Proportion']

if battr != 'S':

new_entropy = sum(df_agg1['Entropy'] * df_agg2['Proportion'])

else:

old_entropy = entropy_of_list(df[target_attribute], battr)

old_entropy = entropy_of_list(df[target_attribute], 'S-' + df.iloc[0][df.columns.get_loc(battr)])

return old_entropy - new_entropy

def id3(df, target_attribute, attribute_names, default_class=None, default_attr='S'):

from collections import Counter

cnt = Counter(x for x in df[target_attribute]) # class of YES/NO

if len(cnt) == 1:

return next(iter(cnt))

elif df.empty or (not attribute_names):

return default_class

else:

default_class = max(cnt.keys())

gainz = []

for attr in attribute_names:

ig = information_gain(df, attr, target_attribute, default_attr)

gainz.append(ig)

print("Information gain of ", attr, " is: ", ig)

index_of_max = gainz.index(max(gainz))

best_attr = attribute_names[index_of_max]

print("\nAttribute with the maximum gain is: ", best_attr)

tree = {best_attr: {}}

remaining_attribute_names = [i for i in attribute_names if i != best_attr]

for attr_val, data_subset in df.groupby(best_attr):

subtree = id3(data_subset, target_attribute, remaining_attribute_names, default_class,
best_attr)

tree[best_attr][attr_val] = subtree

return tree

from pprint import pprint

tree = id3(df, t, attribute_names)

print("\nThe Resultant Decision Tree is:")

pprint(tree)

The code you've shared implements a decision tree classifier using the ID3 (Iterative Dichotomiser
algorithm. Let's walk through the process and explain what the code is doing:

1. Reading the Dataset:

The dataset is read using pandas.read_csv():

python

Copy code

df = pd.read_csv('1d3.csv')

print("\n Input Data Set is:\n", df)

This loads the data from the CSV file 1d3.csv into a DataFrame, and prints the dataset.

2. Extracting Target and Predicting Attributes:

The target attribute (i.e., the attribute to be predicted) is the last column of the dataset:

python

Copy code

t = df.keys()[-1]

print('Target Attribute is: ', t)

The remaining columns are the attributes that are used to predict the target:
python

Copy code

attribute_names = list(df.keys())

attribute_names.remove(t)

print('Predicting Attributes:', attribute_names)

3. Entropy Calculation:

The function entropy(probs) calculates the entropy of a set of probabilities. The entropy is a
measure of the uncertainty or impurity of a set of classes. A lower entropy means the set is more
pure.

python

Copy code

def entropy(probs):

return sum([-prob * math.log(prob, 2) for prob in probs])

The function entropy_of_list(ls, value) calculates the entropy of a list of target values for a
particular attribute value.

python

Copy code

def entropy_of_list(ls, value):

from collections import Counter

cnt = Counter(x for x in ls)

print('Target attribute class count(Yes/No)', dict(cnt))

total_instances = len(ls)

print("Total no of instances/records associated with {0} is: {1}".format(value, total_instances))

probs = [x / total_instances for x in cnt.values()]

return entropy(probs)

4. Information Gain:

The Information Gain (IG) is used to determine which attribute to split on at each step of the tree.
It measures the reduction in entropy when splitting the data based on a particular attribute. The
function information_gain() calculates the information gain of splitting the dataset on a specific
attribute:

python

Copy code

def information_gain(df, split_attribute, target_attribute, battr):

print("\n\n-----Information Gain Calculation of ", split_attribute, "--------")

df_split = df.groupby(split_attribute)

for gname, group in df_split:

glist = []

glist.append(gname)

print('Grouped Attribute Values \n', group)

df_agg1 = df_split.agg({target_attribute: lambda x: entropy_of_list(x, glist.pop())})

df_agg1.columns = ['Entropy']

df_agg2 = df_split.agg({target_attribute: lambda x: len(x) / nobs})

df_agg2.columns = ['Proportion']

if battr != 'S':

new_entropy = sum(df_agg1['Entropy'] * df_agg2['Proportion'])

else:

old_entropy = entropy_of_list(df[target_attribute], battr)

old_entropy = entropy_of_list(df[target_attribute], 'S-' + df.iloc[0][df.columns.get_loc(battr)])

return old_entropy - new_entropy

5. ID3 Algorithm:

The main function, id3(), implements the ID3 algorithm for building a decision tree. This function
works recursively to split the data based on the attribute that maximizes the information gain.

python

Copy code

def id3(df, target_attribute, attribute_names, default_class=None, default_attr='S'):

from collections import Counter

cnt = Counter(x for x in df[target_attribute]) # class of YES/NO

if len(cnt) == 1:

return next(iter(cnt))

elif df.empty or (not attribute_names):

return default_class

else:

default_class = max(cnt.keys())

gainz = []
for attr in attribute_names:

ig = information_gain(df, attr, target_attribute, default_attr)

gainz.append(ig)

print("Information gain of ", attr, " is: ", ig)

index_of_max = gainz.index(max(gainz))

best_attr = attribute_names[index_of_max]

print("\nAttribute with the maximum gain is: ", best_attr)

tree = {best_attr: {}}

remaining_attribute_names = [i for i in attribute_names if i != best_attr]

for attr_val, data_subset in df.groupby(best_attr):

subtree = id3(data_subset, target_attribute, remaining_attribute_names, default_class,

best_attr)

tree[best_attr][attr_val] = subtree

return tree

Steps in the id3() Function:

1. Base Case: If all the instances in the dataset belong to the same class (e.g., all "YES" or all
"NO"), return that class.

2. Base Case: If the dataset is empty or there are no attributes left to split on, return the
default class.

3. Choose the Best Attribute: Calculate the information gain for each attribute, and choose
the attribute with the highest information gain to split on.

4. Recursive Case: Split the dataset based on the chosen attribute and recursively build
subtrees for each subset of data.

6. Printing the Decision Tree:

After running the id3() function, the decision tree is printed using the pprint() function for a better
visual representation:

python

Copy code

from pprint import pprint

tree = id3(df, t, attribute_names)

print("\nThe Resultant Decision Tree is:")

pprint(tree)

Conclusion:
The code will generate a decision tree based on the ID3 algorithm. The tree will be constructed
step-by-step by evaluating which attribute (from the available ones) provides the highest
information gain, and then recursively applying this process to each subset of the data.

The result is a hierarchical decision tree structure that can be used to make predictions based on
the values of the attributes.

AI Exam Question
No ratings yet
AI Exam Question
25 pages
? Technical Competencies
No ratings yet
? Technical Competencies
90 pages
BSC Computer Science Pathways
No ratings yet
BSC Computer Science Pathways
27 pages
Deeskhith Resume AI ML GenAI
No ratings yet
Deeskhith Resume AI ML GenAI
2 pages
2025 Lecture 4 - MoEs
No ratings yet
2025 Lecture 4 - MoEs
47 pages
AWS Certified Machine Learning: Specialty - Exam Overview and Preparation
No ratings yet
AWS Certified Machine Learning: Specialty - Exam Overview and Preparation
14 pages
AI - Model Paper
100% (1)
AI - Model Paper
2 pages
6CS4-02 Machine Learning Manish Bhardwaj
No ratings yet
6CS4-02 Machine Learning Manish Bhardwaj
625 pages
Classification With Decision Trees: Instructor: Qiang Yang
100% (1)
Classification With Decision Trees: Instructor: Qiang Yang
62 pages
PRESENTATION - Ask The Expert - How Do I Integrate SAS Viya and Open Source
No ratings yet
PRESENTATION - Ask The Expert - How Do I Integrate SAS Viya and Open Source
121 pages
DT RF
No ratings yet
DT RF
64 pages
Smarter Data Science: Succeeding With Enterprise-Grade Data and Ai Projects Fishmandownload
100% (2)
Smarter Data Science: Succeeding With Enterprise-Grade Data and Ai Projects Fishmandownload
61 pages
Decision Tree Induction
No ratings yet
Decision Tree Induction
52 pages
CENG313 Introduction To Data Science: Lecture 12: Classification Decision Trees
No ratings yet
CENG313 Introduction To Data Science: Lecture 12: Classification Decision Trees
61 pages
Decision Tree and Random Forest
No ratings yet
Decision Tree and Random Forest
74 pages
Lecture 4
No ratings yet
Lecture 4
74 pages
Lecture 4
No ratings yet
Lecture 4
74 pages
15 1 Random Forest and Decision Tree
No ratings yet
15 1 Random Forest and Decision Tree
66 pages
Lab 6
No ratings yet
Lab 6
47 pages
ML Lab Record
No ratings yet
ML Lab Record
33 pages
MANUAL
No ratings yet
MANUAL
34 pages
Module 5 - S8 CSE NOTES - KTU DEEP LEARNING NOTES - CST414
No ratings yet
Module 5 - S8 CSE NOTES - KTU DEEP LEARNING NOTES - CST414
26 pages
ML Lab Record
No ratings yet
ML Lab Record
49 pages
Day 5 Supervised Technique-Decision Tree For Classification PDF
100% (1)
Day 5 Supervised Technique-Decision Tree For Classification PDF
58 pages
MLT UNIT-3 Notes
No ratings yet
MLT UNIT-3 Notes
35 pages
Decision Tree Induction
No ratings yet
Decision Tree Induction
80 pages
Unit-3 ML
No ratings yet
Unit-3 ML
47 pages
CV Nina
No ratings yet
CV Nina
24 pages
Lecture 9 - Speech Recognition
No ratings yet
Lecture 9 - Speech Recognition
65 pages
Language Models
No ratings yet
Language Models
34 pages
Indexdw
No ratings yet
Indexdw
34 pages
T6 Decision Tree
No ratings yet
T6 Decision Tree
38 pages
ASSESSMENT2
No ratings yet
ASSESSMENT2
22 pages
ASSESSMENT2
No ratings yet
ASSESSMENT2
22 pages
Level 3 Rancho Labs Artificial Intelligence Program
No ratings yet
Level 3 Rancho Labs Artificial Intelligence Program
20 pages
Artificial Intelligence-Based Facial Palsy Evaluation A Survey
No ratings yet
Artificial Intelligence-Based Facial Palsy Evaluation A Survey
19 pages
Practical File Machine Learning
No ratings yet
Practical File Machine Learning
29 pages
ML Lab Manual
No ratings yet
ML Lab Manual
25 pages
2024 Decision Trees
No ratings yet
2024 Decision Trees
28 pages
ID3 Algorithm & ROC Analysis
No ratings yet
ID3 Algorithm & ROC Analysis
51 pages
MANUAL
No ratings yet
MANUAL
33 pages
Primer On Artificial Intelligence and Robotics: Researchprimer Open Access
No ratings yet
Primer On Artificial Intelligence and Robotics: Researchprimer Open Access
14 pages
MLExp 3
No ratings yet
MLExp 3
6 pages
An Ensemble Technique To Predict Parkinson's Disease Using Machine Learning Algorithms
No ratings yet
An Ensemble Technique To Predict Parkinson's Disease Using Machine Learning Algorithms
17 pages
ML File
No ratings yet
ML File
13 pages
Codes & Outputs
No ratings yet
Codes & Outputs
9 pages
2167TC1 Lab
No ratings yet
2167TC1 Lab
8 pages
Sentiment Analysis of Hotel Reviews On Tripadvisor
No ratings yet
Sentiment Analysis of Hotel Reviews On Tripadvisor
8 pages
ID3 Algorithm: Abbas Rizvi CS157 B Spring 2010
No ratings yet
ID3 Algorithm: Abbas Rizvi CS157 B Spring 2010
19 pages
LM11 Introduction To Big Data Techniques IFT Notes
No ratings yet
LM11 Introduction To Big Data Techniques IFT Notes
7 pages
Machine Learning Unit4
No ratings yet
Machine Learning Unit4
8 pages
Name: Suprit Darshan Shrestha Reg - no:19BCE2584: Lab DA1 Machine Learning Lab
No ratings yet
Name: Suprit Darshan Shrestha Reg - no:19BCE2584: Lab DA1 Machine Learning Lab
9 pages
Decision Trees
No ratings yet
Decision Trees
7 pages
ML Exp 3
No ratings yet
ML Exp 3
6 pages
Rule Based Classification
No ratings yet
Rule Based Classification
42 pages
C1 W2 Lab05 Sklearn GD Soln
No ratings yet
C1 W2 Lab05 Sklearn GD Soln
3 pages
Tic Tac Toe
No ratings yet
Tic Tac Toe
55 pages
Lab Manual2
No ratings yet
Lab Manual2
6 pages
IEEE Conference LaTeX Template 7 9 18
No ratings yet
IEEE Conference LaTeX Template 7 9 18
8 pages
3ID3 Algorithm
No ratings yet
3ID3 Algorithm
9 pages
Da Lab3 221it064
No ratings yet
Da Lab3 221it064
6 pages
Lab Program 3
No ratings yet
Lab Program 3
6 pages
Pra 5 ML
No ratings yet
Pra 5 ML
5 pages
K Means Clustering in Image Segmentation
No ratings yet
K Means Clustering in Image Segmentation
5 pages
01 Section 6.2.1 QR Code Content
No ratings yet
01 Section 6.2.1 QR Code Content
5 pages
Ashwin Report
No ratings yet
Ashwin Report
18 pages
Lab 1
No ratings yet
Lab 1
5 pages
ML Lab P-1
No ratings yet
ML Lab P-1
10 pages
Decision Trees
No ratings yet
Decision Trees
11 pages
221IT027 DA Lab3
No ratings yet
221IT027 DA Lab3
5 pages
Da Lab3 221it084 Final
No ratings yet
Da Lab3 221it084 Final
6 pages
Sms Spam Term Paper
No ratings yet
Sms Spam Term Paper
10 pages
ML 4
No ratings yet
ML 4
5 pages
P 4 Andp 5
No ratings yet
P 4 Andp 5
4 pages
MLT Experiment 3
No ratings yet
MLT Experiment 3
3 pages
Machine Learning Lab: Delhi Technological University
No ratings yet
Machine Learning Lab: Delhi Technological University
6 pages
Unit 4 - Decision Tree ID3
No ratings yet
Unit 4 - Decision Tree ID3
5 pages
Lab 4
No ratings yet
Lab 4
4 pages
Step 2: Implement The ID3 Algorithm
No ratings yet
Step 2: Implement The ID3 Algorithm
3 pages
Constitution Test
No ratings yet
Constitution Test
2 pages
3 ID3 Algorithm Updated
No ratings yet
3 ID3 Algorithm Updated
3 pages
Play Tennis Prog 4
No ratings yet
Play Tennis Prog 4
3 pages
ML 5
No ratings yet
ML 5
2 pages
The Impact of Artificial Intelligence On The Indian Supply Chain Market - A Comprehensive Analysis of GenAI
No ratings yet
The Impact of Artificial Intelligence On The Indian Supply Chain Market - A Comprehensive Analysis of GenAI
3 pages
Lab Program 3
No ratings yet
Lab Program 3
6 pages
Designing An Improved Id3 Decision Tree Algorithm
No ratings yet
Designing An Improved Id3 Decision Tree Algorithm
5 pages
AD3461 ML Lab Manual
No ratings yet
AD3461 ML Lab Manual
32 pages
Import Import Def
No ratings yet
Import Import Def
2 pages
Id 3
No ratings yet
Id 3
1 page
Resume For Data Analyst
No ratings yet
Resume For Data Analyst
1 page
CV Igor Chiriac en
No ratings yet
CV Igor Chiriac en
2 pages
COEN266 - Syllabus
No ratings yet
COEN266 - Syllabus
2 pages
Profound Python Data Science
From Everand
Profound Python Data Science
Onder Teker
No ratings yet
Post-Doctoral Fellowship: in Petroleum Engineering
No ratings yet
Post-Doctoral Fellowship: in Petroleum Engineering
3 pages
Crop Recommendation On Analyzing Soil Using Machine Learning
No ratings yet
Crop Recommendation On Analyzing Soil Using Machine Learning
8 pages

Lab 3

Uploaded by

Lab 3

Uploaded by

import pandas as pd

print("\n Input Data Set is:\n", df)

print('Target Attribute is: ', t)

print('Predicting Attributes:', attribute_names)

return sum([-prob * math.log(prob, 2) for prob in probs])

def entropy_of_list(ls, value):

from collections import Counter

cnt = Counter(x for x in ls)

print('Target attribute class count(Yes/No)', dict(cnt))

print("Total no of instances/records associated with {0} is: {1}".format(value, total_instances))

probs = [x / total_instances for x in cnt.values()]

def information_gain(df, split_attribute, target_attribute, battr):

print("\n\n-----Information Gain Calculation of ", split_attribute, "--------")

for gname, group in df_split:

print('Grouped Attribute Values \n', group)

df_agg2 = df_split.agg({target_attribute: lambda x: len(x) / nobs})

new_entropy = sum(df_agg1['Entropy'] * df_agg2['Proportion'])

old_entropy = entropy_of_list(df[target_attribute], battr)

old_entropy = entropy_of_list(df[target_attribute], 'S-' + df.iloc[0][df.columns.get_loc(battr)])

return old_entropy - new_entropy

def id3(df, target_attribute, attribute_names, default_class=None, default_attr='S'):

from collections import Counter

cnt = Counter(x for x in df[target_attribute]) # class of YES/NO

elif df.empty or (not attribute_names):

for attr in attribute_names:

ig = information_gain(df, attr, target_attribute, default_attr)

print("Information gain of ", attr, " is: ", ig)

print("\nAttribute with the maximum gain is: ", best_attr)

tree = {best_attr: {}}

remaining_attribute_names = [i for i in attribute_names if i != best_attr]

for attr_val, data_subset in df.groupby(best_attr):

from pprint import pprint

tree = id3(df, t, attribute_names)

print("\nThe Resultant Decision Tree is:")

1. Reading the Dataset:

The dataset is read using pandas.read_csv():

print("\n Input Data Set is:\n", df)

2. Extracting Target and Predicting Attributes:

print('Target Attribute is: ', t)

print('Predicting Attributes:', attribute_names)

return sum([-prob * math.log(prob, 2) for prob in probs])

def entropy_of_list(ls, value):

from collections import Counter

cnt = Counter(x for x in ls)

print('Target attribute class count(Yes/No)', dict(cnt))

print("Total no of instances/records associated with {0} is: {1}".format(value, total_instances))

probs = [x / total_instances for x in cnt.values()]

def information_gain(df, split_attribute, target_attribute, battr):

for gname, group in df_split:

print('Grouped Attribute Values \n', group)

df_agg1 = df_split.agg({target_attribute: lambda x: entropy_of_list(x, glist.pop())})

df_agg2 = df_split.agg({target_attribute: lambda x: len(x) / nobs})

new_entropy = sum(df_agg1['Entropy'] * df_agg2['Proportion'])

old_entropy = entropy_of_list(df[target_attribute], battr)

old_entropy = entropy_of_list(df[target_attribute], 'S-' + df.iloc[0][df.columns.get_loc(battr)])

return old_entropy - new_entropy

def id3(df, target_attribute, attribute_names, default_class=None, default_attr='S'):

from collections import Counter

cnt = Counter(x for x in df[target_attribute]) # class of YES/NO

elif df.empty or (not attribute_names):

ig = information_gain(df, attr, target_attribute, default_attr)

print("Information gain of ", attr, " is: ", ig)

print("\nAttribute with the maximum gain is: ", best_attr)

tree = {best_attr: {}}

remaining_attribute_names = [i for i in attribute_names if i != best_attr]

for attr_val, data_subset in df.groupby(best_attr):

subtree = id3(data_subset, target_attribute, remaining_attribute_names, default_class,

Steps in the id3() Function:

6. Printing the Decision Tree:

from pprint import pprint

tree = id3(df, t, attribute_names)

print("\nThe Resultant Decision Tree is:")

You might also like