P 4 Andp 5
P 4 Andp 5
P 4 Andp 5
Write a program to demonstrate the working of the decision tree based ID3
algorithm.
Use an appropriate data set for building the decision tree and apply this knowledge to
classify a new sample
import pandas as pd
import numpy as np
dataset=
pd.read_csv('pgm4_dataset.csv',names=['outlook','temperature','humidity','wind','class',])
def entropy(target_col):
elements,counts = np.unique(target_col,return_counts = True)
entropy = np.sum([(-counts[i]/np.sum(counts))*np.log2(counts[i]/np.sum(counts)) for i in
range(len(elements))])
return entropy
def InfoGain(data,split_attribute_name,target_name="class"):
total_entropy = entropy(data[target_name])
vals,counts= np.unique(data[split_attribute_name],return_counts=True)
Weighted_Entropy
=np.sum([(counts[i]/np.sum(counts))*entropy(data.where(data[split_attribute_name]==vals[
i]).dropna()[target_name]) for i in range(len(vals))])
Information_Gain = total_entropy - Weighted_Entropy
return Information_Gain
def ID3(data,originaldata,features,target_attribute_name="class",parent_node_class =
None):
if len(np.unique(data[target_attribute_name])) <= 1:
return np.unique(data[target_attribute_name])[0]
elif len(data)==0:
return np.unique(originaldata[target_attribute_name])
[np.argmax(np.unique(originaldata[target_attribute_name],return_counts=True)[1])]
elif len(features) ==0:
return parent_node_class
else:
parent_node_class =np.unique(data[target_attribute_name])
[np.argmax(np.unique(data[target_attribute_name],return_counts=True)[1])]
item_values = [InfoGain(data,feature,target_attribute_name) for feature in features]
#Return the information gain values for the features in the dataset
best_feature_index = np.argmax(item_values)
best_feature = features[best_feature_index]
tree = {best_feature:{}}
features = [i for i in features if i != best_feature]
for value in np.unique(data[best_feature]):
value = value
sub_data = data.where(data[best_feature] == value).dropna()
subtree = ID3(sub_data,dataset,features,target_attribute_name,parent_node_class)
tree[best_feature][value] = subtree
return(tree)
tree = ID3(dataset,dataset,dataset.columns[:-1])
print(' \nDisplay Tree\n',tree)
Dataset: pgm3data.csv
sunny Hot high weak No
sunny Hot high strong No
overcast Hot high weak Yes
rain Mild high weak Yes
rain Cool normal weak Yes
rain Cool normal strong No
overcast Cool normal strong Yes
sunny Mild high weak No
sunny Cool normal weak Yes
rain Mild normal weak Yes
sunny Mild normal strong Yes
overcast Mild high strong Yes
overcast Hot normal weak Yes
rain Mild high strong No
Display Tree
{'outlook': {'overcast': 'yes', 'rain': {'wind': {'strong': 'no', 'weak': 'yes'}}, 'sunny': {'humidity':
{'high': 'no', 'normal': 'yes'}}}}
The decision tree for the dataset using ID3 algorithm is
Outlook
overcast
yes
rain
Wind
weak
yes
strong
no
sunny
Humidity
normal
yes
high
no
Program 5
Build an Artificial Neural Network by implementing the Back propagation algorithm
and test the same using appropriate data sets.
Program:
import numpy as np
X = np.array(([2, 9], [1, 5], [3, 6]), dtype=float)
y = np.array(([92], [86], [89]), dtype=float)
X = X/np.amax(X, axis=0) #maximum of X array longitudinally
y=y/100
#sigmoid Function
def sigmoid (x):
return 1/(1+np.exp(-x))
#Variable initialization
epoch=5000 #Setting training iterations
lr=0.1 #Setting learning rate
#weight and bias initialization
wh=np.random.uniform(size=(2, 3)) #inputlayer_neurons = 2 #number of features in data set
bh=np.random.uniform(size=(1,3)) #hiddenlayer_neurons = 3 #number of hidden layers neurons
wout=np.random.uniform(size=(3, 1))
bout=np.random.uniform(size=(1,1)) #output_neurons = 1 #number of neurons at output layer
OUTPUT:
Normalized Input:
[[0.66666667 1. ]
[0.33333333 0.55555556]
[1. 0.66666667]]
Actual Output:
[[0.92]
[0.86]
[0.89]]
Predicted Output :
[[0.89648872]
[0.87592696]
[0.89713304]]