P 4 Andp 5

Download as docx, pdf, or txt
Download as docx, pdf, or txt
You are on page 1of 4

4.

Write a program to demonstrate the working of the decision tree based ID3
algorithm.

Use an appropriate data set for building the decision tree and apply this knowledge to
classify a new sample
import pandas as pd
import numpy as np
dataset=
pd.read_csv('pgm4_dataset.csv',names=['outlook','temperature','humidity','wind','class',])
def entropy(target_col):
elements,counts = np.unique(target_col,return_counts = True)
entropy = np.sum([(-counts[i]/np.sum(counts))*np.log2(counts[i]/np.sum(counts)) for i in
range(len(elements))])
return entropy
def InfoGain(data,split_attribute_name,target_name="class"):
total_entropy = entropy(data[target_name])
vals,counts= np.unique(data[split_attribute_name],return_counts=True)
Weighted_Entropy
=np.sum([(counts[i]/np.sum(counts))*entropy(data.where(data[split_attribute_name]==vals[
i]).dropna()[target_name]) for i in range(len(vals))])
Information_Gain = total_entropy - Weighted_Entropy
return Information_Gain

def ID3(data,originaldata,features,target_attribute_name="class",parent_node_class =
None):
if len(np.unique(data[target_attribute_name])) <= 1:
return np.unique(data[target_attribute_name])[0]
elif len(data)==0:
return np.unique(originaldata[target_attribute_name])
[np.argmax(np.unique(originaldata[target_attribute_name],return_counts=True)[1])]
elif len(features) ==0:
return parent_node_class
else:
parent_node_class =np.unique(data[target_attribute_name])
[np.argmax(np.unique(data[target_attribute_name],return_counts=True)[1])]
item_values = [InfoGain(data,feature,target_attribute_name) for feature in features]
#Return the information gain values for the features in the dataset
best_feature_index = np.argmax(item_values)
best_feature = features[best_feature_index]
tree = {best_feature:{}}
features = [i for i in features if i != best_feature]
for value in np.unique(data[best_feature]):
value = value
sub_data = data.where(data[best_feature] == value).dropna()
subtree = ID3(sub_data,dataset,features,target_attribute_name,parent_node_class)
tree[best_feature][value] = subtree
return(tree)
tree = ID3(dataset,dataset,dataset.columns[:-1])
print(' \nDisplay Tree\n',tree)

Dataset: pgm3data.csv
sunny Hot high weak No
sunny Hot high strong No
overcast Hot high weak Yes
rain Mild high weak Yes
rain Cool normal weak Yes
rain Cool normal strong No
overcast Cool normal strong Yes
sunny Mild high weak No
sunny Cool normal weak Yes
rain Mild normal weak Yes
sunny Mild normal strong Yes
overcast Mild high strong Yes
overcast Hot normal weak Yes
rain Mild high strong No

Display Tree
{'outlook': {'overcast': 'yes', 'rain': {'wind': {'strong': 'no', 'weak': 'yes'}}, 'sunny': {'humidity':
{'high': 'no', 'normal': 'yes'}}}}
The decision tree for the dataset using ID3 algorithm is
Outlook
overcast
yes
rain
Wind
weak
yes
strong
no
sunny
Humidity
normal
yes
high
no

Program 5
Build an Artificial Neural Network by implementing the Back propagation algorithm
and test the same using appropriate data sets.

Program:

import numpy as np
X = np.array(([2, 9], [1, 5], [3, 6]), dtype=float)
y = np.array(([92], [86], [89]), dtype=float)
X = X/np.amax(X, axis=0) #maximum of X array longitudinally
y=y/100

#sigmoid Function
def sigmoid (x):
return 1/(1+np.exp(-x))

#Derivative of Sigmoid Function


def derivatives_sigmoid(x):
return x * (1 - x)

#Variable initialization
epoch=5000 #Setting training iterations
lr=0.1 #Setting learning rate
#weight and bias initialization
wh=np.random.uniform(size=(2, 3)) #inputlayer_neurons = 2 #number of features in data set
bh=np.random.uniform(size=(1,3)) #hiddenlayer_neurons = 3 #number of hidden layers neurons
wout=np.random.uniform(size=(3, 1))
bout=np.random.uniform(size=(1,1)) #output_neurons = 1 #number of neurons at output layer

#draw a rndom range of numbers uniformly of dim x*y


for i in range(epoch):
#Forward Propogation
hlayer_act = sigmoid(np.dot(X,wh) + bh)
outinp=np.dot(hlayer_act,wout)+ bout
output = sigmoid(outinp)
#Backpropagation
d_output = (y-output) * derivatives_sigmoid(output)
EH = d_output.dot(wout.T)
#how much hidden layer wts contributed to error
d_hiddenlayer = EH * derivatives_sigmoid(hlayer_act)
#dotproduct of nextlayererror and currentlayerop
wout +=hlayer_act.T.dot(d_output) * lr
wh +=X.T.dot(d_hiddenlayer) * lr

print("Input: \n" + str(X))


print("Actual Output: \n" + str(y))
print("Predicted Output: \n", output)

OUTPUT:
Normalized Input:
[[0.66666667 1. ]
[0.33333333 0.55555556]
[1. 0.66666667]]
Actual Output:
[[0.92]
[0.86]
[0.89]]
Predicted Output :
[[0.89648872]
[0.87592696]
[0.89713304]]

You might also like