Practical 3
Practical 3
Write a program to demonstrate the working of the decision tree based ID3 Algorithm.
Use an appropriate data set for building the decision tree and apply this knowledge to
classify a new sample.
Code
import numpy as np
import math
import csv
def read_data(filename):
with open(filename, 'r') as csvfile:
datareader = csv.reader(csvfile, delimiter=',')
headers = next(datareader)
metadata = []
traindata = []
for name in headers:
metadata.append(name)
for row in datareader:
traindata.append(row)
class Node:
def __init__(self, attribute):
self.attribute = attribute
self.children = []
self.answer = ""
def __str__(self):
return self.attribute
for x in range(items.shape[0]):
for y in range(data.shape[0]):
if data[y, col] == items[x]:
count[x] += 1
for x in range(items.shape[0]):
dict[items[x]] = np.empty((int(count[x]), data.shape[1]), dtype="|S32")
pos = 0
for y in range(data.shape[0]):
if data[y, col] == items[x]:
dict[items[x]][pos] = data[y]
pos += 1
if delete:
dict[items[x]] = np.delete(dict[items[x]], col, 1)
def entropy(S):
items = np.unique(S)
if items.size == 1:
return 0
for x in range(items.shape[0]):
counts[x] = sum(S == items[x]) / (S.size * 1.0)
total_size = data.shape[0]
entropies = np.zeros((items.shape[0], 1))
intrinsic = np.zeros((items.shape[0], 1))
for x in range(items.shape[0]):
ratio = dict[items[x]].shape[0]/(total_size * 1.0)
entropies[x] = ratio * entropy(dict[items[x]][:, -1])
intrinsic[x] = ratio * math.log(ratio, 2)
for x in range(entropies.shape[0]):
total_entropy -= entropies[x]
return total_entropy / iv
split = np.argmax(gains)
node = Node(metadata[split])
metadata = np.delete(metadata, split, 0)
for x in range(items.shape[0]):
child = create_node(dict[items[x]], metadata)
node.children.append((items[x], child))
return node
def empty(size):
s = ""
for x in range(size):
s += " "
return s