ML Exp 3
ML Exp 3
ID3 ALGORITHM
Program:
import math
import csv
def load_csv(filename):
lines=csv.reader(open(filename,"r"));
dataset=list(lines)
headers=dataset.pop(0)
return dataset,headers
class Node:
def __init__(self,attribute):
self.attribute=attribute
self.children=[]
self.answer=" "
def subtables(data, col, delete):
dic = {}
coldata = [row[col] for row in data]
attr = list(set(coldata))
counts = [0] * len(attr)
r = len(data)
c = len(data[0])
for x in range(len(attr)):
for y in range(r):
if data[y][col] == attr[x]:
counts[x] += 1
dic[attr[x]] = [[0 for i in range(c)] for j in range(counts[x])]
pos = 0
for y in range(r):
T AKHILA REDDY 21F41A05A5 MACHINE LEARMNING LAB
if data[y][col] == attr[x]:
if delete:
del data[y][col]
dic[attr[x]][pos] = data[y]
pos += 1
return attr, dic
def entropy(s):
attr=list(set(s))
if len(attr)==1:
return 0
counts=[0,0]
for i in range(2):
counts[i]=sum([1 for x in s if attr[i]==x])/(len(s)*1.0)
sums=0
for cnt in counts:
sums+=-1*cnt*math.log(cnt,2)
return sums
def compute_gain(data, col):
attr, dic = subtables(data, col, delete=False)
total_size = len(data)
entropies = [0] * len(attr)
ratio = [0] * len(attr)
total_entropy = entropy([row[-1] for row in data])
for x in range(len(attr)):
ratio[x]=len(dic[attr[x]]) / (total_size * 1.0)
entropies[x] = entropy([row[-1]for row in dic[attr[x]]])
total_entropy -= ratio[x] * entropies[x]
return total_entropy
def build_tree(data,features):
Test data:
Day Outlook Temperature Humidity Wind
T1 Rain Cool Normal Strong
T2 Sunny Mild Normal Strong