DM (LP1) 1
DM (LP1) 1
Fig:Decision Tree
Decision tree classification:
Fig:1
Fig 2
Tree Pruning:
Tree pruning is performed in order to remove anomalies in the training data due to noise or
outliers. The pruned trees are smaller and less complex.
Entropy:
A decision tree is built top-down from a root node and involves partitioning the data into subsets
that contain instances with similar values (homogenous). algorithm uses entropy to calculate the
homogeneity of a sample. If the sample is completely homogeneous the entropy is zero and if the
sample is an equally divided it has entropy of one.
Information Gain:
The information gain is based on the decrease in entropy after a dataset is split on an attribute.
Constructing a decision tree is all about finding attribute that returns the highest information gain
(i.e., the most homogeneous branches).
Algorithm:
(1) create a node N;
(2) if tuples in D are all of the same class, C, then
(3) return N as a leaf node labeled with the class C;
(4) if attribute list is empty then
(5) return N as a leaf node labeled with the majority class in D; // majority voting
(6) apply Attribute selection method(D, attribute list) to find the “best” splitting criterion; (7) label
node N with splitting criterion;
(8) if splitting attribute is discrete-valued and multiway splits allowed then // not restricted to
binary trees
(9) attribute list ←attribute list −splitting attribute; // remove splitting attribute
(10) foreach outcome j of splitting criterion // partition the tuples and grow subtrees for each
partition
(11) let Dj be the set of data tuples in D satisfying outcome j; // a partition
(12) if Dj is empty then (13) attach a leaf labeled with the majority class in D to node N;
(14) else attach the node returned by Generate decision tree(Dj, attribute list) to node N; endfor
(15) return N;
Program-
Aim: To implement Decision Tree algorithm classification
import java.util.*;
class tree
{
public double hd;
public double p;
char ch;
String parent;
tree(char c,int x)
{
ch=c;
if(x==1)
parent="outlook";
else if(x==2)
parent="temperature";
else if(x==3)
parent="humidity";
else if(x==4)
parent="windy";
}
}
class d_tree
{
static char outlook[]={'S','S','O','R','R','R','O','S','S','R','S','O','O','R'};
static char temperature[]={'B','B','B','M','C','C','C','M','C','M','M','M','B','M'};
static char humidity[]={'H','H','H','H','N','N','N','H','N','N','N','H','N','H'};
static char windy[]={'F','T','F','F','F','T','T','F','F','F','T','T','F','T'};
static char class1[]={'N','N','P','P','P','N','P','N','P','P','P','P','P','N'};
static double G1,G2,G3,G4;
static double HD;
static double play=9.0/14.0;
static double nplay=5.0/14.0;
static double temp1,temp2;
static int row=0,column=0;
static char classify[][]=new char[10][10];
if(choice==2)
{for(int i=0;i<14;++i)
{if(t.ch==temperature[i] && class1[i]=='P')
++count1;
else
if(t.ch==temperature[i] && class1[i]=='N')
++count2;
}}
if(choice==3)
{for(int i=0;i<14;++i)
{if(t.ch==humidity[i] && class1[i]=='P')
++count1;
else
if(t.ch==humidity[i] && class1[i]=='N')
++count2;
}}
if(choice==4)
{for(int i=0;i<14;++i)
{if(t.ch==windy[i] && class1[i]=='P')
++count1;
else
if(t.ch==windy[i] && class1[i]=='N')
++count2;
}}
temp1=count1/(count1+count2);
temp2=count2/(count1+count2);
t.p=(count1+count2)/14;
if(temp1==0 || temp2==0)
t.hd=0;
else
t.hd=temp1*(Math.log(1/temp1)/Math.log(10))+temp2*(Math.log(1/temp2)/Math.log(10));
return t;
}
for(int i=0;i<cnt;++i)
{
if(ca[i][0]==c1)
++count1;
if(ca[i][1]==c2)
++count2;
if(ca[i][2]==c3)
++count3;
}
if(count1==cnt)
return 0;
else
if(count2==cnt)
return 1;
else
if(count3==cnt)
return 2;
else
return -1;
}
int check1=0,check2=0;
for(int i=0;i<cnt1;++i)
{
if(ca1[i][3]=='P')
++check1;
else
if(ca1[i][3]=='N')
++check2;
if(check1==cnt1)
{
classify[row][column++]=ch;
classify[row][column++]='P';
System.out.print("--------->PLAY");
return;
}
else
if(check2==cnt1)
{
classify[row][column++]=ch;
classify[row][column++]='N';
System.out.print("--------->NO PLAY");
return;
}
}
classify[row][column++]=ch;
int z=check_if_equal(ca2,cnt1);
/*
System.out.println("The array containing ch and play");
for(int i=0;i<9;++i)
{
for(int j=0;j<9;++j)
{
System.out.print(" "+ca2[i][j]);
}
System.out.println();
}
*/
//The array containing outlook value ch and class1=N
cnt1=0;
for(int i=0;i<10;++i)
{
if(ca1[i][3]=='N')
{
ca3[cnt1][0]=ca1[i][0];
ca3[cnt1][1]=ca1[i][1];
ca3[cnt1][2]=ca1[i][2];
ca3[cnt1][3]=ca1[i][3];
++cnt1;
}}
++row;
column=0;
classify[row][column++]=ch;
z=check_if_equal(ca3,cnt1);
if(z==0)
{
System.out.print("--------->TEMPERATURE("+ca3[0][0]+")--------->NO PLAY");
classify[row][column++]=ca3[0][0];
classify[row][column++]='N';
}
else
if(z==1)
{
System.out.print("--------->HUMIDITY("+ca3[0][1]+")--------->NO PLAY");
classify[row][column++]=ca3[0][1];
classify[row][column++]='N';
}
else
if(z==2)
{
System.out.print("--------->WINDY("+ca3[0][2]+")--------->NO PLAY");
classify[row][column++]=ca3[0][2];
classify[row][column++]='N';
}
}
public static void main(String args[])
{
Scanner scr=new Scanner(System.in);
HD=play*(Math.log(1/play)/Math.log(10))+nplay*(Math.log(1/nplay)/Math.log(10));
System.out.println("\nThe value of H(D) is "+HD);
sunny=cal_hd(sunny,1);
overcast=cal_hd(overcast,1);
rain=cal_hd(rain,1);
hot=cal_hd(hot,2);
mild=cal_hd(mild,2);
cool=cal_hd(cool,2);
high=cal_hd(high,3);
normal=cal_hd(normal,3);
tru=cal_hd(tru,4);
fal=cal_hd(fal,4);
G1=HD-(sunny.p*sunny.hd+overcast.p*overcast.hd+rain.p*rain.hd);
G2=HD-(hot.p*hot.hd+mild.p*mild.hd+cool.p*cool.hd);
G3=HD-(high.p*high.hd+normal.p*normal.hd);
G4=HD-(tru.p*tru.hd+fal.p*fal.hd);
int gflag=0;
/* Display classify
for(int i=0;i<9;++i)
{
for(int j=0;j<9;++j)
{
System.out.print(" "+classify[i][j]);
}
System.out.println();
}
*/
System.out.print("Your input is ");
for(int i=0;i<4;++i)
System.out.print(" "+input[i]);
int inc1=0;
do
{
if(input[0]==classify[inc1][0])
{
if(classify[inc1][1]=='P')
{
System.out.println("\n\nTuple classified as PLAY");
break;
}
else
if(input[1]==classify[inc1][1] || input[2]==classify[inc1][1] || input[3]==classify[inc1][1])
{
if(classify[inc1][2]=='P')
{
System.out.println("\n\nTuple classified as PLAY");
break;
}
else
if(classify[inc1][2]=='N')
{
System.out.println("\n\nTuple classified as NO PLAY");
break;
}
}
}
++inc1;
}
while(true);
}
}
/*OUTPUT1
C:\Users\varsha\Desktop\varsha>java d_tree
Gain(D,Outlook)= 0.07427909717678749
Gain(D,Temperature)= 0.00879686881360553
Gain(D,Humidity)= 0.04570704031674405
Gain(D,Windy)= 0.014487679755121663
OUTLOOK(S)
--------->HUMIDITY(N)--------->PLAY
--------->HUMIDITY(H)--------->NO PLAY
OUTLOOK(O)--------->PLAY
OUTLOOK(R)
--------->WINDY(F)--------->PLAY
--------->WINDY(T)--------->NO PLAY
Menu:
Outlook(S/O/R)= S
Temperature(B/M/C)= M
Humidity(H/N)= N
Windy(T/F)= T
Your input is S M N T
Tuple classified as PLAY
------------------------------------------------------------------------
OUTPUT2
C:\Users\varsha\Desktop\varsha>java d_tree
Gain(D,Outlook)= 0.07427909717678749
Gain(D,Temperature)= 0.00879686881360553
Gain(D,Humidity)= 0.04570704031674405
Gain(D,Windy)= 0.014487679755121663
OUTLOOK(S)
--------->HUMIDITY(N)--------->PLAY
--------->HUMIDITY(H)--------->NO PLAY
OUTLOOK(O)--------->PLAY
OUTLOOK(R)
--------->WINDY(F)--------->PLAY
--------->WINDY(T)--------->NO PLAY
Menu:
Temperature(B/M/C)= M
Humidity(H/N)= H
Windy(T/F)= T
Your input is S M H T