0% found this document useful (0 votes)
13 views

Decision Tree

Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
13 views

Decision Tree

Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 5

In [12]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
data=pd.read_csv('z:\ML\playtennis.csv')

In [2]: data

Out [2]:
outlook Temperature Humidity Wind PlayTennis

0 Sunny Hot High Weak No

1 Sunny Hot High Strong No


2 Overcast Hot High Weak Yes

3 Rain Mild High Weak Yes


4 Rain Cool Normal Weak Yes

5 Rain Cool Normal Strong No


6 Overcast Cool Normal Strong Yes

7 Sunny Mild High Weak No


8 Sunny Cool Normal Weak Yes

9 Rain Mild Normal Weak Yes


10 Sunny Mild Normal Strong Yes

11 Overcast Mild High Strong Yes


12 Overcast Hot Normal Weak Yes

13 Rain Mild High Strong No

In [3]:
from sklearn.preprocessing import LabelEncoder
Le=LabelEncoder()

In [6]:
data['outlook']=Le.fit_transform(data['outlook'])
data['Temperature']=Le.fit_transform(data['Temperature'])
data['Humidity']=Le.fit_transform(data['Humidity'])
data['Wind']=Le.fit_transform(data['Wind'])
data['PlayTennis']=Le.fit_transform(data['PlayTennis'])
data

Out [6]:
outlook Temperature Humidity Wind PlayTennis

0 2 1 0 1 0
1 2 1 0 0 0

2 0 1 0 1 1
3 1 2 0 1 1

4 1 0 1 1 1
5 1 0 1 0 0

6 0 0 1 0 1
7 2 2 0 1 0

8 2 0 1 1 1
outlook Temperature Humidity Wind PlayTennis

9 1 2 1 1 1
10 2 2 1 0 1

11 0 2 0 0 1
12 0 1 1 1 1

13 1 2 0 0 0

In [7]: x=data.drop(['PlayTennis'],axis=1)
y=data['PlayTennis']
from sklearn import tree
import matplotlib
clf=tree.DecisionTreeClassifier(criterion='entropy')
clf=clf.fit(x,y)
tree.plot_tree(clf)

Matplotlib is building the font cache; this may take a moment.

Out [7]: [Text(0.4444444444444444, 0.9, 'x[0] <= 0.5\nentropy = 0.94\nsamples = 14\nvalue = [5, 9]'),
Text(0.3333333333333333, 0.7, 'entropy = 0.0\nsamples = 4\nvalue = [0, 4]'),
Text(0.5555555555555556, 0.7, 'x[2] <= 0.5\nentropy = 1.0\nsamples = 10\nvalue = [5, 5]'),
Text(0.3333333333333333, 0.5, 'x[0] <= 1.5\nentropy = 0.722\nsamples = 5\nvalue = [4, 1]'),
Text(0.2222222222222222, 0.3, 'x[3] <= 0.5\nentropy = 1.0\nsamples = 2\nvalue = [1, 1]'),
Text(0.1111111111111111, 0.1, 'entropy = 0.0\nsamples = 1\nvalue = [1, 0]'),
Text(0.3333333333333333, 0.1, 'entropy = 0.0\nsamples = 1\nvalue = [0, 1]'),
Text(0.4444444444444444, 0.3, 'entropy = 0.0\nsamples = 3\nvalue = [3, 0]'),
Text(0.7777777777777778, 0.5, 'x[3] <= 0.5\nentropy = 0.722\nsamples = 5\nvalue = [1, 4]'),
Text(0.6666666666666666, 0.3, 'x[1] <= 1.0\nentropy = 1.0\nsamples = 2\nvalue = [1, 1]'),
Text(0.5555555555555556, 0.1, 'entropy = 0.0\nsamples = 1\nvalue = [1, 0]'),
Text(0.7777777777777778, 0.1, 'entropy = 0.0\nsamples = 1\nvalue = [0, 1]'),
Text(0.8888888888888888, 0.3, 'entropy = 0.0\nsamples = 3\nvalue = [0, 3]')]

decision tree using iris dataset


In [1]:
from sklearn.datasets import load_iris
from sklearn.tree import DecisionTreeClassifier
iris=load_iris()
X=iris.data[:,2:] #petal length and width
Y=iris.target
tree_clf=DecisionTreeClassifier(max_depth=2,random_state=42)
tree_clf.fit(X,Y)
from sklearn.tree import export_graphviz
export_graphviz(
tree_clf,
out_file="iris_tree.dot",
feature_names=iris.feature_names[2:],
class_names=iris.target_names,
rounded=True,
filled=True)
from graphviz import Source
Source.from_file('iris_tree.dot')

Out [1]:
petal length (cm) <= 2.45
gini = 0.667
samples = 150
value = [50, 50, 50]
class = setosa
False
True

petal width (cm) <= 1.75


gini = 0.0
gini = 0.5
samples = 50
samples = 100
value = [50, 0, 0]
value = [0, 50, 50]
class = setosa
class = versicolor

gini = 0.168 gini = 0.043


samples = 54 samples = 46
value = [0, 49, 5] value = [0, 1, 45]
class = versicolor class = virginica

In [8]:
%pip install graphviz

Defaulting to user installation because normal site-packages is not writeable


Requirement already satisfied: graphviz in c:\programdata\anaconda3\lib\site-packages (0.20.1)
Note: you may need to restart the kernel to use updated packages.

In [9]: import graphviz


dot_data=tree.export_graphviz(clf,out_file=None)
graph=graphviz.Source(dot_data)
graph
Out [9]:
x[0] <= 0.5
entropy = 0.94
samples = 14
value = [5, 9]
False
True

x[2] <= 0.5


entropy = 0.0
entropy = 1.0
samples = 4
samples = 10
value = [0, 4]
value = [5, 5]

x[0] <= 1.5 x[3] <= 0.5


entropy = 0.722 entropy = 0.722
samples = 5 samples = 5
value = [4, 1] value = [1, 4]

x[3] <= 0.5 x[1] <= 1.0


entropy = 0.0 entropy = 0.0
entropy = 1.0 entropy = 1.0
samples = 3 samples = 3
samples = 2 samples = 2
value = [3, 0] value = [0, 3]
value = [1, 1] value = [1, 1]

entropy = 0.0 entropy = 0.0 entropy = 0.0 entropy = 0.0


samples = 1 samples = 1 samples = 1 samples = 1
value = [1, 0] value = [0, 1] value = [1, 0] value = [0, 1]

In [10]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.3)
y_pred=clf.predict(x_test)
cf_matrix=confusion_matrix(y_test, y_pred)
print("Confusion Matrix:",cf_matrix)
print ("Accuracy : ",accuracy_score(y_test,y_pred)*100)

Confusion Matrix: [[3 0]


[0 2]]
Accuracy : 100.0

In [13]:
import seaborn as sns
ax=sns.heatmap(cf_matrix,annot=True,cmap='Blues')
ax.set_title('confusion Matrix with labels\n\n');
ax.set_xlabel('\npredicted values')
ax.set_ylabel('Actual values');
ax.xaxis.set_ticklabels(['False','True'])
ax.yaxis.set_ticklabels(['False','True'])
plt.show()
In [ ]:

You might also like