Pyth
Pyth
# V? m� h�nh c�y
plt.figure(figsize=(10, 8))
plot_tree(clf, feature_names=['sepal length'], class_names=iris.target_names,
filled=True, rounded=True)
#plot_tree(clf, feature_names=['sepal length'], class_names=iris.target_names)
plt.show()
import math
import numpy as np
from anytree import Node, RenderTree#l?p Node du?c s? d?ng d? t?o c�c n�t v� h�m
RenderTree d�ng d? hi?n th? c�y.
data_properties = ["Outlook","Temperature","Humidity","Wind","play"]
data_Base = [
["Sunny","Hot","High","Weak","No"],
["Sunny","Hot","High","Strong","No"],
["Overcast","Hot","High","Weak","Yes"],
["Rain","Mild","High","Weak","Yes"],
["Rain","Cool","Normal","Weak","Yes"],
["Rain","Cool","Normal","Strong","No"],
["Overcast","Cool","Normal","Strong","Yes"],
["Sunny","Mild","High","Weak","No"],
["Sunny","Cool","Normal","Weak","Yes"],
["Rain","Mild","Normal","Weak","Yes"],
["Sunny","Mild","Normal","Strong","Yes"],
["Overcast","Mild","High","Strong","Yes"],
["Overcast","Hot","Normal","Weak","Yes"],
["Rain","Mild","High","Strong","No"]
]
#zeps = np.finfo(float).eps
def return_name_properties(data_case,ID_properties):# tr? v? liss name_properties
c�c m?u thu?c t�nh c?a c?t num_properties
# X�c d?nh c�c thu?c t�nh
# VD : return_properties(data_Base, number_data, 0) =>> RETURN : ['young',
'middle', 'old']
list_properties = []
for scan_properties in range(len(data_case)):
if not(data_case[scan_properties][ID_properties] in list_properties):
list_properties.append(data_case[scan_properties][ID_properties])
return list_properties
def return_number_properties(data_case,ID_properties,properties): # tr? v? s? lu?
ng c?u thu?c t�nh c� trong data
# Tr? v? s? lu?ng young c� trong c?t ID
num_properties = 0
for scan_properties in range(len(data_case)): # qu�t c�c m?u
if data_case[scan_properties][ID_properties] == properties:
num_properties = num_properties + 1
return num_properties
def number_label_in_properties(data_case,name_properties,ID_properties): # tr? v?
list k?t qu? gi� tr? yes no
#VD: data Young => [4, 2],[name1,name2]
list_Label_name = return_name_properties(data_case,len(data_properties)-1)
list_label = []
return_name = []
for scan_label in list_Label_name:
num_label = 0
for scan_properties in range(len(data_case)): # qu�t c�c m?u
if data_case[scan_properties][ID_properties] == name_properties: #
Ki?m tra c?t thu?c t�nh c� tr�ng v?i thu?c t�nh truy?n v�o kh�ng
if data_case[scan_properties][-1] == scan_label: # n?u label
(yes/no) thu?c m?u d� ?ng v?i thu?c t�nh d�
num_label = num_label + 1
if not(num_label==0):
list_label.append(num_label)
return_name.append(scan_label)
return list_label,return_name
def _entropi(S_label): # npj vaof liss ch?a th�ng tin v? s? lu?ng label c?a thu?
c t�nh VD : [2,3] rooif trar veef gias trij entropi
sum_label = 0.0
entropi = 0.0
for num_label in S_label:
sum_label = sum_label + num_label
for num_label in S_label:
fraction = num_label / (sum_label + 0.00000000000000001)#k? thu?t nh?m
tr�nh l?i chia cho 0 trong t�nh to�n
entropi += -fraction*math.log2(fraction+ 0.00000000000000001)
return entropi
def catualtoin_entropi(data_case,ID_properties):
name_properties = return_name_properties(data_case,ID_properties)
retun_entroipi = 0.00
result_label.append(return_number_properties(data_Base,len(data_properties)-1,
name_label))
# result_label ch?a s? lu?ng c?a t?ng gi� tr? c?a thu?c t�nh yesn no
entropi_label = 0
for value in result_label:
fraction = value/len(data_case)
entropi_label += -fraction*np.log2(fraction)
# entropi_label ch?a entropi c?a l?p yes no
gain = []
for num_properties in range(len(data_properties)-1):
gain.append(entropi_label - catualtoin_entropi(data_case,num_properties))
local_gain = gain.index(max(gain))
name_gain_max = data_properties[gain.index(max(gain))]
return local_gain,name_gain_max
def cover_data(data_case,name_properties,ID_properties):
new_data = []
for scan_properties in range(len(data_case)): # qu�t c�c m?u
if data_case[scan_properties][ID_properties] == name_properties:
new_data.append(data_case[scan_properties])
return new_data
def build_tree(data_case,tree = None):
test={'Outlook':'Sunny','Temperature':'Hot','Humidity':'High','Wind':'Weak'}
def input_data_test():
data_input = {}
data_input[str(data_properties[0])] = input("Outlook(Overcast, Rain, Sunny): ")
while (data_input[str(data_properties[0])] != "Overcast") and
(data_input[str(data_properties[0])] != "Rain") and
(data_input[str(data_properties[0])] != "Sunny"):
print("Nhap dung dinh dang goi y trong ngoac!")
data_input[str(data_properties[0])] = input("Outlook(Overcast, Rain,
Sunny): ")
tree = build_tree(data_Base)
print("Show Tree Print")
for pre, fill, node in RenderTree(tree):
print("%s%s" % (pre, node.name))
list_tree = build_tree_list(data_Base)
print("Show Tree LIST")
import pprint
pprint.pprint(list_tree)
print("========================================================")
sel = input("Nh?p d? li?u : (1) \n L?y d? li?u m?u :
('Outlook':'Sunny','Temperature':'Hot','Humidity':'High','Wind':'Weak') => (2) \n
L?a ch?n : ")
if int(sel) == 1 :
test = input_data_test()
print("=====================DATA TEST====================")
pprint.pprint(test)
print("========================================================")
else:
print("Loi Thuan Toan")