0% found this document useful (0 votes)
6 views

Pyth

hpbd

Uploaded by

yuonhboikk
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
6 views

Pyth

hpbd

Uploaded by

yuonhboikk
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
You are on page 1/ 5

from sklearn.

datasets import load_iris


from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.tree import plot_tree
import matplotlib.pyplot as plt

# Load d? li?u iris


iris = load_iris()
X = iris.data # Ma tr?n d?c trung
y = iris.target # M?ng nh�n l?p

# Chia d? li?u th�nh t?p hu?n luy?n v� t?p ki?m tra


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
random_state=42)

# T?o m� h�nh c�y quy?t d?nh


clf = DecisionTreeClassifier()

# S? d?ng d?c trung sepal length


sepal_length_index = 0 # Ch? m?c c?a d?c trung sepal length trong ma tr?n d?c
trung X
X_train_sepal_length = X_train[:, sepal_length_index].reshape(-1, 1)#reshape(-1, 1)
du?c s? d?ng d? thay d?i h�nh d?ng c?a ma tr?n k?t qu? th�nh m?t ma tr?n c� k�ch
thu?c (n, 1),
X_test_sepal_length = X_test[:, sepal_length_index].reshape(-1, 1)

# Hu?n luy?n m� h�nh v?i d?c trung sepal length


clf.fit(X_train_sepal_length, y_train)

# ��nh gi� m� h�nh tr�n t?p ki?m tra


accuracy = clf.score(X_test_sepal_length, y_test)
print("Accuracy:", accuracy)

# V? m� h�nh c�y
plt.figure(figsize=(10, 8))
plot_tree(clf, feature_names=['sepal length'], class_names=iris.target_names,
filled=True, rounded=True)
#plot_tree(clf, feature_names=['sepal length'], class_names=iris.target_names)
plt.show()

import math
import numpy as np
from anytree import Node, RenderTree#l?p Node du?c s? d?ng d? t?o c�c n�t v� h�m
RenderTree d�ng d? hi?n th? c�y.

data_properties = ["Outlook","Temperature","Humidity","Wind","play"]
data_Base = [
["Sunny","Hot","High","Weak","No"],
["Sunny","Hot","High","Strong","No"],
["Overcast","Hot","High","Weak","Yes"],
["Rain","Mild","High","Weak","Yes"],
["Rain","Cool","Normal","Weak","Yes"],
["Rain","Cool","Normal","Strong","No"],
["Overcast","Cool","Normal","Strong","Yes"],
["Sunny","Mild","High","Weak","No"],
["Sunny","Cool","Normal","Weak","Yes"],
["Rain","Mild","Normal","Weak","Yes"],
["Sunny","Mild","Normal","Strong","Yes"],
["Overcast","Mild","High","Strong","Yes"],
["Overcast","Hot","Normal","Weak","Yes"],
["Rain","Mild","High","Strong","No"]
]
#zeps = np.finfo(float).eps
def return_name_properties(data_case,ID_properties):# tr? v? liss name_properties
c�c m?u thu?c t�nh c?a c?t num_properties
# X�c d?nh c�c thu?c t�nh
# VD : return_properties(data_Base, number_data, 0) =>> RETURN : ['young',
'middle', 'old']
list_properties = []
for scan_properties in range(len(data_case)):
if not(data_case[scan_properties][ID_properties] in list_properties):
list_properties.append(data_case[scan_properties][ID_properties])
return list_properties
def return_number_properties(data_case,ID_properties,properties): # tr? v? s? lu?
ng c?u thu?c t�nh c� trong data
# Tr? v? s? lu?ng young c� trong c?t ID
num_properties = 0
for scan_properties in range(len(data_case)): # qu�t c�c m?u
if data_case[scan_properties][ID_properties] == properties:
num_properties = num_properties + 1
return num_properties
def number_label_in_properties(data_case,name_properties,ID_properties): # tr? v?
list k?t qu? gi� tr? yes no
#VD: data Young => [4, 2],[name1,name2]
list_Label_name = return_name_properties(data_case,len(data_properties)-1)
list_label = []
return_name = []
for scan_label in list_Label_name:
num_label = 0
for scan_properties in range(len(data_case)): # qu�t c�c m?u
if data_case[scan_properties][ID_properties] == name_properties: #
Ki?m tra c?t thu?c t�nh c� tr�ng v?i thu?c t�nh truy?n v�o kh�ng
if data_case[scan_properties][-1] == scan_label: # n?u label
(yes/no) thu?c m?u d� ?ng v?i thu?c t�nh d�
num_label = num_label + 1
if not(num_label==0):
list_label.append(num_label)
return_name.append(scan_label)
return list_label,return_name
def _entropi(S_label): # npj vaof liss ch?a th�ng tin v? s? lu?ng label c?a thu?
c t�nh VD : [2,3] rooif trar veef gias trij entropi
sum_label = 0.0
entropi = 0.0
for num_label in S_label:
sum_label = sum_label + num_label
for num_label in S_label:
fraction = num_label / (sum_label + 0.00000000000000001)#k? thu?t nh?m
tr�nh l?i chia cho 0 trong t�nh to�n
entropi += -fraction*math.log2(fraction+ 0.00000000000000001)
return entropi
def catualtoin_entropi(data_case,ID_properties):
name_properties = return_name_properties(data_case,ID_properties)
retun_entroipi = 0.00

for scan_name_p in name_properties:


d_label,name_label =
number_label_in_properties(data_case,scan_name_p,ID_properties)
entropi = _entropi(d_label)
retun_entroipi += -(sum(d_label)/len(data_case))*entropi
return abs(retun_entroipi)
def catualtion_Gain(data_case):
liss_label_name = return_name_properties(data_case,len(data_properties)-1) #
tr? v? t�n c�c thu?c t�nh Label : ['no', 'yes']
# t�nh gain c?a label ( yes/no)
result_label = [] # s? lu?ng c�c ph�n t? label c� trong d? li?u
for name_label in liss_label_name:

result_label.append(return_number_properties(data_Base,len(data_properties)-1,
name_label))
# result_label ch?a s? lu?ng c?a t?ng gi� tr? c?a thu?c t�nh yesn no
entropi_label = 0
for value in result_label:
fraction = value/len(data_case)
entropi_label += -fraction*np.log2(fraction)
# entropi_label ch?a entropi c?a l?p yes no
gain = []
for num_properties in range(len(data_properties)-1):
gain.append(entropi_label - catualtoin_entropi(data_case,num_properties))
local_gain = gain.index(max(gain))
name_gain_max = data_properties[gain.index(max(gain))]
return local_gain,name_gain_max
def cover_data(data_case,name_properties,ID_properties):
new_data = []
for scan_properties in range(len(data_case)): # qu�t c�c m?u
if data_case[scan_properties][ID_properties] == name_properties:
new_data.append(data_case[scan_properties])
return new_data
def build_tree(data_case,tree = None):

loc_gain_max,name_gain_max = catualtion_Gain(data_case) # v? tr� vaf t�n v�


c?a thu?c t�nh c� gain l?n nh?t
name_properties_max = return_name_properties(data_case,loc_gain_max)
if tree is None:
print("New tree")
tree = Node(name_gain_max)
tree_properties_name = tree
else:
tree_properties_name = Node(name_gain_max,parent=tree)
for value in name_properties_max:
#print("New Chill tree properties name "+str(value) + " In " +
str(name_gain_max))
tree_properties = Node(value,parent=tree_properties_name) # giasd tr?
thu?c t�nh
new_data = cover_data(data_case,value,loc_gain_max)
val_label,str_label =
number_label_in_properties(new_data,value,loc_gain_max)
if len(val_label) == 1:
chill = Node(str_label,parent = tree_properties)
else:
try:
build_tree(new_data, tree_properties)
#chill = Node(str_label,
parent=build_tree(new_data,tree_properties))
except:
build_tree(new_data, tree)
#chill = Node(str_label, parent=build_tree(new_data, tree))
return tree

def build_tree_list(data_case,tree = None):


loc_gain_max,name_gain_max = catualtion_Gain(data_case) # v? tr� vaf t�n v�
c?a thu?c t�nh c� gain l?n nh?t
name_properties_max = return_name_properties(data_case,loc_gain_max)
if tree is None:
tree={}
tree[str(name_gain_max)] = {}
for value in name_properties_max:
#print("New Chill tree properties name "+str(value) + " In " +
str(name_gain_max))
new_data = cover_data(data_case,value,loc_gain_max)
val_label,str_label =
number_label_in_properties(new_data,value,loc_gain_max)
if len(val_label) == 1:
tree[str(name_gain_max)][value] = str_label
else:
tree[str(name_gain_max)][value] = build_tree_list(new_data)
return tree

test={'Outlook':'Sunny','Temperature':'Hot','Humidity':'High','Wind':'Weak'}
def input_data_test():
data_input = {}
data_input[str(data_properties[0])] = input("Outlook(Overcast, Rain, Sunny): ")
while (data_input[str(data_properties[0])] != "Overcast") and
(data_input[str(data_properties[0])] != "Rain") and
(data_input[str(data_properties[0])] != "Sunny"):
print("Nhap dung dinh dang goi y trong ngoac!")
data_input[str(data_properties[0])] = input("Outlook(Overcast, Rain,
Sunny): ")

data_input[str(data_properties[1])] = input("Temperature (Hot,Mild,Cool): ")


while (data_input[str(data_properties[1])] != "Hot") and
(data_input[str(data_properties[1])] != "Mild") and
(data_input[str(data_properties[1])] != "Cool"):
print("Nhap dung dinh dang goi y trong ngoac!")
data_input[str(data_properties[1])] = input("Temperature (Hot,Mild,Cool):
")

data_input[str(data_properties[2])] = input("Humidity(High,Normal): ")


while (data_input[str(data_properties[2])] != "High") and
(data_input[str(data_properties[2])] != "Normal"):
print("Nhap dung dinh dang goi y trong ngoac!")
data_input[str(data_properties[2])] = input("Humidity(High,Normal): ")

data_input[str(data_properties[3])] = input("Wind (Weak,Strong): ")


while (data_input[str(data_properties[3])] != "Weak") and
(data_input[str(data_properties[3])] != "Strong"):
print("Nhap dung dinh dang goi y trong ngoac!")
data_input[str(data_properties[3])] = input("Wind (Weak,Strong): ")
print("SHOW data input : ")
print(data_input)
return data_input

tree = build_tree(data_Base)
print("Show Tree Print")
for pre, fill, node in RenderTree(tree):
print("%s%s" % (pre, node.name))

list_tree = build_tree_list(data_Base)
print("Show Tree LIST")
import pprint
pprint.pprint(list_tree)

print("========================================================")
sel = input("Nh?p d? li?u : (1) \n L?y d? li?u m?u :
('Outlook':'Sunny','Temperature':'Hot','Humidity':'High','Wind':'Weak') => (2) \n
L?a ch?n : ")
if int(sel) == 1 :
test = input_data_test()
print("=====================DATA TEST====================")
pprint.pprint(test)
print("========================================================")

def try_test(test, tree, default=None):


attribute = next(iter(tree)) # t�n g?c c�y
if test[attribute] in tree[attribute].keys(): #n?u gi� tr? c?a thu?c t�nh d?u
v�o l� g?c c� gi� tr? tr�ng v?i nh�nh c?a g?c
result = tree[attribute][test[attribute]] # k?t qu? c?a nh�nh con c?a
nh�nh g?c (c�y con v?i g?c l� gi� tr? c?a d? li?u d?u v�o)
if isinstance(result, dict): # ki?m tra xem d?i tu?ng n�y c� nh�nh con
con kh�ng n?u c� th� d? quy l?i v�ng ( n?u ch? c�n yes no th� tr? v?
return try_test(test, result) # d? quy v?i d?u v�o l� c�y con ? tr�n
else:
return result
else:
return default
ketquathuantoan = try_test(test,list_tree)
if not(ketquathuantoan is None):
print("Ket qua cua du lieu la : ")
print(ketquathuantoan)

else:
print("Loi Thuan Toan")

You might also like