0% found this document useful (0 votes)
6 views

Pyth

hpbd

Uploaded by

yuonhboikk
Copyright
© © All Rights Reserved
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
6 views

Pyth

hpbd

Uploaded by

yuonhboikk
Copyright
© © All Rights Reserved
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
You are on page 1/ 5

from sklearn.

datasets import load_iris


from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.tree import plot_tree
import matplotlib.pyplot as plt

# Load d? li?u iris


iris = load_iris()
X = iris.data # Ma tr?n d?c trung
y = iris.target # M?ng nh�n l?p

# Chia d? li?u th�nh t?p hu?n luy?n v� t?p ki?m tra


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
random_state=42)

# T?o m� h�nh c�y quy?t d?nh


clf = DecisionTreeClassifier()

# S? d?ng d?c trung sepal length


sepal_length_index = 0 # Ch? m?c c?a d?c trung sepal length trong ma tr?n d?c
trung X
X_train_sepal_length = X_train[:, sepal_length_index].reshape(-1, 1)#reshape(-1, 1)
du?c s? d?ng d? thay d?i h�nh d?ng c?a ma tr?n k?t qu? th�nh m?t ma tr?n c� k�ch
thu?c (n, 1),
X_test_sepal_length = X_test[:, sepal_length_index].reshape(-1, 1)

# Hu?n luy?n m� h�nh v?i d?c trung sepal length


clf.fit(X_train_sepal_length, y_train)

# ��nh gi� m� h�nh tr�n t?p ki?m tra


accuracy = clf.score(X_test_sepal_length, y_test)
print("Accuracy:", accuracy)

# V? m� h�nh c�y
plt.figure(figsize=(10, 8))
plot_tree(clf, feature_names=['sepal length'], class_names=iris.target_names,
filled=True, rounded=True)
#plot_tree(clf, feature_names=['sepal length'], class_names=iris.target_names)
plt.show()

import math
import numpy as np
from anytree import Node, RenderTree#l?p Node du?c s? d?ng d? t?o c�c n�t v� h�m
RenderTree d�ng d? hi?n th? c�y.

data_properties = ["Outlook","Temperature","Humidity","Wind","play"]
data_Base = [
["Sunny","Hot","High","Weak","No"],
["Sunny","Hot","High","Strong","No"],
["Overcast","Hot","High","Weak","Yes"],
["Rain","Mild","High","Weak","Yes"],
["Rain","Cool","Normal","Weak","Yes"],
["Rain","Cool","Normal","Strong","No"],
["Overcast","Cool","Normal","Strong","Yes"],
["Sunny","Mild","High","Weak","No"],
["Sunny","Cool","Normal","Weak","Yes"],
["Rain","Mild","Normal","Weak","Yes"],
["Sunny","Mild","Normal","Strong","Yes"],
["Overcast","Mild","High","Strong","Yes"],
["Overcast","Hot","Normal","Weak","Yes"],
["Rain","Mild","High","Strong","No"]
]
#zeps = np.finfo(float).eps
def return_name_properties(data_case,ID_properties):# tr? v? liss name_properties
c�c m?u thu?c t�nh c?a c?t num_properties
# X�c d?nh c�c thu?c t�nh
# VD : return_properties(data_Base, number_data, 0) =>> RETURN : ['young',
'middle', 'old']
list_properties = []
for scan_properties in range(len(data_case)):
if not(data_case[scan_properties][ID_properties] in list_properties):
list_properties.append(data_case[scan_properties][ID_properties])
return list_properties
def return_number_properties(data_case,ID_properties,properties): # tr? v? s? lu?
ng c?u thu?c t�nh c� trong data
# Tr? v? s? lu?ng young c� trong c?t ID
num_properties = 0
for scan_properties in range(len(data_case)): # qu�t c�c m?u
if data_case[scan_properties][ID_properties] == properties:
num_properties = num_properties + 1
return num_properties
def number_label_in_properties(data_case,name_properties,ID_properties): # tr? v?
list k?t qu? gi� tr? yes no
#VD: data Young => [4, 2],[name1,name2]
list_Label_name = return_name_properties(data_case,len(data_properties)-1)
list_label = []
return_name = []
for scan_label in list_Label_name:
num_label = 0
for scan_properties in range(len(data_case)): # qu�t c�c m?u
if data_case[scan_properties][ID_properties] == name_properties: #
Ki?m tra c?t thu?c t�nh c� tr�ng v?i thu?c t�nh truy?n v�o kh�ng
if data_case[scan_properties][-1] == scan_label: # n?u label
(yes/no) thu?c m?u d� ?ng v?i thu?c t�nh d�
num_label = num_label + 1
if not(num_label==0):
list_label.append(num_label)
return_name.append(scan_label)
return list_label,return_name
def _entropi(S_label): # npj vaof liss ch?a th�ng tin v? s? lu?ng label c?a thu?
c t�nh VD : [2,3] rooif trar veef gias trij entropi
sum_label = 0.0
entropi = 0.0
for num_label in S_label:
sum_label = sum_label + num_label
for num_label in S_label:
fraction = num_label / (sum_label + 0.00000000000000001)#k? thu?t nh?m
tr�nh l?i chia cho 0 trong t�nh to�n
entropi += -fraction*math.log2(fraction+ 0.00000000000000001)
return entropi
def catualtoin_entropi(data_case,ID_properties):
name_properties = return_name_properties(data_case,ID_properties)
retun_entroipi = 0.00

for scan_name_p in name_properties:


d_label,name_label =
number_label_in_properties(data_case,scan_name_p,ID_properties)
entropi = _entropi(d_label)
retun_entroipi += -(sum(d_label)/len(data_case))*entropi
return abs(retun_entroipi)
def catualtion_Gain(data_case):
liss_label_name = return_name_properties(data_case,len(data_properties)-1) #
tr? v? t�n c�c thu?c t�nh Label : ['no', 'yes']
# t�nh gain c?a label ( yes/no)
result_label = [] # s? lu?ng c�c ph�n t? label c� trong d? li?u
for name_label in liss_label_name:

result_label.append(return_number_properties(data_Base,len(data_properties)-1,
name_label))
# result_label ch?a s? lu?ng c?a t?ng gi� tr? c?a thu?c t�nh yesn no
entropi_label = 0
for value in result_label:
fraction = value/len(data_case)
entropi_label += -fraction*np.log2(fraction)
# entropi_label ch?a entropi c?a l?p yes no
gain = []
for num_properties in range(len(data_properties)-1):
gain.append(entropi_label - catualtoin_entropi(data_case,num_properties))
local_gain = gain.index(max(gain))
name_gain_max = data_properties[gain.index(max(gain))]
return local_gain,name_gain_max
def cover_data(data_case,name_properties,ID_properties):
new_data = []
for scan_properties in range(len(data_case)): # qu�t c�c m?u
if data_case[scan_properties][ID_properties] == name_properties:
new_data.append(data_case[scan_properties])
return new_data
def build_tree(data_case,tree = None):

loc_gain_max,name_gain_max = catualtion_Gain(data_case) # v? tr� vaf t�n v�


c?a thu?c t�nh c� gain l?n nh?t
name_properties_max = return_name_properties(data_case,loc_gain_max)
if tree is None:
print("New tree")
tree = Node(name_gain_max)
tree_properties_name = tree
else:
tree_properties_name = Node(name_gain_max,parent=tree)
for value in name_properties_max:
#print("New Chill tree properties name "+str(value) + " In " +
str(name_gain_max))
tree_properties = Node(value,parent=tree_properties_name) # giasd tr?
thu?c t�nh
new_data = cover_data(data_case,value,loc_gain_max)
val_label,str_label =
number_label_in_properties(new_data,value,loc_gain_max)
if len(val_label) == 1:
chill = Node(str_label,parent = tree_properties)
else:
try:
build_tree(new_data, tree_properties)
#chill = Node(str_label,
parent=build_tree(new_data,tree_properties))
except:
build_tree(new_data, tree)
#chill = Node(str_label, parent=build_tree(new_data, tree))
return tree

def build_tree_list(data_case,tree = None):


loc_gain_max,name_gain_max = catualtion_Gain(data_case) # v? tr� vaf t�n v�
c?a thu?c t�nh c� gain l?n nh?t
name_properties_max = return_name_properties(data_case,loc_gain_max)
if tree is None:
tree={}
tree[str(name_gain_max)] = {}
for value in name_properties_max:
#print("New Chill tree properties name "+str(value) + " In " +
str(name_gain_max))
new_data = cover_data(data_case,value,loc_gain_max)
val_label,str_label =
number_label_in_properties(new_data,value,loc_gain_max)
if len(val_label) == 1:
tree[str(name_gain_max)][value] = str_label
else:
tree[str(name_gain_max)][value] = build_tree_list(new_data)
return tree

test={'Outlook':'Sunny','Temperature':'Hot','Humidity':'High','Wind':'Weak'}
def input_data_test():
data_input = {}
data_input[str(data_properties[0])] = input("Outlook(Overcast, Rain, Sunny): ")
while (data_input[str(data_properties[0])] != "Overcast") and
(data_input[str(data_properties[0])] != "Rain") and
(data_input[str(data_properties[0])] != "Sunny"):
print("Nhap dung dinh dang goi y trong ngoac!")
data_input[str(data_properties[0])] = input("Outlook(Overcast, Rain,
Sunny): ")

data_input[str(data_properties[1])] = input("Temperature (Hot,Mild,Cool): ")


while (data_input[str(data_properties[1])] != "Hot") and
(data_input[str(data_properties[1])] != "Mild") and
(data_input[str(data_properties[1])] != "Cool"):
print("Nhap dung dinh dang goi y trong ngoac!")
data_input[str(data_properties[1])] = input("Temperature (Hot,Mild,Cool):
")

data_input[str(data_properties[2])] = input("Humidity(High,Normal): ")


while (data_input[str(data_properties[2])] != "High") and
(data_input[str(data_properties[2])] != "Normal"):
print("Nhap dung dinh dang goi y trong ngoac!")
data_input[str(data_properties[2])] = input("Humidity(High,Normal): ")

data_input[str(data_properties[3])] = input("Wind (Weak,Strong): ")


while (data_input[str(data_properties[3])] != "Weak") and
(data_input[str(data_properties[3])] != "Strong"):
print("Nhap dung dinh dang goi y trong ngoac!")
data_input[str(data_properties[3])] = input("Wind (Weak,Strong): ")
print("SHOW data input : ")
print(data_input)
return data_input

tree = build_tree(data_Base)
print("Show Tree Print")
for pre, fill, node in RenderTree(tree):
print("%s%s" % (pre, node.name))

list_tree = build_tree_list(data_Base)
print("Show Tree LIST")
import pprint
pprint.pprint(list_tree)

print("========================================================")
sel = input("Nh?p d? li?u : (1) \n L?y d? li?u m?u :
('Outlook':'Sunny','Temperature':'Hot','Humidity':'High','Wind':'Weak') => (2) \n
L?a ch?n : ")
if int(sel) == 1 :
test = input_data_test()
print("=====================DATA TEST====================")
pprint.pprint(test)
print("========================================================")

def try_test(test, tree, default=None):


attribute = next(iter(tree)) # t�n g?c c�y
if test[attribute] in tree[attribute].keys(): #n?u gi� tr? c?a thu?c t�nh d?u
v�o l� g?c c� gi� tr? tr�ng v?i nh�nh c?a g?c
result = tree[attribute][test[attribute]] # k?t qu? c?a nh�nh con c?a
nh�nh g?c (c�y con v?i g?c l� gi� tr? c?a d? li?u d?u v�o)
if isinstance(result, dict): # ki?m tra xem d?i tu?ng n�y c� nh�nh con
con kh�ng n?u c� th� d? quy l?i v�ng ( n?u ch? c�n yes no th� tr? v?
return try_test(test, result) # d? quy v?i d?u v�o l� c�y con ? tr�n
else:
return result
else:
return default
ketquathuantoan = try_test(test,list_tree)
if not(ketquathuantoan is None):
print("Ket qua cua du lieu la : ")
print(ketquathuantoan)

else:
print("Loi Thuan Toan")

You might also like