ML Record
ML Record
DEPARTMENT OF CSE
REG NO:211421104223
Land.csv
area price
2600 550000
3000 565000
3200 610000
3600 680000
4000 725000
1
PANIMALAR ENGINEERING COLLEGE
DEPARTMENT OF CSE
REG NO:211421104223
15
PANIMALAR ENGINEERING COLLEGE
DEPARTMENT OF CSE
REG NO:211421104223
RESULT:
16
PANIMALAR ENGINEERING COLLEGE
DEPARTMENT OF CSE
REG NO:211421104223
PROGRAM:(CANDIDATE-ELIMINATION ALGORITHM)
import numpy as np
import pandas as pd
for i, h in enumerate(concepts):
print("\nInstance", i+1 , "is ", h)
if target[i] == "yes":
print("Instance is Positive ")
for x in
range(len(specific_h)):
if h[x]!=
specific_h[x]:
specific_h[x] ='?'
general_h[x][x]
='?'
if target[i] == "no":
print("Instance is Negative ")
for x in
range(len(specific_h)):
if h[x]!= specific_h[x]:
general_h[x][x] = specific_h[x]
else:
general_h[x][x] = '?'
indices = [i for i, val in enumerate(general_h) if val == ['?', '?', '?', '?', '?', '?']]
17
PANIMALAR ENGINEERING COLLEGE
DEPARTMENT OF CSE
REG NO:211421104223
for i in indices:
general_h.remove(['?', '?', '?', '?', '?', '?'])
return specific_h, general_h
enjoysports.csv
18
PANIMALAR ENGINEERING COLLEGE
DEPARTMENT OF CSE
REG NO:211421104223
OUTPUT:
Instances are:
[['sunny' 'warm' 'normal' 'strong' 'warm' 'same']
['sunny' 'warm' 'high' 'strong' 'warm' 'same']
['rainy' 'cold' 'high' 'strong' 'warm' 'change']
['sunny' 'warm' 'high' 'strong' 'cool' 'change']]
Generic Boundary: [['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?',
'?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?']]
19
PANIMALAR ENGINEERING COLLEGE
DEPARTMENT OF CSE
REG NO:211421104223
Generic Boundary after 4 Instance is [['sunny', '?', '?', '?', '?', '?'], ['?', 'warm', '?', '?', '?', '?'], ['?',
'?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?']]
Final Specific_h:
['sunny' 'warm' '?' 'strong' '?' '?']
Final General_h:
[['sunny', '?', '?', '?', '?', '?'], ['?', 'warm', '?', '?', '?', '?']]
RESULT:
20
PANIMALAR ENGINEERING COLLEGE
DEPARTMENT OF CSE
REG NO:211421104223
21
PANIMALAR ENGINEERING COLLEGE
DEPARTMENT OF CSE
REG NO:211421104223
if instance[attribute] in tree[attribute].keys():
result = tree[attribute][instance[attribute]]
if isinstance(result,dict):
return classify(instance,result)
else:
return result
else:
return default
attribute_names = list(df_tennis.columns)
attribute_names.remove('PlayTennis')
tree = id3(df_tennis,'PlayTennis',attribute_names)
print("\n\nThe Resultant Decision Tree is :\n")
print(tree)
training_data = df_tennis.iloc[1:-4]
test_data = df_tennis.iloc[-4:]
train_tree = id3(training_data, 'PlayTennis', attribute_names)
print("\n\nThe Resultant Decision train_tree is :\n") print(train_tree)
test_data['predicted2'] = test_data.apply(classify,axis=1,args=(train_tree,'Yes'))
print ('\n\n Training the model for a few samples, and again predicting \'Playtennis\' for
remaining attribute')
print('The Accuracy for new trained data is : ' + str(
sum(test_data['PlayTennis']==test_data['predicted2'] ) / (1.0*len(test_data.index)) ))
tennis.csv
outlook temp humidity windy PlayTennis
sunny hot high FALSE no
sunny hot high TRUE no
overcast hot high FALSE yes
rainy mild high FALSE yes
rainy cool normal FALSE yes
rainy cool normal TRUE no
overcast cool normal TRUE yes
sunny mild high FALSE no
sunny cool normal FALSE yes
rainy mild normal FALSE yes
sunny mild normal TRUE yes
overcast mild high TRUE yes
overcast hot normal FALSE yes
rainy mild high TRUE no
22
PANIMALAR ENGINEERING COLLEGE
DEPARTMENT OF CSE
REG NO:211421104223
OUTPUT:
The Resultant Decision Tree is :
{'outlook': {'overcast': 'yes', 'rainy': {'windy': {False: 'yes', True: 'no'}}, 'sunny': {'humidity':
{'high': 'no', 'normal': 'yes'}}}}
Training the model for a few samples, and again predicting 'Playtennis' for remaining
attribute
The Accuracy for new trained data is : 0.75
RESULT:
23
PANIMALAR ENGINEERING COLLEGE
DEPARTMENT OF CSE
REG NO:211421104223
data=pd.read_csv("Desktop//clusterdata.csv")
df1=pd.DataFrame(data)
print(df1)
f1 = df1['Distance_Feature'].values
f2 = df1['Speeding_Feature'].values
f1
f2
fig = plt.figure()
fig.subplots_adjust(hspace=0.4, wspace=0.4)
for i in range(1, 7):
plt.subplot(2, 3,i)
plt.text(0.5, 0.5, str((2, 3, i)),fontsize=18, ha='center')
X=np.matrix(list(zip(f1,f2))
) X=np.asarray(X)
plt.xlim([0, 100])
plt.ylim([0, 50])
plt.title('Dataset')
plt.ylabel('speeding_feature')
plt.xlabel('distance_feature')
plt.scatter(f1,f2)
plt.xlim([0, 100])
plt.ylim([0, 50])
plt.title('K- Means')
plt.ylabel('speeding_feature')
plt.xlabel('distance_feature')
kmeans_model = KMeans(n_clusters=3).fit(X)
for i, l in enumerate(kmeans_model.labels_):
24
PANIMALAR ENGINEERING COLLEGE
DEPARTMENT OF CSE
REG NO:211421104223
plt.plot(3)
plt.subplot(515)
plt.xlim([0, 100])
plt.ylim([0, 50])
plt.title('Gaussian Mixture')
plt.ylabel('speeding_feature')
plt.xlabel('distance_feature')
gmm=GaussianMixture(n_components=3).fit(X)
labels= gmm.predict(X)
for i, l in enumerate(labels):
plt.plot(f1[i], f2[i], color=colors[l], marker=markers[l])
clusterdata.csv
Driver_ID Distance_Feature Speeding_Feature
3423311935 71.24 28
3423313212 52.53 25
3423313724 64.54 27
3423311373 55.69 22
3423310999 54.58 25
3423313857 41.91 10
3423312432 58.64 20
3423311434 52.02 8
3423311328 31.25 34
3423312488 44.31 19
3423311254 49.35 40
3423312943 58.07 45
3423312536 44.22 22
3423311542 55.73 19
3423312176 46.63 43
3423314176 52.97 32
3423314202 46.25 35
3423311346 51.55 27
3423310666 57.05 26
3423313527 58.45 30
3423312182 43.42 23
3423313590 55.68 37
3423312268 55.15 18
25
PANIMALAR ENGINEERING COLLEGE
DEPARTMENT OF CSE
REG NO:211421104223
OUTPUT:
Driver_ID Distance_Feature Speeding_Feature0
3423311935 71.24 28
1 3423313212 52.53 25
2 3423313724 64.54 27
3 3423311373 55.69 22
4 3423310999 54.58 25
5 3423313857 41.91 10
6 3423312432 58.64 20
7 3423311434 52.02 8
8 3423311328 31.25 34
9 3423312488 44.31 19
10 3423311254 49.35 40
11 3423312943 58.07 45
12 3423312536 44.22 22
13 3423311542 55.73 19
14 3423312176 46.63 43
15 3423314176 52.97 32
16 3423314202 46.25 35
17 3423311346 51.55 27
18 3423310666 57.05 26
19 3423313527 58.45 30
20 3423312182 43.42 23
21 3423313590 55.68 37
22 3423312268 55.15 18
26
PANIMALAR ENGINEERING COLLEGE
DEPARTMENT OF CSE
REG NO:211421104223
RESULT:
27
PANIMALAR ENGINEERING COLLEGE
DEPARTMENT OF CSE
REG NO:211421104223
OUTPUT:
Confusion matrix is as follows
[[15 0 0]
[ 0 13 2]
[ 0 0 15]]
Accuracy Matrices
Precision recall f1-score support
accuracy 0.96 45
macro avg 0.96 0.96 0.96 45
Weighted avg 0.96 0.96 0.96 45
RESULT:
28