0% found this document useful (0 votes)
16 views15 pages

ML Record

Ml

Uploaded by

Shereyas T.N
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
16 views15 pages

ML Record

Ml

Uploaded by

Shereyas T.N
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 15

PANIMALAR ENGINEERING COLLEGE

DEPARTMENT OF CSE
REG NO:211421104223

PROGRAM: (LINEAR REGRESSION WITH DATASET)


import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn import linear_model
model=pd.read_csv("Desktop//Land.csv")
model x=np.array(model['area']).reshape(-
1,1)
x
y=np.array(model.price)
y
reg=linear_model.LinearRegression()
reg.fit(x,y)
reg.predict([[3300]])
reg.coef_ reg.intercept_
x = np.array(model['area']).reshape(-1, 1)
plt.scatter(x, y, color='red', label='Data Points')
plt.plot(x, reg.predict(x), color='blue', label='Linear Regression')
plt.xlabel('Area')
plt.ylabel('Price')
plt.show()

Land.csv

area price
2600 550000
3000 565000
3200 610000
3600 680000
4000 725000

1
PANIMALAR ENGINEERING COLLEGE
DEPARTMENT OF CSE
REG NO:211421104223

OUTPUT: (LINEAR REGRESSION WITH DATASET)

PROGRAM: (LINEAR REGRESSION WITHOUT DATASET)


import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
temperature=[20,25,30,35,40]
icesales=[13,21,25,35,38]
X=np.array([temperature]).T
Y=np.array(icesales)
rmodel=LinearRegression()
rmodel=rmodel.fit(X,Y)
rmodel_slope=rmodel.coef_
rmodel_intercept=rmodel.intercept_
print('Model Slope',rmodel_slope)
print('Model Intercept',rmodel_intercept)
Y_predict=rmodel.predict(X)
rmse=np.sqrt(mean_squared_error(Y,Y_predict))
r2=rmodel.score(X,Y)
print('Model
RMSE',rmse) print('R-
Squared Error',r2)
plt.scatter(temperature,icesales,marker = '*',edgecolors='r')
plt.plot(temperature,Y_predict,'-bo')
plt.show()

15
PANIMALAR ENGINEERING COLLEGE
DEPARTMENT OF CSE
REG NO:211421104223

OUTPUT: (LINEAR REGRESSION WITHOUT DATASET)


Model Slope [1.28]
Model Intercept -12.0
Model RMSE 1.3856406460551007
R-Squared Error 0.9770992366412214

RESULT:

16
PANIMALAR ENGINEERING COLLEGE
DEPARTMENT OF CSE
REG NO:211421104223

PROGRAM:(CANDIDATE-ELIMINATION ALGORITHM)
import numpy as np
import pandas as pd

data = pd.read_csv('desktop\\enjoysports.csv') concepts


= np.array(data.iloc[:,0:-1]) print("\nInstances
are:\n",concepts)
target = np.array(data.iloc[:,-1])
print("\nTarget Values are: ",target)

def learn(concepts, target):


specific_h = concepts[0].copy()
print("\nInitialization of specific_h and genearal_h")
print("\nSpecific Boundary: ", specific_h)
general_h = [["?" for i in range(len(specific_h))] for i in range(len(specific_h))]
print("\nGeneric Boundary: ",general_h)

for i, h in enumerate(concepts):
print("\nInstance", i+1 , "is ", h)
if target[i] == "yes":
print("Instance is Positive ")
for x in
range(len(specific_h)):
if h[x]!=
specific_h[x]:
specific_h[x] ='?'
general_h[x][x]
='?'

if target[i] == "no":
print("Instance is Negative ")
for x in
range(len(specific_h)):
if h[x]!= specific_h[x]:
general_h[x][x] = specific_h[x]
else:
general_h[x][x] = '?'

print("Specific Bundary after ", i+1, "Instance is ", specific_h)


print("Generic Boundary after ", i+1, "Instance is ", general_h)
print("\n")

indices = [i for i, val in enumerate(general_h) if val == ['?', '?', '?', '?', '?', '?']]

17
PANIMALAR ENGINEERING COLLEGE
DEPARTMENT OF CSE
REG NO:211421104223

for i in indices:
general_h.remove(['?', '?', '?', '?', '?', '?'])
return specific_h, general_h

s_final, g_final = learn(concepts, target)

print("Final Specific_h: ", s_final, sep="\n")


print("Final General_h: ", g_final, sep="\n")

enjoysports.csv

sky airtemp humidity wind water forcast enjoysport


sunny warm normal strong warm same yes
sunny warm high strong warm same yes
rainy cold high strong warm change no
sunny warm high strong cool change yes

18
PANIMALAR ENGINEERING COLLEGE
DEPARTMENT OF CSE
REG NO:211421104223

OUTPUT:
Instances are:
[['sunny' 'warm' 'normal' 'strong' 'warm' 'same']
['sunny' 'warm' 'high' 'strong' 'warm' 'same']
['rainy' 'cold' 'high' 'strong' 'warm' 'change']
['sunny' 'warm' 'high' 'strong' 'cool' 'change']]

Target Values are: ['yes' 'yes' 'no' 'yes']

Initialization of specific_h and genearal_h

Specific Boundary: ['sunny' 'warm' 'normal' 'strong' 'warm' 'same']

Generic Boundary: [['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?',
'?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?']]

Instance 1 is ['sunny' 'warm' 'normal' 'strong' 'warm' 'same'] Instance


is Positive
Specific Bundary after 1 Instance is ['sunny' 'warm' 'normal' 'strong' 'warm' 'same'] Generic
Boundary after 1 Instance is [['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?',
'?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?']]

Instance 2 is ['sunny' 'warm' 'high' 'strong' 'warm' 'same']


Instance is Positive
Specific Bundary after 2 Instance is ['sunny' 'warm' '?' 'strong' 'warm' 'same']
Generic Boundary after 2 Instance is [['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?',
'?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?']]

Instance 3 is ['rainy' 'cold' 'high' 'strong' 'warm' 'change']


Instance is Negative
Specific Bundary after 3 Instance is ['sunny' 'warm' '?' 'strong' 'warm' 'same']
Generic Boundary after 3 Instance is [['sunny', '?', '?', '?', '?', '?'], ['?', 'warm', '?', '?', '?', '?'], ['?',
'?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', 'same']]

Instance 4 is ['sunny' 'warm' 'high' 'strong' 'cool' 'change']


Instance is Positive
Specific Bundary after 4 Instance is ['sunny' 'warm' '?' 'strong' '?' '?']

19
PANIMALAR ENGINEERING COLLEGE
DEPARTMENT OF CSE
REG NO:211421104223

Generic Boundary after 4 Instance is [['sunny', '?', '?', '?', '?', '?'], ['?', 'warm', '?', '?', '?', '?'], ['?',
'?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?']]

Final Specific_h:
['sunny' 'warm' '?' 'strong' '?' '?']
Final General_h:
[['sunny', '?', '?', '?', '?', '?'], ['?', 'warm', '?', '?', '?', '?']]

RESULT:

20
PANIMALAR ENGINEERING COLLEGE
DEPARTMENT OF CSE
REG NO:211421104223

PROGRAM:(DECISION TREE BASED ID3 ALGORITHM)


import pandas as pd
import numpy as np
df_tennis = pd.read_csv('desktop\\tennis.csv')
from collections import Counter
def entropy_list(a_list):
cnt = Counter(x for x in a_list)
num_instance = len(a_list)*1.0
probs = [x/num_instance for x in cnt.values()]
return entropy(probs)
import math
def entropy(probs):
return sum([-prob*math.log(prob,2) for prob in probs])
def info_gain(df,split,target,trace=0):
df_split = df.groupby(split)
nobs = len(df.index)*1.0
df_agg_ent = df_split.agg({ target:[entropy_list, lambda x: len(x)/nobs]})
df_agg_ent.columns = ['Entropy','PropObserved']
new_entropy = sum( df_agg_ent['Entropy'] * df_agg_ent["PropObserved"])
old_entropy = entropy_list(df[target])
return old_entropy - new_entropy
def id3(df,target,attribute_name,default_class = None): cnt
= Counter(x for x in df[target])
if len(cnt)==1:
return next(iter(cnt))
elif df.empty or (not attribute_name):
return default_class
else:
default_class = max(cnt.keys())
gains = [info_gain(df,attr,target) for attr in attribute_name]
index_max = gains.index(max(gains))
best_attr = attribute_name[index_max] tree
= { best_attr:{ } }
remaining_attr = [x for x in attribute_name if x!=best_attr]
for attr_val, data_subset in df.groupby(best_attr):
subtree = id3(data_subset,target,remaining_attr,default_class)
tree[best_attr][attr_val] = subtree
return tree
def classify(instance,tree,default = None):
attribute = next(iter(tree))

21
PANIMALAR ENGINEERING COLLEGE
DEPARTMENT OF CSE
REG NO:211421104223

if instance[attribute] in tree[attribute].keys():
result = tree[attribute][instance[attribute]]
if isinstance(result,dict):
return classify(instance,result)
else:
return result
else:
return default
attribute_names = list(df_tennis.columns)
attribute_names.remove('PlayTennis')
tree = id3(df_tennis,'PlayTennis',attribute_names)
print("\n\nThe Resultant Decision Tree is :\n")
print(tree)
training_data = df_tennis.iloc[1:-4]
test_data = df_tennis.iloc[-4:]
train_tree = id3(training_data, 'PlayTennis', attribute_names)
print("\n\nThe Resultant Decision train_tree is :\n") print(train_tree)
test_data['predicted2'] = test_data.apply(classify,axis=1,args=(train_tree,'Yes'))
print ('\n\n Training the model for a few samples, and again predicting \'Playtennis\' for
remaining attribute')
print('The Accuracy for new trained data is : ' + str(
sum(test_data['PlayTennis']==test_data['predicted2'] ) / (1.0*len(test_data.index)) ))

tennis.csv
outlook temp humidity windy PlayTennis
sunny hot high FALSE no
sunny hot high TRUE no
overcast hot high FALSE yes
rainy mild high FALSE yes
rainy cool normal FALSE yes
rainy cool normal TRUE no
overcast cool normal TRUE yes
sunny mild high FALSE no
sunny cool normal FALSE yes
rainy mild normal FALSE yes
sunny mild normal TRUE yes
overcast mild high TRUE yes
overcast hot normal FALSE yes
rainy mild high TRUE no

22
PANIMALAR ENGINEERING COLLEGE
DEPARTMENT OF CSE
REG NO:211421104223

OUTPUT:
The Resultant Decision Tree is :
{'outlook': {'overcast': 'yes', 'rainy': {'windy': {False: 'yes', True: 'no'}}, 'sunny': {'humidity':
{'high': 'no', 'normal': 'yes'}}}}

The Resultant Decision train_tree is :


{'outlook': {'overcast': 'yes', 'rainy': {'windy': {False: 'yes', True: 'no'}}, 'sunny': {'temp': {'cool':
'yes', 'hot': 'no', 'mild': 'no'}}}}

Training the model for a few samples, and again predicting 'Playtennis' for remaining
attribute
The Accuracy for new trained data is : 0.75

RESULT:

23
PANIMALAR ENGINEERING COLLEGE
DEPARTMENT OF CSE
REG NO:211421104223

PROGRAM:(EM ALGORITHM AND K-MEANS ALGORITHM)


import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.cluster import KMeans
from sklearn.mixture import GaussianMixture

data=pd.read_csv("Desktop//clusterdata.csv")
df1=pd.DataFrame(data)
print(df1)
f1 = df1['Distance_Feature'].values
f2 = df1['Speeding_Feature'].values
f1
f2

fig = plt.figure()
fig.subplots_adjust(hspace=0.4, wspace=0.4)
for i in range(1, 7):
plt.subplot(2, 3,i)
plt.text(0.5, 0.5, str((2, 3, i)),fontsize=18, ha='center')

X=np.matrix(list(zip(f1,f2))
) X=np.asarray(X)
plt.xlim([0, 100])
plt.ylim([0, 50])
plt.title('Dataset')
plt.ylabel('speeding_feature')
plt.xlabel('distance_feature')
plt.scatter(f1,f2)

colors = ['b', 'g', 'r']


markers = ['o', 'v', 's']

plt.xlim([0, 100])
plt.ylim([0, 50])
plt.title('K- Means')
plt.ylabel('speeding_feature')
plt.xlabel('distance_feature')
kmeans_model = KMeans(n_clusters=3).fit(X)
for i, l in enumerate(kmeans_model.labels_):

24
PANIMALAR ENGINEERING COLLEGE
DEPARTMENT OF CSE
REG NO:211421104223

plt.plot(f1[i], f2[i], color=colors[l],marker=markers[l])

plt.plot(3)
plt.subplot(515)
plt.xlim([0, 100])
plt.ylim([0, 50])
plt.title('Gaussian Mixture')
plt.ylabel('speeding_feature')
plt.xlabel('distance_feature')
gmm=GaussianMixture(n_components=3).fit(X)
labels= gmm.predict(X)
for i, l in enumerate(labels):
plt.plot(f1[i], f2[i], color=colors[l], marker=markers[l])

clusterdata.csv
Driver_ID Distance_Feature Speeding_Feature
3423311935 71.24 28
3423313212 52.53 25
3423313724 64.54 27
3423311373 55.69 22
3423310999 54.58 25
3423313857 41.91 10
3423312432 58.64 20
3423311434 52.02 8
3423311328 31.25 34
3423312488 44.31 19
3423311254 49.35 40
3423312943 58.07 45
3423312536 44.22 22
3423311542 55.73 19
3423312176 46.63 43
3423314176 52.97 32
3423314202 46.25 35
3423311346 51.55 27
3423310666 57.05 26
3423313527 58.45 30
3423312182 43.42 23
3423313590 55.68 37
3423312268 55.15 18

25
PANIMALAR ENGINEERING COLLEGE
DEPARTMENT OF CSE
REG NO:211421104223

OUTPUT:
Driver_ID Distance_Feature Speeding_Feature0
3423311935 71.24 28
1 3423313212 52.53 25
2 3423313724 64.54 27
3 3423311373 55.69 22
4 3423310999 54.58 25
5 3423313857 41.91 10
6 3423312432 58.64 20
7 3423311434 52.02 8
8 3423311328 31.25 34
9 3423312488 44.31 19
10 3423311254 49.35 40
11 3423312943 58.07 45
12 3423312536 44.22 22
13 3423311542 55.73 19
14 3423312176 46.63 43
15 3423314176 52.97 32
16 3423314202 46.25 35
17 3423311346 51.55 27
18 3423310666 57.05 26
19 3423313527 58.45 30
20 3423312182 43.42 23
21 3423313590 55.68 37
22 3423312268 55.15 18

26
PANIMALAR ENGINEERING COLLEGE
DEPARTMENT OF CSE
REG NO:211421104223

RESULT:

27
PANIMALAR ENGINEERING COLLEGE
DEPARTMENT OF CSE
REG NO:211421104223

PROGRAM:(K-NEAREST NEIGHBOUR ALGORITHM)


from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report,confusion_matrix
from sklearn import datasets
iris=datasets.load_iris()
iris_data=iris.data
iris_labels=iris.target
x_train,x_test,y_train,y_test=train_test_split(iris_data,iris_labels,test_size=0.30)
classifier=KNeighborsClassifier(n_neighbors=5).fit(x_train,y_train)
y_pred=classifier.predict(x_test)
print('Confusion matrix is as follows')
print(confusion_matrix(y_test,y_pred))
print('Accuracy Matrics')
print(classification_report(y_test,y_pred))

OUTPUT:
Confusion matrix is as follows
[[15 0 0]
[ 0 13 2]
[ 0 0 15]]

Accuracy Matrices
Precision recall f1-score support

0 1.00 1.00 1.00 15


1 1.00 0.87 0.93 15
2 0.88 1.00 0.94 15

accuracy 0.96 45
macro avg 0.96 0.96 0.96 45
Weighted avg 0.96 0.96 0.96 45

RESULT:

28

You might also like