ML Programs 1
ML Programs 1
In [1]:
#Program 1
f = open('prg1.csv','r')
length = len(f.readline().split(','))
hypo = ['0']*(length-1)
print('Intital Hypo = ',hypo)
f.close()
f = open('prg1.csv','r')
count =1
for line in f:
lst = line.split(',')
for i in range(length-1):
if(lst[-1] == 'yes\n'):
if(hypo[i]!='0' and lst[i]!=hypo[i]):
hypo[i]='?'
else:
hypo[i] = lst[i]
print('Hypo ',hypo)
print('final hypo ',hypo)
In [2]:
#program2
f = open('prg1.csv','r')
length = len(f.readline().split(',')) -1
f.close()
f = open('prg1.csv','r')
shypo = ['0']*(length)
ghypo =['?']*(length)
print('Intital Specific hypothesis',shypo)
count = 1
print('Intital General hypothesis',ghypo)
ghypo.clear()
for line in f:
lst = line.split(',')
for i in range(length):
if(lst[-1] == 'yes\n'):
if shypo[i]!='0' and shypo[i]!=lst[i]:
shypo[i] ='?'
else:
shypo[i] = lst[i]
elif (lst[-1] == 'no\n'):
if '0' in shypo:
temp_lst = ['?']*i
temp_lst += [lst[i]]
temp_lst += ['?'] * (length-1-i)
ghypo.append(temp_lst)
elif shypo[i]!='?' and shypo[i]!=lst[i]:
temp_lst = ['?']*i
temp_lst = temp_lst + [shypo[i]]
temp_lst = temp_lst + ['?'] * (length-1-i)
if(temp_lst not in ghypo):
ghypo.append(temp_lst)
print('SHYPO ',count ," ",shypo)
print('GHYPO ',count ," ",ghypo)
count+=1
f_ghypo = list()
for i in range(len(ghypo)):
for j in range(len(ghypo[i])):
if(ghypo[i][j]!='?' and ghypo[i][j]==shypo[j]):
f_ghypo.append(ghypo[i])
print(f_ghypo)
In [3]:
#program3
import numpy as np
import pandas as pd
def entropy(target_col):
val,counts = np.unique(target_col,return_counts = True)
ent = sum( (-counts[i]/np.sum(counts)) * np.log2( counts[i]/np.sum(counts) ) for i
in range(len(val)))
return ent
def infoGain(data,features,target):
te = entropy(data[target])
val,counts = np.unique(data[features],return_counts = True)
eg = sum((counts[i]/sum(counts)) * entropy(data[data[features] == val[i]][target] )
for i in range(len(val)))
InfoGain = te-eg
return InfoGain
def ID3(data,features,target,pnode):
if len(np.unique(data[target])) == 1:
return np.unique(data[target])[0]
elif len(features) == 0:
return pnode
else:
pnode = np.unique(data[target])[np.argmax(np.unique(data[target])[1])]
IG = [infoGain(data,f,target) for f in features]
index = np.argmax(IG)
col = features[index]
tree = {col:{}}
features = [f for f in features if f!=col]
for val in np.unique(data[col]):
sub_data = data[data[col]==val].dropna()
subtree = ID3(sub_data,features,target,pnode)
tree[col][val] = subtree
return tree
data = pd.read_csv('PlayTennis.csv')
testData = data.sample(frac = 0.1)
data.drop(testData.index,inplace = True)
print(data)
target = 'PlayTennis'
features = data.columns[data.columns!=target]
tree = ID3(data,features,target,None)
print (tree)
test = testData.to_dict('records')[0]
print(test,'=>', test['PlayTennis'])
In [4]:
#program4
import numpy as np # numpy is commonly used to process number array
X = np.array([[2,9], [3,6], [4,8]]) # Features ( Hrs Slept, Hrs Studied)
y = np.array([[92], [86], [89]]) # Labels(Marks obtained)
X = X/np.amax(X,axis=0) # Normalize
y = y/100
def sigmoid(x):
return 1/(1 + np.exp(-x))
def sigmoid_grad(x):
return x * (1 - x)
# Variable initialization
epoch=1000 #Setting training iterations
eta =0.1 #Setting learning rate (eta)
input_neurons = 2 #number of features in data set
hidden_neurons = 3 #number of hidden layers neurons
output_neurons = 1 #number of neurons at output layer
# Weight and bias - Random initialization
wh=np.random.uniform(size=(input_neurons,hidden_neurons)) # 2x3
bh=np.random.uniform(size=(1,hidden_neurons)) # 1x3
wout=np.random.uniform(size=(hidden_neurons,output_neurons)) # 1x1
bout=np.random.uniform(size=(1,output_neurons))
for i in range(epoch):
#Forward Propogation
h_ip=np.dot(X,wh) + bh # Dot product + bias
h_act = sigmoid(h_ip) # Activation function
o_ip=np.dot(h_act,wout) + bout
output = sigmoid(o_ip)
# Error at Output layer
Eo = y-output # Error at o/p
outgrad = sigmoid_grad(output)
d_output = Eo* outgrad # Errj=Oj(1-Oj)(Tj-Oj)
# Error at Hidden later
Eh = np.dot(d_output,wout.T) # .T means transpose
hiddengrad = sigmoid_grad(h_act) # How much hidden layer wts contributed to error
d_hidden = Eh * hiddengrad
Normalized Input:
[[0.5 1. ]
[0.75 0.66666667]
[1. 0.88888889]]
Actual Output:
[[0.92]
[0.86]
[0.89]]
Predicted Output:
[[0.88719705]
[0.88752842]
[0.89654925]]
In [5]:
#program5
import pandas as pd
mush = pd.read_csv('mushrooms.csv')
target = 'class'
classes = mush[target].unique()
features = mush.columns[mush.columns!=target]
testData = mush.sample(frac=0.3)
mush.drop(testData.index,inplace = True)
first ={}
fourth ={}
for x in classes:
mushcl = mush[mush[target]==x][features]
tot = len(mushcl)
second={}
for col in mushcl.columns:
third={}
for val,cnt in mushcl[col].value_counts().iteritems():
prob = cnt/tot
third[val]=prob
second[col]=third
first[x]=second
fourth[x]=len(mushcl)/len(mush)
def proabs(params):
proab={}
for x in classes:
calc = fourth[x]
for col, val in params.iteritems():
try:
calc = first[x][col][val]
except KeyError:
calc =0
proab[x]=calc
return proab
def maxx(params):
proab = proabs(params)
maxcl =''; maxv=0
for col,val in proab.items():
if(val>maxv):
maxv=val
maxcl=col
return maxcl
b=[]
for i in mush.index:
b.append( maxx(mush.loc[i,features]) == mush.loc[i,target]
)
print(sum(b),'correct of',len(b))
print('Accuracy =',sum(b)/len(b))
b=[]
for i in testData.index:
b.append( maxx(testData.loc[i,features]) == testData.loc[i,target]
)
print(sum(b),'correct of',len(b))
print('Accuracy =',sum(b)/len(b))
In [6]:
#program6
import pandas as pd
msg=pd.read_csv('naive.csv',names=['message','label'])
print('The dimensions of the dataset',msg.shape)
msg['labelnum']=msg.label.map({'pos':1,'neg':0})
X=msg.message
y=msg.labelnum
print(X)
print(y)
#splitting the dataset into train and test data
from sklearn.model_selection import train_test_split
xtrain,xtest,ytrain,ytest=train_test_split(X,y)
print(xtest.shape)
print(xtrain.shape)
print(ytest.shape)
print(ytrain.shape)
#output of count vectoriser is a sparse matrix
from sklearn.feature_extraction.text import CountVectorizer
count_vect = CountVectorizer()
xtrain_dtm = count_vect.fit_transform(xtrain)
xtest_dtm=count_vect.transform(xtest)
print(count_vect.get_feature_names())
df=pd.DataFrame(xtrain_dtm.toarray(),columns=count_vect.get_feature_names())
print(df)#tabular representation
print(xtrain_dtm) #sparse matrix representation
# Training Naive Bayes (NB) classifier on training data.
from sklearn.naive_bayes import MultinomialNB
clf = MultinomialNB().fit(xtrain_dtm,ytrain)
predicted = clf.predict(xtest_dtm)
#printing accuracy metrics
from sklearn import metrics
print('Accuracy metrics')
print('Accuracy of the classifer is',metrics.accuracy_score(ytest,predicted))
print('Confusion matrix')
print(metrics.confusion_matrix(ytest,predicted))
print('Recall and Precison ')
print(metrics.recall_score(ytest,predicted))
print(metrics.precision_score(ytest,predicted))
'''docs_new = ['I like this place', 'My boss is not my saviour']
Dept of CSE, CIT Gubb Page 18
X_new_counts = count_vect.transform(docs_new)
predictednew = clf.predict(X_new_counts)
for doc, category in zip(docs_new, predictednew):
print('%s->%s' % (doc, msg.labelnum[category]))'''
11 0 0 0 0 0 0 0 0 0 0 ... 0
12 0 0 0 0 0 0 0 0 0 0 ... 1
(11, 13) 1
(11, 15) 1
(11, 41) 1
(11, 39) 1
(12, 27) 1
(12, 20) 1
(12, 24) 1
(12, 10) 1
(12, 34) 1
Accuracy metrics
Accuracy of the classifer is 1.0
Confusion matrix
[[2 0]
[0 3]]
Recall and Precison
1.0
1.0
Out[6]:
In [7]:
#program7
import pandas as pd
from pgmpy.estimators import BayesianEstimator
from pgmpy.models import BayesianModel
from pgmpy.inference import VariableElimination
f=open('data7_name.csv','r')
attributes= f.readline().split(',')
heartDisease=pd.read_csv('data7.csv',names=attributes)
print("\nAttributes and datatypes")
print(heartDisease.dtypes)
model=BayesianModel([('age','trestbps'),('age','fbs'),('sex','trestbps'),('exang','tres
tbps'),('trestbps','heartdisease'),('fbs','heartdisease')])
model.fit(heartDisease,BayesianEstimator)
HeartDisease_infer=VariableElimination(model)
print("\n 1. Probability heart disease given age=28")
q=HeartDisease_infer.query(['heartdisease'],{'age':28})
print(q['heartdisease'])
print("\n 2. Probability of heart disease for male")
q=HeartDisease_infer.query(['heartdisease'],{'sex':1})
print(q['heartdisease'])
In [9]:
#program8
from sklearn.cluster import KMeans
from sklearn.mixture import GaussianMixture
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
data = pd.read_csv('8-kmeansdata.csv')
f1 =data['Distance_Feature']
f2=data['Speeding_Feature']
X =np.array(list(zip(f1,f2)))
plt.scatter(f1,f2,color='black')
plt.show()
kmeans = KMeans(3).fit(X)
labels = kmeans.predict(X)
plt.scatter(f1,f2,c=labels)
plt.show()
gm = GaussianMixture(3).fit(X)
labels = gm.predict(X)
plt.scatter(f1,f2,c=labels)
plt.show()
In [10]:
#program9
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn import datasets
iris=datasets.load_iris()
print("Iris Data set loaded...")
x_train, x_test, y_train, y_test = train_test_split(iris.data,iris.target)
classifier = KNeighborsClassifier(3).fit(x_train, y_train)
y_pred=classifier.predict(x_test)
print("Results of Classification using K-nn with K=1 ")
for r in range(0,len(x_test)):
print(" Sample:", str(x_test[r]), " Actual-label:", str(y_test[r]), " Predicted-lab
el:",str(y_pred[r]))
print("Classification Accuracy :" , classifier.score(x_test,y_test));
In [11]:
#program10
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
def localWeigh(point,X,ymat,k):
m,n = np.shape(X)
weights = np.mat(np.eye(m))
for i in range(m):
diff = point - X[i]
weights[i,i] = np.exp(diff*diff.T/(-2.0*k**2))
W = (X.T *(weights*X)).I * (X.T*(weights*ymat.T))
return W
def localWeightReg(X,ymat,k):
m,n = np.shape(X)
ypred = np.zeros(m)
for i in range(m):
ypred[i] = X[i] * localWeigh(X[i],X,ymat,k)
return ypred
def plott(X,pred):
sortIndex = X[:,1].argsort(0)
xsort = X[sortIndex][:,0][:,1]
ysort = pred[sortIndex]
plt.scatter(x,y,color='green')
plt.plot(xsort,ysort,color="red",linewidth=5)
plt.xlabel('Total bill')
plt.ylabel('Tips')
plt.show()
data = pd.read_csv('data10.csv')
x=data['total_bill']
y = data['tip']
xmat = np.mat(x)
ymat = np.mat(y)
size = np.shape(xmat)[1]
ones = np.mat(np.ones(size))
X=np.hstack((ones.T,xmat.T))
pred = localWeightReg(X,ymat,3)
plott(X,pred)
In [ ]: