Programs Lab Bca
Programs Lab Bca
8. Write a program to demonstrate Regression analysis with residual plots on a given data set.
SOURCE CODE:
import numpy as np
import matplotlib.pyplot as plt
def estimate_coef(x, y):
# number of observations/points
n = np.size(x)
# putting labels
plt.xlabel('x')
plt.ylabel('y')
OUTPUT:
Estimated coefficients:
b0 = 1.2363636363636363
b1 = 1.1696969696969697
9. Write a program to demonstrate the working of the decision tree-based ID3 algorithm.
SOURCE CODE:
# Performing training
clf_gini.fit(X_train, y_train)
return clf_gini
# Function to perform training with entropy.
def tarin_using_entropy(X_train, X_test, y_train):
# Performing training
clf_entropy.fit(X_train, y_train)
return clf_entropy
# Operational Phase
print("Results Using Gini Index:")
OUTPUT:
a .CSV file.
SOURCE CODE:
.CSVfile.
classNaiveBayesClassifier:
def init (self,X,y):
'''Xandydenotesthefeaturesandthetargetlabelsrespectively'''self.X, se
lf.y =X,y
self.N=len(self.X) #Lengthofthetraining set
self.dim=len(self.X[0]) #Dimensionofthevectoroffeatures
self.attrs = [[] for _ in range(self.dim)] # Here we'll store the columns of the training
setself.output_dom={} #Output classes with the number of occurrences in the training set. In this
case we have only 2classes
self.data=[] #To store every
row[Xi,yi]
for iinrange(len(self.X)):
for jinrange(self.dim): #if we have never seen this value for this attrs before, #then we add it to the
attrs array in the corresponding position if not
self.X[i][j]
inself.attrs[j]:
self.attrs[j].append(self.X[i][j]) #if we have never seen this output class before,
#then we add it to the output_domand count one occurrence for now if not
self.y[i]
inself.output_dom.keys():
self.output_dom[self.y[i]]=1
#otherwise, we increment the occurrence of this output in the training set by 1
else: self.output_dom[self.y[i]]+=1
solve = None
# Final
inself.o utput_dom.keys():
prob=self.output_dom[y]/self.N #P(y) for i
inrange(self.dim):
cases=[x for x inself.data if x[0][i]==entry[i] and x[1]==y] #all rows with
Xi= xin=len(cases)
prob*=n/self.N
P*=P(Xi=xi) #if we have a greater prob for this output than the partial maximum...
if pro b>max_arg:
max_arg = probsolve=y
OUTPUT:
Array([‘Iris_virgincia’,’Iris_versicolor’,’Iris_setosa’,’ Iris_virgincia’,’ Iris_setosa’,
Iris_virgincia’, ’ Iris_setosa’, Iris_virgincia’, ’Iris_versicolor’, Iris_virgincia’, Iris_virgincia’,
Iris_virgincia’, ([‘Iris_virgincia’,’Iris_versicolor’,’Iris_setosa’,’ Iris_virgincia’,’ Iris_setosa’,
Iris_virgincia’, ’ Iris_setosa’, Iris_virgincia’, ’Iris_versicolor’, Iris_virgincia’, Iris_virgincia’,
Iris_virgincia’],dtype=’<U15’)
11. Write a program to implement k-Nearest Neighbour algorithm to classify the iris data set.
SOURCE CODE:
Import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
plt.rc Params['font.sans- serif']=['SimHei']#Datageneration
train_num=200
test_num=100
config={ 'Corn':[[150,190],[40,70],[2,4]], 'Potato':[[30,60],[7,10],[1,2]], 'grass':[[10,40],[10,40],[0,1]]
}
index=0 #Natural
for pinconfig:
for iinrange(int(train_num/3- 3)): row=[]
for j, [min_val,max_val]inenumerate(config[p]): v=round(np.random.rand()*(max_val-
min_val)+min_val,2) while vin dataset[dataset.columns[j]]:
v=round(np.random.rand()*(max_val- min_val)+min_val,2)
row.append(v)
row.append(p)
dataset.loc[index]=rowindex+= 1
#Wrongdata
for inrange(train_num-index):
k=np.random.randint( 3)
p = lants[k] row=[]
for j,[min_val,max_val]
inenumerate(config[p]):
v=round(np.random.rand()*(max_val- min_val)+min_val,2)
while v in dataset[dataset.columns[j]]:
v=round(np.random.rand()*(max_val- min_val)+min_val,2)
row.append(v)
row.append(plants[(k+1)%3])
dataset.loc[index]=row
index+=1
#dataset=dataset.infer_objects()
dataset=dataset.reindex(np.random.permutation(len(dataset)))
dataset.reset_index(drop=True,inplace=True)
dataset.iloc[:int(train_num),:- 1].to_csv('potato_train_data.csv',index=False)dataset.iloc[:int(train_num):,[-
1]].to_csv('potato_train_label.csv',index=False)
defvisualize(dataset,labels,features,classes,fig_size=(10,10),layout=None):plt.figure(figsize=
fig_size)
index=1 if layout==None:
layout=[len(features),1]
foriinrange(len(features)):
forjinrange(i+1,len(features)):
p=plt.subplot(layout[0],layout[1],index)
plt.subplots_adjust(hspace=0.4)
p.set_title(features[i]+'&'+features[j])p.set_xlabel(features[i])p.set_ylabel(features[ j])
forkinrange(len(classes)):
p.scatter(dataset[labels==k,i],dataset[labels==k,j],label=classes[k])p.legend()
index+=1
plt.show()
dataset=pd.read_csv('potato_train_data.csv')
labels=pd.read_csv('potato_train_label.csv')
feature s=list(dataset.keys())
classes= np.array(['Corn', 'Potato', 'grass'])
foriinrange(3):
labels.loc[labels['type']==classes[i],'type']=i
dataset = dataset.values
labels=labels[ 'type'].values
visualize(dataset,labels,features,classes)
OUTPUT:
12. Write a program to implement k-Means clustering algorithm to cluster the set of
data stored in .CSV file.
SOURCE CODE:
SOURCE CODE:
import turtle
def drawBar (t, height):
""" Get turtle t to draw one bar, of height. """
t.begin_fill () # start filling this shape
t. left (90)
t.forward(height)
t. write (str(height))
t. right (90)
t.forward (40)
t. right (90)
t.forward(height)
t. left (90)
t. end_fill () # stop filling this shape
xs = [48, 117, 200, 240, 160, 260, 220] # here is the data
maxheight = max(xs)
numbars = len(xs)
border = 10
wn = turtle.Screen() # Set up the window and its attributes
wn.setworldcoordinates(0-border, 0-border, 40*numbars+border, maxheight+border)
wn.bgcolor("lightgreen")
tess = turtle.Turtle() # create tess and set some attributes
tess.color("blue")
tess.fillcolor("red")
tess.pensize(3)
for a in xs:
drawBar(tess, a)
wn.exitonclick()
OUTPUT:
14.To implement the program to plot a histogram of the given data.
5 6 9 8 2
SOURCE CODE:
import matplotlib.pyplot as plt
# Show the
plot
plt.show()
OUTPUT:
15. To implement the program to draw line plot and bar chart of the given data.
Speed(m/s) 0 3 7 12 20 30 45.6
SOURCE CODE:
import matplotlib.pyplot as plt
OUTPUT: