0% found this document useful (0 votes)
16 views

Programs Lab Bca

Uploaded by

Gayu Gayu
Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
16 views

Programs Lab Bca

Uploaded by

Gayu Gayu
Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 16

Programs

8. Write a program to demonstrate Regression analysis with residual plots on a given data set.

SOURCE CODE:

import numpy as np
import matplotlib.pyplot as plt
def estimate_coef(x, y):

# number of observations/points
n = np.size(x)

# mean of x and y vector


mx = np.mean(x)
my = np.mean(y)

# calculating cross-deviation and deviation about x


sxy = np.sum(y*x) - n*my*mx
sxx = np.sum(x*x) - n*mx*mx

# calculating regression coefficients


b1 = sxy / sxx
b0 = my - b1*mx
return (b0, b1)
def plot_regression_line(x, y, b):

# plotting the actual points as scatter plot


plt.scatter(x, y, color = "m",marker = "o", s = 30)
# predicted response vector
ypred = b[0] + b[1]*x

# plotting the regression line


plt.plot(x, ypred, color = "g")

# putting labels
plt.xlabel('x')
plt.ylabel('y')

# function to show plot


plt.show()
def main():
# observations or data
x = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
y = np.array([1, 3, 2, 5, 7, 8, 8, 9, 10, 12])
# estimating coefficients
b = estimate_coef(x, y)
print("Estimated coefficients:\nb0 = {} \nb1 = {}".format(b[0], b[1]))

# plotting regression line


plot_regression_line(x, y, b)
main()

OUTPUT:
Estimated coefficients:
b0 = 1.2363636363636363
b1 = 1.1696969696969697
9. Write a program to demonstrate the working of the decision tree-based ID3 algorithm.

SOURCE CODE:

Importing the required packages import


numpy as np
import pandas as pd
from sklearn.metrics import confusion_matrix
#from sklearn.cross_validation import train_test_split from
sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score from
sklearn.metrics import classification_report

# Function importing Dataset


def importdata():
balance_data = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-'+
'databases/balance-scale/balance-scale.data',sep= ',', header = None)

# Printing the dataswet shape


print ("Dataset Length: ", len(balance_data)) print
("Dataset Shape: ", balance_data.shape)

# Printing the dataset obseravtions


print ("Dataset: ",balance_data.head()) return
balance_data

# Function to split the dataset def


splitdataset(balance_data):

# Separating the target variable


X = balance_data.values[:, 1:5]
Y = balance_data.values[:, 0]

# Splitting the dataset into train and test


X_train, X_test, y_train, y_test = train_test_split( X, Y, test_size = 0.3, random_state = 100)
return X, Y, X_train, X_test, y_train, y_test

# Function to perform training with giniIndex.


def train_using_gini(X_train, X_test, y_train):

# Creating the classifier object


clf_gini = DecisionTreeClassifier(criterion = "gini", random_state = 100, max_depth=3, min_samples_leaf=5)

# Performing training
clf_gini.fit(X_train, y_train)
return clf_gini
# Function to perform training with entropy.
def tarin_using_entropy(X_train, X_test, y_train):

# Decision tree with entropy


clf_entropy = DecisionTreeClassifier(criterion = "entropy", random_state = 100, max_depth = 3,
min_samples_leaf = 5)

# Performing training
clf_entropy.fit(X_train, y_train)
return clf_entropy

# Function to make predictions def prediction


(X_test, clf_object):

# Predicton on test with giniIndex


y_pred = clf_object.predict(X_test)
print("Predicted values:")
print(y_pred)
return y_pred

# Function to calculate accuracy


def cal_accuracy(y_test, y_pred):
print("Confusion Matrix: ", confusion_matrix(y_test, y_pred))
print ("Accuracy : ", accuracy_score(y_test,y_pred)*100)
print("Report : ",classification_report(y_test, y_pred))
def main():
# Building Phase
data = importdata()
X, Y, X_train, X_test, y_train, y_test = splitdataset(data)
clf_gini = train_using_gini(X_train, X_test, y_train)
clf_entropy = tarin_using_entropy(X_train, X_test, y_train)

# Operational Phase
print("Results Using Gini Index:")

# Prediction using gini


y_pred_gini = prediction(X_test, clf_gini)
cal_accuracy(y_test, y_pred_gini)
print("Results Using Entropy:")

# Prediction using entropy


y_pred_entropy = prediction(X_test, clf_entropy)
cal_accuracy(y_test, y_pred_entropy)

# Calling main function


if name = =" main ":
main()

OUTPUT:

Dataset Length: 625


Dataset Shape: (625, 5)
Dataset: 0 1 2 3 4
0 B 1 1 1 1
1 R 1 1 1 2
2 R 1 1 1 3
3 R 1 1 1 4
4 R 1 1 1 5
10. Write a program to implement the Naïve Bayesian classifier for a sample training data
set stored as

a .CSV file.

SOURCE CODE:

.CSVfile.

classNaiveBayesClassifier:
def init (self,X,y):
'''Xandydenotesthefeaturesandthetargetlabelsrespectively'''self.X, se
lf.y =X,y
self.N=len(self.X) #Lengthofthetraining set
self.dim=len(self.X[0]) #Dimensionofthevectoroffeatures
self.attrs = [[] for _ in range(self.dim)] # Here we'll store the columns of the training
setself.output_dom={} #Output classes with the number of occurrences in the training set. In this
case we have only 2classes
self.data=[] #To store every
row[Xi,yi]
for iinrange(len(self.X)):
for jinrange(self.dim): #if we have never seen this value for this attrs before, #then we add it to the
attrs array in the corresponding position if not
self.X[i][j]
inself.attrs[j]:
self.attrs[j].append(self.X[i][j]) #if we have never seen this output class before,
#then we add it to the output_domand count one occurrence for now if not
self.y[i]
inself.output_dom.keys():
self.output_dom[self.y[i]]=1
#otherwise, we increment the occurrence of this output in the training set by 1
else: self.output_dom[self.y[i]]+=1

# store the row self.


data.append([self.X[i],self.y[i]])

def classify(self, entry):

solve = None

# Final

resultmax_arg=-1 #partial maximum for y

inself.o utput_dom.keys():
prob=self.output_dom[y]/self.N #P(y) for i
inrange(self.dim):
cases=[x for x inself.data if x[0][i]==entry[i] and x[1]==y] #all rows with
Xi= xin=len(cases)
prob*=n/self.N
P*=P(Xi=xi) #if we have a greater prob for this output than the partial maximum...
if pro b>max_arg:
max_arg = probsolve=y

OUTPUT:
Array([‘Iris_virgincia’,’Iris_versicolor’,’Iris_setosa’,’ Iris_virgincia’,’ Iris_setosa’,
Iris_virgincia’, ’ Iris_setosa’, Iris_virgincia’, ’Iris_versicolor’, Iris_virgincia’, Iris_virgincia’,
Iris_virgincia’, ([‘Iris_virgincia’,’Iris_versicolor’,’Iris_setosa’,’ Iris_virgincia’,’ Iris_setosa’,
Iris_virgincia’, ’ Iris_setosa’, Iris_virgincia’, ’Iris_versicolor’, Iris_virgincia’, Iris_virgincia’,
Iris_virgincia’],dtype=’<U15’)
11. Write a program to implement k-Nearest Neighbour algorithm to classify the iris data set.

SOURCE CODE:

Import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
plt.rc Params['font.sans- serif']=['SimHei']#Datageneration
train_num=200
test_num=100
config={ 'Corn':[[150,190],[40,70],[2,4]], 'Potato':[[30,60],[7,10],[1,2]], 'grass':[[10,40],[10,40],[0,1]]
}

plants=list(config.keys()) dataset=pd.DataFrame(columns=['height(cm)','Leaf length(cm)',


'Stemdiameter(cm)', 'type'])

index=0 #Natural
for pinconfig:
for iinrange(int(train_num/3- 3)): row=[]
for j, [min_val,max_val]inenumerate(config[p]): v=round(np.random.rand()*(max_val-
min_val)+min_val,2) while vin dataset[dataset.columns[j]]:
v=round(np.random.rand()*(max_val- min_val)+min_val,2)
row.append(v)
row.append(p)
dataset.loc[index]=rowindex+= 1
#Wrongdata
for inrange(train_num-index):
k=np.random.randint( 3)
p = lants[k] row=[]
for j,[min_val,max_val]
inenumerate(config[p]):
v=round(np.random.rand()*(max_val- min_val)+min_val,2)
while v in dataset[dataset.columns[j]]:
v=round(np.random.rand()*(max_val- min_val)+min_val,2)
row.append(v)
row.append(plants[(k+1)%3])
dataset.loc[index]=row
index+=1
#dataset=dataset.infer_objects()
dataset=dataset.reindex(np.random.permutation(len(dataset)))
dataset.reset_index(drop=True,inplace=True)
dataset.iloc[:int(train_num),:- 1].to_csv('potato_train_data.csv',index=False)dataset.iloc[:int(train_num):,[-
1]].to_csv('potato_train_label.csv',index=False)

defvisualize(dataset,labels,features,classes,fig_size=(10,10),layout=None):plt.figure(figsize=
fig_size)
index=1 if layout==None:
layout=[len(features),1]
foriinrange(len(features)):
forjinrange(i+1,len(features)):
p=plt.subplot(layout[0],layout[1],index)
plt.subplots_adjust(hspace=0.4)
p.set_title(features[i]+'&'+features[j])p.set_xlabel(features[i])p.set_ylabel(features[ j])
forkinrange(len(classes)):
p.scatter(dataset[labels==k,i],dataset[labels==k,j],label=classes[k])p.legend()
index+=1
plt.show()
dataset=pd.read_csv('potato_train_data.csv')
labels=pd.read_csv('potato_train_label.csv')
feature s=list(dataset.keys())
classes= np.array(['Corn', 'Potato', 'grass'])
foriinrange(3):
labels.loc[labels['type']==classes[i],'type']=i
dataset = dataset.values
labels=labels[ 'type'].values
visualize(dataset,labels,features,classes)
OUTPUT:

12. Write a program to implement k-Means clustering algorithm to cluster the set of
data stored in .CSV file.

SOURCE CODE:

Froms klearn.cluster import KMeans


import pandas as pd
import numpy as np
import pickle

#read csv input file


input_data=pd.read_csv("input_data.txt",sep="\t")

#Initialize KMeans object specify in the number of desired


clusterskmeans=KMeans(n_clusters=4)

#learning the cluster from the input data


k means.fit(input_data.values) #output the labels for the input data
print(kmeans.labels_)

#Predict the classification for given data sample


predicted_class = kmeans.predict([[1, 10, 15]])
print(predicted_class)
OUTPUT:

Unnamed=0 unnamed=1 flow report sorted unnamed U by station

U OBS STATION SHIFT EMPLOYEE 0+ NO.OF


ROWS
0 1 Amberst 2 Hyme 1 4
1 2 Goshen 2 Peth 2 4
2 3 Hadley 2 John 3 3
3 4 Holyorce 1 Woxter 4 0
4 5 Holyorce 1 Barb 5 3
5 6 Orange 2 Card 6 5
6 7 Otis 1 Bey 7 0
7 8 Pledom 2 Mike 8 4
8 9 Standard 1 Sam 9 1
9 10 Suttled 2 Lisa 10 1
11 NAN NAN NAN NAN 11 NAN
13. Design a Python script using the Turtle graphics library to construct a turtle bar chart representing the
grades obtained by N students read from a file categorising them into distinction, first class, second class,
third class and failed.

SOURCE CODE:
import turtle
def drawBar (t, height):
""" Get turtle t to draw one bar, of height. """
t.begin_fill () # start filling this shape
t. left (90)
t.forward(height)
t. write (str(height))
t. right (90)
t.forward (40)
t. right (90)
t.forward(height)
t. left (90)
t. end_fill () # stop filling this shape
xs = [48, 117, 200, 240, 160, 260, 220] # here is the data
maxheight = max(xs)
numbars = len(xs)
border = 10
wn = turtle.Screen() # Set up the window and its attributes
wn.setworldcoordinates(0-border, 0-border, 40*numbars+border, maxheight+border)
wn.bgcolor("lightgreen")
tess = turtle.Turtle() # create tess and set some attributes
tess.color("blue")
tess.fillcolor("red")
tess.pensize(3)
for a in xs:
drawBar(tess, a)
wn.exitonclick()

OUTPUT:
14.To implement the program to plot a histogram of the given data.

10-15 15-20 20-25 25-30 30-35

5 6 9 8 2

SOURCE CODE:
import matplotlib.pyplot as plt

# Define the data


labels = ['10-15', '15-20', '20-25', '25-30', '30-35']
data = [5, 6, 9, 8, 2]

# Create the histogram


plt.bar(labels, data)
# Add labels and title
plt.xlabel('Range')
plt.ylabel('Frequency')
plt.title('Histogram of Data')

# Show the
plot
plt.show()

OUTPUT:
15. To implement the program to draw line plot and bar chart of the given data.

Elapsed time (s) 0 1 2 3 4 5 6

Speed(m/s) 0 3 7 12 20 30 45.6

SOURCE CODE:
import matplotlib.pyplot as plt

# Define the data


time = [0, 1, 2, 3, 4, 5, 6]
speed = [0, 3, 7, 12, 20, 30, 45.6]
# Create the line plot
plt.plot(time, speed)

# Add labels and title to the line


plot plt.xlabel('Elapsed time (s)')
plt.ylabel('Speed (m/s)')
plt.title('Line Plot')

# Show the line plot


plt.show()

# Create the bar chart


plt.bar(time, speed)

# Add labels and title to the bar chart


plt.xlabel('Elapsed time (s)')
plt.ylabel('Speed (m/s)')
plt.title('Bar Chart')

# Show the bar chart


plt.show()

OUTPUT:

You might also like