0% found this document useful (0 votes)
12 views5 pages

Naive

The document contains a Python implementation of a Naive Bayes classifier that loads a dataset from a CSV file, splits it into training and testing sets, and calculates class probabilities to make predictions. It includes functions for calculating mean, standard deviation, and accuracy of the predictions. The main function orchestrates the loading of the dataset, training, and evaluation of the classifier.
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
12 views5 pages

Naive

The document contains a Python implementation of a Naive Bayes classifier that loads a dataset from a CSV file, splits it into training and testing sets, and calculates class probabilities to make predictions. It includes functions for calculating mean, standard deviation, and accuracy of the predictions. The main function orchestrates the loading of the dataset, training, and evaluation of the classifier.
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
You are on page 1/ 5

import csv

import random

import math

def loadcsv(filename):

# Load dataset from CSV file

lines = csv.reader(open(filename, "r"))

dataset = list(lines)

header = dataset[0]

dataset = dataset[1:]

for i in range(len(dataset)):

dataset[i] = [float(x) for x in dataset[i]]

return dataset, header

def splitdataset(dataset, splitratio):

# Split dataset into training and testing datasets

trainsize = int(len(dataset) * splitratio)

trainset = []

copy = list(dataset)

while len(trainset) < trainsize:

index = random.randrange(len(copy))

trainset.append(copy.pop(index))

return [trainset, copy]

def separetebyclass(dataset):

# Separate dataset by class values

seperated = {}
for i in range(len(dataset)):

vector = dataset[i]

if vector[-1] not in seperated:

seperated[vector[-1]] = []

seperated[vector[-1]].append(vector)

return seperated

def mean(numbers):

# Calculate mean of a list of numbers

return sum(numbers) / float(len(numbers))

def stdev(numbers):

# Calculate standard deviation of a list of numbers

avg = mean(numbers)

variance = sum([pow(x - avg, 2) for x in numbers]) / float(len(numbers) - 1)

return math.sqrt(variance)

def summarize(dataset):

# Summarize dataset attributes by calculating mean and standard deviation

summaries = [(mean(attribute), stdev(attribute)) for attribute in zip(*dataset)]

del summaries[-1] # Remove the summary of the class attribute

return summaries

def summarizebyclass(dataset):

# Summarize dataset attributes by class

seperated = separetebyclass(dataset)

summaries = {}
for classvalue, instances in seperated.items():

summaries[classvalue] = summarize(instances)

return summaries

def calculateprobability(x, mean, std):

# Calculate probability of x given mean and standard deviation

if std == 0:

return 1.0 # Assign a default probability if std is zero

exponent = math.exp(-(math.pow(x - mean, 2) / (2 * math.pow(std, 2))))

return (1 / (math.sqrt(2 * math.pi) * std)) * exponent

def calculateclassprobabilities(summaries, inputvector):

# Calculate class probabilities for the input vector

probabilities = {}

for classvalue, classsummaries in summaries.items():

probabilities[classvalue] = 1

for i in range(len(classsummaries)):

mean, std = classsummaries[i]

x = inputvector[i]

probabilities[classvalue] *= calculateprobability(x, mean, std)

return probabilities

def predict(summaries, inputvector):

# Predict the class for the input vector

probabilities = calculateclassprobabilities(summaries, inputvector)

bestLabel, bestprob = None, -1

for classvalue, probability in probabilities.items():


if bestLabel is None or probability > bestprob:

bestprob = probability

bestLabel = classvalue

return bestLabel

def getpredictions(summaries, testset):

# Get predictions for the test set

predictions = []

for i in range(len(testset)):

result = predict(summaries, testset[i])

predictions.append(result)

return predictions

def getaccuracy(testset, predictions):

# Calculate accuracy of predictions

correct = 0

for x in range(len(testset)):

if testset[x][-1] == predictions[x]:

correct += 1

return (correct / float(len(testset))) * 100.0

def main():

filename = '/content/naivedata.csv'

splitratio = 0.67

dataset, header = loadcsv(filename)

trainset, testset = splitdataset(dataset, splitratio)

print('Split {0} rows into train={1} and test={2} rows'.format(len(dataset), len(trainset),


len(testset)))
summaries = summarizebyclass(trainset)

predictions = getpredictions(summaries, testset)

accuracy = getaccuracy(testset, predictions)

print('Accuracy of the classifier: {0}%'.format(accuracy))

main()

DATASET:

Exampl Pregnancie BloodPressur SkinThicknes DiabeticPedigreeFunctio


e s Glucose e s Insulin BMI n
1 6 148 72 35 0 33.6 0.62
2 1 85 66 29 0 26.6 0.35
3 8 183 64 0 0 23.6 0.67
4 1 89 66 23 94 28.1 0.16
5 0 137 40 35 168 43.1 2.28
6 5 116 74 0 0 25.6 0.20
7 3 78 50 32 88 31 0.24
8 10 115 0 0 0 35.3 0.13
9 2 197 70 543 543 30.5 0.15
10 8 125 96 0 0 0 0.23

You might also like