0% found this document useful (0 votes)
43 views3 pages

#Creating A Dataset #Creating Target Variable: Import As Import As

1. The document creates a dataset with features including gender, height, weight, and foot size for 80 individuals. 2. It then calculates summary statistics including the mean and variance for each feature, separated by gender. 3. A probability function is defined to calculate the probability of an individual's features given the mean and variance for a particular gender. 4. This function is used to calculate the probability that each individual in the original dataset belongs to either the male or female class, and predict the most likely gender. The accuracy of these predictions is reported.

Uploaded by

badeni
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
43 views3 pages

#Creating A Dataset #Creating Target Variable: Import As Import As

1. The document creates a dataset with features including gender, height, weight, and foot size for 80 individuals. 2. It then calculates summary statistics including the mean and variance for each feature, separated by gender. 3. A probability function is defined to calculate the probability of an individual's features given the mean and variance for a particular gender. 4. This function is used to calculate the probability that each individual in the original dataset belongs to either the male or female class, and predict the most likely gender. The accuracy of these predictions is reported.

Uploaded by

badeni
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 3

import numpy as np

import pandas as pd
#creating a dataset
person = pd.DataFrame()
#creating target variable
person['Gender'] = ['male','male','male','male','female','female','female'
,'female']
#creating our feature variables
person['Height'] = [6,5.92,5.58,5.92,5,5.5,5.42,5.75]
person['Weight'] = [180,190,170,165,100,150,130,150]
person['Foot_Size'] = [12,11,12,10,6,8,7,9]
#view the data
print("\n Dataset")
print("")
print(data)
#create an empty data frame
data = pd.DataFrame()
#creating some feature values for this single row
data['Gender']=['male','male','male','male','male','male','male','male','m
ale','male','male','male','male','male','male','male','male','male','male'
,'male','female','female','female','female','female','female','female','fe
male','female','female','female','female','female','female','female','fema
le','female','female','female','female']
data['Height'] =[5.82,5.77,5.87,5.99,6.07,6.13,6.06,5.99,6.21,5.81,5.57,5.
15,6.02,5.93,5.91,5.63,5.86,5.93,5.59,5.77,5.60,5.40,5,5.75,5.7,5.2,5.1,5.
73,5.74,5,5.8,5.77,5.82,5.60,5.40,5,5.75,5.43,5.12,5.55]
data['Weight'] =[172,171,180,163,169,181,185,168,166,164,175,172,167,140,1
74,183,133,111,162,177,154,134,137,150,155,136,132,140,154,146,141,145,142
,158,155,155,152,150,139,160]
data['Foot_Size'] =[10,11,12,11,12,11,12,13,13,10,11,13,12,12,6,7,12,13,8,
9,7,6,5,9,5,6,5,7,6,5,5,9,5,7,6,6,9,12,9,10]
#view the data
print('\n Test Instance: ')
print(" ")
print(person)
n_male = data['Gender'][data['Gender'] == 'male'].count()
n_male
n_female = data['Gender'][data['Gender'] == 'female'].count()
n_female
#total rows
total_ppl = data['Gender'].count()
total_ppl
#no of males divided by the total rows
p_male = n_male / total_ppl #(4/8)
p_male
p_female = n_female / total_ppl #(4/8)
p_female
# group the data by gender & calculate the means of each feature
# for eg - height = (6+5.92+5.58+5.92) / 4
data_means = data.groupby('Gender').mean()
data_means
#calculate of mean
print('\n Dataset Mean')
print(" ")
print(data_means)
# calculate the data variance
# variance = summation of((mean - x) ** 2) / n
data_variance = data.groupby('Gender').var()
print(data_variance)
#mean for male
male_height_mean = data_means['Height'][data_means.index == 'male'].values
[0]
male_weight_mean = data_means['Weight'][data_means.index == 'male'].values
[0]
male_footsize_mean = data_means['Foot_Size'][data_means.index == 'male'].v
alues[0]
print("male_height_mean: ", male_height_mean)
print("male_weight_mean: ", male_weight_mean)
print("male_footsize_mean: ", male_footsize_mean)
#variance for male
male_height_variance = data_variance['Height'][data_variance.index == 'mal
e'].values[0]
male_weight_variance = data_variance['Weight'][data_variance.index == 'mal
e'].values[0]
male_footsize_variance = data_variance['Foot_Size'][data_variance.index ==
'male'].values[0]
print("male_height_variance: ",male_height_variance)
print("male_weight_variance: ",male_weight_variance)
print("male_footsize_variance: ",male_footsize_variance)
# for female now
# mean for female
female_height_mean = data_means['Height'][data_means.index == 'female'].va
lues[0]
female_weight_mean = data_means['Weight'][data_means.index == 'female'].va
lues[0]
female_footsize_mean = data_means['Foot_Size'][data_means.index == 'female
'].values[0]
print("female_height_mean: ", female_height_mean)
print("female_weight_mean: ", female_weight_mean)
print("female_footsize_mean: ", female_footsize_mean)
#variance for female
female_height_variance = data_variance['Height'][data_variance.index == 'f
emale'].values[0]
female_weight_variance = data_variance['Weight'][data_variance.index == 'f
emale'].values[0]
female_footsize_variance = data_variance['Foot_Size'][data_variance.index
== 'female'].values[
0]
print("female_height_variance: ",female_height_variance)
print("female_weight_variance: ",female_weight_variance)
print("female_footsize_variance: ",female_footsize_variance)
# create a function which calculates p(x|y)
def p_x_given_y(x,mean_y, variance_y):
#input the arguments into a probability density function
p = 1/(np.sqrt(2*np.pi*variance_y))* np.exp((-(x-
mean_y) ** 2)/(2*variance_y))
return p
count=0
# numerator of the posterior if the unclassified observation is a male
for i in range(len(person)):
print('\n Probability male: ')
prob_male = p_male*p_x_given_y(person['Height'][i],male_height_mean,ma
le_height_variance)*p_x_given_y(person['Weight'][i],male_weight_mean,male_
weight_variance)* p_x_given_y(person['Foot_Size'][i],male_footsize_mean,ma
le_footsize_variance)
print(prob_male)
print('\n Probability female: ')
prob_female = p_female*p_x_given_y(person['Height'][i],female_height_m
ean,female_height_variance)*p_x_given_y(person['Weight'][i],female_weight_
mean,female_weight_variance)*p_x_given_y(person['Foot_Size'][i],female_foo
tsize_mean,female_footsize_variance)
print(prob_female)
if(prob_male > prob_female):
print(f"target label: male for {i} ")
if(person['Gender'][i]=='male'):
count+=1
else:
print(f"target label: Female for {i} ")
if (person['Gender'][i]=='female'):
count+=1
print(f"Accuracy {((count)/8)*100}")

You might also like