Copy of ML - Assignment
Copy of ML - Assignment
import pandas as pd
pd.options.display.width = 1000
pd.options.display.max_rows = 500
pd.options.display.max_columns = 500 # this code helps to output
columns in 1 line , it is easy to see & understand
df = pd.read_csv('/content/drive/MyDrive/Colab
Notebooks/pokemon_data.csv')
sample = df[df['Legendary']==True].head(10)
print(sample)
sample['Attack']
156 85
157 90
158 100
162 110
163 190
164 150
262 85
263 115
264 75
269 90
Name: Attack, dtype: int64
p = sample[sample['Attack']>100]
p
n = sample[sample['Attack']<=100]
n
# total = 10 , in which negative are 6 , positive are 4
enR = -(4/10)*np.log2(4/10)-(6/10)*np.log2(6/10)
print(enR)
0.9709505944546686
# Now lets build the left node and right node based on 'Generation'
column
LN = n[n['Generation']==1] ,p[p['Generation']==1]
print(LN)
#total 6 rows , 3 rows of negative and 3 rows of positive
RN = n[n['Generation']==2],p[p['Generation']==2]
print(RN)
# total rows 4 , 3 negative and 1 positive
entropyLN = -(3/6)*np.log2(3/6)-(3/6)*np.log2(3/6)
print(entropyLN)
1.0
# we have p = 1 , n = 3 , total = 4
entropyRN = -(1/4)*np.log2(1/4)-(3/4)*np.log2(3/4)
print(entropyRN)
0.8112781244591328
# Now let us calculate Information Gain which is IG = E(root)-E(Root|
part{left or right node})
# Since left node entropy is 1.0 , right node entropy is ~= 0.8113 and
root node entropy is ~= 0.9710 based on this we calculate IG
print('\n\t')
K = pd.DataFrame(sample)
print(K)
CorrelationSample = K['Attack'].corr(K['HP'])
print('\n\t')
print('Here is the correlation b/w Attack & HP\n')
print(CorrelationSample)
0.4980818208834152