0% found this document useful (0 votes)
14 views

Copy of ML - Assignment

Uploaded by

Decoy Mail
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
14 views

Copy of ML - Assignment

Uploaded by

Decoy Mail
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 7

import numpy as np

import pandas as pd

pd.options.display.width = 1000
pd.options.display.max_rows = 500
pd.options.display.max_columns = 500 # this code helps to output
columns in 1 line , it is easy to see & understand

from google.colab import drive


drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly


remount, call drive.mount("/content/drive", force_remount=True).

df = pd.read_csv('/content/drive/MyDrive/Colab
Notebooks/pokemon_data.csv')
sample = df[df['Legendary']==True].head(10)
print(sample)
sample['Attack']

# Name Type 1 Type 2 HP Attack


Defense Sp. Atk Sp. Def Speed Generation Legendary
156 144 Articuno Ice Flying 90 85
100 95 125 85 1 True
157 145 Zapdos Electric Flying 90 90
85 125 90 100 1 True
158 146 Moltres Fire Flying 90 100
90 125 85 90 1 True
162 150 Mewtwo Psychic NaN 106 110
90 154 90 130 1 True
163 150 MewtwoMega Mewtwo X Psychic Fighting 106 190
100 154 100 130 1 True
164 150 MewtwoMega Mewtwo Y Psychic NaN 106 150
70 194 120 140 1 True
262 243 Raikou Electric NaN 90 85
75 115 100 115 2 True
263 244 Entei Fire NaN 115 115
85 90 75 100 2 True
264 245 Suicune Water NaN 100 75
115 90 115 85 2 True
269 249 Lugia Psychic Flying 106 90
130 90 154 110 2 True

156 85
157 90
158 100
162 110
163 190
164 150
262 85
263 115
264 75
269 90
Name: Attack, dtype: int64

p = sample[sample['Attack']>100]
p

{"summary":"{\n \"name\": \"p\",\n \"rows\": 4,\n \"fields\": [\n


{\n \"column\": \"#\",\n \"properties\": {\n
\"dtype\": \"number\",\n \"std\": 47,\n \"min\": 150,\n
\"max\": 244,\n \"num_unique_values\": 2,\n \"samples\":
[\n 244,\n 150\n ],\n
\"semantic_type\": \"\",\n \"description\": \"\"\n }\
n },\n {\n \"column\": \"Name\",\n \"properties\": {\n
\"dtype\": \"string\",\n \"num_unique_values\": 4,\n
\"samples\": [\n \"MewtwoMega Mewtwo X\",\n
\"Entei\"\n ],\n \"semantic_type\": \"\",\n
\"description\": \"\"\n }\n },\n {\n \"column\":
\"Type 1\",\n \"properties\": {\n \"dtype\": \"string\",\n
\"num_unique_values\": 2,\n \"samples\": [\n
\"Fire\",\n \"Psychic\"\n ],\n
\"semantic_type\": \"\",\n \"description\": \"\"\n }\
n },\n {\n \"column\": \"Type 2\",\n \"properties\":
{\n \"dtype\": \"category\",\n \"num_unique_values\":
1,\n \"samples\": [\n \"Fighting\"\n ],\n
\"semantic_type\": \"\",\n \"description\": \"\"\n }\
n },\n {\n \"column\": \"HP\",\n \"properties\": {\n
\"dtype\": \"number\",\n \"std\": 4,\n \"min\": 106,\n
\"max\": 115,\n \"num_unique_values\": 2,\n \"samples\":
[\n 115\n ],\n \"semantic_type\": \"\",\n
\"description\": \"\"\n }\n },\n {\n \"column\":
\"Attack\",\n \"properties\": {\n \"dtype\": \"number\",\n
\"std\": 37,\n \"min\": 110,\n \"max\": 190,\n
\"num_unique_values\": 4,\n \"samples\": [\n 190\n
],\n \"semantic_type\": \"\",\n \"description\": \"\"\n
}\n },\n {\n \"column\": \"Defense\",\n
\"properties\": {\n \"dtype\": \"number\",\n \"std\":
12,\n \"min\": 70,\n \"max\": 100,\n
\"num_unique_values\": 4,\n \"samples\": [\n 100\n
],\n \"semantic_type\": \"\",\n \"description\": \"\"\n
}\n },\n {\n \"column\": \"Sp. Atk\",\n
\"properties\": {\n \"dtype\": \"number\",\n \"std\":
43,\n \"min\": 90,\n \"max\": 194,\n
\"num_unique_values\": 3,\n \"samples\": [\n 154\n
],\n \"semantic_type\": \"\",\n \"description\": \"\"\n
}\n },\n {\n \"column\": \"Sp. Def\",\n
\"properties\": {\n \"dtype\": \"number\",\n \"std\":
18,\n \"min\": 75,\n \"max\": 120,\n
\"num_unique_values\": 4,\n \"samples\": [\n 100\n
],\n \"semantic_type\": \"\",\n \"description\": \"\"\n
}\n },\n {\n \"column\": \"Speed\",\n \"properties\":
{\n \"dtype\": \"number\",\n \"std\": 17,\n
\"min\": 100,\n \"max\": 140,\n \"num_unique_values\":
3,\n \"samples\": [\n 130\n ],\n
\"semantic_type\": \"\",\n \"description\": \"\"\n }\
n },\n {\n \"column\": \"Generation\",\n
\"properties\": {\n \"dtype\": \"number\",\n \"std\":
0,\n \"min\": 1,\n \"max\": 2,\n
\"num_unique_values\": 2,\n \"samples\": [\n 2\n
],\n \"semantic_type\": \"\",\n \"description\": \"\"\n
}\n },\n {\n \"column\": \"Legendary\",\n
\"properties\": {\n \"dtype\": \"boolean\",\n
\"num_unique_values\": 1,\n \"samples\": [\n true\n
],\n \"semantic_type\": \"\",\n \"description\": \"\"\n
}\n }\n ]\n}","type":"dataframe","variable_name":"p"}

n = sample[sample['Attack']<=100]
n
# total = 10 , in which negative are 6 , positive are 4

{"summary":"{\n \"name\": \"n\",\n \"rows\": 6,\n \"fields\": [\n


{\n \"column\": \"#\",\n \"properties\": {\n
\"dtype\": \"number\",\n \"std\": 55,\n \"min\": 144,\n
\"max\": 249,\n \"num_unique_values\": 6,\n \"samples\":
[\n 144,\n 145,\n 249\n ],\n
\"semantic_type\": \"\",\n \"description\": \"\"\n }\
n },\n {\n \"column\": \"Name\",\n \"properties\": {\n
\"dtype\": \"string\",\n \"num_unique_values\": 6,\n
\"samples\": [\n \"Articuno\",\n \"Zapdos\",\n
\"Lugia\"\n ],\n \"semantic_type\": \"\",\n
\"description\": \"\"\n }\n },\n {\n \"column\":
\"Type 1\",\n \"properties\": {\n \"dtype\": \"string\",\n
\"num_unique_values\": 5,\n \"samples\": [\n
\"Electric\",\n \"Psychic\",\n \"Fire\"\n ],\
n \"semantic_type\": \"\",\n \"description\": \"\"\n
}\n },\n {\n \"column\": \"Type 2\",\n \"properties\":
{\n \"dtype\": \"category\",\n \"num_unique_values\":
1,\n \"samples\": [\n \"Flying\"\n ],\n
\"semantic_type\": \"\",\n \"description\": \"\"\n }\
n },\n {\n \"column\": \"HP\",\n \"properties\": {\n
\"dtype\": \"number\",\n \"std\": 6,\n \"min\": 90,\n
\"max\": 106,\n \"num_unique_values\": 3,\n \"samples\":
[\n 90\n ],\n \"semantic_type\": \"\",\n
\"description\": \"\"\n }\n },\n {\n \"column\":
\"Attack\",\n \"properties\": {\n \"dtype\": \"number\",\n
\"std\": 8,\n \"min\": 75,\n \"max\": 100,\n
\"num_unique_values\": 4,\n \"samples\": [\n 90\n
],\n \"semantic_type\": \"\",\n \"description\": \"\"\n
}\n },\n {\n \"column\": \"Defense\",\n
\"properties\": {\n \"dtype\": \"number\",\n \"std\":
20,\n \"min\": 75,\n \"max\": 130,\n
\"num_unique_values\": 6,\n \"samples\": [\n 100\n
],\n \"semantic_type\": \"\",\n \"description\": \"\"\n
}\n },\n {\n \"column\": \"Sp. Atk\",\n
\"properties\": {\n \"dtype\": \"number\",\n \"std\":
16,\n \"min\": 90,\n \"max\": 125,\n
\"num_unique_values\": 4,\n \"samples\": [\n 125\n
],\n \"semantic_type\": \"\",\n \"description\": \"\"\n
}\n },\n {\n \"column\": \"Sp. Def\",\n
\"properties\": {\n \"dtype\": \"number\",\n \"std\":
25,\n \"min\": 85,\n \"max\": 154,\n
\"num_unique_values\": 6,\n \"samples\": [\n 125\n
],\n \"semantic_type\": \"\",\n \"description\": \"\"\n
}\n },\n {\n \"column\": \"Speed\",\n \"properties\":
{\n \"dtype\": \"number\",\n \"std\": 12,\n
\"min\": 85,\n \"max\": 115,\n \"num_unique_values\":
5,\n \"samples\": [\n 100\n ],\n
\"semantic_type\": \"\",\n \"description\": \"\"\n }\
n },\n {\n \"column\": \"Generation\",\n
\"properties\": {\n \"dtype\": \"number\",\n \"std\":
0,\n \"min\": 1,\n \"max\": 2,\n
\"num_unique_values\": 2,\n \"samples\": [\n 2\n
],\n \"semantic_type\": \"\",\n \"description\": \"\"\n
}\n },\n {\n \"column\": \"Legendary\",\n
\"properties\": {\n \"dtype\": \"boolean\",\n
\"num_unique_values\": 1,\n \"samples\": [\n true\n
],\n \"semantic_type\": \"\",\n \"description\": \"\"\n
}\n }\n ]\n}","type":"dataframe","variable_name":"n"}

# lets calculate entropy for root node p =4, n=6

# Entropy for root node


-(P/total)*np.log2(p/total)-(n/total)*np.log2(n/total)

enR = -(4/10)*np.log2(4/10)-(6/10)*np.log2(6/10)
print(enR)

0.9709505944546686

#total values in root node are Posotive = 4 , Negative = 6

# Now lets build the left node and right node based on 'Generation'
column

LN = n[n['Generation']==1] ,p[p['Generation']==1]
print(LN)
#total 6 rows , 3 rows of negative and 3 rows of positive

( # Name Type 1 Type 2 HP Attack Defense Sp. Atk


Sp. Def Speed Generation Legendary
156 144 Articuno Ice Flying 90 85 100 95
125 85 1 True
157 145 Zapdos Electric Flying 90 90 85 125
90 100 1 True
158 146 Moltres Fire Flying 90 100 90 125
85 90 1 True, # Name Type
1 Type 2 HP Attack Defense Sp. Atk Sp. Def Speed Generation
Legendary
162 150 Mewtwo Psychic NaN 106 110 90
154 90 130 1 True
163 150 MewtwoMega Mewtwo X Psychic Fighting 106 190 100
154 100 130 1 True
164 150 MewtwoMega Mewtwo Y Psychic NaN 106 150 70
194 120 140 1 True)

RN = n[n['Generation']==2],p[p['Generation']==2]
print(RN)
# total rows 4 , 3 negative and 1 positive

( # Name Type 1 Type 2 HP Attack Defense Sp. Atk


Sp. Def Speed Generation Legendary
262 243 Raikou Electric NaN 90 85 75 115
100 115 2 True
264 245 Suicune Water NaN 100 75 115 90
115 85 2 True
269 249 Lugia Psychic Flying 106 90 130 90
154 110 2 True, # Name Type 1 Type 2 HP
Attack Defense Sp. Atk Sp. Def Speed Generation Legendary
263 244 Entei Fire NaN 115 115 85 90 75
100 2 True)

#calculating the entropy , of LN , we have p = 3 , N= 3 , in which our


total sample was 6 were negative , and 4 were positive

# Entropy for left node

entropyLN = -(3/6)*np.log2(3/6)-(3/6)*np.log2(3/6)
print(entropyLN)

1.0

# we have p = 1 , n = 3 , total = 4

# Entropy for right node

entropyRN = -(1/4)*np.log2(1/4)-(3/4)*np.log2(3/4)
print(entropyRN)

0.8112781244591328
# Now let us calculate Information Gain which is IG = E(root)-E(Root|
part{left or right node})
# Since left node entropy is 1.0 , right node entropy is ~= 0.8113 and
root node entropy is ~= 0.9710 based on this we calculate IG

print('for left node')


IG_left = enR - entropyLN
print(IG_left)

print('\n\t')

print('for right node')


IG_right = enR - entropyRN
print(IG_right)

for left node


-0.02904940554533142

for right node


0.15967246999553575

K = pd.DataFrame(sample)
print(K)
CorrelationSample = K['Attack'].corr(K['HP'])
print('\n\t')
print('Here is the correlation b/w Attack & HP\n')
print(CorrelationSample)

# Name Type 1 Type 2 HP Attack


Defense Sp. Atk Sp. Def Speed Generation Legendary
156 144 Articuno Ice Flying 90 85
100 95 125 85 1 True
157 145 Zapdos Electric Flying 90 90
85 125 90 100 1 True
158 146 Moltres Fire Flying 90 100
90 125 85 90 1 True
162 150 Mewtwo Psychic NaN 106 110
90 154 90 130 1 True
163 150 MewtwoMega Mewtwo X Psychic Fighting 106 190
100 154 100 130 1 True
164 150 MewtwoMega Mewtwo Y Psychic NaN 106 150
70 194 120 140 1 True
262 243 Raikou Electric NaN 90 85
75 115 100 115 2 True
263 244 Entei Fire NaN 115 115
85 90 75 100 2 True
264 245 Suicune Water NaN 100 75
115 90 115 85 2 True
269 249 Lugia Psychic Flying 106 90
130 90 154 110 2 True

Here is the correlation b/w Attack & HP

0.4980818208834152

You might also like