A5 A.ipynb - Colaboratory
A5 A.ipynb - Colaboratory
CSV DISPLAY
*Do Visit:https://www.mathsisfun.com/data/standard-normal-distribution.html*
For better understanding
Mounted at /content/drive
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
Missing Values:
Wine 0
Alcohol 0
Malic.acid 0
dtype: int64
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
scaling = MinMaxScaler()
scaled_data = scaling.fit_transform(df[['Alcohol', 'Malic.acid']])
print(scaled_data)
[0.53557312 0.52941176]
[0.39920949 0.5026738 ]
[0.71541502 0.48128342]
[0.58498024 0.6631016 ]
[0.51976285 0.5026738 ]
[0.36363636 0.71122995]
[0.76284585 0.80213904]
[0.45256917 0.51336898]
[0.32608696 0.49197861]
[0.34980237 0.59893048]
[0.97035573 0.5828877 ]
[0.62648221 0.59893048]
[0.69960474 0.48128342]
[0.36561265 0.54010695]
[0.66403162 0.73796791]]
df.head()
Z-SCORE STANDARDIZATION
scale = StandardScaler()
scale_data=scale.fit_transform(df[['Alcohol',"Malic.acid"]])
print(scale_data)
[[-0.50162433 -0.8244853 ]
[ 0.01802001 1.10768971]
[-0.34931478 0.48793547]
[ 0.22408586 1.83681236]
[-0.5195431 0.3056548 ]
[-0.42098986 0.3056548 ]
[-0.17012708 0.88895292]
[-0.62705572 -0.7151169 ]
[-0.88687789 -0.35055558]
[-0.1611677 -0.24118718]
[-0.77040588 -0.16827492]
[-0.54642125 0.15983027]
[-0.54642125 0.08691801]
[-0.42098986 0.05046188]
[-0.47474617 1.21705811]
[-0.37619294 1.28997038]
[-0.68977141 0.92540905]
[-0.67185264 0.4150232 ]
[ 0.6810145 0.70667226]
[-0.6360151 -0.31409944]
[ 1.30817146 1.03477745]
[-0.42994925 -0.02245039]
[-0.66289326 0.56084773]
[-0.47474617 0.88895292]
[-0.25972093 3.11277699]
[-0.51058371 0.92540905]
[-0.55538064 -0.8244853 ]
[-0.39411171 1.58161943]
[-0.59121818 -0.56929237]
[-0.75248711 1.21705811]
[-0.60913695 -0.02245039]
[-0.4568274 -0.02245039]
[-0.72560895 1.21705811]
[-0.48370556 1.03477745]
[-0.47474617 0.15983027]
[-0.62705572 1.72744396]
[-0.61809633 0.67021613]
[-0.75248711 -0.97030983]
[ 1.47839977 0.5243916 ]
[-0.56434002 -0.20473105]
[ 1.344009 -0.89739756]
[-0.40307109 0.81604066]
[ 1.46944039 -0.27764331]
[-0.51058371 -0.97030983]
[ 1.5231967 0.26919867]
[ 1.12002437 -0.31409944]
[-0.59121818 -0.89739756]
[-0.28659909 0.12337414]
[-0.54642125 -0.35055558]
[-0.54642125 -1.18904662]
[-0.61809633 0.85249679]
[-0.52850248 0.19628641]
[-0.39411171 1.14414585]
[-0.60017756 -0.42346784]
[-0.54642125 0.34211094]
[-0.57329941 -0.24118718]
[-0.33139601 1.14414585]
[-0 8152028 0 48793547]
Label Encoding
Notes: https://www.geeksforgeeks.org/ml-label-encoding-of-datasets-in-python/
# Import libraries
import numpy as np #Here whatever we import, i.e. after import ex. pandas or numpy are
import pandas as pd
# Import dataset
df = pd.read_csv('/content/drive/MyDrive/Sem-3/Data Science Lab/A5/CSV Files/Iris.csv')
df
Id SepalLengthCm SepalWidthCm PetalLengthCm PetalWidthCm Species
df['Species'].unique()
array([0, 1, 2])
import pandas as pd
0 1 0 0
1 1 0 0
2 1 0 0
3 1 0 0
4 1 0 0
5 0 0 1
6 0 0 1
7 0 0 1
8 0 0 1
9 0 1 0
10 0 1 0
11 0 1 0
12 0 1 0
#To Concatenate two datas we use pd.concat()
merge = pd.concat([df, dummies], axis= 'columns') #syntax: pandas.concat([var1, var2], a
merge #As we stored dummies in df.
final = merge.drop(['town'], axis = 'columns') #as we no more required town along axis:
final #As We have already encoded [monroe township = 0, west windsor = 1, robinsville =
area price monroe township robinsville west windsor
0 2600 550000 1 0 0
1 3000 565000 1 0 0
2 3200 610000 1 0 0
3 3600 680000 1 0 0
4 4000 725000 1 0 0
5 2600 585000 0 0 1
6 2800 615000 0 0 1