2.basic Statistics - Jupyter Notebook
2.basic Statistics - Jupyter Notebook
In [4]: mba
...
...
...
In [7]: mba.shape
...
...
...
...
In [12]: # column information dataset structure
mba.info()
...
...
In [14]: mba.describe().transpose()
...
In [15]: mba
# del mba['datasrno'] by using delete command
#mba.drop(0) by giving index it will remove the column
...
...
In [17]: #mba$workex
mba['workex']
...
...
In [19]:
del mba['Datasrno']
mba
...
...
In [21]: mba.std()
...
In [22]: mba.describe()
...
In [1]: # in R mean(mba$gmat)
mba['gmat'].mean()
...
In [24]: mba['gmat'].median()
#mba['gmat'].mean
#mba['gmat'].mode()
#mba['gmat'].var()
#mba['gmat'].std()
#mba['gmat'].max()
#mba['gmat'].min()
# Range = mba['gmat'].max() - mba['gmat'].min()
...
In [10]: mba['workex'].mode()
...
In [11]: mba['gmat'].var()
...
In [12]: mba['gmat'].std()
...
In [17]: max(mba['gmat'])
...
In [18]: min(mba['gmat'])
...
...
In [13]: # In R skewness and kurtosis - we have installed e1071 package
from scipy.stats import skew
skew = skew(mba['gmat'])
skew
#print("skewness value of gmat:",skew)
...
...
...
In [ ]:
Categorical Analysis
In [5]: import pandas as pd
In [6]: wbcd = pd.read_csv("D:\\Course\\Python\\Datasets\\wbcd.csv")
wbcd
In [3]: wbcd
del wbcd['id']
In [4]: wbcd
Out[27]: B 357
M 212
diagnosis
B 357
M 212
In [29]: freq/freq.sum()
# percentage
# b = 357 /357 + 212 = 357/569= 62
# m = 212 / 357+212 = 212.569 = 38
diagnosis
B 0.627417
M 0.372583
In [30]: # replace function used to change the label name in the rows
wbcd['diagnosis'].replace({"B":"Bengign","M":"Malignant"},inplace=True)
In [31]: wbcd
...
Out[38]: Mean
diagnosis texture_mean perimeter_mean area_mean smoothness_mean compactnes
Radius
In [ ]:
In [ ]: