0% found this document useful (0 votes)
17 views

Statistical Data Analysis - Ipynb - Colaboratory

Uploaded by

Varad Kulkarni
Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
17 views

Statistical Data Analysis - Ipynb - Colaboratory

Uploaded by

Varad Kulkarni
Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 6

5/17/23, 1:23 AM Copy of Statistical data analysis.

ipynb - Colaboratory

name : shreya bhangale


roll no : se entc 18 name : shreya bhangale roll no : se entc 18 experiment no:04
experiment no:04

#Import the required Packages


import pandas as pd
import numpy as np

from google.colab import files


uploaded = files.upload()

Choose Files No file chosen Upload widget is only available when the cell has been executed in
the current browser session. Please rerun this cell to enable.
Saving IBM-313 Marks xlsx to IBM-313 Marks xlsx

path = "IBM-313 Marks.xlsx"


table = pd.read_excel(path)
print (table)

S.No. MTE (25) Mini Project (25) Total (50) ETE (50) Total
0 1 5.00 20 25.00 12.0 37.00
1 2 11.05 20 31.05 26.0 57.05
2 3 8.10 20 28.10 14.0 42.10
3 4 6.00 10 16.00 13.0 29.00
4 5 11.35 20 31.35 17.0 48.35
.. ... ... ... ... ... ...
74 75 12.05 10 22.05 20.0 42.05
75 76 12.25 10 22.25 28.0 50.25
76 77 1.75 10 11.75 NaN 0.00
77 78 3.00 10 13.00 NaN 0.00
78 79 5.80 10 15.80 12.0 27.80

[79 rows x 6 columns]

table.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 79 entries, 0 to 78
Data columns (total 6 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 S.No. 79 non-null int64
1 MTE (25) 79 non-null float64
2 Mini Project (25) 79 non-null int64
3 Total (50) 79 non-null float64
4 ETE (50) 77 non-null float64
5 Total 79 non-null float64
dtypes: float64(4), int64(2)
memory usage: 3.8 KB

x = table['Total']
np.mean(x)

46.90632911392405

np.median(x)

45.0

import scipy
from scipy import stats
m1=stats.mode(x)
print(m1)

ModeResult(mode=array([0.]), count=array([2]))
<ipython-input-7-625da3d35865>:3: FutureWarning: Unlike other reduction functions (e.g. `skew`, `kurtosis`), the default behavior of `mo
m1=stats.mode(x)

li = [1, 2, 3, 3, 2, 2, 2, 1, 2]
print (stats.mode(li))

https://colab.research.google.com/drive/1apFRrvkiXDm2KVY7jq9fPctuxdxi8D-G#scrollTo=epYJ7wb4zlRr&printMode=true 1/6
5/17/23, 1:23 AM Copy of Statistical data analysis.ipynb - Colaboratory

ModeResult(mode=array([2]), count=array([5]))
<ipython-input-8-5c66091584e9>:2: FutureWarning: Unlike other reduction functions (e.g. `skew`, `kurtosis`), the default behavior of `mo
print (stats.mode(li))

a = np.array([1,2,3,4,5])
p = np.percentile(a,50)
print (p)

3.0

k = ['Ram', 65, 2.5]


print (k)

['Ram', 65, 2.5]

k = ['Ram', 65, 2.5]


for i in k:
print (i)

Ram
65
2.5

for i in range(10, 20, 2): print(i)

10
12
14
16
18

for i in range(10, 20, 2): print(i, end=',')

10,12,14,16,18,

FUNCTIONS IN PYTHON

def greet():
print("Hi")
print("Good Evening")
greet()

Hi
Good Evening

def add(p,q): #Function add is created to add 2 Numbers.


c = p+q
print(c)

add(10,4)

14

Finding Minimum and Maximum Value in an Array

data = [1,3,4,463,2,3,6,8,9,4,254,6,72]
min(data), max(data)

(1, 463)

data = [1,3,4,463,2,3,6,8,9,4,254,6,72]
def min_and_max(data): \
#Creates a function min_and_max which retrns the minimum and maximum values of array.
min_val = min(data)
max_val = max(data)

return (min_val, max_val)

https://colab.research.google.com/drive/1apFRrvkiXDm2KVY7jq9fPctuxdxi8D-G#scrollTo=epYJ7wb4zlRr&printMode=true 2/6
5/17/23, 1:23 AM Copy of Statistical data analysis.ipynb - Colaboratory
min_and_max(data)

(1, 463)

def rangeef(data):
min_val = min(data)
max_val = max(data)

return (max_val - min_val)

rangeef (data)

462

Quartile

a = np.array([1,2,3,4,5])
q1 = np.percentile(a,25)
print (q1)

2.0

a = np.array([1,2,3,4,5])
q2 = np.percentile(a,50)
print (q2)

3.0

a = np.array([1,2,3,4,5])
q3 = np.percentile(a,75)
print (q3)

4.0

Inter Quartile Range

IQ = q3 - q1
IQ

2.0

Variance

np.var(x)

262.7814789296587

Population Standard Deviation

import statistics
statistics.pstdev(x)

16.210536046955966

Sample Standard Deviation

np.std(x)

16.210536046955966

Skewness

from scipy.stats import skew


skew(x)

0.10226407464884266

https://colab.research.google.com/drive/1apFRrvkiXDm2KVY7jq9fPctuxdxi8D-G#scrollTo=epYJ7wb4zlRr&printMode=true 3/6
5/17/23, 1:23 AM Copy of Statistical data analysis.ipynb - Colaboratory

Box Plot

from matplotlib import pyplot as plt


plt.boxplot (x, sym = 'o') # * impliers data goes beyond max and min value.
plt.show()

table.describe()

S.No. MTE (25) Mini Project (25) Total (50) ETE (50) Total

count 79.000000 79.000000 79.000000 79.000000 77.000000 79.000000

mean 40.000000 10.178481 16.556962 26.735443 21.016234 46.906329

std 22.949219 4.961924 4.900934 8.504976 8.091209 16.314119

min 1.000000 0.700000 10.000000 11.200000 7.000000 0.000000

25% 20.500000 6.500000 11.000000 19.600000 17.000000 38.000000

50% 40.000000 10.300000 15.000000 27.500000 20.000000 45.000000

75% 59.500000 12.975000 22.000000 33.250000 24.000000 55.375000

max 79.000000 23.500000 22.000000 45.500000 50.000000 94.500000

from scipy.stats import skew


import numpy as np
import pylab as p

x1 = np.linspace( -5, 5, 1000 )


y1 = 1./(np.sqrt(2.*np.pi)) * np.exp( -.5*(x1)**2 )

p.plot(x1, y1, '*')

print( '\nSkewness for data : ', skew(y1))

https://colab.research.google.com/drive/1apFRrvkiXDm2KVY7jq9fPctuxdxi8D-G#scrollTo=epYJ7wb4zlRr&printMode=true 4/6
5/17/23, 1:23 AM Copy of Statistical data analysis.ipynb - Colaboratory

Skewness for data : 1.1108237139164436

from scipy.stats import skew


import numpy as np

# random values based on a normal distribution


x = np.random.normal(0, 2, 10000)

print ("X : \n", x)

print('\nSkewness for data : ', skew(x))

X :
[ 1.13126468 -2.2362244 4.36431696 ... -0.65830856 1.24609437
-1.98773246]

Skewness for data : 0.015454783893490075

from scipy.stats import skew


import numpy as np
import pylab as p

x1 = np.linspace( -5, 12, 1000 )


y1 = 1./(np.sqrt(2.*np.pi)) * np.exp( -.5*(x1)**2 )

p.plot(x1, y1, '.')

print( '\nSkewness for data : ', skew(y1))

Skewness for data : 1.917677776148478

https://colab.research.google.com/drive/1apFRrvkiXDm2KVY7jq9fPctuxdxi8D-G#scrollTo=epYJ7wb4zlRr&printMode=true 5/6
5/17/23, 1:23 AM Copy of Statistical data analysis.ipynb - Colaboratory

https://colab.research.google.com/drive/1apFRrvkiXDm2KVY7jq9fPctuxdxi8D-G#scrollTo=epYJ7wb4zlRr&printMode=true 6/6

You might also like