20BIT037 - Data Analytics
20BIT037 - Data Analytics
import numpy as np
print("Matrix b: \n",b)
print("\nMatrix a: \n",a)
c = np.empty([3,3])
print("\nMatrix c: \n",c)
print("\nMatrix d: \n",d)
print("\nMatrix e: \n",e)
f = np.zeros([3,3])
print("\nMatrix f: \n",f)
Page 1 of 30
Anish Sil
# Addition
add_ans = g+h
print(add_ans)
add_ans1 = np.add(g,h)
print(add_ans1)
# multiple matrices
i = np.array([1, 2, 3, 4])
add_ans2 = g+h+i
print(add_ans2)
add_ans3 = np.add(g,h,i)
print(add_ans3)
Page 2 of 30
Anish Sil
a1 = np.array([5, 10, 15, 20])
c1 = np.array([3, 6, 9, 12])
add2 = a1+b1
print(add2)
print(add2_1)
add3 = a1+b1+c1
print(add3)
print(add3_2)
# subtraction
sub_ans = g-h
print(sub_ans)
sub_ans1 = np.subtract(g,h)
Page 3 of 30
Anish Sil
print(sub_ans1)
sub2 = a1-b1
print(sub2)
print(sub2_1)
sub3 = a1-b1-c1
print(sub3)
print(sub3_2)
# multiplication
mul_ans = g*h
print(mul_ans)
Page 4 of 30
Anish Sil
# performing multiplication using numpy function
mul_ans1 = np.multiply(g,h)
print(mul_ans1)
mul2 = a1*b1
print(mul2)
print(mul2_1)
mul3 = a1*b1*c1
print(mul3)
print(mul3_2)
# division
div_ans = g/h
Page 5 of 30
Anish Sil
print(div_ans)
div_ans1 = np.divide(g,h)
print(div_ans1)
div2 = a1/b1
print(div2)
print(div2_1)
div3 = a1/b1/c1
print(div3)
print(div3_2)
-------------------------------------------------------------------------------------
Matrix b:
[ 4 1072693248]
Matrix a:
[[1 0]
[0 1]]
Page 6 of 30
Anish Sil
Matrix c:
[[-3. 2. -6.]
[ 5. 7. -5.]
[ 1. 4. -2.]]
Matrix d:
[0 0]
Matrix e:
[[0 0]
[0 0]]
Matrix f:
[[0. 0. 0.]
[0. 0. 0.]
[0. 0. 0.]]
Addition Example:
[ 7 77 23 130]
[ 7 77 23 130]
[ 8 79 26 134]
[ 7 77 23 130]
Ans: 2
[17 34 51 68]
[17 34 51 68]
Ans: 3
[20 40 60 80]
[20 40 60 80]
Subtraction Example:
[ 3 67 3 70]
[ 3 67 3 70]
Ans: 4
[ -7 -14 -21 -28]
[ -7 -14 -21 -28]
Ans: 5
[-10 -20 -30 -40]
[-10 -20 -30 -40]
Multiplication Example:
[ 10 360 130 3000]
[ 10 360 130 3000]
Ans: 6
[ 60 240 540 960]
Page 7 of 30
Anish Sil
[ 60 240 540 960]
Ans: 7
[ 180 1440 4860 11520]
[ 180 1440 4860 11520]
Division Example:
[ 2.5 14.4 1.3 3.33333333]
[ 2.5 14.4 1.3 3.33333333]
Ans: 8
[0.41666667 0.41666667 0.41666667 0.41666667]
[0.41666667 0.41666667 0.41666667 0.41666667]
Ans: 9
[0.13888889 0.06944444 0.0462963 0.03472222]
[0.13888889 0.06944444 0.0462963 0.03472222]
-------------------------------------------------------------------------------------
**Python NumPy Array Indexing**
import numpy as np
# Psuedo Inverse
A = np.array([[1,4,3],[2,5,6]])
#A = np.array([[1,4],[3,2],[5,6]])
print(A)
print("\n")
B = np.transpose(A)
C = np.matmul(B,A)
E = np.linalg.det(C)
if(E==0):
print("\n")
else:
F = np.linalg.inv(C)
Page 8 of 30
Anish Sil
G = np.matmul(C, B)
print(G)
print("\n")
L = np.linalg.pinv(A)
print(L)
print("\n")
H = np.matmul(A,B)
I = np.linalg.det(H)
if(I==0):
else:
J = np.linalg.inv(H)
K = np.matmul(B, J)
print(K)
print("\n")
M = np.linalg.pinv(A)
print(M)
print("\n")
# Condition Number
print("Ans 9:")
#(a)
#A = np.array([[1,0],[0,1]])
#(b)
Page 9 of 30
Anish Sil
#A = np.array([[3,1],[5,2]])
#(c)
A = np.array([[1,0],[0,1]])
B = np.array([[1],[2]])
Y = np.array([[3],[4]])
C = np.subtract(Y, B)
D = np.matmul(np.linalg.inv(A), C)
print(D)
print("\n")
print("Ans 8:")
N = np.linalg.cond(A)
print(N)
print("\n")
print("Ans 10:")
A = np.array([[-3,2,-6],[5,7,-5],[1,4,-2]])
B = np.array([[6],[6],[8]])
x = np.linalg.solve(A,B)
print(x)
-----------------------------------------------------------------------------------------
[[1 4 3]
[2 5 6]]
[[-0.16666667 0.13333333]
[ 0.66666667 -0.33333333]
[-0.5 0.4 ]]
Page 10 of 30
Anish Sil
Psuedo Inverse:
[[-0.16666667 0.13333333]
[ 0.66666667 -0.33333333]
[-0.5 0.4 ]]
Ans 9:
[[2.]
[2.]]
Ans 8:
1.0
Ans 10:
[[-2.]
[ 3.]
[ 1.]]
-------------------------------------------------------------------------------------
**NumPy Array Slicing**
import numpy as np
# 10 to 1 with a step of -2
a = np.arange(10, 1, -2)
Page 11 of 30
Anish Sil
a = np.arange(20)
# a[start:stop:step]
-----------------------------------------------------------------------------------------
Array is:
[ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19]
macros = np.array([
])
# Create a new array filled with zeros, of the same shape as macros.
result = np.zeros_like(macros)
Page 12 of 30
Anish Sil
# Now multiply each row of macros by
for i in range(macros.shape[0]):
result
w = np.array([45, 55])
# then broadcast it against w to yield an output of shape 3x2 which is the outer product
of v and w
# x has shape 2x3 and v has shape (3, ) so they broadcast to 2x3,
print("\n",X + v)
# Add a vector to each column of a matrix X has shape 2x3 and w has shape (2, ) If we
transpose X
# then it has shape 3x2 and can be broadcast against w to yield a result of shape 3x2.
# Transposing this yields the final result of shape 2x3 which is the matrix.
print("\n",(X.T + w).T)
Page 13 of 30
Anish Sil
# Multiply a matrix by a constant, X has shape 2x3. Numpy treats scalars as arrays of
shape();
print("\n",X * 2)
-------------------------------------------------------------------------------------
[[ 540 660]
[1080 1320]
[1620 1980]]
[[ 24 46 69]
[ 57 79 102]]
[[ 57 67 78]
[100 110 121]]
[[ 57 67 78]
[100 110 121]]
[[ 24 44 66]
[ 90 110 132]]
-------------------------------------------------------------------------------------
**Analyzing Data Using Pandas**
import pandas as pd
import numpy as np
ser = pd.Series()
print(ser)
# simple array
ser = pd.Series(data)
print("\n",ser)
df = pd.DataFrame()
print("\n",df)
Page 14 of 30
Anish Sil
# list of strings
df = pd.DataFrame(lst)
df
-------------------------------------------------------------------------------------
0 d
1 a
2 t
3 a
4 1
dtype: object
Empty DataFrame
Columns: []
Index: []
-------------------------------------------------------------------------------------
**Creating Dataframe from CSV**
import pandas as pd
df = pd.read_csv("iris_csv.csv")
Page 15 of 30
Anish Sil
df.head()
-------------------------------------------------------------------------------------
-------------------------------------------------------------------------------------
# applying filter function
df.filter(["Species", "SepalLengthCm",
"SepalLengthCm"]).head()
-------------------------------------------------------------------------------------
-------------------------------------------------------------------------------------
**Groups**
data1 = {'Name': ['Jai', 'Anuj', 'Jai', 'Princi', 'Gaurav', 'Anuj', 'Princi', 'Abhi'],
df = pd.DataFrame(data1)
Page 16 of 30
Anish Sil
print("Original Dataframe")
display(df)
gk = df.groupby('Name')
gk.first()
-------------------------------------------------------------------------------------
-------------------------------------------------------------------------------------
**Python Pandas Aggregation**
Page 17 of 30
Anish Sil
# Define a dictionary containing employee data
data1 = {'Name': ['Jai', 'Anuj', 'Jai', 'Princi', 'Gaurav', 'Anuj', 'Princi', 'Abhi'],
df = pd.DataFrame(data1)
grp1 = df.groupby('Name')
grp1.aggregate(np.sum)
-------------------------------------------------------------------------------------
-------------------------------------------------------------------------------------
**Concatenating DataFrame**
df = pd.DataFrame(data1)
df1 = pd.DataFrame(data2)
display(df, df1)
res
-------------------------------------------------------------------------------------
Page 19 of 30
Anish Sil
-------------------------------------------------------------------------------------
**Merging DataFrame**
df = pd.DataFrame(data1)
df1 = pd.DataFrame(data2)
display(df, df1)
res
-------------------------------------------------------------------------------------
Page 20 of 30
Anish Sil
------------------------------------------------------------------------------------
**Visualization with Matplotlib**
plt.axis([0, 6, 0, 20])
plt.show()
-------------------------------------------------------------------------------------
Page 21 of 30
Anish Sil
-------------------------------------------------------------------------------------
import matplotlib.pyplot as plt
import pandas as pd
df = pd.read_csv("iris_csv.csv")
plt.bar(df['class'], df['sepallength'])
plt.title("Iris Dataset")
plt.legend(["bar"])
plt.show()
-------------------------------------------------------------------------------------
Page 22 of 30
Anish Sil
-------------------------------------------------------------------------------------
import matplotlib.pyplot as plt
import pandas as pd
df = pd.read_csv("iris_csv.csv")
plt.hist(df["sepallength"])
plt.title("Histogram")
plt.legend(["sepallength"])
plt.show()
-------------------------------------------------------------------------------------
Page 23 of 30
Anish Sil
-------------------------------------------------------------------------------------
plt.scatter(df["class"], df["sepallength"])
plt.title("Scatter Plot")
plt.legend(["sepallength"])
plt.show()
-------------------------------------------------------------------------------------
Page 24 of 30
Anish Sil
-------------------------------------------------------------------------------------
plt.boxplot(df["sepallength"])
plt.title("Box Plot")
plt.legend(["sepallength"])
plt.show()
-------------------------------------------------------------------------------------
Page 25 of 30
Anish Sil
-------------------------------------------------------------------------------------
df.shape
df.info()
# importing packages
plt.show()
df.isnull().sum()
data
df.value_counts("class")
-------------------------------------------------------------------------------------
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 sepallength 150 non-null float64
1 sepalwidth 150 non-null float64
2 petallength 150 non-null float64
3 petalwidth 150 non-null float64
4 class 150 non-null object
dtypes: float64(4), object(1)
memory usage: 6.0+ KB
Page 26 of 30
Anish Sil
class
Iris-setosa 50
Iris-versicolor 50
Iris-virginica 50
Name: count, dtype: int64
-------------------------------------------------------------------------------------
**Comparing Petal Length and Petal Width**
plt.show()
-------------------------------------------------------------------------------------
Page 27 of 30
Anish Sil
-------------------------------------------------------------------------------------
sns.pairplot(df.drop([], axis = 1), hue='class', height=2)
**Handling Outliers**
sns.boxplot(x='sepalwidth', data=df)
-------------------------------------------------------------------------------------
-------------------------------------------------------------------------------------
**Removing Outliers**
# Importing
import sklearn
# IQR
Q1 = np.percentile(df['sepalwidth'], 25,
interpolation = 'midpoint')
Q3 = np.percentile(df['sepalwidth'], 75,
interpolation = 'midpoint')
IQR = Q3 - Q1
Page 28 of 30
Anish Sil
print("Old Shape: ", df.shape)
# Upper bound
# Lower bound
-------------------------------------------------------------------------------------
-------------------------------------------------------------------------------------
# Removing the Outliers
sns.boxplot(x='sepalwidth', data=df)
-------------------------------------------------------------------------------------
Page 29 of 30
Anish Sil
-------------------------------------------------------------------------------------
Page 30 of 30