numpy_dataframe
January 10, 2025
1 NumPy ( Numerical Python )
[1]: import numpy as np
roll_no = np.array([1,2,3])
print(roll_no)
[1 2 3]
[5]: a = np.arange(10,100,10)
print(a)
[10 20 30 40 50 60 70 80 90]
[7]: student = np.array([11, 'Amita', 92.5])
print(student)
['11' 'Amita' '92.5']
[13]: student = np.array([11, 92.5])
print(student)
[11. 92.5]
[81]: b = np.random.random(4)
print(b)
[0.45928608 0.60661161 0.85467259 0.39128761]
[91]: b = np.random.randint(10, size=(3,4))
print(b)
[[1 9 3 0]
[6 3 4 0]
[4 9 6 2]]
[111]: print(np.random.randint(10, size=10) + 20)
[22 22 21 20 25 23 27 28 27 25]
[21]: a = np.ones((3,4), dtype = int)
print(a)
1
[[1 1 1 1]
[1 1 1 1]
[1 1 1 1]]
[22]: a = (np.zeros((3,3)))
print(a)
[[0. 0. 0.]
[0. 0. 0.]
[0. 0. 0.]]
[20]: a = np.full((3,4),6)
print(a)
[[6 6 6 6]
[6 6 6 6]
[6 6 6 6]]
[27]: a = np.array([[11, 'Arshia', 93], [15, 'Riddhi', 95]])
print(a)
[['11' 'Arshia' '93']
['15' 'Riddhi' '95']]
[17]: marks1 = np.array([1,2,3,4])
marks2 = np.array([2,1,2,1])
print(marks1 ** marks2)
print("Marks1 to the power of 3 is: ", marks1 ** 3)
[1 2 9 4]
Marks1 to the power of 3 is: [ 1 8 27 64]
[11]: marks1 = np.array([10,20,30,40])
marks2 = np.array([2,4,3,5])
print(marks1 // marks2)
print("Marks1 to the power of 3 is: ", marks1 // 2)
[ 5 5 10 8]
Marks1 to the power of 3 is: [ 5 10 15 20]
[18]: np.add(marks1,marks2)
np.subtract(marks1,marks2)
np.multiply(marks1,marks2)
np.divide(marks1,marks2)
np.remainder(marks1,marks2)
[18]: array([1, 0, 1, 0], dtype=int32)
2
1.1 Functions of NumPy Arrays
[3]: import numpy as np
arr = np.array([[1,2,3,4],[3,4,5,6]])
print(type(arr))
<class 'numpy.ndarray'>
[14]: print(arr)
[[1 2 3 4]
[3 4 5 6]]
[9]: print(arr.ndim)
print(arr.shape)
print(arr.size)
print(arr.dtype)
2
(2, 4)
8
int32
1.1.1 max()
[15]: print(arr.max())
print(arr.max(axis=0)) # column wise maximum value
print(arr.max(axis=1)) # row wise maximum value
6
[3 4 5 6]
[4 6]
1.1.2 min()
[16]: print(arr.min())
print(arr.min(axis=0)) # column wise minimum value
print(arr.min(axis=1)) # row wise minimum value
1
[1 2 3 4]
[1 3]
[18]: arr = np.array([[11,2,13,4],[3,4,5,6]])
print(arr)
[[11 2 13 4]
[ 3 4 5 6]]
3
1.1.3 sum()
[23]: print("Overall sum: ", arr.sum())
print("Row wise sum: ", arr.sum(axis=1))
print("Column wise sum: ",arr.sum(axis=0))
Overall sum: 48
Row wise sum: [30 18]
Column wise sum: [14 6 18 10]
1.1.4 sort()
[75]: a = np.array([12, 4, -10, 23, 29, 15, -1, 45, 33, 37, -14])
print(np.sort(a))
[-14 -10 -1 4 12 15 23 29 33 37 45]
[79]: # reverse order
np.flip(np.sort(a))
[79]: array([ 45, 37, 33, 29, 23, 15, 12, 4, -1, -10, -14])
[39]: # reverse order
print(np.sort(a)[::-1])
[ 45 37 33 29 23 15 12 4 -1 -10 -14]
[43]: # reverse order
r = np.sort(-a)
r
[43]: array([-45, -37, -33, -29, -23, -15, -12, -4, 1, 10, 14])
[45]: print(-r)
[ 45 37 33 29 23 15 12 4 -1 -10 -14]
[129]: b = np.array([[-9, 5, 18, 9, 12],[10, 11, 3, -5, -10]])
print(b)
[[ -9 5 18 9 12]
[ 10 11 3 -5 -10]]
[117]: print(np.sort(b))
[[ -9 5 9 12 18]
[-10 -5 3 10 11]]
[119]: # Row wise sorting
print(np.sort(b, axis=1))
4
[[ -9 5 9 12 18]
[-10 -5 3 10 11]]
[55]: # Column wise sorting
print(np.sort(b, axis=0))
[[ -9 5 3 -5 -10]
[ 10 11 18 9 12]]
[57]: print(b.flatten())
[ -9 5 18 9 12 10 11 3 -5 -10]
[59]: print(np.sort(b.flatten()))
[-10 -9 -5 3 5 9 10 11 12 18]
[61]: # Overall Sorting
print(np.sort(b.flatten()).reshape(b.shape))
[[-10 -9 -5 3 5]
[ 9 10 11 12 18]]
[71]: b
[71]: array([[ -9, 5, 18, 9, 12],
[ 10, 11, 3, -5, -10]])
[73]: # reverse order
print(np.flip(np.sort(b.flatten()).reshape(b.shape)))
[[ 18 12 11 10 9]
[ 5 3 -5 -9 -10]]
1.1.5 reverse order
[63]: print(b)
[[ -9 5 18 9 12]
[ 10 11 3 -5 -10]]
[65]: print(np.flip(b))
[[-10 -5 3 11 10]
[ 12 9 18 5 -9]]
[67]: print(np.flip(b, axis=1))
[[ 12 9 18 5 -9]
[-10 -5 3 11 10]]
[69]: print(np.flip(b, axis=0))
5
[[ 10 11 3 -5 -10]
[ -9 5 18 9 12]]
2 Pandas ( PANel Data )
[3]: # Creating an empty series
import pandas as pd
emp = pd.Series()
[143]: emp
[143]: Series([], dtype: object)
[145]: # Creating a series from a NumPy array
data = np.array([10,20,30])
s1 = pd.Series(data)
print(s1)
0 10
1 20
2 30
dtype: int64
[147]: # Creating series using labelled index
friends = pd.Series(['Rohan', 'Susan', 'James'], index=[11,22,33])
print(friends)
11 Rohan
22 Susan
33 James
dtype: object
[149]: # Creating a Series from a Python list
cities = ['Delhi', 'Mumbai','Chennai', 'Kolkata']
s2 = pd.Series(cities)
print(s2)
0 Delhi
1 Mumbai
2 Chennai
3 Kolkata
dtype: object
[157]: print(s2[0:3])
0 Delhi
1 Mumbai
2 Chennai
dtype: object
6
[159]: # Creating series using labelled index
month = ["June", "August", "October", "December"]
s4 = pd.Series(month, index=[6,8,10,12])
print(s4)
6 June
8 August
10 October
12 December
dtype: object
[171]: print(s4[[6,12]])
6 June
12 December
dtype: object
[6]: # Creating a series from a python dictionary
month = {6:"June", 8:"August", 10:"October", 12:"December"}
s5 = pd.Series(month)
print(s5)
6 June
8 August
10 October
12 December
dtype: object
[13]: # File access using Panda
s = pd.read_csv("Student data.csv")
print(s.head(5))
# print(s.head(10))
# print(s.tail(5))
Name Marks
0 Ram 56
1 Rajesh 78
2 Rahul 98
3 Akhil 82
[17]: s1 = s.sort_values(by='Name')
print(s1.head(5))
Name Marks
3 Akhil 82
2 Rahul 98
1 Rajesh 78
0 Ram 56
7
[19]: s1 = s.sort_values(by='Name', ascending=False)
print(s1.head(5))
Name Marks
0 Ram 56
1 Rajesh 78
2 Rahul 98
3 Akhil 82
[175]: # Accessing the elements of the Series
Marks = pd.Series([96, 92, 91, 93], index=["Shashi", "Sonali", "Neha",␣
↪"Rajeev"])
print("Using labelled index: ", Marks["Sonali"])
print("Using Positional index: ", Marks[1])
Using labelled index: 92
Using Positional index: 92
/tmp/ipykernel_3999/1295168995.py:4: FutureWarning: Series.__getitem__ treating
keys as positions is deprecated. In a future version, integer keys will always
be treated as labels (consistent with DataFrame behavior). To access a value by
position, use `ser.iloc[pos]`
print("Using Positional index: ", Marks[1])
[177]: # To print 2 elements
print(Marks[[2,3]])
Neha 91
Rajeev 93
dtype: int64
/tmp/ipykernel_3999/91782681.py:2: FutureWarning: Series.__getitem__ treating
keys as positions is deprecated. In a future version, integer keys will always
be treated as labels (consistent with DataFrame behavior). To access a value by
position, use `ser.iloc[pos]`
print(Marks[[2,3]])
[183]: print(Marks.iloc[[2,3]])
Neha 91
Rajeev 93
dtype: int64
[185]: print(Marks[["Sonali", "Rajeev"]])
Sonali 92
Rajeev 93
dtype: int64
[187]: # Elements can be accessed using Slicing as follows:
print(Marks[1:3]) # excludes the value at index position 3
8
Sonali 92
Neha 91
dtype: int64
[189]: print(Marks["Sonali":"Rajeev"]) # includes the value at labelled index
Sonali 92
Neha 91
Rajeev 93
dtype: int64
[12]: # To display the series in the reverse order
print(Marks[::-1])
Rajeev 93
Neha 91
Sonali 92
Shashi 96
dtype: int64
2.1 Attributes of the Series
[15]: data = np.array([10, 30, 50])
s1 = pd.Series(data)
s1.name="Roll no"
print(s1)
0 10
1 30
2 50
Name: Roll no, dtype: int32
[16]: s1.index.name = "Students"
[17]: s1
[17]: Students
0 10
1 30
2 50
Name: Roll no, dtype: int32
[20]: print(s1.values)
[10 30 50]
[21]: print(s1.size)
9
[22]: print(s1.empty)
False
2.2 Built in Functioins in Series
[199]: friends = pd.Series(["Rohan", "Susan", "James", "Riya", "sumit", "Abhinav",␣
↪"Vihaan"], index=[11, 22, 33, 44, 55, 66, 77])
print(friends.head(2))
11 Rohan
22 Susan
dtype: object
[201]: print(friends.tail())
33 James
44 Riya
55 sumit
66 Abhinav
77 Vihaan
dtype: object
[203]: print(friends.count())
3 DataFrames
[27]: # Creating an empty DataFrame
emp = pd.DataFrame()
print(emp)
Empty DataFrame
Columns: []
Index: []
[28]: # Creating a DataFrame from Numpy Array
a1 = np.array([10, 20, 30])
a2 = np.array([11, 22, 33])
f1 = pd.DataFrame(a1)
f2 = pd.DataFrame([a1, a2])
print(f1)
print(f2)
0
0 10
1 20
2 30
0 1 2
10
0 10 20 30
1 11 22 33
[29]: # With column index
a1 = np.array([10, 20, 30])
a2 = np.array([11, 22, 33])
f2 = pd.DataFrame([a1, a2], columns = ['R1', 'R2', 'R3'])
print(f2)
R1 R2 R3
0 10 20 30
1 11 22 33
[30]: # Creating a DataFrame using a Python List
cities = ['Delhi', 'Mumbai', 'Chennai', 'Kolkata']
f1 = pd.DataFrame(cities)
print(f1)
0
0 Delhi
1 Mumbai
2 Chennai
3 Kolkata
[39]: # Creating a DataFrame using a Python dictionary
d1 = {'Name':['Riya', 'Sonali', 'Sneha'], 'Marks':[92, 90, 89]}
f2 = pd.DataFrame(d1, index = [11, 22, 33])
print(f2)
Name Marks
11 Riya 92
22 Sonali 90
33 Sneha 89
3.1 Basic Operations in a DataFrame
[40]: # Adding a new row
f2.loc[44] = ['Nithin', 84]
print(f2)
Name Marks
11 Riya 92
22 Sonali 90
33 Sneha 89
44 Nithin 84
[41]: # Adding a new column
f2['Section'] = ['A', 'E', 'E', 'C']
print(f2)
11
Name Marks Section
11 Riya 92 A
22 Sonali 90 E
33 Sneha 89 E
44 Nithin 84 C
[45]: # Modifying an existing row
f2.loc[33] = ['Sneha', 95, 'C']
print(f2)
Name Marks Section
11 Riya 92 A
22 Sonali 97 E
33 Sneha 95 C
44 Nithin 89 C
[44]: # Modifying an existing column
f2['Marks'] = [92, 97, 91, 89]
print(f2)
Name Marks Section
11 Riya 92 A
22 Sonali 97 E
33 Sneha 91 C
44 Nithin 89 C
[50]: # Deleting a row
f2 = f2.drop(22, axis = 0)
print(f2)
Name Marks Section
11 Riya 92 A
33 Sneha 95 C
44 Nithin 89 C
[51]: # Deleting a column
f2 = f2.drop('Marks', axis=1)
print(f2)
Name Section
11 Riya A
33 Sneha C
44 Nithin C
[ ]:
12