Python With Pandas
Python With Pandas
ipynb - Colaboratory
#python pandas
#used to work with heterogenous (tabular) data
#facilitate faster and easier data processing
import pandas as pd
## Series
#A sequence of values ( similar to 1 D array) with explicit custom index to access the values
#like a fixed length dict. a KIND mapping of index values to data values
# Created using Series function
series1 = pd.Series([22,33,44,55]) # default index: 0 to n-1
series1
0 22
1 33
2 44
3 55
dtype: int64
# 2. index
# it is like range
series1.index
series2
a 22
b 33
c 44
d 55
dtype: int64
True
'e' in series2
False
series2.values
https://colab.research.google.com/drive/1KY8qW02fPGwZI8S3tRjhoj8Z1alZWqCf#scrollTo=iDltQkrkPOd6&uniqifier=1&printMode=true 1/17
6/25/23, 10:02 PM Copy of Copy of IIT R DIXIT DATAFRAMES.ipynb - Colaboratory
series2[['b','a']]
b 33
a 22
dtype: int64
a 22
b 33
c 44
d 55
dtype: int64
series2[series2>34]
c 44
d 55
dtype: int64
# scalar multiplication
series2*9 # Broadcasting is done here.
a 198
b 297
c 396
d 495
dtype: int64
#math operation
import numpy as np
np.exp(series2) # exponential of series values
a 3.584913e+09
b 2.146436e+14
c 1.285160e+19
d 7.694785e+23
dtype: float64
series3 = pd.Series(PIN_code)
series3
Roorkee 247667
Dehradun 248001
Haridwar 249401
dtype: int64
series3.index
Dehradun 248001.0
Roorkee 247667.0
Haridwar 249401.0
Tehri garhwal NaN
dtype: float64
https://colab.research.google.com/drive/1KY8qW02fPGwZI8S3tRjhoj8Z1alZWqCf#scrollTo=iDltQkrkPOd6&uniqifier=1&printMode=true 2/17
6/25/23, 10:02 PM Copy of Copy of IIT R DIXIT DATAFRAMES.ipynb - Colaboratory
Dehradun False
Roorkee False
Haridwar False
Tehri garhwal True
dtype: bool
pd.notnull(series4)
Dehradun True
Roorkee True
Haridwar True
Tehri garhwal False
dtype: bool
Dehradun False
Roorkee False
Haridwar False
Tehri garhwal True
dtype: bool
series4.notnull()
Dehradun True
Roorkee True
Haridwar True
Tehri garhwal False
dtype: bool
# alignment feature
series3 + series4 # pincode added.
Dehradun 496002.0
Haridwar 498802.0
Roorkee 495334.0
Tehri garhwal NaN
dtype: float64
series4
city
Dehradun 248001.0
Roorkee 247667.0
Haridwar 249401.0
Tehri garhwal NaN
Name: PIN_code, dtype: float64
0 22
1 33
2 44
3 55
dtype: int64
series1.index = [1,2,3,4]
series1
1 22
2 33
3 44
4 55
dtype: int64
https://colab.research.google.com/drive/1KY8qW02fPGwZI8S3tRjhoj8Z1alZWqCf#scrollTo=iDltQkrkPOd6&uniqifier=1&printMode=true 3/17
6/25/23, 10:02 PM Copy of Copy of IIT R DIXIT DATAFRAMES.ipynb - Colaboratory
#possible error
# df = pd.dataframe(dict1)
# df = pd.Dataframe(dict1)
df = pd.DataFrame(dict1)
df
https://colab.research.google.com/drive/1KY8qW02fPGwZI8S3tRjhoj8Z1alZWqCf#scrollTo=iDltQkrkPOd6&uniqifier=1&printMode=true 4/17
6/25/23, 10:02 PM Copy of Copy of IIT R DIXIT DATAFRAMES.ipynb - Colaboratory
Index(['year',
6 2013 Mumbai 'metro',
2.15 'popcr',
NaN 'area'], dtype='object')
1 1.67
2 1.72
3 1.77
4 2.07
5 2.11
6 2.15
Name: popcr, dtype: float64
1 2011
2 2012
3 2013
4 2011
5 2012
6 2013
Name: year, dtype: int64
2 1.72
3 1.77
4 2.07
Name: popcr, dtype: float64
1 1.67
2 1.72
3 1.77
4 2.07
5 2.11
Name: popcr, dtype: float64
df2['popcr'][:-3]
1 1.67
2 1.72
3 1.77
Name: popcr, dtype: float64
1 1.67
2 1.72
3 1.77
4 2.07
5 2.11
6 2.15
Name: popcr, dtype: float64
https://colab.research.google.com/drive/1KY8qW02fPGwZI8S3tRjhoj8Z1alZWqCf#scrollTo=iDltQkrkPOd6&uniqifier=1&printMode=true 5/17
6/25/23, 10:02 PM Copy of Copy of IIT R DIXIT DATAFRAMES.ipynb - Colaboratory
df2
# populating values
#e.g area column
df2['area'] = np.arange(6000,6600,100) # (start, stop, step)
df2
# assigning values to a non existing column will also create that column
df2['northern'] = (df2.metro == 'Delhi') # kind of boolean column
df2
https://colab.research.google.com/drive/1KY8qW02fPGwZI8S3tRjhoj8Z1alZWqCf#scrollTo=iDltQkrkPOd6&uniqifier=1&printMode=true 6/17
6/25/23, 10:02 PM Copy of Copy of IIT R DIXIT DATAFRAMES.ipynb - Colaboratory
df2.columns
# sequences of labels used while creating a series or dataframe are internally converted into
series5 = pd.Series(range(3), index=['a','b','c']) # index square braket
series5.index
#subsetting
ind1 = series5.index
ind1[1:] #metion starting point
ind1[0:2]
#immutability
ind1[1] = 'd'
# output is error : type error
# index does not support mutable operation
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-126-65fbb217ef82> in <cell line: 2>()
1 #immutability
----> 2 ind1[1] = 'd'
3 # output is error : type error
4 # index does not support mutable operation
/usr/local/lib/python3.10/dist-packages/pandas/core/indexes/base.py in
__setitem__(self, key, value)
5300 @final
5301 def __setitem__(self, key, value):
-> 5302 raise TypeError("Index does not support mutable operations")
5303
5304 def __getitem__(self, key):
ind2
https://colab.research.google.com/drive/1KY8qW02fPGwZI8S3tRjhoj8Z1alZWqCf#scrollTo=iDltQkrkPOd6&uniqifier=1&printMode=true 7/17
6/25/23, 10:02 PM Copy of Copy of IIT R DIXIT DATAFRAMES.ipynb - Colaboratory
series6
0 1.5
1 -2.5
2 0.0
dtype: float64
series6.index is ind2
True
ind3
series7
beta 1.1
alpha 1.2
beta 1.3
sigma 1.4
dtype: float64
beta 1.1
beta 1.3
dtype: float64
series10
0 blue
2 purple
4 yellow
dtype: object
0 blue
1 blue
2 purple
3 purple
4 yellow
5 yellow
dtype: object
df4
a 0 1 2
c 3 4 5
d 6 7 8
df5
https://colab.research.google.com/drive/1KY8qW02fPGwZI8S3tRjhoj8Z1alZWqCf#scrollTo=iDltQkrkPOd6&uniqifier=1&printMode=true 8/17
6/25/23, 10:02 PM Copy of Copy of IIT R DIXIT DATAFRAMES.ipynb - Colaboratory
Delhi Mumbai
a 0.0 1.0
b NaN NaN
c 3.0 4.0
d 6.0 7.0
0 blue
1 blue
2 purple
3 purple
4 yellow
https://colab.research.google.com/drive/1KY8qW02fPGwZI8S3tRjhoj8Z1alZWqCf#scrollTo=iDltQkrkPOd6&uniqifier=1&printMode=true 9/17
6/25/23, 10:02 PM Copy of Copy of IIT R DIXIT DATAFRAMES.ipynb - Colaboratory
5 yellow
dtype: object
series7_1
beta 1.1
alpha 1.3
beta 1.3
sigma 1.3
'purple'
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
<ipython-input-153-8bdd86d373a9> in <cell line: 1>()
----> 1 series7_1['sigma'] # square braket with single quotes
2 # 1.3
1 frames
/usr/local/lib/python3.10/dist-packages/pandas/core/indexes/range.py in
get_loc(self, key, method, tolerance)
393 raise KeyError(key) from err
394 self._check_indexing_error(key)
--> 395 raise KeyError(key)
396 return super().get_loc(key, method=method, tolerance=tolerance)
397
KeyError: 'sigma'
3 purple
4 yellow
dtype: object
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
<ipython-input-156-b23e739c9d06> in <cell line: 2>()
1 # selecting few rows
----> 2 series7_1[['sigma','alpha']] #in index use square brakets always
2 frames
/usr/local/lib/python3.10/dist-packages/pandas/core/indexes/base.py in
_raise_if_missing(self, key, indexer, axis_name)
6128 if use_interval_msg:
6129 key = list(key)
-> 6130 raise KeyError(f"None of [{key}] are in the [{axis_name}]")
6131
6132 not_found = list(ensure_index(key)[missing_mask.nonzero()
[0]].unique())
sigma 1.4
alpha 1.2
dtype: float64
https://colab.research.google.com/drive/1KY8qW02fPGwZI8S3tRjhoj8Z1alZWqCf#scrollTo=iDltQkrkPOd6&uniqifier=1&printMode=true 10/17
6/25/23, 10:02 PM Copy of Copy of IIT R DIXIT DATAFRAMES.ipynb - Colaboratory
# filtering rows based on some logic
series7[series7<1.3]
beta 1.1
alpha 1.2
dtype: float64
# slicing
alpha 1.2
beta 1.3
sigma 1.4
dtype: float64
beta 1.1
alpha 1.3
beta 1.3
sigma 1.3
dtype: float64
series7['beta']
beta 1.1
beta 1.3
dtype: float64
# Ex. reindexing
# using redindex method
series8 = pd.Series([4.5, 7.2, -5.3, 3.6], index = ['d', 'b', 'a', 'c'])
#pd.Series([],index= [])
series8
d 4.5
b 7.2
a -5.3
c 3.6
dtype: float64
series9 = series8.reindex(['a','b','c','d','e'])
series9
a -5.3
b 7.2
c 3.6
d 4.5
e NaN
dtype: float64
series10
0 blue
2 purple
4 yellow
dtype: object
series11
https://colab.research.google.com/drive/1KY8qW02fPGwZI8S3tRjhoj8Z1alZWqCf#scrollTo=iDltQkrkPOd6&uniqifier=1&printMode=true 11/17
6/25/23, 10:02 PM Copy of Copy of IIT R DIXIT DATAFRAMES.ipynb - Colaboratory
0 blue
1 blue
2 purple
3 purple
4 yellow
5 yellow
dtype: object
# Error chnaces
#1. index=['a','b',c','d']
# brakets
df4
a 0 1 2
c 3 4 5
d 6 7 8
df5 = df4.reindex(['a','b','c','d'])
df5
df5.reindex(columns = ['Banglore','Delhi','Mumbai'])
A function that takes the passenger name as input, checks which booking class the passenger belongs to, and returns the average delay time.
Enter passenger name: Amber Mcclure Booking class for passenger Amber Mcclure : First Delay minutes for passenger Amber Mcclure : 8
https://colab.research.google.com/drive/1KY8qW02fPGwZI8S3tRjhoj8Z1alZWqCf#scrollTo=iDltQkrkPOd6&uniqifier=1&printMode=true 12/17
6/25/23, 10:02 PM Copy of Copy of IIT R DIXIT DATAFRAMES.ipynb - Colaboratory
df5
df5.drop('b')
# df5.drop('a','c') error
# have to pass list
df5.drop(['a','c'])
df5.drop('Banglore', axis=1)
Delhi Mumbai
a 0.0 1.0
b NaN NaN
c 3.0 4.0
d 6.0 7.0
df5.drop(['Delhi','Mumbai'], axis=1)
Banglore
a 2.0
b NaN
c 5.0
d 8.0
df5
df5
https://colab.research.google.com/drive/1KY8qW02fPGwZI8S3tRjhoj8Z1alZWqCf#scrollTo=iDltQkrkPOd6&uniqifier=1&printMode=true 13/17
6/25/23, 10:02 PM Copy of Copy of IIT R DIXIT DATAFRAMES.ipynb - Colaboratory
series11
0 blue
1 blue
2 purple
3 purple
4 yellow
5 yellow
dtype: object
series7
beta 1.1
alpha 1.3
beta 1.3
sigma 1.3
dtype: float64
'purple'
1.3
3 purple
4 yellow
dtype: object
sigma 1.3
alpha 1.3
dtype: float64
1 blue
5 yellow
dtype: object
beta 1.1
dtype: float64
# slicing
# Note: end point inclusive on series and dataframe
series7['alpha':'sigma']
alpha 1.3
beta 1.3
sigma 1.3
dtype: float64
series7['alpha':'sigma'] = 1.3
https://colab.research.google.com/drive/1KY8qW02fPGwZI8S3tRjhoj8Z1alZWqCf#scrollTo=iDltQkrkPOd6&uniqifier=1&printMode=true 14/17
6/25/23, 10:02 PM Copy of Copy of IIT R DIXIT DATAFRAMES.ipynb - Colaboratory
series7
beta 1.1
alpha 1.3
beta 1.3
sigma 1.3
dtype: float64
# accessing columns
df5['Delhi']
a 0.0
c 3.0
d 6.0
Name: Delhi, dtype: float64
df5[['Delhi','Bangalore']]
Delhi Bangalore
a 0.0 2.0
c 3.0 5.0
d 6.0 8.0
# slicing rows
df5[:2] # starting point not specified
df5[1:2]
# filtering rows
df5[df5['Bangalore']>5]
df5[df5<5] = 0
df5
https://colab.research.google.com/drive/1KY8qW02fPGwZI8S3tRjhoj8Z1alZWqCf#scrollTo=iDltQkrkPOd6&uniqifier=1&printMode=true 15/17
6/25/23, 10:02 PM Copy of Copy of IIT R DIXIT DATAFRAMES.ipynb - Colaboratory
0.0
Delhi 0.0
Banglore 2.0
Name: a, dtype: float64
# df5.loc['a','b'],['Delhi','Bangalore']
# not allowed now , earlier shows results.
0.0
Delhi 0.0
Mumbai 0.0
Name: a, dtype: float64
Delhi Mumbai
a 0.0 0.0
c 0.0 0.0
Delhi 0.0
Mumbai 0.0
Bangalore 5.0
Name: c, dtype: float64
a 0.0
c 0.0
Name: Delhi, dtype: float64
df5.loc['c':'d', 'Delhi']
https://colab.research.google.com/drive/1KY8qW02fPGwZI8S3tRjhoj8Z1alZWqCf#scrollTo=iDltQkrkPOd6&uniqifier=1&printMode=true 16/17
6/25/23, 10:02 PM Copy of Copy of IIT R DIXIT DATAFRAMES.ipynb - Colaboratory
c 0.0
d 6.0
Name: Delhi, dtype: float64
df5.loc['c':'d', 'Delhi':'Bangalore']
df5.iloc[:, 1:2]
# : shows all rows, all columns are to selected
# on column side we are selecting 1,2
Mumbai
a 0.0
c 0.0
d 7.0
df5.iloc[:,0:2][df5.Mumbai != 0]
# Filtering apart from slicing
# all rows, 0 to 2 columns are going to be selected
# only those columns are selected in which value is not equal to 0
Delhi Mumbai
d 6.0 7.0
0 blue
1 blue
2 purple
3 purple
4 yellow
5 yellow
dtype: object
1.3
https://colab.research.google.com/drive/1KY8qW02fPGwZI8S3tRjhoj8Z1alZWqCf#scrollTo=iDltQkrkPOd6&uniqifier=1&printMode=true 17/17