DSMP
DSMP
May 3, 2024
1 1. Python Output
[ ]: # Python is a case sensitive language
print('Hello World')
Hello World
[ ]: print('salman khan')
salman khan
[ ]: print(salman khan)
[ ]: print(7)
[ ]: print(7.7)
7.7
[ ]: print(True)
True
[ ]: print('Hello',1,4.5,True)
[ ]: print('Hello',1,4.5,True,sep='/')
Hello/1/4.5/True
1
[ ]: print('hello')
print('world')
hello
world
[ ]: print('hello',end='-')
print('world')
hello-world
2 2. Data Types
[ ]: # Integer
print(8)
# 1*10^308
print(1e309)
8
inf
[ ]: # Decimal/Float
print(8.55)
print(1.7e309)
8.55
inf
[ ]: # Boolean
print(True)
print(False)
True
False
[ ]: # Text/String
print('Hello World')
Hello World
[ ]: # complex
print(5+6j)
(5+6j)
2
[1, 2, 3, 4, 5]
[ ]: # Tuple
print((1,2,3,4,5))
(1, 2, 3, 4, 5)
[ ]: # Sets
print({1,2,3,4,5})
{1, 2, 3, 4, 5}
[ ]: # Dictionary
print({'name':'Nitish','gender':'Male','weight':70})
[ ]: # type
type([1,2,3])
[ ]: list
3 3. Variables
[ ]: # Static Vs Dynamic Typing
# Static Vs Dynamic Binding
# stylish declaration techniques
[ ]: # C/C++
name = 'nitish'
print(name)
a = 5
b = 6
print(a + b)
nitish
11
[ ]: # Dynamic Typing
a = 5
# Static Typing
int a = 5
[ ]: # Dynamic Binding
a = 5
3
print(a)
a = 'nitish'
print(a)
# Static Binding
int a = 5
5
nitish
[ ]: a = 1
b = 2
c = 3
print(a,b,c)
1 2 3
[ ]: a,b,c = 1,2,3
print(a,b,c)
1 2 3
[ ]: a=b=c= 5
print(a,b,c)
5 5 5
4 Comments
[ ]: # this is a comment
# second line
a = 4
b = 6 # like this
# second comment
print(a+b)
10
[ ]: # Identifiers
# You can't start with a digit
name1 = 'Nitish'
print(name1)
4
# You can use special chars -> _
_ = 'ntiish'
print(_)
# identiers can not be keyword
Nitish
ntiish
6 Temp Heading
7 5. User Input
[ ]: # Static Vs Dynamic
input('Enter Email')
Enter [email protected]
[ ]: '[email protected]'
8 6. Type Conversion
[ ]: # Implicit Vs Explicit
print(5+5.6)
print(type(5),type(5.6))
print(4 + '4')
10.6
<class 'int'> <class 'float'>
---------------------------------------------------------------------------
5
TypeError Traceback (most recent call last)
<ipython-input-57-72e5c45cdb6f> in <module>
3 print(type(5),type(5.6))
4
----> 5 print(4 + '4')
[ ]: # Explicit
# str -> int
#int(4+5j)
# int to str
str(5)
# float
float(4)
[ ]: 4.0
9 7. Literals
[ ]: a = 0b1010 #Binary Literals
b = 100 #Decimal Literal
c = 0o310 #Octal Literal
d = 0x12c #Hexadecimal Literal
#Float Literal
float_1 = 10.5
float_2 = 1.5e2 # 1.5 * 10^2
float_3 = 1.5e-3 # 1.5 * 10^-3
#Complex Literal
x = 3.14j
print(a, b, c, d)
print(float_1, float_2,float_3)
print(x, x.imag, x.real)
[ ]: # binary
x = 3.14j
print(x.imag)
3.14
6
[ ]: string = 'This is Python'
strings = "This is Python"
char = "C"
multiline_str = """This is a multiline string with more than one line code."""
unicode = u"\U0001f600\U0001F606\U0001F923"
raw_str = r"raw \n string"
print(string)
print(strings)
print(char)
print(multiline_str)
print(unicode)
print(raw_str)
This is Python
This is Python
C
This is a multiline string with more than one line code.
���
raw \n string
[ ]: a = True + 4
b = False + 10
print("a:", a)
print("b:", b)
a: 5
b: 10
[ ]: k = None
a = 5
b = 6
print('Program exe')
Program exe
10 8. Operators
[ ]: # Arithmetic
# Relational
# Logical
# Bitwise
# Assignment
# Membership
7
11 9. If-Else
[ ]:
8
2-operators-if-else-loops-updated
May 3, 2024
print(5-6)
print(5*6)
print(5/2)
print(5//2)
print(5%2)
print(5**2)
11
-1
30
2.5
2
1
25
[ ]: # Relational Operators
print(4>5)
print(4<5)
print(4>=4)
1
print(4<=4)
print(4==4)
print(4!=4)
False
True
True
True
True
False
[ ]: # Logical Operators
print(1 and 0)
print(1 or 0)
print(not 1)
0
1
False
[ ]: # Bitwise Operators
# bitwise and
print(2 & 3)
# bitwise or
print(2 | 3)
# bitwise xor
print(2 ^ 3)
print(~3)
print(4 >> 2)
print(5 << 2)
2
3
1
-4
1
20
2
[ ]: # Assignment Operators
# =
# a = 2
a = 2
# a = a % 2
a %= 2
# a++ ++a
print(a)
[ ]: # Membership Operators
# in/not in
print(1 in [2,3,4,5,6])
False
False
# 345%10 -> 5
a = number%10
number = number//10
# 34%10 -> 4
b = number % 10
number = number//10
# 3 % 10 -> 3
c = number % 10
print(a + b + c)
3
0.2 If-else in Python
[ ]: # login program and indentation
# email -> [email protected]
# password -> 1234
enter emailsrhreh
enter passworderhetjh
Not correct
[ ]: # if-else examples
# 1. Find the min of 3 given numbers
# 2. Menu Driven Program
[ ]: # min of 3 number
a = int(input('first num'))
b = int(input('second num'))
c = int(input('third num'))
first num4
second num1
third num10
smallest is 1
4
[ ]: # menu driven calculator
menu = input("""
Hi! how can I help you.
1. Enter 1 for pin change
2. Enter 2 for balance check
3. Enter 3 for withdrawl
4. Enter 4 for exit
""")
if menu == '1':
print('pin change')
elif menu == '2':
print('balance')
else:
print('exit')
math.sqrt(196)
[ ]: 14.0
[ ]: # keyword
import keyword
print(keyword.kwlist)
5
[ ]: # random
import random
print(random.randint(1,100))
88
[ ]: # datetime
import datetime
print(datetime.datetime.now())
2022-11-08 15:50:21.228643
[ ]: help('modules')
/usr/local/lib/python3.7/dist-packages/caffe2/proto/__init__.py:17: UserWarning:
Caffe2 support is not enabled in this PyTorch build. Please enable Caffe2 by
building PyTorch from source with `BUILD_CAFFE2=1` flag.
/usr/local/lib/python3.7/dist-packages/caffe2/proto/__init__.py:17: UserWarning:
Caffe2 support is not enabled in this PyTorch build. Please enable Caffe2 by
building PyTorch from source with `BUILD_CAFFE2=1` flag.
/usr/local/lib/python3.7/dist-packages/caffe2/python/__init__.py:9: UserWarning:
Caffe2 support is not enabled in this PyTorch build. Please enable Caffe2 by
building PyTorch from source with `BUILD_CAFFE2=1` flag.
Cython collections kaggle requests_oauthlib
IPython colorcet kanren resampy
OpenGL colorlover kapre resource
PIL colorsys keras rlcompleter
ScreenResolution community keras_preprocessing rmagic
__future__ compileall keyword rpy2
_abc concurrent kiwisolver rsa
_ast confection korean_lunar_calendar runpy
_asyncio configparser langcodes samples
_bisect cons lib2to3 sched
_blake2 contextlib libfuturize scipy
_bootlocale contextlib2 libpasteurize scs
_bz2 contextvars librosa seaborn
_cffi_backend convertdate lightgbm secrets
_codecs copy linecache select
_codecs_cn copyreg llvmlite selectors
_codecs_hk crashtest lmdb send2trash
_codecs_iso2022 crcmod locale setuptools
_codecs_jp crypt locket setuptools_git
_codecs_kr csimdjson logging shapely
_codecs_tw csv lsb_release shelve
6
_collections ctypes lunarcalendar shlex
_collections_abc cufflinks lxml shutil
_compat_pickle curses lzma signal
_compression cv2 macpath simdjson
_contextvars cvxopt mailbox site
_crypt cvxpy mailcap sitecustomize
_csv cycler markdown six
_ctypes cymem markupsafe skimage
_ctypes_test cython marshal sklearn
_curses cythonmagic marshmallow sklearn_pandas
_curses_panel daft math slugify
_cvxcore dask matplotlib smart_open
_datetime dataclasses matplotlib_venn smtpd
_dbm datascience mimetypes smtplib
_decimal datetime missingno sndhdr
_distutils_hack dateutil mistune snowballstemmer
_dlib_pybind11 dbm mizani socket
_dummy_thread dbus mlxtend socketserver
_ecos debugpy mmap socks
_elementtree decimal modulefinder sockshandler
_functools decorator more_itertools softwareproperties
_hashlib defusedxml moviepy sortedcontainers
_heapq descartes mpmath soundfile
_imp difflib msgpack spacy
_io dill multidict spacy_legacy
_json dis multipledispatch spacy_loggers
_locale distributed multiprocessing sphinx
_lsprof distutils multitasking spwd
_lzma dlib murmurhash sql
_markupbase dns music21 sqlalchemy
_md5 docs natsort sqlite3
_multibytecodec doctest nbconvert sqlparse
_multiprocessing docutils nbformat sre_compile
_opcode dopamine netCDF4 sre_constants
_operator dot_parser netrc sre_parse
_osx_support dummy_threading networkx srsly
_pickle easydict nibabel ssl
_plotly_future_ ecos nis stan
_plotly_utils editdistance nisext stat
_posixsubprocess ee nltk statistics
_py_abc email nntplib statsmodels
_pydecimal en_core_web_sm notebook storemagic
_pyio encodings ntpath string
_pyrsistent_version entrypoints nturl2path stringprep
_pytest enum numba struct
_queue ephem numbergen subprocess
_random erfa numbers sunau
_remote_module_non_scriptable errno numexpr symbol
7
_rinterface_cffi_abi et_xmlfile numpy sympy
_rinterface_cffi_api etils oauth2client sympyprinting
_scs_direct etuples oauthlib symtable
_scs_indirect fa2 ogr sys
_sha1 fastai okgrade sysconfig
_sha256 fastcore opcode syslog
_sha3 fastdownload openpyxl tables
_sha512 fastdtw operator tabnanny
_signal fastjsonschema opt_einsum tabulate
_sitebuiltins fastprogress optparse tarfile
_socket fastrlock os tblib
_soundfile faulthandler osgeo telnetlib
_sqlite3 fcntl osqp tempfile
_sre feather osqppurepy tenacity
_ssl filecmp osr tensorboard
_stat fileinput ossaudiodev
tensorboard_data_server
_string filelock packaging
tensorboard_plugin_wit
_strptime firebase_admin palettable tensorflow
_struct fix_yahoo_finance pandas tensorflow_datasets
_symtable flask pandas_datareader tensorflow_estimator
_sysconfigdata_m_linux_x86_64-linux-gnu flatbuffers pandas_gbq
tensorflow_gcs_config
_sysconfigdata_m_x86_64-linux-gnu fnmatch pandas_profiling
tensorflow_hub
_testbuffer folium pandocfilters
tensorflow_io_gcs_filesystem
_testcapi formatter panel tensorflow_metadata
_testimportmultiple fractions param
tensorflow_probability
_testmultiphase frozenlist parser termcolor
_thread fsspec parso terminado
_threading_local ftplib partd termios
_tkinter functools past test
_tracemalloc future pasta testpath
_warnings gast pastel tests
_weakref gc pathlib text_unidecode
_weakrefset gdal pathy textblob
_xxtestfuzz gdalconst patsy textwrap
_yaml gdalnumeric pdb thinc
abc gdown pep517 this
absl genericpath pexpect threading
aeppl gensim pickle threadpoolctl
aesara geographiclib pickleshare tifffile
aifc geopy pickletools time
aiohttp getopt pip timeit
aiosignal getpass pipes tkinter
8
alabaster gettext piptools tlz
albumentations gi pkg_resources token
altair gin pkgutil tokenize
antigravity glob platform toml
apiclient glob2 plistlib tomli
appdirs gnm plotly toolz
apt google_auth_httplib2 plotlywidget torch
apt_inst google_auth_oauthlib plotnine torchaudio
apt_pkg google_drive_downloader pluggy torchgen
aptsources googleapiclient pooch torchsummary
argparse googlesearch poplib torchtext
array graphviz portpicker torchvision
arviz greenlet posix tornado
ast gridfs posixpath tqdm
astor grp pprint trace
astropy grpc prefetch_generator traceback
astunparse gspread preshed tracemalloc
async_timeout gspread_dataframe prettytable traitlets
asynchat gym profile tree
asyncio gym_notices progressbar tty
asyncore gzip promise turtle
asynctest h5py prompt_toolkit tweepy
atari_py hashlib prophet typeguard
atexit heapdict pstats typer
atomicwrites heapq psutil types
attr hijri_converter psycopg2 typing
attrs hmac pty typing_extensions
audioop holidays ptyprocess tzlocal
audioread holoviews pvectorc unicodedata
autograd html pwd unification
autoreload html5lib py unittest
babel http py_compile uritemplate
backcall httpimport pyarrow urllib
base64 httplib2 pyasn1 urllib3
bdb httplib2shim pyasn1_modules uu
bin httpstan pyclbr uuid
binascii humanize pycocotools vega_datasets
binhex hyperopt pycparser venv
bisect idna pyct vis
bleach imageio pydantic warnings
blis imagesize pydata_google_auth wasabi
bokeh imaplib pydoc wave
boost imblearn pydoc_data wcwidth
branca imgaug pydot weakref
bs4 imghdr pydot_ng webargs
bson imp pydotplus webbrowser
builtins importlib pydrive webencodings
bz2 importlib_metadata pyemd werkzeug
9
cProfile importlib_resources pyexpat wheel
cachecontrol imutils pygments widgetsnbextension
cached_property inflect pygtkcompat wordcloud
cachetools inspect pylab wrapt
caffe2 intervaltree pylev wsgiref
calendar io pymc xarray
catalogue ipaddress pymeeus xarray_einstats
certifi ipykernel pymongo xdrlib
cffi ipykernel_launcher pymystem3 xgboost
cftime ipython_genutils pyparsing xkit
cgi ipywidgets pyrsistent xlrd
cgitb isympy pysndfile xlwt
chardet itertools pytest xml
charset_normalizer itsdangerous python_utils xmlrpc
chunk jax pytz xxlimited
clang jaxlib pyviz_comms xxsubtype
click jieba pywt yaml
client jinja2 pyximport yarl
clikit joblib qdldl yellowbrick
cloudpickle jpeg4py qudida zict
cmake json queue zipapp
cmath jsonschema quopri zipfile
cmd jupyter random zipimport
cmdstanpy jupyter_client re zipp
code jupyter_console readline zlib
codecs jupyter_core regex zmq
codeop jupyterlab_plotly reprlib
colab jupyterlab_widgets requests
Enter any module name to get more help. Or, type "modules spam" to search
for modules whose name or summary contain the string "spam".
i = 1
10
while i<11:
print(number,'*',i,'=',number * i)
i += 1
x = 1
while x < 3:
print(x)
x += 1
else:
print('limit crossed')
1
2
limit crossed
[ ]: # Guessing game
11
else:
print('correct guess')
print('attempts',counter)
guess karo7
galat!guess higher
guess karo50
galat!guess lower
guess karo30
galat!guess higher
guess karo40
galat!guess lower
guess karo35
galat!guess lower
guess karo32
galat!guess higher
guess karo33
correct guess
attempts 7
for i in {1,2,3,4,5}:
print(i)
1
2
3
4
5
0.4.1 Program - The current population of a town is 10000. The population of the
town is increasing at the rate of 10% per year. You have to write a program to
find out the population at the end of each of the last 10 years.
for i in range(10,0,-1):
print(i,curr_pop)
curr_pop = curr_pop - 0.1*curr_pop
10 10000
9 9090.90909090909
12
8 8264.462809917353
7 7513.148009015775
6 6830.134553650703
5 6209.213230591548
4 5644.739300537771
3 5131.5811823070635
2 4665.07380209733
1 4240.976183724845
for i in range(10,0,-1):
print(i,curr_pop)
curr_pop /= 1.1
Explanation : To calculate the population for each year with a 10% increase, you can use a
simpler equation based on the previous year’s population. Let’s assume the population of the
previous year is represented by variable x.
The equation can be written as:
Current Year Population = x * 1.1
In this equation, the current year’s population is equal to the previous year’s population multiplied
by 1.1, representing a 10% increase.
To find the population of the previous year (x), we can rearrange the equation as follows:
x = Current Year Population / 1.1
Using this simplified equation, if you have the current year’s population (e.g., 10,000), you can
divide it by 1.1 to calculate the population of the previous year.
This equation allows you to calculate the population for each year, assuming you know the popu-
lation of the current year and want to find the population of the previous year.
[ ]: # code here
13
0.5.1 Pattern 1
*** **** ***
[ ]:
0.5.2 Pattern 2
1 121 12321 1234321
[ ]:
[ ]: # Continue demo
[ ]: # Pass demo
14
session3-strings-lists
May 3, 2024
0.0.1 Program - The current population of a town is 10000. The population of the
town is increasing at the rate of 10% per year. You have to write a program to
find out the population at the end of each of the last 10 years.
[ ]: # Code here
curr_pop = 10000
for i in range(10,0,-1):
print(i,curr_pop)
curr_pop = curr_pop/1.1
10 10000
9 9090.90909090909
8 8264.462809917353
7 7513.148009015775
6 6830.134553650703
5 6209.213230591548
4 5644.739300537771
3 5131.5811823070635
2 4665.07380209733
1 4240.976183724845
[ ]: # Code here
n = int(input('enter n'))
result = 0
fact = 1
for i in range(1,n+1):
fact = fact * i
result = result + i/fact
print(result)
1
enter n2
2.0
1 Nested Loops
[ ]: # Examples -> unique pairs
for i in range(1,5):
for j in range(1,5):
print(i,j)
1 1
1 2
1 3
1 4
2 1
2 2
2 3
2 4
3 1
3 2
3 3
3 4
4 1
4 2
4 3
4 4
1.0.1 Pattern 1
*** **** ***
[ ]: # code here
for i in range(1,rows+1):
for j in range(1,i+1):
print('*',end='')
print()
2
*******
********
*********
**********
1.0.2 Pattern 2
1 121 12321 1234321
[ ]: # Code here
rows = int(input('enter number of rows'))
for i in range(1,rows+1):
for j in range(1,i+1):
print(j,end='')
for k in range(i-1,0,-1):
print(k,end='')
print()
1
2
3
4
for i in range(lower,upper+1):
for j in range(2,i):
if i%j == 0:
3
break
else:
print(i)
[ ]: # Continue
for i in range(1,10):
if i == 5:
continue
print(i)
1
2
3
4
6
7
8
9
[ ]: for i in range(1,10):
pass
4
In Python specifically, strings are a sequence of Unicode Characters
• Creating Strings
• Accessing Strings
• Adding Chars to Strings
• Editing Strings
• Deleting Strings
• Operations on Strings
• String Functions
hello
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
<ipython-input-61-633ba99ed6e5> in <module>
1 # Positive Indexing
2 s = 'hello world'
----> 3 print(s[41])
[ ]: # Negative Indexing
s = 'hello world'
print(s[-3])
5
[ ]: # Slicing
s = 'hello world'
print(s[6:0:-2])
wol
[ ]: print(s[::-1])
dlrow olleh
[ ]: s = 'hello world'
print(s[-1:-6:-1])
dlrow
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-80-0c8a824e3b73> in <module>
1 s = 'hello world'
----> 2 s[0] = 'H'
[ ]: s = 'hello world'
del s
print(s)
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-81-9ae37fbf1c6c> in <module>
1 s = 'hello world'
2 del s
----> 3 print(s)
[ ]: s = 'hello world'
del s[-1:-5:2]
print(s)
6
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-82-d0d823eafb6b> in <module>
1 s = 'hello world'
----> 2 del s[-1:-5:2]
3 print(s)
delhi mumbai
[ ]: print('delhi'*5)
delhidelhidelhidelhidelhi
[ ]: print("*"*50)
**************************************************
[ ]: 'delhi' != 'delhi'
[ ]: False
[ ]: False
[ ]: False
[ ]: 'world'
[ ]: 'hello' or 'world'
7
[ ]: 'hello'
[ ]: ''
[ ]: '' or 'world'
[ ]: 'world'
[ ]: 'hello' or 'world'
[ ]: 'hello'
[ ]: 'world'
[ ]: not 'hello'
[ ]: False
[ ]: for i in 'hello':
print(i)
h
e
l
l
o
[ ]: for i in 'delhi':
print('pune')
pune
pune
pune
pune
pune
[ ]: 'D' in 'delhi'
[ ]: False
[ ]:
[ ]:
8
[ ]:
[ ]: 11
[ ]: max('hello world')
[ ]: 'w'
[ ]: min('hello world')
[ ]: ' '
[ ]: sorted('hello world',reverse=True)
[ ]: ['w', 'r', 'o', 'o', 'l', 'l', 'l', 'h', 'e', 'd', ' ']
[ ]:
2.6 Capitalize/Title/Upper/Lower/Swapcase
[ ]: s = 'hello world'
print(s.capitalize())
print(s)
Hello world
hello world
[ ]: s.title()
[ ]: 'Hello World'
[ ]: s.upper()
[ ]: 'HELLO WORLD'
[ ]: 'Hello Wolrd'.lower()
[ ]: 'hello wolrd'
9
[ ]: 'HeLlO WorLD'.swapcase()
[ ]: 'hElLo wORld'
2.7 Count/Find/Index
[ ]: 'my name is nitish'.count('i')
[ ]: 3
[ ]: -1
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-121-12e2ad5b75e9> in <module>
----> 1 'my name is nitish'.index('x')
[ ]:
2.8 endswith/startswith
[ ]: 'my name is nitish'.endswith('sho')
[ ]: False
[ ]: False
2.9 format
[ ]: name = 'nitish'
gender = 'male'
10
2.10 isalnum/ isalpha/ isdigit/ isidentifier
[ ]: 'nitish1234%'.isalnum()
[ ]: False
[ ]: 'nitish'.isalpha()
[ ]: True
[ ]: '123abc'.isdigit()
[ ]: False
[ ]: 'first-name'.isidentifier()
[ ]: False
[ ]:
2.11 Split/Join
[ ]: 'hi my name is nitish'.split()
2.12 Replace
[ ]: 'hi my name is nitish'.replace('nitisrgewrhgh','campusx')
2.13 Strip
[ ]: 'nitish '.strip()
[ ]: 'nitish'
11
2.14 Example Programs
[ ]: # Find the length of a given string without using the len() function
counter = 0
for i in s:
counter += 1
pos = s.index('@')
print(s[0:pos])
counter = 0
for i in s:
if i == term:
counter += 1
print('frequency',counter)
12
result = ''
for i in s:
if i != term:
result = result + i
print(result)
[ ]: # Write a program that can check whether a given string is palindrome or not.
# abba
# malayalam
if flag:
print('Palindrome')
if i != ' ':
temp = temp + i
else:
L.append(temp)
temp = ''
L.append(temp)
print(L)
13
['hi', 'how', 'are', 'you']
[ ]: # Write a python program to convert a string to title case without using the␣
↪title()
L = []
for i in s.split():
L.append(i[0].upper() + i[1:].lower())
print(" ".join(L))
digits = '0123456789'
result = ''
while number != 0:
result = digits[number % 10] + result
number = number//10
print(result)
print(type(result))
[ ]:
14
session-on-time-complexity
May 3, 2024
digits = '0123456789'
result = ''
while number != 0:
result = digits[number % 10] + result
number = number//10
print(result)
[ ]: L = [1,2,3,4]
sum = 0
for i in L:
sum = sum + i
product = 1
for i in L:
product = product*i
print(sum,product)
[ ]: A = [1,2,3,4]
B = [5,6,7,8]
for i in A:
for j in B:
print(i,j)
[ ]: A = [1,2,3,4]
B = [5,6,7,8]
for i in A:
for j in B:
for k in range(1000000):
print(i,j)
1
[ ]: L = [1,2,3,4,5]
for i in range(0,len(L)//2):
other = len(L) - i -1
temp = L[i]
L[i] = L[other]
L[other] = temp
print(L)
[5, 4, 3, 2, 1]
[ ]: n = 10
k = 0;
for i in range(n//2,n):
for j in range(2,n,pow(2,j)):
k = k + n / 2;
print(k)
40.0
[ ]: a = 10
b = 3
if b <= 0:
print(-1)
div = a//b
print(a-div-b)
[ ]: n = 345
sum = 0
while n>0:
sum = sum + n%10
n = n // 10
print(sum)
12
[ ]: def fib(n):
if n == 1 or n == 0:
return 1
else:
2
return fib(n-1) + fib(n-2)
[ ]: # Subset Algo
{3T(n-1) if n>0
T(n) = {1, otherwise
{2T(n-1)-1 if n>0
T(n) = {1, otherwise
[ ]:
3
CampusX Mentorship Program
keyboard_arrow_down Python interview questions & answers for lecture 1, lecture 2 and lecture 3:
Question No. Question
8 Disadvantages of Python.
15 Python Docstrings
Python is a high-level, interpreted, general-purpose programming language. Being a general-purpose language, it can be used to build almost
any type of application with the right tools/libraries. Additionally, python supports objects, modules, threads, exception-handling, and automatic
memory management which help in modelling real-world problems and building applications to solve these problems.
Python is a general-purpose programming language that has a simple, easy-to-learn syntax that emphasizes readability and therefore
reduces the cost of program maintenance. Moreover, the language is capable of scripting, is completely open-source, and supports third-
party packages encouraging modularity and code reuse.
Its high-level data structures, combined with dynamic typing and dynamic binding, attract a huge community of developers for Rapid
Application Development and deployment.
Before we understand a dynamically typed language, we should learn about what typing is. Typing refers to type-checking in programming
languages. In a strongly-typed language, such as Python, "1" + 2 will result in a type error since these languages don't allow for "type-coercion"
(implicit conversion of data types). On the other hand, a weakly-typed language, such as Javascript, will simply output "12" as result.
An Interpreted language executes its statements line by line. Languages such as Python, Javascript, R, PHP, and Ruby are prime examples of
Interpreted languages. Programs written in an interpreted language runs directly from the source code, with no intermediary compilation step.
PEP stands for Python Enhancement Proposal. A PEP is an official design document providing information to the Python community, or
describing a new feature for Python or its processes. PEP 8 is especially important since it documents the style guidelines for Python Code.
Apparently contributing to the Python open-source community requires you to follow these style guidelines sincerely and strictly.
Read more - https://realpython.com/python-
pep8/#:~:text=PEP%208%2C%20sometimes%20spelled%20PEP8,and%20consistency%20of%20Python%20code.
There are several built-in data types in Python. Although, Python doesn't require data types to be defined explicitly during variable declarations
type errors are likely to occur if the knowledge of data types and their compatibility with each other are neglected. Python provides type() and
isinstance() functions to check the type of these variables. These data types can be grouped into the following categories-
1. None Type: None keywork represents the null values in Python. Boolean equality operation can be performed using these NoneType
objects.
2. Numeric Type: There are three distinct numeric types - integers , floating-point numbers and complex numbers . Additionally, booleans
are a sub-type of integers.
3. Sequence Types: According to Python Docs, there are three basic Sequence Types - lists , tuples , and range objects. Sequence types
have the in and not in operators defined for their traversing their elements. These operators share the same priority as the comparison
operations.
4. Mapping Types: A mapping object can map hashable values to random objects in Python. Mappings objects are mutable and there is
currently only one standard mapping type, the dictionary .
5. Set Types: Currently, Python has two built-in set types - set and frozenset . set type is mutable and supports methods like add() and
remove(). frozenset type is immutable and can't be modified after creation.
6. Callable Types: Callable types are the types to which function call can be applied. They can be user-defined functions, instance methods,
generator functions, and some other built-in functions, methods and classes. Refer to the documentation at docs.python.org for a
detailed view of the callable types.
https://www.programiz.com/python-programming/precedence-associativity
If the expression is True, the statement under [on true] is executed. Else, that under [on false] is executed.
a,b=2,3
min=a if a<b else b
print(min)
Bye
Identity operators
In Python, is and is not are used to check if two values are located on the same part of the memory. Two variables that are equal does not imply
that they are identical.
a = 1
id(a)
11126688
a = 2
id(a)
11126720
a = 1
b = 1
a is b
True
id(a)
11126688
id(b)
11126688
a = 257
b = 257
a is b
False
id(a)
140582907232784
a == b
True
id(b)
140582907232464
# -5 to 256
a = -14
b = -14
a is b
False
Q 9: Disadvantages of Python.
https://www.geeksforgeeks.org/disadvantages-of-python
https://stackoverflow.com/questions/19224059/how-strings-are-stored-in-python-memory-model
https://www.quora.com/How-are-strings-stored-internally-in-Python-3
https://betterprogramming.pub/an-interviewers-favorite-question-how-are-python-strings-stored-in-internal-memory-ac0eaef9d9c2
The Zen of Python is a collection of 19 "guiding principles" for writing computer programs that influence the design of the Python programming
language. https://en.wikipedia.org/wiki/Zen_of_Python
Identity operators: The “is” and “is not” keywords are called identity operators that compare objects based on their identity. Equality operator:
The “==” and “!=” are called equality operators that compare the objects based on their values.
# Case 4:
# Here variable s is assigned a list,
# and q assigned a list values same as s but on slicing of list a new list is generated
s=[1,2,3]
p=s
# cloning
q=s[:]
print("id of p", id(p))
print("Id of s", id(s))
print("id of q", id(q))
print("Comapare- s == q", s==q)
print("Identity- s is q", s is q)
print("Identity- s is p", s is p)
print("Comapare- s == p", s==p)
id of p 140582906466864
Id of s 140582906466864
id of q 140582906466944
Comapare- s == q True
Identity- s is q False
Identity- s is p True
Comapare- s == p True
a = [1,2,3]
b = a[:]
a.append(4)
print(a)
print(b)
[1, 2, 3, 4]
[1, 2, 3]
GssksForGeeks Article
The underscore _ is used for ignoring the specific values. If you don’t need the specific values or the values are not used, just assign the
values to underscore.
print("x-",x)
print("y-", y)
x- 1
y- 3
_ 2
for _ in range(5):
print('hello')
hello
hello
hello
hello
hello
Python uses some terms that you may not be familiar with if you’re coming from a different language. Among these are modules, packages,
and libraries.
A module is a Python file that’s intended to be imported into scripts or other modules. It often defines members like classes, functions,
and variables intended to be used in other files that import it.
A package is a collection of related modules that work together to provide certain functionality. These modules are contained within a
folder and can be imported just like any other modules. This folder will often contain a special __init__ file that tells Python it’s a
package, potentially containing more modules nested within subfolders
A library is an umbrella term that loosely means “a bundle of code.” These can have tens or even hundreds of individual modules that can
provide a wide range of functionality. Matplotlib is a plotting library. The Python Standard Library contains hundreds of modules for
performing common tasks, like sending emails or reading JSON data. What’s special about the Standard Library is that it comes bundled
with your installation of Python, so you can use its modules without having to download them from anywhere.
These are not strict definitions. Many people feel these terms are somewhat open to interpretation. Script and module are terms that you may
hear used interchangeably.
https://stackoverflow.com/questions/19198166/whats-the-difference-between-a-module-and-a-library-in-python
https://www.geeksforgeeks.org/what-is-the-difference-between-pythons-module-package-and-library/
The reason behind it is called “precision”, and it’s due to the fact that computers do not compute in Decimal, but in Binary. Computers do not
use a base 10 system, they use a base 2 system (also called Binary code).
https://www.geeksforgeeks.org/why-0-3-0-2-is-not-equal-to-0-1-in-python/
# code
print(0.3 - 0.2)
print(0.3 - 0.2 == 0.1)
0.09999999999999998
False
https://www.geeksforgeeks.org/python-docstrings
print('hello')
hello
type(3)
int
print(input.__doc__)
Raises
------
StdinNotImplentedError if active frontend doesn't support stdin.
print(type.__doc__)
s = 'have'
print(id(s))
s = s.capitalize()
print(id(s))
140582962705648
140582906473392
a,b = print('hello'),print('world')
account_circle hello
world
3 3 3 3
May 3, 2024
L = [12,23,1,4,56,34,22,3]
k=3
L.sort(reverse=True)
print(L[k-1])
23
flag = True
for i in range(0,len(L)-1):
if L[i] > L[i+1]:
flag = False
if flag:
print('sorted')
else:
print('not sorted')
sorted
L = [21,1,34,23,54,11,10]
1
max_val = L[0]
for i in L:
if i > max_val:
max_val = i
print(max_val)
54
L = [1,1,2,3,4,4,5,5]
k = 2
d = {}
for i in L:
if i in d:
d[i] = d[i] + 1
else:
d[i] = 1
for i in d:
if d[i] == k:
print(i)
break
d = {}
for i in L:
if i in d:
d[i] = d[i] + 1
else:
d[i] = 1
for i in d:
if d[i] > 1:
print(i)
1
4
2
5
L = [1,2,3,4,5]
rotate = 2
for i in range(rotate):
temp = L[0]
for j in range(0,len(L)-1):
L[j] = L[j+1]
L[len(L)-1] = temp
print(L)
[3, 4, 5, 1, 2]
a = [1,2,3,4,5,8]
b = [3,6,7,8]
for i in a:
if i in b:
print(i)
3
8
[ ]: a = [1,2,3,4,5,8]
b = [3,6,7,8]
i=j=0
3
8
3
[ ]: # 8. Find continous subarray with a given sum(given non-negative numbers)
# return the starting and ending index of the subarray
# return 1st subarray in case of multiple
L = [1,22,13,7,9,11,10]
S = 16
for i in range(0,len(L)):
subarray = []
for j in range(i,len(L)):
subarray.append(L[j])
if sum(subarray) == S:
print(subarray)
[11, 10]
[ ]: L = [1,22,13,7,9,11,10]
S = 35
d = {}
curr_sum = 0
for i in range(len(L)):
curr_sum = curr_sum + L[i]
if (curr_sum - S) in d:
print(d[curr_sum - S]+1,i)
break
d[curr_sum] = i
1 2
L = [3,1,2,5,8,7,9]
for i in range(1,len(L)-1):
flag = True
for j in range(0,i):
if L[j] > L[i]:
flag = False
for k in range(i+1,len(L)):
if L[k] < L[i]:
4
flag = False
if flag:
print(L[i])
[ ]: L = [3,1,2,5,8,7,9]
for i in range(1,len(L)-1):
if max(L[:i]) < L[i] < min(L[i+1:]):
print(L[i])
[ ]: L = [3,1,2,5,8,7,9]
max_arr = []
min_arr = []
max_val = L[0]
min_val = L[-1]
for i in L:
if i>max_val:
max_val = i
max_arr.append(max_val)
for i in range(len(L)-1,-1,-1):
if L[i] < min_val:
min_val = L[i]
min_arr.insert(0,min_val)
for i in range(1,len(L)-1):
if max_arr[i-1] < L[i] < min_arr[i+1]:
print(L[i])
d = {}
for i in range(0,len(L)):
subarray = []
for j in range(i,len(L)):
5
subarray.append(L[j])
d[sum(subarray)] = subarray[:]
max_val = max(d.keys())
for i in d:
if i == max_val:
print(d[i])
[2]: L = [-2,4,7,-1,6,-11,14,3,-1,-6]
curr_sum = 0
curr_seq = []
best_sum = L[0]
best_seq = []
for i in L:
if i + curr_sum > i:
curr_sum = curr_sum + i
curr_seq.append(i)
else:
curr_sum = i
curr_seq.clear()
curr_seq.append(i)
print(best_sum,best_seq)
[ ]:
[ ]: -9223372036854775808
6
[ ]: # 15. Find Pythagorean triplets in an array
7
session-4-python-lists
May 3, 2024
0.1 1. Lists
• What are Lists?
• Lists Vs Arrays
• Characterstics of a List
• How to create a list
• Access items from a List
• Editing items in a List
• Deleting items from a List
• Operations on Lists
• Functions on Lists
print(id(L))
print(id(L[0]))
print(id(L[1]))
print(id(L[2]))
print(id(1))
print(id(2))
print(id(3))
140163201133376
11126688
1
11126720
11126752
11126688
11126720
11126752
L == L1
[ ]: False
[]
[1, 2, 3, 4, 5]
[1, 2, 3, [4, 5]]
[[[1, 2], [3, 4]], [[5, 6], [7, 8]]]
[1, True, 5.6, (5+6j), 'Hello']
['h', 'e', 'l', 'l', 'o']
2
0.6 Accessing Items from a List
[ ]: # Indexing
L = [[[1,2],[3,4]],[[5,6],[7,8]]]
#positive
#print(L[0][0][1])
# Slicing
L = [1,2,3,4,5,6]
print(L[::-1])
[6, 5, 4, 3, 2, 1]
[1, 2, 3, 4, 5, True]
[ ]: # extend
L = [1,2,3,4,5]
L.extend([6,7,8])
print(L)
[1, 2, 3, 4, 5, 6, 7, 8]
[ ]: L = [1,2,3,4,5]
L.append([6,7,8])
print(L)
[ ]: L = [1,2,3,4,5]
L.extend('delhi')
print(L)
[ ]: # insert
L = [1,2,3,4,5]
L.insert(1,100)
print(L)
[1, 100, 2, 3, 4, 5]
3
1 Editing items in a List
[ ]: L = [1,2,3,4,5]
print(L)
# indexing
del L[-1]
# slicing
del L[1:3]
print(L)
[1, 4]
[ ]: # remove
L = [1,2,3,4,5]
L.remove(5)
print(L)
[1, 2, 3, 4]
[ ]: # pop
L = [1,2,3,4,5]
L.pop()
print(L)
[1, 2, 3, 4]
4
[ ]: # clear
L = [1,2,3,4,5]
L.clear()
print(L)
[]
L1 = [1,2,3,4]
L2 = [5,6,7,8]
# Concatenation/Merge
print(L1 + L2)
[1, 2, 3, 4, 5, 6, 7, 8]
[ ]: print(L1*3)
[1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4]
[ ]: L1 = [1,2,3,4,5]
L2 = [1,2,3,4,[5,6]]
False
True
[ ]: # Loops
L1 = [1,2,3,4,5]
L2 = [1,2,3,4,[5,6]]
L3 = [[[1,2],[3,4]],[[5,6],[7,8]]]
for i in L3:
print(i)
5
1.3 List Functions
[ ]: # len/min/max/sorted
L = [2,1,5,7,0]
print(len(L))
print(min(L))
print(max(L))
print(sorted(L,reverse=True))
5
0
7
[7, 5, 2, 1, 0]
[ ]: # count
L = [1,2,1,3,4,1,5]
L.count(5)
[ ]: 1
[ ]: # index
L = [1,2,1,3,4,1,5]
L.index(1)
[ ]: 0
[ ]: # reverse
L = [2,1,5,7,0]
# permanently reverses the list
L.reverse()
print(L)
[0, 7, 5, 1, 2]
[2, 1, 5, 7, 0]
[0, 1, 2, 5, 7]
[2, 1, 5, 7, 0]
[0, 1, 2, 5, 7]
6
[ ]: # copy -> shallow
L = [2,1,5,7,0]
print(L)
print(id(L))
L1 = L.copy()
print(L1)
print(id(L1))
[2, 1, 5, 7, 0]
140163201056112
[2, 1, 5, 7, 0]
140163201128800
for i in range(1,11):
L.append(i)
print(L)
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
[ ]: L = [i for i in range(1,11)]
print(L)
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
[s*i for i in v]
[ ]: # Add squares
L = [1,2,3,4,5]
7
[i**2 for i in L]
[ ]: [5, 10, 15, 20, 25, 30, 35, 40, 45, 50]
[ ]: ['python', 'php']
# add new list from my_fruits and items if the fruit exists in basket and also␣
↪starts with 'a'
[ ]: ['apple']
[ ]: # Print a (3,3) matrix using list comprehension -> Nested List comprehension
[[i*j for i in range(1,4)] for j in range(1,4)]
[ ]: [5, 6, 7, 8, 10, 12, 14, 16, 15, 18, 21, 24, 20, 24, 28, 32]
8
[ ]: # itemwise
L = [1,2,3,4]
for i in L:
print(i)
1
2
3
4
[ ]: # indexwise
L = [1,2,3,4]
for i in range(0,len(L)):
print(L[i])
1
2
3
4
1.6 Zip
The zip() function returns a zip object, which is an iterator of tuples where the first item in each
passed iterator is paired together, and then the second item in each passed iterator are paired
together.
If the passed iterators have different lengths, the iterator with the least items decides the length of
the new iterator.
[ ]: # Write a program to add items of 2 lists indexwise
L1 = [1,2,3,4]
L2 = [-1,-2,-3,-4]
list(zip(L1,L2))
[ ]: [0, 0, 0, 0]
[ ]: L = [1,2,print,type,input]
print(L)
9
1.7 Disadvantages of Python Lists
• Slow
• Risky usage
• eats up more memory
[ ]: a = [1,2,3]
b = a.copy()
print(a)
print(b)
a.append(4)
print(a)
print(b)
[1, 2, 3]
[1, 2, 3]
[1, 2, 3, 4]
[1, 2, 3]
L = [1,2,3,4,5,6]
[ ]: # Write a program to replace an item with a different item if found in the list
L = [1,2,3,4,5,3]
# replace 3 with 300
L = [1,2,1,2,3,4,5,3,4]
10
python-tuple-sets-dictionary
May 3, 2024
1 Tuples
A tuple in Python is similar to a list. The difference between the two is that we cannot change the
elements of a tuple once it is assigned whereas we can change the elements of a list.
In short, a tuple is an immutable list. A tuple can not be changed in any way once it is created.
Characterstics
• Ordered
• Unchangeble
• Allows duplicate
1
print(t5)
# using type conversion
t6 = tuple('hello')
print(t6)
()
('hello',)
<class 'tuple'>
(1, 2, 3, 4)
(1, 2.5, True, [1, 2, 3])
(1, 2, 3, (4, 5))
('h', 'e', 'l', 'l', 'o')
(1, 2, 3, 4)
1
4
[ ]: t5[-1][0]
[ ]: 4
[ ]: print(t3)
t3[0] = 100
# immutable just like strings
(1, 2, 3, 4)
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-30-49d9e1416ccf> in <module>
1 print(t3)
----> 2 t3[0] = 100
2
1.0.5 Adding items
[ ]: print(t3)
# not possible
(1, 2, 3, 4)
[ ]: print(t3)
del t3
print(t3)
(1, 2, 3, 4)
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-33-0a67b29ad777> in <module>
1 print(t3)
2 del t3
----> 3 print(t3)
[ ]: t = (1,2,3,4,5)
t[-1:-4:-1]
[ ]: (5, 4, 3)
[ ]: print(t5)
del t5[-1]
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-35-2b39d140e8ae> in <module>
1 print(t5)
----> 2 del t5[-1]
3
1.0.7 Operations on Tuples
[ ]: # + and *
t1 = (1,2,3,4)
t2 = (5,6,7,8)
print(t1 + t2)
print(t1*3)
# membership
1 in t1
# iteration
for i in t1:
print(i)
(1, 2, 3, 4, 5, 6, 7, 8)
(1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4)
1
2
3
4
[ ]: # len/sum/min/max/sorted
t = (1,2,3,4)
len(t)
sum(t)
min(t)
max(t)
sorted(t,reverse=True)
[ ]: [4, 3, 2, 1]
[ ]: # count
t = (1,2,3,4,5)
t.count(50)
[ ]: 0
[ ]: # index
t.index(50)
4
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-51-cae2b6ba49a8> in <module>
1 # index
----> 2 t.index(50)
L = list(range(100000000))
T = tuple(range(100000000))
start = time.time()
for i in L:
i*5
print('List time',time.time()-start)
start = time.time()
for i in T:
i*5
print('Tuple time',time.time()-start)
[ ]: import sys
L = list(range(1000))
T = tuple(range(1000))
print('List size',sys.getsizeof(L))
print('Tuple size',sys.getsizeof(T))
5
[ ]: a = [1,2,3]
b = a
a.append(4)
print(a)
print(b)
[1, 2, 3, 4]
[1, 2, 3, 4]
[ ]: a = (1,2,3)
b = a
a = a + (4,)
print(a)
print(b)
(1, 2, 3, 4)
(1, 2, 3)
[ ]: # tuple unpacking
a,b,c = (1,2,3)
print(a,b,c)
1 2 3
[ ]: a,b = (1,2,3)
print(a,b)
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-55-22f327f11d4b> in <module>
----> 1 a,b = (1,2,3)
2 print(a,b)
[ ]: a = 1
b = 2
a,b = b,a
print(a,b)
2 1
6
[ ]: a,b,*others = (1,2,3,4)
print(a,b)
print(others)
1 2
[3, 4]
[ ]: # zipping tuples
a = (1,2,3,4)
b = (5,6,7,8)
tuple(zip(a,b))
2 Sets
A set is an unordered collection of items. Every set element is unique (no duplicates) and must be
immutable (cannot be changed).
However, a set itself is mutable. We can add or remove items from it.
Sets can also be used to perform mathematical set operations like union, intersection, symmetric
difference, etc.
Characterstics: - Unordered - Mutable - No Duplicates - Can’t contain mutable data types
s4 = set([1,2,3])
print(s4)
# duplicates not allowed
s5 = {1,1,2,2,3,3}
print(s5)
7
# set can't have mutable items
s6 = {1,2,[3,4]}
print(s6)
set()
<class 'set'>
{1, 2, 3}
{1, 4.5, (1, 2, 3), 'hello'}
{1, 2, 3}
{1, 2, 3}
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-71-ab3c7dde6aed> in <module>
19 print(s5)
20 # set can't have mutable items
---> 21 s6 = {1,2,[3,4]}
22 print(s6)
[ ]: s1 = {1,2,3}
s2 = {3,2,1}
print(s1 == s2)
True
[ ]:
[ ]: s1 = {1,2,3,4}
s1[0:3]
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-75-4c49b6b6050d> in <module>
1 s1 = {1,2,3,4}
----> 2 s1[0:3]
8
2.0.3 Editing Items
[ ]: s1 = {1,2,3,4}
s1[0] = 100
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-76-bd617ce25076> in <module>
1 s1 = {1,2,3,4}
----> 2 s1[0] = 100
[ ]: S = {1,2,3,4}
# add
# S.add(5)
# print(S)
# update
S.update([5,6,7])
print(S)
{1, 2, 3, 4, 5, 6, 7}
[ ]: # del
s = {1,2,3,4,5}
# print(s)
# del s[0]
# print(s)
# discard
# s.discard(50)
# print(s)
# remove
# s.remove(50)
# print(s)
# pop
# s.pop()
# clear
s.clear()
print(s)
set()
9
2.0.6 Set Operation
[ ]: s1 = {1,2,3,4,5}
s2 = {4,5,6,7,8}
s1 | s2
# Union(|)
# Intersection(&)
s1 & s2
# Difference(-)
s1 - s2
s2 - s1
# Symmetric Difference(^)
s1 ^ s2
# Membership Test
1 not in s1
# Iteration
for i in s1:
print(i)
1
2
3
4
5
[ ]: [7, 5, 4, 3, 2, 1]
[ ]: # union/update
s1 = {1,2,3,4,5}
s2 = {4,5,6,7,8}
# s1 | s2
s1.union(s1)
s1.update(s2)
print(s1)
print(s2)
10
{1, 2, 3, 4, 5, 6, 7, 8}
{4, 5, 6, 7, 8}
[ ]: # intersection/intersection_update
s1 = {1,2,3,4,5}
s2 = {4,5,6,7,8}
s1.intersection(s2)
s1.intersection_update(s2)
print(s1)
print(s2)
{4, 5}
{4, 5, 6, 7, 8}
[ ]: # difference/difference_update
s1 = {1,2,3,4,5}
s2 = {4,5,6,7,8}
s1.difference(s2)
s1.difference_update(s2)
print(s1)
print(s2)
{1, 2, 3}
{4, 5, 6, 7, 8}
[ ]: # symmetric_difference/symmetric_difference_update
s1 = {1,2,3,4,5}
s2 = {4,5,6,7,8}
s1.symmetric_difference(s2)
s1.symmetric_difference_update(s2)
print(s1)
print(s2)
{1, 2, 3, 6, 7, 8}
{4, 5, 6, 7, 8}
[ ]: # isdisjoint/issubset/issuperset
s1 = {1,2,3,4}
s2 = {7,8,5,6}
s1.isdisjoint(s2)
11
[ ]: True
[ ]: s1 = {1,2,3,4,5}
s2 = {3,4,5}
s1.issuperset(s2)
[ ]: True
[ ]: # copy
s1 = {1,2,3}
s2 = s1.copy()
print(s1)
print(s2)
{1, 2, 3}
{1, 2, 3}
2.0.8 Frozenset
Frozen set is just an immutable version of a Python set object
[ ]: # create frozenset
fs1 = frozenset([1,2,3])
fs2 = frozenset([3,4,5])
fs1 | fs2
[ ]: frozenset({1, 2, 3, 4, 5})
[ ]: # When to use
# 2D sets
fs = frozenset([1,2,frozenset([3,4])])
fs
[ ]: # examples
12
[ ]: {36, 49, 64, 81, 100}
[ ]:
3 Dictionary
Dictionary in Python is a collection of keys values, used to store data values like a map, which,
unlike other data types which hold only a single value as an element.
In some languages it is known as map or assosiative arrays.
dict = { ‘name’ : ‘nitish’ , ‘age’ : 33 , ‘gender’ : ‘male’ }
Characterstics:
• Mutable
• Indexing has no meaning
• keys can’t be duplicated
• keys can’t be mutable items
13
d5
# mutable items as keys
d6 = {'name':'nitish',(1,2,3):2}
print(d6)
s['subjects']['maths']
[ ]: 67
[ ]: d4['gender'] = 'male'
d4
d4['weight'] = 72
d4
s['subjects']['ds'] = 75
s
[ ]: {'name': 'nitish',
'college': 'bit',
'sem': 4,
'subjects': {'dsa': 50, 'maths': 67, 'english': 34, 'ds': 75}}
14
# clear
d.clear()
print(d)
del s['subjects']['maths']
s
{}
[ ]: {'name': 'nitish',
'college': 'bit',
'sem': 4,
'subjects': {'dsa': 50, 'english': 34, 'ds': 75}}
[ ]: s['subjects']['dsa'] = 80
s
[ ]: {'name': 'nitish',
'college': 'bit',
'sem': 5,
'subjects': {'dsa': 80, 'english': 34, 'ds': 75}}
'name' in s
[ ]: True
[ ]: d = {'name':'nitish','gender':'male','age':33}
for i in d:
print(i,d[i])
name nitish
gender male
age 33
15
3.0.7 Dictionary Functions
[ ]: # len/sorted
len(d)
print(d)
sorted(d,reverse=True)
max(d)
[ ]: 'name'
[ ]: # items/keys/values
print(d)
print(d.items())
print(d.keys())
print(d.values())
[ ]: # update
d1 = {1:2,3:4,4:5}
d2 = {4:7,6:8}
d1.update(d2)
print(d1)
{1: 2, 3: 4, 4: 7, 6: 8}
[ ]: distances = {'delhi':1000,'mumbai':2000,'bangalore':3000}
print(distances.items())
16
[ ]: # using existing dict
distances = {'delhi':1000,'mumbai':2000,'bangalore':3000}
{key:value*0.62 for (key,value) in distances.items()}
[ ]: # using zip
days = ["Sunday", "Monday","Tuesday","Wednesday","Thursday","Friday","Saturday"]
temp_C = [30.5,32.6,31.8,33.4,29.8,30.2,29.9]
[ ]: {'Sunday': 30.5,
'Monday': 32.6,
'Tuesday': 31.8,
'Wednesday': 33.4,
'Thursday': 29.8,
'Friday': 30.2,
'Saturday': 29.9}
[ ]: # using if condition
products = {'phone':10,'laptop':0,'charger':32,'tablet':0}
[ ]: # Nested Comprehension
# print tables of number from 2 to 4
{i:{j:i*j for j in range(1,11)} for i in range(2,5)}
[ ]: {
2:{1:2,2:4,3:6,4:8},
3:{1:3,2:6,3:9,4:12},
4:{1:4,2:8,3:12,4:16}
}
17
session6-functions
May 3, 2024
[ ]: def is_even(num):
"""
This function returns if a given number is odd or even
input - any valid integer
output - odd/even
created on - 16th Nov 2022
"""
if type(num) == int:
if num % 2 == 0:
return 'even'
else:
return 'odd'
else:
return 'pagal hai kya?'
[ ]: # function
# function_name(input)
for i in range(1,11):
x = is_even(i)
print(x)
odd
even
odd
even
odd
even
odd
even
odd
even
[ ]: print(type.__doc__)
1
type(name, bases, dict) -> a new type
[ ]: power()
[ ]: 1
[ ]: # positional argument
power(2,3)
[ ]: 8
[ ]: # keyword argument
power(b=3,a=2)
[ ]: 8
def multiply(*kwargs):
product = 1
for i in kwargs:
product = product * i
print(kwargs)
return product
2
[ ]: multiply(1,2,3,4,5,6,7,8,9,10,12)
[ ]: 43545600
[ ]: # **kwargs
# **kwargs allows us to pass any number of keyword arguments.
# Keyword arguments mean that they contain a key-value pair, like a Python␣
↪dictionary.
def display(**salman):
[ ]: display(india='delhi',srilanka='colombo',nepal='kathmandu',pakistan='islamabad')
[ ]:
None
[1, 2, 3, 4]
3
0.0.8 Variable Scope
[ ]: def g(y):
print(x)
print(x+1)
x = 5
g(x)
print(x)
[ ]: def f(y):
x = 1
x += 1
print(x)
x = 5
f(x)
print(x)
[ ]: def h(y):
x += 1
x = 5
h(x)
print(x)
[ ]: def f(x):
x = x + 1
print('in f(x): x =', x)
return x
x = 3
z = f(x)
print('in main program scope: z =', z)
print('in main program scope: x =', x)
[ ]: f()
inside function g
inside function g
inside function g
4
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
5
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
6
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
7
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
8
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
9
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
10
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
11
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
12
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
13
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
inside function g
---------------------------------------------------------------------------
RecursionError Traceback (most recent call last)
<ipython-input-92-c43e34e6d405> in <module>
----> 1 f()
14
<ipython-input-91-374a68ddd49e> in f()
3 print('inside function g')
4 f()
----> 5 g()
6 print('inside function f')
<ipython-input-91-374a68ddd49e> in g()
2 def g():
3 print('inside function g')
----> 4 f()
5 g()
6 print('inside function f')
<ipython-input-91-374a68ddd49e> in f()
3 print('inside function g')
4 f()
----> 5 g()
6 print('inside function f')
[ ]: def g(x):
def h():
x = 'abc'
x = x + 1
print('in g(x): x =', x)
h()
return x
x = 3
z = g(x)
[ ]: def g(x):
def h(x):
x = x+1
print("in h(x): x = ", x)
x = x + 1
print('in g(x): x = ', x)
h(x)
return x
x = 3
z = g(x)
print('in main program scope: x = ', x)
15
print('in main program scope: z = ', z)
[ ]:
type(square)
id(square)
[ ]: 140471717004784
[ ]: # reassign
x = square
id(x)
x(3)
[ ]: 9
[ ]: a = 2
b = a
b
[ ]: 2
[ ]: # deleting a function
del square
[ ]: square(3)
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-104-2cfd8bba3a88> in <module>
----> 1 square(3)
[ ]: # storing
L = [1,2,3,4,square]
L[-1](3)
[ ]: 9
16
[ ]: s = {square}
s
[ ]: {<function __main__.square(num)>}
[ ]: # returning a function
[ ]:
[ ]: def f():
def x(a, b):
return a+b
return x
val = f()(3,4)
print(val)
[ ]: # function as argument
[ ]: def func_a():
print('inside func_a')
def func_b(z):
print('inside func_c')
return z()
print(func_b(func_a))
[ ]: # x -> x^2
lambda x:x**2
[ ]: <function __main__.<lambda>(x)>
17
[ ]: # x,y -> x+y
a = lambda x,y:x+y
a(5,2)
[ ]: 7
[ ]: False
[ ]: # odd or even
a = lambda x:'even' if x%2 == 0 else 'odd'
a(6)
[ ]: 'even'
18
[ ]: # Example
def square(x):
return x**2
def cube(x):
return x**3
# HOF
def transform(f,L):
output = []
for i in L:
output.append(f(i))
print(output)
L = [1,2,3,4,5]
transform(lambda x:x**3,L)
0.0.14 Map
users = [
{
'name':'Rahul',
'age':45,
'gender':'male'
},
{
'name':'Nitish',
'age':33,
'gender':'male'
19
},
{
'name':'Ankita',
'age':50,
'gender':'female'
}
]
list(map(lambda users:users['gender'],users))
0.0.15 Filter
[ ]: # numbers greater than 5
L = [3,4,5,6,7]
list(filter(lambda x:x>5,L))
[ ]: [6, 7]
list(filter(lambda x:x.startswith('a'),fruits))
[ ]: ['apple']
0.0.16 Reduce
[ ]: # sum of all item
import functools
functools.reduce(lambda x,y:x+y,[1,2,3,4,5])
[ ]: 15
[ ]: # find min
functools.reduce(lambda x,y:x if x>y else y,[23,11,45,10,1])
[ ]: 45
[ ]:
20
Question No. Question
1 What is aliasing?
4 What is cloning?
a = 4
id(a)
account_circle 11126784
id(4)
11126784
hex(11126784)
'0xa9c800'
a = 4
b = a
print(id(a))
print(id(b))
11126784
11126784
c = b
print(id(c))
11126784
del a
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-23-3f786850e387> in <module>
----> 1 a
print(b)
del b
print(c)
a = 'DSMP 2022-23'
b = a
c = b
import sys
sys.getrefcount('DSMP 2022-23')
3
L = [1,2,3]
print(id(L))
L.append(4)
print(L)
print(id(L))
139652074757168
[1, 2, 3, 4]
139652074757168
T = (1,2,3)
print(id(T))
T = T + (4,)
print(T)
print(id(T))
139652076050304
(1, 2, 3, 4)
139652075362160
a = [1,2,3]
b = a
b.append(4)
print(b)
[1, 2, 3, 4]
print(a)
[1, 2, 3, 4]
def func(data):
data.append(4)
a = [1,2,3]
func(a[:])
print(a)
[1, 2, 3]
a = [1,2,3]
# cloning
b = a[:]
id(a)
139652074408688
id(b)
139652074409728
b.append(4)
b
[1, 2, 3, 4]
[1, 2, 3]
a = {'name':'nitish','age':33}
# aliasing->cloning
b = a.copy()
b['gender'] = 'male'
print(b)
print(a)
a = [1,2,3]
# shallow
b = a.copy()
b.append(4)
print(a)
print(b)
[1, 2, 3]
[1, 2, 3, 4]
a = [1,2,3,[4,5]]
a
print(id(a[-1]))
print(id(b[-1]))
139652074560480
139652074560480
b = a.copy()
b
print(id(a))
print(id(b))
139652074868240
139652074561280
b[-1][0] = 400
print(b)
import copy
a = [1,2,3,[4,5]]
a
b = a[:]
b
b[-1][0] = 400
b
a
[1, 2, 3, [400, 5]]
print(id(a[-1]))
print(id(b[-1]))
139652075071600
139652074658464
s = 'hello'
id(s)
139652297075120
id(s[0])
139652626357424
id('h')
139652626357424
{12,}
oop-project
May 3, 2024
[ ]: import nlpcloud
client = nlpcloud.Client("finetuned-gpt-neox-20b",␣
↪"2b58d7fb9af09e617ee525e78c7766b6d8c5bb61", gpu=True, lang="en")
↪searched_entity="programming languages")
1
[ ]: import nlpcloud
client = nlpcloud.Client("distilbert-base-uncased-emotion",␣
↪"2b58d7fb9af09e617ee525e78c7766b6d8c5bb61", gpu=False, lang="en")
[ ]: import nlpcloud
client = nlpcloud.Client("distilbert-base-uncased-emotion",␣
↪"2b58d7fb9af09e617ee525e78c7766b6d8c5bb61", gpu=False, lang="en")
[ ]: import nlpcloud
client = nlpcloud.Client("python-langdetect",␣
↪"3126efa8746a8c9a683e757205437143fa015ec5")
---------------------------------------------------------------------------
HTTPError Traceback (most recent call last)
/usr/local/lib/python3.7/dist-packages/nlpcloud/__init__.py in␣
↪langdetection(self, text)
294 try:
--> 295 r.raise_for_status()
296 except HTTPError as err:
/usr/local/lib/python3.7/dist-packages/requests/models.py in␣
↪raise_for_status(self)
940 if http_error_msg:
2
--> 941 raise HTTPError(http_error_msg, response=self)
942
/usr/local/lib/python3.7/dist-packages/nlpcloud/__init__.py in␣
↪langdetection(self, text)
[ ]: import nlpcloud
client = nlpcloud.Client("finetuned-gpt-neox-20b",␣
↪"3126efa8746a8c9a683e757205437143fa015ec5", gpu=True, lang="en")
↪searched_entity="programming languages")
---------------------------------------------------------------------------
HTTPError Traceback (most recent call last)
/usr/local/lib/python3.7/dist-packages/nlpcloud/__init__.py in entities(self,␣
↪text, searched_entity)
178 try:
--> 179 r.raise_for_status()
180 except HTTPError as err:
/usr/local/lib/python3.7/dist-packages/requests/models.py in␣
↪raise_for_status(self)
940 if http_error_msg:
--> 941 raise HTTPError(http_error_msg, response=self)
3
942
↪searched_entity="programming languages")
/usr/local/lib/python3.7/dist-packages/nlpcloud/__init__.py in entities(self,␣
↪text, searched_entity)
4
[ ]: import nlpcloud
class NLPApp:
def __init__(self):
self.__database = {}
self.__first_menu()
def __first_menu(self):
first_input = input("""
Hi! how would you like to proceed?
1. Not a member? Register
2. Already a member? Login
3. Galti se aa gaye? Exit
""")
if first_input == '1':
self.__register()
elif first_input == '2':
self.__login()
else:
exit()
def __second_menu(self):
second_input = input("""
Hi! how would you like to proceed?
1. NER
2. Language Detection
3. Sentiment Analysis
4. Logout
""")
if second_input == '1':
self.__ner()
elif second_input == '2':
self.__language_detection()
elif second_input == '3':
self.__sentiment_analysis()
else:
exit()
def __register(self):
name = input('enter name')
email = input('enter email')
5
password = input('enter password')
if email in self.__database:
print('email already exists')
else:
self.__database[email] = [name,password]
print('registration successful.Now login')
print(self.__database)
self.__first_menu()
def __login(self):
if email in self.__database:
if self.__database[email][1] == password:
print('login successful')
self.__second_menu()
else:
print('wrong password.Try again')
self.__login()
else:
print('This email is not registered')
self.__first_menu()
def __sentiment_analysis(self):
para = input('enter the paragraph')
client = nlpcloud.Client("distilbert-base-uncased-emotion",␣
↪"2b58d7fb9af09e617ee525e78c7766b6d8c5bb61", gpu=False, lang="en")
response = client.sentiment(para)
L = []
for i in response['scored_labels']:
L.append(i['score'])
print(response['scored_labels'][index]['label'])
self.__second_menu()
obj = NLPApp()
6
Hi! how would you like to proceed?
1. Not a member? Register
2. Already a member? Login
3. Galti se aa gaye? Exit
1
enter namenitish
enter [email protected]
enter password1234
registration successful.Now login
{'[email protected]': ['nitish', '1234']}
7
[ ]: d = {'scored_labels': [{'label': 'sadness', 'score': 0.98093181848526},␣
↪{'label': 'joy', 'score': 0.001407247269526124}, {'label': 'love', 'score':␣
↪'score': 0.00035347335506230593}]}
[ ]: d
[ ]: L = []
for i in d['scored_labels']:
L.append(i['score'])
[ ]: d['scored_labels'][index]['label']
[ ]: 'sadness'
[ ]: import emoji
print(emoji.emojize('Python is :grinning_face:'))
Python is �
8
[ ]:
9
session-7-oop
May 3, 2024
[ ]: L = [1,2,3]
L.upper()
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-1-af1f83522ab7> in <module>
1 L = [1,2,3]
2
----> 3 L.upper()
[ ]: s = 'hello'
s.append('x')
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-2-2cb7c5babec0> in <module>
1 s = 'hello'
----> 2 s.append('x')
[ ]: L = [1,2,3]
print(type(L))
<class 'list'>
[ ]: s = [1,2,3]
#objectname = classname()
1
[ ]: # object literal
L = [1,2,3]
[ ]: L = list()
L
[ ]: []
[ ]: s = str()
s
[ ]: ''
[ ]: # Pascal Case
HelloWorld
[ ]: class Atm:
def menu(self):
user_input = input("""
Hi how can I help you?
1. Press 1 to create pin
2. Press 2 to change pin
3. Press 3 to check balance
4. Press 4 to withdraw
5. Anything else to exit
""")
if user_input == '1':
self.create_pin()
elif user_input == '2':
self.change_pin()
elif user_input == '3':
self.check_balance()
elif user_input == '4':
self.withdraw()
else:
exit()
2
def create_pin(self):
user_pin = input('enter your pin')
self.pin = user_pin
def change_pin():
old_pin = input('enter old pin')
if old_pin == self.pin:
# let him change the pin
new_pin = input('enter new pin')
self.pin = new_pin
print('pin change successful')
self.menu()
else:
print('nai karne de sakta re baba')
self.menu()
def check_balance(self):
user_pin = input('enter your pin')
if user_pin == self.pin:
print('your balance is ',self.balance)
else:
print('chal nikal yahan se')
def withdraw(self):
user_pin = input('enter the pin')
if user_pin == self.pin:
# allow to withdraw
amount = int(input('enter the amount'))
if amount <= self.balance:
self.balance = self.balance - amount
print('withdrawl successful.balance is',self.balance)
else:
print('abe garib')
else:
print('sale chor')
self.menu()
[ ]: obj1 = Atm()
140289660099024
3
[ ]: id(obj1)
[ ]: 140289660099024
[ ]: obj2 = Atm()
140289660586384
[ ]: id(obj2)
[ ]: 140289660586384
[ ]: L = [1,2,3]
len(L) # function ->bcos it is outside the list class
L.append()# method -> bcos it is inside the list class
[ ]: class Temp:
def __init__(self):
print('hello')
obj = Temp()
hello
[ ]: 3/4*1/2
[ ]: 0.375
[ ]: class Fraction:
# parameterized constructor
def __init__(self,x,y):
self.num = x
self.den = y
def __str__(self):
return '{}/{}'.format(self.num,self.den)
def __add__(self,other):
new_num = self.num*other.den + other.num*self.den
new_den = self.den*other.den
return '{}/{}'.format(new_num,new_den)
def __sub__(self,other):
new_num = self.num*other.den - other.num*self.den
4
new_den = self.den*other.den
return '{}/{}'.format(new_num,new_den)
def __mul__(self,other):
new_num = self.num*other.num
new_den = self.den*other.den
return '{}/{}'.format(new_num,new_den)
def __truediv__(self,other):
new_num = self.num*other.den
new_den = self.den*other.num
return '{}/{}'.format(new_num,new_den)
def convert_to_decimal(self):
return self.num/self.den
[ ]: fr1 = Fraction(3,4)
fr2 = Fraction(1,2)
[ ]: fr1.convert_to_decimal()
# 3/4
[ ]: 0.75
[ ]: print(fr1 + fr2)
print(fr1 - fr2)
print(fr1 * fr2)
print(fr1 / fr2)
10/8
2/8
3/8
6/4
[ ]: s1={1,2,3}
s2={3,4,5}
s1 + s2
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-32-3a417afc75fb> in <module>
2 s2={3,4,5}
3
5
----> 4 s1 + s2
[ ]: print(fr1 - fr2)
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-39-929bcd8b32dc> in <module>
----> 1 print(fr1 - fr2)
[ ]:
6
session8-oop-part2
May 3, 2024
def __init__(self,x,y):
self.x_cod = x
self.y_cod = y
def __str__(self):
return '<{},{}>'.format(self.x_cod,self.y_cod)
def euclidean_distance(self,other):
return ((self.x_cod - other.x_cod)**2 + (self.y_cod - other.y_cod)**2)**0.5
def distance_from_origin(self):
return (self.x_cod**2 + self.y_cod**2)**0.5
# return self.euclidean_distance(Point(0,0))
class Line:
def __init__(self,A,B,C):
self.A = A
self.B = B
self.C = C
def __str__(self):
return '{}x + {}y + {} = 0'.format(self.A,self.B,self.C)
def point_on_line(line,point):
if line.A*point.x_cod + line.B*point.y_cod + line.C == 0:
return "lies on the line"
1
else:
return "does not lie on the line"
def shortest_distance(line,point):
return abs(line.A*point.x_cod + line.B*point.y_cod + line.C)/(line.A**2 +␣
↪line.B**2)**0.5
[ ]: l1 = Line(1,1,-2)
p1 = Point(1,10)
print(l1)
print(p1)
l1.shortest_distance(p1)
1x + 1y + -2 = 0
<1,10>
[ ]: 6.363961030678928
[ ]: class Person:
def __init__(self,name_input,country_input):
self.name = name_input
self.country = country_input
def greet(self):
if self.country == 'india':
print('Namaste',self.name)
else:
print('Hello',self.name)
[ ]: p.name
[ ]: 'nitish'
Namaste nitish
2
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-49-39388d77d830> in <module>
1 # what if i try to access non-existent attributes
----> 2 p.gender
[ ]: p.gender
[ ]: 'male'
def __init__(self):
self.name = 'nitish'
self.gender = 'male'
p = Person()
q = p
[ ]: # Multiple ref
print(id(p))
print(id(q))
140655538334992
140655538334992
[ ]: print(p.name)
print(q.name)
q.name = 'ankit'
print(q.name)
3
print(p.name)
nitish
nitish
ankit
ankit
[ ]: class Person:
def __init__(self,name,gender):
self.name = name
self.gender = gender
p = Person('nitish','male')
x = greet(p)
print(x.name)
print(x.gender)
[ ]: class Person:
def __init__(self,name,gender):
self.name = name
self.gender = gender
p = Person('nitish','male')
print(id(p))
greet(p)
print(p.name)
140655538334288
4
140655538334288
ankit
ankit
[ ]: class Person:
def __init__(self,name,gender):
self.name = name
self.gender = gender
p = Person('nitish','male')
print(id(p))
p1 = greet(p)
print(id(p1))
140655555218960
140655555218960
0.0.7 Encapsulation
def __init__(self,name_input,country_input):
self.name = name_input
self.country = country_input
p1 = Person('nitish','india')
p2 = Person('steve','australia')
[ ]: p2.name
[ ]: 'steve'
[ ]: class Atm:
5
self.__balance = 0
#self.menu()
def get_balance(self):
return self.__balance
def set_balance(self,new_value):
if type(new_value) == int:
self.__balance = new_value
else:
print('beta bahot maarenge')
def __menu(self):
user_input = input("""
Hi how can I help you?
1. Press 1 to create pin
2. Press 2 to change pin
3. Press 3 to check balance
4. Press 4 to withdraw
5. Anything else to exit
""")
if user_input == '1':
self.create_pin()
elif user_input == '2':
self.change_pin()
elif user_input == '3':
self.check_balance()
elif user_input == '4':
self.withdraw()
else:
exit()
def create_pin(self):
user_pin = input('enter your pin')
self.pin = user_pin
def change_pin(self):
old_pin = input('enter old pin')
if old_pin == self.pin:
# let him change the pin
6
new_pin = input('enter new pin')
self.pin = new_pin
print('pin change successful')
else:
print('nai karne de sakta re baba')
def check_balance(self):
user_pin = input('enter your pin')
if user_pin == self.pin:
print('your balance is ',self.__balance)
else:
print('chal nikal yahan se')
def withdraw(self):
user_pin = input('enter the pin')
if user_pin == self.pin:
# allow to withdraw
amount = int(input('enter the amount'))
if amount <= self.__balance:
self.__balance = self.__balance - amount
print('withdrawl successful.balance is',self.__balance)
else:
print('abe garib')
else:
print('sale chor')
[ ]: obj = Atm()
140655538526416
[ ]: obj.get_balance()
[ ]: 1000
[ ]: obj.set_balance(1000)
[ ]: obj.withdraw()
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-93-826ea677aa70> in <module>
----> 1 obj.withdraw()
<ipython-input-86-f5bffac7e2a0> in withdraw(self)
7
67 # allow to withdraw
68 amount = int(input('enter the amount'))
---> 69 if amount <= self.__balance:
70 self.__balance = self.__balance - amount
71 print('withdrawl successful.balance is',self.__balance)
[ ]:
[ ]: # list of objects
class Person:
def __init__(self,name,gender):
self.name = name
self.gender = gender
p1 = Person('nitish','male')
p2 = Person('ankit','male')
p3 = Person('ankita','female')
L = [p1,p2,p3]
for i in L:
print(i.name,i.gender)
nitish male
ankit male
ankita female
[ ]: # dict of objects
# list of objects
class Person:
def __init__(self,name,gender):
self.name = name
self.gender = gender
p1 = Person('nitish','male')
p2 = Person('ankit','male')
p3 = Person('ankita','female')
d = {'p1':p1,'p2':p2,'p3':p3}
8
for i in d:
print(d[i].gender)
male
male
female
[ ]: class Atm:
__counter = 1
# utility functions
@staticmethod
def get_counter():
return Atm.__counter
def get_balance(self):
return self.__balance
def set_balance(self,new_value):
if type(new_value) == int:
self.__balance = new_value
else:
print('beta bahot maarenge')
def __menu(self):
user_input = input("""
Hi how can I help you?
1. Press 1 to create pin
2. Press 2 to change pin
3. Press 3 to check balance
4. Press 4 to withdraw
5. Anything else to exit
9
""")
if user_input == '1':
self.create_pin()
elif user_input == '2':
self.change_pin()
elif user_input == '3':
self.check_balance()
elif user_input == '4':
self.withdraw()
else:
exit()
def create_pin(self):
user_pin = input('enter your pin')
self.pin = user_pin
def change_pin(self):
old_pin = input('enter old pin')
if old_pin == self.pin:
# let him change the pin
new_pin = input('enter new pin')
self.pin = new_pin
print('pin change successful')
else:
print('nai karne de sakta re baba')
def check_balance(self):
user_pin = input('enter your pin')
if user_pin == self.pin:
print('your balance is ',self.__balance)
else:
print('chal nikal yahan se')
def withdraw(self):
user_pin = input('enter the pin')
if user_pin == self.pin:
# allow to withdraw
amount = int(input('enter the amount'))
if amount <= self.__balance:
self.__balance = self.__balance - amount
10
print('withdrawl successful.balance is',self.__balance)
else:
print('abe garib')
else:
print('sale chor')
[ ]: c1 = Atm()
140655538287248
[ ]: Atm.get_counter()
[ ]: 2
[ ]: c3 = Atm()
140655538226704
[ ]: c3.cid
[ ]: 3
[ ]: Atm.counter
[ ]: 4
def drinks_water(self):
print(self.__name,
11
"drinks water from the",Lion.__water_source)
@staticmethod
def get_water_source():
return Lion.__water_source
simba=Lion("Simba","Male")
simba.drinks_water()
print( "Water source of lions:",Lion.get_water_source())
12
session9-oop-part3
May 3, 2024
[ ]: # example
class Customer:
def __init__(self,name,gender,address):
self.name = name
self.gender = gender
self.address = address
def print_address(self):
print(self.address._Address__city,self.address.pin,self.address.state)
def edit_profile(self,new_name,new_city,new_pin,new_state):
self.name = new_name
self.address.edit_address(new_city,new_pin,new_state)
class Address:
def __init__(self,city,pin,state):
self.__city = city
self.pin = pin
self.state = state
def get_city(self):
return self.__city
def edit_address(self,new_city,new_pin,new_state):
self.__city = new_city
self.pin = new_pin
self.state = new_state
add1 = Address('gurgaon',122011,'haryana')
1
cust = Customer('nitish','male',add1)
cust.print_address()
cust.edit_profile('ankit','mumbai',111111,'maharastra')
cust.print_address()
# method example
# what about private attribute
0.0.3 Inheritance
• What is inheritance
• Example
• What gets inherited?
[ ]: # Inheritance and it's benefits
[ ]: # Example
# parent
class User:
def __init__(self):
self.name = 'nitish'
self.gender = 'male'
def login(self):
print('login')
# child
class Student(User):
def __init__(self):
self.rollno = 100
def enroll(self):
print('enroll into the course')
u = User()
s = Student()
2
print(s.name)
s.login()
s.enroll()
nitish
login
enroll into the course
[ ]: # Class diagram
class Phone:
def __init__(self, price, brand, camera):
print ("Inside phone constructor")
self.price = price
self.brand = brand
self.camera = camera
def buy(self):
print ("Buying a phone")
class SmartPhone(Phone):
pass
[ ]: # constructor example 2
class Phone:
def __init__(self, price, brand, camera):
print ("Inside phone constructor")
self.__price = price
self.brand = brand
self.camera = camera
class SmartPhone(Phone):
def __init__(self, os, ram):
3
self.os = os
self.ram = ram
print ("Inside SmartPhone constructor")
s=SmartPhone("Android", 2)
s.brand
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-27-fff5c9f9674f> in <module>
15
16 s=SmartPhone("Android", 2)
---> 17 s.brand
class Phone:
def __init__(self, price, brand, camera):
print ("Inside phone constructor")
self.__price = price
self.brand = brand
self.camera = camera
#getter
def show(self):
print (self.__price)
class SmartPhone(Phone):
def check(self):
print(self.__price)
[ ]: class Parent:
def __init__(self,num):
self.__num=num
4
def get_num(self):
return self.__num
class Child(Parent):
def show(self):
print("This is in child class")
son=Child(100)
print(son.get_num())
son.show()
100
This is in child class
[ ]: class Parent:
def __init__(self,num):
self.__num=num
def get_num(self):
return self.__num
class Child(Parent):
def __init__(self,val,num):
self.__val=val
def get_val(self):
return self.__val
son=Child(100,10)
print("Parent: Num:",son.get_num())
print("Child: Val:",son.get_val())
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-35-5a17300f6fc7> in <module>
16
17 son=Child(100,10)
---> 18 print("Parent: Num:",son.get_num())
19 print("Child: Val:",son.get_val())
<ipython-input-35-5a17300f6fc7> in get_num(self)
5
6 def get_num(self):
----> 7 return self.__num
5
8
9 class Child(Parent):
[ ]: class A:
def __init__(self):
self.var1=100
def display1(self,var1):
print("class A :", self.var1)
class B(A):
def display2(self,var1):
print("class B :", self.var1)
obj=B()
obj.display1(200)
class A : 200
[ ]: # Method Overriding
class Phone:
def __init__(self, price, brand, camera):
print ("Inside phone constructor")
self.__price = price
self.brand = brand
self.camera = camera
def buy(self):
print ("Buying a phone")
class SmartPhone(Phone):
def buy(self):
print ("Buying a smartphone")
s.buy()
6
0.0.4 Super Keyword
[ ]: class Phone:
def __init__(self, price, brand, camera):
print ("Inside phone constructor")
self.__price = price
self.brand = brand
self.camera = camera
def buy(self):
print ("Buying a phone")
class SmartPhone(Phone):
def buy(self):
print ("Buying a smartphone")
# syntax to call parent ka buy method
super().buy()
s.buy()
def buy(self):
print ("Buying a phone")
class SmartPhone(Phone):
def buy(self):
print ("Buying a smartphone")
# syntax to call parent ka buy method
super().buy()
s.buy()
7
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-42-b20080504d0e> in <module>
17 s=SmartPhone(20000, "Apple", 13)
18
---> 19 super().buy()
def buy(self):
print ("Buying a phone")
class SmartPhone(Phone):
def buy(self):
print ("Buying a smartphone")
# syntax to call parent ka buy method
print(super().brand)
s.buy()
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-43-87cd65570d46> in <module>
19 s=SmartPhone(20000, "Apple", 13)
20
---> 21 s.buy()
<ipython-input-43-87cd65570d46> in buy(self)
15 print ("Buying a smartphone")
16 # syntax to call parent ka buy method
---> 17 print(super().brand)
18
8
19 s=SmartPhone(20000, "Apple", 13)
class SmartPhone(Phone):
def __init__(self, price, brand, camera, os, ram):
print('Inside smartphone constructor')
super().__init__(price, brand, camera)
self.os = os
self.ram = ram
print ("Inside smartphone constructor")
print(s.os)
print(s.brand)
Inheritance in summary
• A class can inherit from another class.
• Inheritance improves code reuse
• Constructor, attributes, methods get inherited to the child class
• The parent has no access to the child class
• Private properties of parent are not accessible directly in child class
• Child class can override the attributes or methods. This is called method overriding
• super() is an inbuilt function which is used to invoke the parent class methods and constructor
[ ]: class Parent:
def __init__(self,num):
9
self.__num=num
def get_num(self):
return self.__num
class Child(Parent):
def __init__(self,num,val):
super().__init__(num)
self.__val=val
def get_val(self):
return self.__val
son=Child(100,200)
print(son.get_num())
print(son.get_val())
100
200
[ ]: class Parent:
def __init__(self):
self.num=100
class Child(Parent):
def __init__(self):
super().__init__()
self.var=200
def show(self):
print(self.num)
print(self.var)
son=Child()
son.show()
100
200
[ ]: class Parent:
def __init__(self):
self.__num=100
def show(self):
print("Parent:",self.__num)
10
class Child(Parent):
def __init__(self):
super().__init__()
self.__var=10
def show(self):
print("Child:",self.__var)
obj=Child()
obj.show()
Child: 10
[ ]: class Parent:
def __init__(self):
self.__num=100
def show(self):
print("Parent:",self.__num)
class Child(Parent):
def __init__(self):
super().__init__()
self.__var=10
def show(self):
print("Child:",self.__var)
obj=Child()
obj.show()
Child: 10
11
self.camera = camera
def buy(self):
print ("Buying a phone")
class SmartPhone(Phone):
pass
SmartPhone(1000,"Apple","13px").buy()
[ ]: # multilevel
class Product:
def review(self):
print ("Product customer review")
class Phone(Product):
def __init__(self, price, brand, camera):
print ("Inside phone constructor")
self.__price = price
self.brand = brand
self.camera = camera
def buy(self):
print ("Buying a phone")
class SmartPhone(Phone):
pass
s.buy()
s.review()
[ ]: # Hierarchical
class Phone:
def __init__(self, price, brand, camera):
print ("Inside phone constructor")
self.__price = price
self.brand = brand
self.camera = camera
12
def buy(self):
print ("Buying a phone")
class SmartPhone(Phone):
pass
class FeaturePhone(Phone):
pass
SmartPhone(1000,"Apple","13px").buy()
FeaturePhone(10,"Lava","1px").buy()
[ ]: # Multiple
class Phone:
def __init__(self, price, brand, camera):
print ("Inside phone constructor")
self.__price = price
self.brand = brand
self.camera = camera
def buy(self):
print ("Buying a phone")
class Product:
def review(self):
print ("Customer review")
s.buy()
s.review()
13
# https://stackoverflow.com/questions/56361048/
↪what-is-the-diamond-problem-in-python-and-why-its-not-appear-in-python2
class Phone:
def __init__(self, price, brand, camera):
print ("Inside phone constructor")
self.__price = price
self.brand = brand
self.camera = camera
def buy(self):
print ("Buying a phone")
class Product:
def buy(self):
print ("Product buy method")
s.buy()
[ ]: class A:
def m1(self):
return 20
class B(A):
def m1(self):
return 30
def m2(self):
return 40
class C(B):
def m2(self):
return 20
obj1=A()
obj2=B()
obj3=C()
14
print(obj1.m1() + obj3.m1()+ obj3.m2())
70
[ ]: class A:
def m1(self):
return 20
class B(A):
def m1(self):
val=super().m1()+30
return val
class C(B):
def m1(self):
val=self.m1()+20
return val
obj=C()
print(obj.m1())
---------------------------------------------------------------------------
RecursionError Traceback (most recent call last)
<ipython-input-56-bb3659d52487> in <module>
16 return val
17 obj=C()
---> 18 print(obj.m1())
<ipython-input-56-bb3659d52487> in m1(self)
13
14 def m1(self):
---> 15 val=self.m1()+20
16 return val
17 obj=C()
<ipython-input-56-bb3659d52487> in m1(self)
13
14 def m1(self):
---> 15 val=self.m1()+20
16 return val
17 obj=C()
15
RecursionError: maximum recursion depth exceeded
0.0.6 Polymorphism
• Method Overriding
• Method Overloading
• Operator Overloading
[ ]: class Shape:
def area(self,a,b=0):
if b == 0:
return 3.14*a*a
else:
return a*b
s = Shape()
print(s.area(2))
print(s.area(3,4))
12.56
12
[ ]: 'hello' + 'world'
[ ]: 'helloworld'
[ ]: 4 + 5
[ ]: 9
[ ]: [1,2,3] + [4,5]
[ ]: [1, 2, 3, 4, 5]
0.0.7 Abstraction
[ ]: from abc import ABC,abstractmethod
class BankApp(ABC):
def database(self):
print('connected to database')
@abstractmethod
def security(self):
pass
16
@abstractmethod
def display(self):
pass
[ ]: class MobileApp(BankApp):
def mobile_login(self):
print('login into mobile')
def security(self):
print('mobile security')
def display(self):
print('display')
[ ]: mob = MobileApp()
[ ]: mob.security()
mobile security
[ ]: obj = BankApp()
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-24-0aa75fd04378> in <module>
----> 1 obj = BankApp()
[ ]:
17
week3-interview-questions
May 3, 2024
[ ]: d = {(1,2,3):'nitish'}
d
[ ]: d = {[1,2,3]:'nitish'}
d
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-99-968dc6d378a3> in <module>
----> 1 d = {[1,2,3]:'nitish'}
2 d
[ ]: # enumerate
# The enumerate() method adds a counter to an iterable and returns it (the␣
↪enumerate object).
L = [('nitish',45),('ankit',31),('ankita',40)]
sorted(L,key=lambda x:x[1],reverse=True)
1
[ ]: [('nitish', 45), ('ankita', 40), ('ankit', 31)]
[ ]: L = [15,21,13,13]
sorted(list(enumerate(L)),reverse=True)
[ ]: # destructor
class Example:
def __init__(self):
print('constructor called')
# destructor
def __del__(self):
print('destructor called')
obj = Example()
a = obj
del obj
del a
constructor called
destructor called
[ ]: # dir
class Test:
def __init__(self):
self.foo = 11
self._bar = 23
self.__baz = 23
def greet(self):
print('hello')
t = Test()
print(dir(t)) # This gives us a list with the object’s attributes
2
[ ]: # isinstance
class Example:
def __init__(self):
print('hello')
obj = Example()
isinstance(obj,Example)
hello
[ ]: True
[ ]: # issubclass
class A:
def __init__(self):
pass
class B(A):
pass
issubclass(B,A)
[ ]: True
0.0.1 classmethod
• A class method is a method that is bound to the class and not the object of the class.
• They have the access to the state of the class as it takes a class parameter that points to the
class and not the object instance.
• It can modify a class state that would apply across all the instances of the class. For example,
it can modify a class variable that will be applicable to all the instances.
0.0.2 staticmethod
A static method does not receive an implicit first argument. A static method is also a method that
is bound to the class and not the object of the class. This method can’t access or modify the class
state. It is present in a class because it makes sense for the method to be present in class.
[ ]: class A:
def normal_m(self):
print('normal method')
@staticmethod
def static_m():
3
print('static method')
@classmethod
def class_m(cls):
print('class method')
[ ]: a = A()
normal method
class method
static method
static method
class method
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-94-088d25a52aaf> in <module>
4 A.class_m()
5 # normal -> class -> not callable
----> 6 A.normal_m()
[ ]: # Alternate syntax
A.normal_m(a)
4
• A class method can access or modify the class state while a static method can’t access or
modify it.
• In general, static methods know nothing about the class state. They are utility-type methods
that take some parameters and work upon those parameters. On the other hand class methods
must have class as a parameter.
• We use @classmethod decorator in python to create a class method and we use @staticmethod
decorator to create a static method in python.
class Class2(Class1):
def m(self):
print("In Class2")
class Class3(Class1):
def m(self):
print("In Class3")
obj = Class4()
obj.m()
# MRO
In Class3
a = 'hello'
print(str(a))
print(repr(a))
hello
'hello'
5
[ ]: import datetime
a = datetime.datetime.now()
b = str(a)
print(str(a))
print(str(b))
print(repr(a))
print(repr(b))
2022-11-26 15:46:52.007475
2022-11-26 15:46:52.007475
datetime.datetime(2022, 11, 26, 15, 46, 52, 7475)
'2022-11-26 15:46:52.007475'
0.0.5 In summary
• str is for users -> meant to be more readable
• repr is for developers for debugging - > for being unambigous
[ ]: # how objects are stored even though they are mutable
# https://stackoverflow.com/questions/31340756/
↪python-why-can-i-put-mutable-object-in-a-dict-or-set
class A:
def __init__(self):
print('constructor')
def hello(self):
print('hello')
a = A()
a.hello()
s = {a}
print(s)
dir(a)
constructor
hello
{<__main__.A object at 0x7f4f5f3fd510>}
[ ]: ['__class__',
'__delattr__',
'__dict__',
'__dir__',
'__doc__',
'__eq__',
6
'__format__',
'__ge__',
'__getattribute__',
'__gt__',
'__hash__',
'__init__',
'__init_subclass__',
'__le__',
'__lt__',
'__module__',
'__ne__',
'__new__',
'__reduce__',
'__reduce_ex__',
'__repr__',
'__setattr__',
'__sizeof__',
'__str__',
'__subclasshook__',
'__weakref__',
'hello']
[ ]: class A:
def __init__(self):
print('constructor')
def __eq__(self):
pass
def __hash__(self):
return 1
def hello(self):
print('hello')
a = A()
a.hello()
s = {a}
print(s)
dir(a)
constructor
hello
{<__main__.A object at 0x7f4f5f369290>}
7
[ ]: ['__class__',
'__delattr__',
'__dict__',
'__dir__',
'__doc__',
'__eq__',
'__format__',
'__ge__',
'__getattribute__',
'__gt__',
'__hash__',
'__init__',
'__init_subclass__',
'__le__',
'__lt__',
'__module__',
'__ne__',
'__new__',
'__reduce__',
'__reduce_ex__',
'__repr__',
'__setattr__',
'__sizeof__',
'__str__',
'__subclasshook__',
'__weakref__',
'hello']
[ ]: class A:
def __init__(self):
self._var = 10
a = A()
a._var
[ ]: 10
[ ]: s = {[1,2]}
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-125-abf442ad56c0> in <module>
----> 1 s = {[1,2]}
8
[ ]: L = [1,2,3]
s = {L}
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-129-fc3c139945bb> in <module>
1 L = [1,2,3]
----> 2 s = {L}
[ ]: print(L.__hash__)
None
[ ]: hash(1)
[ ]: 1
[ ]: hash('hello')
[ ]: 4306082800328210013
[ ]: hash((1,2,3,))
[ ]: 2528502973977326415
[ ]: hash([1,2,3])
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-141-35e31e935e9e> in <module>
----> 1 hash([1,2,3])
[ ]:
9
week-3-interview-questions
May 3, 2024
1 Week-3
###1. What is a decorator in Python?
Python offers a unique feature called decorators.
Let’s start with an analogy before getting to the technical definition of the decorators. When we
mention the word “decorator”, what enters your mind? Well, likely something that adds beauty to
an existing object. An example is when we hang a picture frame to a wall to enhance the room.
Decorators in Python add some feature or functionality to an existing function without altering it.
Let’s say we have the following simple function that takes two numbers as parameters and divides
them.
def divide(first, second):
print ("The result is:", first/second)
Now if we call this function by passing the two values 16 and 4, it will return the following output:
divide(16, 4)
The output is:
The result is: 4.0
[ ]: def divide(first, second):
print ("The result is:", first/second)
divide(16, 4)
1
return func(first, second)
return swipe
Now we have generated a decorator for the divide() function. Let’s see how it works.
divide = swipe_decorator(divide)
divide(4, 16)
The output is:
The result is: 4.0
We have passed the function as a parameter to the decorator. The decorator “swiped our values”
and returned the function with swiped values. After that, we invoked the returned function to
generate the output as expected.
[ ]: # Func refrencinf this divide function
def divide(first, second):
print ("The result is:", first/second)
def swipe_decorator(func):
def swipe(first, second):
if first < second:
first, second = second, first
return func(first, second)
return swipe
# Decorated
divide(4, 16)
[ ]:
divide(4,10) # Bigger/smaller
2
def swipe_decorator(func):
def swipe(first, second):
if first < second:
first, second = second, first
return func(first, second)
return swipe
divide(4, 16)
class PQR(ABC):
pass
False
True
###3.What does Python's MRO (Method Resolution Order) mean?
Ans:
Method Resolution Order is referred to as MRO. A class inherits from many classes under multiple
inheritance. If we attempt to access a method by building an object from the child class, the
methods of the child class are first searched for the method. If the method is not found in the child
class, the inheritance classes are searched from left to right.
The show method is present in both the Father and Mother classes in the example presented below.
In MRO, methods and variables are searched from left to right because while conducting inheritance,
Father class is written first and Mother class is written afterwards. So firstly Father class will be
searched for show method if found then will get executed if not, Mother class will be searched.
3
[ ]: # Example code
class Father:
def __init__(self):
print('You are in Father Class Constructor')
def show(self):
print("Father Class instance Method")
class Mother:
def __init__(self):
print("You are in Mother Class Constructor")
def show(self):
print("Mother Class instance Method")
son = Son()
son.show()
[ ]:
###4. What’s the meaning of single and double underscores in Python variable
and method names
• Single Leading Underscore: _var
• Single Trailing Underscore: var_
• Double Leading Underscore: __var
• Double Leading and Trailing Underscore: __var__
• Single Underscore: _
1. Single Leading Underscore: _var are a Python naming convention that indicates a name
is meant for internal use. It is generally not enforced by the Python interpreter and is only
meant as a hint to the programmer.
Adding a single underscore in front of a variable name is more like someone putting up a tiny
underscore warning sign that says:
“Hey, this isn’t really meant to be a part of the public interface of this class. Best to
leave it alone.”
[ ]: class Test:
def __init__(self):
self.foo = 11
self._bar = 23
4
t = Test()
print(t.foo) #Print 11
print(t._bar) # Print 23
11
23
2. Single Trailing Underscore: var_ Sometimes the most fitting name for a variable is already
taken by a keyword in the Python language. Therefore, names like class or def cannot be
used as variable names in Python. In this case, you can append a single underscore to break
the naming conflict:
[ ]: def make_object(name, class):
pass
In summary, a single trailing underscore (postfix) is used by convention to avoid naming conflicts
with Python keywords. This convention is defined and explained in PEP 8.
3. Double Leading Underscore: __var
A double underscore prefix causes the Python interpreter to rewrite the attribute name in order to
avoid naming conflicts in subclasses.
This is also called name mangling—the interpreter changes the name of the variable in a way that
makes it harder to create collisions when the class is extended later.
[ ]: class Test:
def __init__(self):
self.foo = 11
self._bar = 23
self.__baz = 23
t = Test()
print(dir(t)) # This gives us a list with the object’s attributes
5
'__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__',
'__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_bar', 'foo']
['_Test__baz', '__class__', '__delattr__', '__dict__',
'__dir__', '__doc__', '__eq__', '__format__', '__ge__',
'__getattribute__', '__gt__', '__hash__', '__init__',
'__le__', '__lt__', '__module__', '__ne__', '__new__',
'__reduce__', '__reduce_ex__', '__repr__',
'__setattr__', '__sizeof__', '__str__',
'__subclasshook__', '__weakref__', '_bar', 'foo']
Let’s take this list and look for our original variable names foo, _bar, and __baz. I promise you’ll
notice some interesting changes.
First of all, the self.foo variable appears unmodified as foo in the attribute list.
Next up, self._bar behaves the same way—it shows up on the class as _bar. Like I explained
before, the leading underscore is just a convention in this case—a hint for the programmer.
However, with self.__baz things look a little different. When you search for __baz in that list,
you’ll see that there is no variable with that exact name.
So what happened to __baz?
If you look closely, you’ll see there’s an attribute called _Test__baz on this object. This
is the name mangling that the Python interpreter applies. It does this to protect the
variable from getting overridden in subclasses.
This type of variables also explained in Private attributes and methods session.
4. Double Leading and Trailing Underscore: __var__
Double underscores __ are often referred to as “dunders” in the Python community. The reason
is that double underscores appear quite often in Python code, and to avoid fatiguing their jaw
muscles, Pythonistas often shorten “double underscore” to “dunder.”
The names that have both leading and trailing double underscores are reserved for special use in
the language. This rule covers things like __init__ for object constructors, or __call__ to make
objects callable.
These dunder methods are often referred to as magic methods.
There are many dunder methods, here are some:-
__str__, __repr__, __call__, __add__, __sub__, __len__ etc.
5. Single Underscore _: Sometimes used as a name for temporary or insignificant variables
(“don’t care). Also, it represents the result of the last expression in a Python REPL session.
6
Object Oriented Programming Structural Programming
Bottom-up approach Top-down approach
Provides data hiding Does not provide data hiding
Can solve problems of any complexity Can solve moderate problems
Code can be reused thereby reducing Does not support code reusability
redundancy
1.0.2 6. Can you call the base class method without creating an instance?
Yes, you can call the base class without instantiating it if: - It is a static method - The base class
is inherited by some other subclass
range() xrange()
In Python 3, xrange() is not supported; The xrange() function is used in Python 2 to
instead, the range() function is used to iterate iterate in for loops.
in for loops.
It returns a list. It returns a generator object as it doesn’t
really generate a static list at the run time.
It takes more memory as it keeps the entire It takes less memory as it keeps only one
list of iterating numbers in memory. number at a time in memory.
7
self.last_name = last_name
self.age = age
def __str__(self):
return f"{self.first_name} {self.last_name} ({self.age})"
def __repr__(self):
return f"{self.first_name} {self.last_name} ({self.age})"
1.0.6 10. What is the difference between a class method, a static method and an
instance method?
Let’s begin by writing a (Python 3) class that contains simple examples for all three method types:
class MyClass:
def method(self):
return 'instance method called', self
@classmethod
def classmethod(cls):
return 'class method called', cls
@staticmethod
def staticmethod():
return 'static method called'
Instance Methods
The first method on MyClass, called method, is a regular instance method. That’s the basic, no-
frills method type you’ll use most of the time. You can see the method takes one parameter, self,
which points to an instance of MyClass when the method is called. But of course, instance methods
can accept more than just one parameter.
Through the self parameter, instance methods can freely access attributes and other methods on
the same object. This gives them a lot of power when it comes to modifying an object’s state.
Not only can they modify object state, instance methods can also access the class itself through
the self.__class__ attribute. This means instance methods can also modify class state. This
makes instance methods powerful in terms of access restrictions—they can freely modify state on
the object instance and on the class itself.
Class Methods
Let’s compare that to the second method, MyClass.classmethod. I marked this method with a
@classmethod decorator to flag it as a class method. Instead of accepting a self parameter, class
8
methods take a cls parameter that points to the class—and not the object instance—when the
method is called.
Since the class method only has access to this cls argument, it can’t modify object instance state.
That would require access to self. However, class methods can still modify class state that applies
across all instances of the class.
Static Methods
The third method, MyClass.staticmethod was marked with a @staticmethod6 decorator to flag
it as a static method.
This type of method doesn’t take a self or a cls parameter, although, of course, it can be made
to accept an arbitrary number of other parameters.
As a result, a static method cannot modify object state or class state. Static methods are restricted
in what data they can access—they’re primarily a way to namespace your methods.
Let’s See Them in Action!
Let’s take a look at how these methods behave in action when we call them. We’ll start by creating
an instance of the class and then calling the three different methods on it.
MyClass was set up in such a way that each method’s implementation returns a tuple containing
information we can use to trace what’s going on and which parts of the class or object that method
can access.
[ ]: # Class
class MyClass:
def method(self):
return 'instance method called', self
@classmethod
def classmethod(cls):
return 'class method called', cls
@staticmethod
def staticmethod():
return 'static method called'
This confirms that, in this case, the instance method called method has access to the object instance
(printed as <MyClass instance>) via the self argument.
When the method is called, Python replaces the self argument with the instance object, obj.
9
We could ignore the syntactic sugar provided by the obj.method() dot-call syntax and pass the
instance object manually to get the same result:
MyClass.method(obj)
[ ]: MyClass.method(obj)
[ ]: type(obj)
[ ]: __main__.MyClass
Calling classmethod() showed us that it doesn’t have access to the <MyClass instance> object,
but only to the <class MyClass> object, representing the class itself (everything in Python is
an object, even classes themselves).
Notice how Python automatically passes the class as the first argument to the function when we
call MyClass.classmethod(). Calling a method in Python through the dot syntax triggers this
behavior. The self parameter on instance methods works the same way.
Please note that naming these parameters self and cls is just a convention. You could just as
easily name them the_object and the_class and get the same result. All that matters is that
they’re positioned first in the parameter list for that particular method.
Time to call the static method now:
[ ]: obj.staticmethod()
Did you see how we called staticmethod() on the object and were able to do so successfully?
Some developers are surprised when they learn that it’s possible to call a static method on an
object instance.
Behind the scenes, Python simply enforces the access restrictions by not passing in the self or the
cls argument when a static method gets called using the dot syntax
This confirms that static methods can neither access the object instance state nor the class state.
They work like regular functions but belong to the class’ (and every instance’s) namespace.
Now, let’s take a look at what happens when we attempt to call these methods on the class itself,
without creating an object instance beforehand:
[ ]: # Class Method
print(MyClass.classmethod())
10
# Static method
print(MyClass.staticmethod())
#Instance Method
print(MyClass.method())
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-31-b561d87f2a57> in <module>
4 print(MyClass.staticmethod())
5 #Instance Method
----> 6 print(MyClass.method())
We were able to call classmethod() and staticmethod() just fine, but attempting to call the
instance method method() failed with a TypeError.
This is to be expected. This time we didn’t create an object instance and tried calling an instance
function directly on the class blueprint itself. This means there is no way for Python to populate
the self argument and therefore the call fails with a TypeError exception.
This should make the distinction between these three method types a little more clear
Key Takeaways * Instance methods need a class instance and can access the instance through
self. * Class methods don’t need a class instance. They can’t access the instance (self) but
they have access to the class itself via cls. * Static methods don’t have access to cls or self.
They work like regular functions but belong to the class’ namespace. * Static and class methods
communicate and (to a certain degree) enforce developer intent about class design. This can have
definite maintenance benefits.
[ ]:
11
session10-file-handling
May 3, 2024
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-109-c02a4a856526> in <module>
3 f.write('Hello world')
4 f.close()
----> 5 f.write('hello')
1
f.write('\nhow are you?')
f.close()
[ ]: # write lines
L = ['hello\n','hi\n','how are you\n','I am fine']
f = open('/content/temp/sample.txt','w')
f.writelines(L)
f.close()
hello
hi
how are you
I am fine
hello
hi
h
2
print(f.readline(),end='')
print(f.readline(),end='')
f.close()
hello
hi
while True:
data = f.readline()
if data == '':
break
else:
print(data,end='')
f.close()
hello
hi
how are you
I am fine
[ ]: f.write('hello')
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-4-00cba062fa3d> in <module>
----> 1 f.write('hello')
3
print(f.readline())
hello
hello
hi
h
ow are you
I am fine
with open('big.txt','w') as f:
f.writelines(big_L)
[ ]: with open('big.txt','r') as f:
chunk_size = 10
d hello wo***o world he***d hello wo***o world he***d hello wo***o world he***d
hello wo***o world he***d hello wo***o world he***d hello wo***o world he***d
hello wo***o world he***d hello wo***o world he***d hello wo***o world he***d
hello wo***o world he***d hello wo***o world he***d hello wo***o world he***d
hello wo***o world he***d hello wo***o world he***d hello wo***o world he***d
hello wo***o world he***d hello wo***o world he***d hello wo***o world he***d
hello wo***o world he***d hello wo***o world he***d hello wo***o world he***d
hello wo***o world he***d hello wo***o world he***d hello wo***o world he***d
hello wo***o world he***d hello wo***o world he***d hello wo***o world he***d
hello wo***o world he***d hello wo***o world he***d hello wo***o world he***d
hello wo***o world he***d hello wo***o world he***d hello wo***o world he***d
hello wo***o world he***d hello wo***o world he***d hello wo***o world he***d
hello wo***o world he***d hello wo***o world he***d hello wo***o world he***d
hello wo***o world he***d hello wo***o world he***d hello wo***o world he***d
4
hello wo***o world he***d hello wo***o world he***d hello wo***o world he***d
hello wo***o world he***d hello wo***o world he***d hello wo***o world he***d
hello wo***o world he***d hello wo***o world he***d hello wo***o world he***d
hello wo***o world he***d hello wo***o world he***d hello wo***o world he***d
hello wo***o world he***d hello wo***o world he***d hello wo***o world he***d
hello wo***o world he***d hello wo***o world he***d hello wo***o world he***d
hello wo***o world he***d hello wo***o world he***d hello wo***o world he***d
hello wo***o world he***d hello wo***o world he***d hello wo***o world he***d
hello wo***o world he***d hello wo***o world he***d hello wo***o world he***d
hello wo***o world he***d hello wo***o world he***d hello wo***o world he***d
hello wo***o world he***d hello wo***o world he***d hello wo***o world he***d
hello wo***o world he***d hello wo***o world he***d hello wo***o world he***d
hello wo***o world he***d hello wo***o world he***d hello wo***o world he***d
hello wo***o world he***d hello wo***o world he***d hello wo***o world he***d
hello wo***o world he***d hello wo***o world he***d hello wo***o world he***d
hello wo***o world he***d hello wo***o world he***d hello wo***o world he***d
hello wo***o world he***d hello wo***o world he***d hello wo***o world he***d
hello wo***o world he***d hello wo***o world he***d hello wo***o world he***d
hello wo***o world he***d hello wo***o world he***d hello wo***o world he***d
hello wo***o world he***d hello wo***o world he***d hello wo***o world he***d
hello wo***o world he***d hello wo***o world he***d hello wo***o world he***d
hello wo***o world he***d hello wo***o world he***d hello wo***o world he***d
hello wo***o world he***d hello wo***o world he***d hello wo***o world he***d
hello wo***o world he***d hello wo***o world he***d hello wo***o world he***d
hello wo***o world he***d hello wo***o world he***d hello wo***o world he***d
hello wo***o world he***d hello wo***o world he***d hello wo***o world he***d
hello wo***o world he***d hello wo***o world he***d hello wo***o world he***d
hello wo***o world he***d hello wo***o world he***d hello wo***o world he***d
hello wo***o world he***d hello wo***o world he***d hello wo***o world he***d
hello wo***o world he***d hello wo***o world he***d hello wo***o world he***d
hello wo***o world he***d hello wo***o world he***d hello wo***o world he***d
hello wo***o world he***d hello wo***o world he***d hello wo***o world he***d
hello wo***o world he***d hello wo***o world he***d hello wo***o world he***d
hello wo***o world he***d hello wo***o world he***d hello wo***o world he***d
hello wo***o world he***d hello wo***o world he***d hello wo***o world he***d
hello wo***o world he***d hello wo***o world he***d hello wo***o world he***d
hello wo***o world he***d hello wo***o world he***d hello wo***o world he***d
hello wo***o world he***d hello wo***o world he***d hello wo***o world he***d
hello wo***o world he***d hello wo***o world he***d hello wo***o world he***d
hello wo***o world he***d hello wo***o world he***d hello wo***o world he***d
hello wo***o world he***d hello wo***o world he***d hello wo***o world he***d
hello wo***o world he***d hello wo***o world he***d hello wo***o world he***d
hello wo***o world he***d hello wo***o world he***d hello wo***o world he***d
hello wo***o world he***d hello wo***o world he***d hello wo***o world he***d
hello wo***o world he***d hello wo***o world he***d hello wo***o world he***d
hello wo***o world he***d hello wo***o world he***d hello wo***o world he***d
hello wo***o world he***d hello wo***o world he***d hello wo***o world he***d
hello wo***o world he***d hello wo***o world he***d hello wo***o world he***d
5
hello wo***o world he***d hello wo***o world he***d hello wo***o world he***d
hello wo***o world he***d hello wo***o world he***d hello wo***o world he***d
hello wo***o world he***d hello wo***o world he***d hello wo***o world he***d
hello wo***o world he***d hello wo***o world he***d hello wo***o world he***d
hello wo***o world he***d hello wo***o world he***
[ ]:
print(f.read(10))
print(f.tell())
e you
I am
25
fine
30
6
[ ]: # working with binary file
with open('screenshot1.png','r') as f:
f.read()
---------------------------------------------------------------------------
UnicodeDecodeError Traceback (most recent call last)
<ipython-input-23-b662b4ad1a91> in <module>
1 # working with binary file
2 with open('screenshot1.png','r') as f:
----> 3 f.read()
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-26-a8e7a73b1431> in <module>
1 # working with other data types
2 with open('sample.txt','w') as f:
----> 3 f.write(5)
[ ]: with open('sample.txt','w') as f:
f.write('5')
7
[ ]: with open('sample.txt','r') as f:
print(int(f.read()) + 5)
10
[ ]:
with open('sample.txt','w') as f:
f.write(str(d))
[ ]: with open('sample.txt','r') as f:
print(dict(f.read()))
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-34-949b64f1fbe0> in <module>
1 with open('sample.txt','r') as f:
----> 2 print(dict(f.read()))
What is JSON?
[ ]: # serialization using json module
# list
import json
L = [1,2,3,4]
with open('demo.json','w') as f:
json.dump(L,f)
8
[ ]: # dict
d = {
'name':'nitish',
'age':33,
'gender':'male'
}
with open('demo.json','w') as f:
json.dump(d,f,indent=4)
[ ]: # deserialization
import json
with open('demo.json','r') as f:
d = json.load(f)
print(d)
print(type(d))
t = (1,2,3,4,5)
with open('demo.json','w') as f:
json.dump(t,f)
d = {
'student':'nitish',
'marks':[23,14,34,45,56]
}
with open('demo.json','w') as f:
json.dump(d,f)
def __init__(self,fname,lname,age,gender):
self.fname = fname
self.lname = lname
self.age = age
9
self.gender = gender
# format to printed in
# -> Nitish Singh age -> 33 gender -> male
[ ]: person = Person('Nitish','Singh',33,'male')
[ ]: # As a string
import json
def show_object(person):
if isinstance(person,Person):
return "{} {} age -> {} gender -> {}".format(person.fname,person.
↪lname,person.age,person.gender)
with open('demo.json','w') as f:
json.dump(person,f,default=show_object)
[ ]: # As a dict
import json
def show_object(person):
if isinstance(person,Person):
return {'name':person.fname + ' ' + person.lname,'age':person.age,'gender':
↪person.gender}
with open('demo.json','w') as f:
json.dump(person,f,default=show_object,indent=4)
[ ]: # indent arrtribute
# As a dict
[ ]: # deserializing
import json
with open('demo.json','r') as f:
d = json.load(f)
print(d)
print(type(d))
0.0.8 Pickling
Pickling is the process whereby a Python object hierarchy is converted into a byte stream, and
unpickling is the inverse operation, whereby a byte stream (from a binary file or bytes-like object)
10
is converted back into an object hierarchy.
[ ]: class Person:
def __init__(self,name,age):
self.name = name
self.age = age
def display_info(self):
print('Hi my name is',self.name,'and I am ',self.age,'years old')
[ ]: p = Person('nitish',33)
[ ]: # pickle dump
import pickle
with open('person.pkl','wb') as f:
pickle.dump(p,f)
[ ]: # pickle load
import pickle
with open('person.pkl','rb') as f:
p = pickle.load(f)
p.display_info()
[ ]:
11
keyboard_arrow_down Session 11
There are 2 stages where error may happen in a program
a = 5
if a==3
print('hello')
a = 5
iff a==3:
print('hello')
a = 5
if a==3:
print('hello')
# IndexError
# The IndexError is thrown when trying to access an item at an invalid index.
L = [1,2,3]
L[100]
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
<ipython-input-71-c90668d2b194> in <module>
2 # The IndexError is thrown when trying to access an item at an invalid index.
3 L = [1,2,3]
----> 4 L[100]
# ModuleNotFoundError
# The ModuleNotFoundError is thrown when a module could not be found.
import mathi
math.floor(5.3)
---------------------------------------------------------------------------
ModuleNotFoundError Traceback (most recent call last)
<ipython-input-73-cbdaf00191df> in <module>
1 # ModuleNotFoundError
2 # The ModuleNotFoundError is thrown when a module could not be found.
----> 3 import mathi
4 math.floor(5.3)
---------------------------------------------------------------------------
NOTE: If your import is failing due to a missing package, you can
manually install dependencies using either !pip or !apt.
OPEN EXAMPLES
# KeyError
# The KeyError is thrown when a key is not found
d = {'name':'nitish'}
d['age']
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
<ipython-input-74-453afa1c9765> in <module>
3
4 d = {'name':'nitish'}
----> 5 d['age']
KeyError: 'age'
# TypeError
# The TypeError is thrown when an operation or function is applied to an object of an inappropriate type.
1 + 'a'
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-78-2a3eb3f5bb0a> in <module>
1 # TypeError
2 # The TypeError is thrown when an operation or function is applied to an object of an inappropriate type.
----> 3 1 + 'a'
# ValueError
# The ValueError is thrown when a function's argument is of an inappropriate type.
int('a')
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-76-e419d2a084b4> in <module>
1 # ValueError
2 # The ValueError is thrown when a function's argument is of an inappropriate type.
----> 3 int('a')
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-79-e3e8aaa4ec45> in <module>
1 # NameError
2 # The NameError is thrown when an object could not be found.
----> 3 print(k)
# AttributeError
L = [1,2,3]
L.upper()
# Stacktrace
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-80-dd5a29625ddc> in <module>
1 # AttributeError
2 L = [1,2,3]
----> 3 L.upper()
keyboard_arrow_down Exceptions
If things go wrong during the execution of the program(runtime). It generally happens when something unforeseen has happened.
Examples
Memory overflow
Divide by 0 -> logical error
Database error
# else
try:
f = open('sample1.txt','r')
except FileNotFoundError:
print('file nai mili')
except Exception:
print('kuch to lafda hai')
else:
print(f.read())
# finally
# else
try:
f = open('sample1.txt','r')
except FileNotFoundError:
print('file nai mili')
except Exception:
print('kuch to lafda hai')
else:
print(f.read())
finally:
print('ye to print hoga hi')
# raise Exception
# In Python programming, exceptions are raised when errors occur at runtime.
# We can also manually raise exceptions using the raise keyword.
# We can optionally pass values to the exception to clarify why that exception was raised
---------------------------------------------------------------------------
ZeroDivisionError Traceback (most recent call last)
<ipython-input-106-5a07d7d89433> in <module>
----> 1 raise ZeroDivisionError('aise hi try kar raha hu')
def __init__(self,balance):
self.balance = balance
def withdraw(self,amount):
if amount < 0:
raise Exception('amount cannot be -ve')
if self.balance < amount:
raise Exception('paise nai hai tere paas')
self.balance = self.balance - amount
obj = Bank(10000)
try:
obj.withdraw(15000)
except Exception as e:
print(e)
else:
print(obj.balance)
class MyException(Exception):
def __init__(self,message):
print(message)
class Bank:
def __init__(self,balance):
self.balance = balance
def withdraw(self,amount):
if amount < 0:
raise MyException('amount cannot be -ve')
if self.balance < amount:
raise MyException('paise nai hai tere paas')
self.balance = self.balance - amount
obj = Bank(10000)
try:
obj.withdraw(5000)
except MyException as e:
pass
else:
print(obj.balance)
5000
# simple example
class SecurityError(Exception):
def __init__(self,message):
print(message)
def logout(self):
print('logout')
class Google:
May 3, 2024
0.0.1 Namespaces
A namespace is a space that holds names(identifiers).Programmatically speaking, namespaces are
dictionary of identifiers(keys) and their objects(values)
There are 4 types of namespaces: - Builtin Namespace - Global Namespace - Enclosing Namespace
- Local Namespace
def temp():
# local var
b = 3
print(b)
temp()
print(a)
3
2
def temp():
# local var
a = 3
print(b)
temp()
1
print(a)
[ ]: # local and global -> local does not have but global has
a = 2
def temp():
# local var
print(a)
temp()
print(a)
2
2
def temp():
# local var
a += 1
print(a)
temp()
print(a)
---------------------------------------------------------------------------
UnboundLocalError Traceback (most recent call last)
<ipython-input-49-0bff4ae6448f> in <module>
7 print(a)
8
----> 9 temp()
10 print(a)
<ipython-input-49-0bff4ae6448f> in temp()
4 def temp():
5 # local var
----> 6 a += 1
7 print(a)
8
[ ]: a = 2
def temp():
2
# local var
global a
a += 1
print(a)
temp()
print(a)
3
3
temp()
print(a)
a = 5
temp(5)
print(a)
print(z)
5
5
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-51-aac3f4d9657f> in <module>
7 temp(5)
8 print(a)
----> 9 print(z)
[ ]: # built-in scope
import builtins
print(dir(builtins))
3
['ArithmeticError', 'AssertionError', 'AttributeError', 'BaseException',
'BlockingIOError', 'BrokenPipeError', 'BufferError', 'BytesWarning',
'ChildProcessError', 'ConnectionAbortedError', 'ConnectionError',
'ConnectionRefusedError', 'ConnectionResetError', 'DeprecationWarning',
'EOFError', 'Ellipsis', 'EnvironmentError', 'Exception', 'False',
'FileExistsError', 'FileNotFoundError', 'FloatingPointError', 'FutureWarning',
'GeneratorExit', 'IOError', 'ImportError', 'ImportWarning', 'IndentationError',
'IndexError', 'InterruptedError', 'IsADirectoryError', 'KeyError',
'KeyboardInterrupt', 'LookupError', 'MemoryError', 'ModuleNotFoundError',
'NameError', 'None', 'NotADirectoryError', 'NotImplemented',
'NotImplementedError', 'OSError', 'OverflowError', 'PendingDeprecationWarning',
'PermissionError', 'ProcessLookupError', 'RecursionError', 'ReferenceError',
'ResourceWarning', 'RuntimeError', 'RuntimeWarning', 'StopAsyncIteration',
'StopIteration', 'SyntaxError', 'SyntaxWarning', 'SystemError', 'SystemExit',
'TabError', 'TimeoutError', 'True', 'TypeError', 'UnboundLocalError',
'UnicodeDecodeError', 'UnicodeEncodeError', 'UnicodeError',
'UnicodeTranslateError', 'UnicodeWarning', 'UserWarning', 'ValueError',
'Warning', 'ZeroDivisionError', '__IPYTHON__', '__build_class__', '__debug__',
'__doc__', '__import__', '__loader__', '__name__', '__package__', '__spec__',
'abs', 'all', 'any', 'ascii', 'bin', 'bool', 'breakpoint', 'bytearray', 'bytes',
'callable', 'chr', 'classmethod', 'compile', 'complex', 'copyright', 'credits',
'delattr', 'dict', 'dir', 'display', 'divmod', 'enumerate', 'eval', 'exec',
'execfile', 'filter', 'float', 'format', 'frozenset', 'get_ipython', 'getattr',
'globals', 'hasattr', 'hash', 'help', 'hex', 'id', 'input', 'int', 'isinstance',
'issubclass', 'iter', 'len', 'license', 'list', 'locals', 'map', 'max',
'memoryview', 'min', 'next', 'object', 'oct', 'open', 'ord', 'pow', 'print',
'property', 'range', 'repr', 'reversed', 'round', 'runfile', 'set', 'setattr',
'slice', 'sorted', 'staticmethod', 'str', 'sum', 'super', 'tuple', 'type',
'vars', 'zip']
[ ]: # renaming built-ins
L = [1,2,3]
print(max(L))
def max():
print('hello')
print(max(L))
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-68-c19f3451a38f> in <module>
1 # renaming built-ins
2 L = [1,2,3]
----> 3 print(max(L))
4 def max():
4
5 print('hello')
[ ]: # Enclosing scope
def outer():
def inner():
print(a)
inner()
print('outer function')
outer()
print('main program')
1
outer function
main program
[ ]: # nonlocal keyword
def outer():
a = 1
def inner():
nonlocal a
a += 1
print('inner',a)
inner()
print('outer',a)
outer()
print('main program')
inner 2
outer 2
main program
[ ]: # Summary
0.0.3 Decorators
A decorator in python is a function that receives another function as input and adds some func-
tionality(decoration) to and it and returns it.
This can happen only because python functions are 1st class citizens.
There are 2 types of decorators available in python - Built in decorators like @staticmethod,
@classmethod, @abstractmethod and @property etc - User defined decorators that we pro-
5
grammers can create according to our needs
[ ]: # Python are 1st class function
def modify(func,num):
return func(num)
def square(num):
return num**2
modify(square,2)
[ ]: 4
[ ]: # simple example
def my_decorator(func):
def wrapper():
print('***********************')
func()
print('***********************')
return wrapper
def hello():
print('hello')
def display():
print('hello nitish')
a = my_decorator(hello)
a()
b = my_decorator(display)
b()
***********************
hello
***********************
***********************
hello nitish
***********************
[ ]: # more functions
[ ]: # python tutor
6
[ ]: # Better syntax?
# simple example
def my_decorator(func):
def wrapper():
print('***********************')
func()
print('***********************')
return wrapper
@my_decorator
def hello():
print('hello')
hello()
***********************
hello
***********************
[ ]: # anything meaningful?
import time
def timer(func):
def wrapper(*args):
start = time.time()
func(*args)
print('time taken by',func.__name__,time.time()-start,'secs')
return wrapper
@timer
def hello():
print('hello wolrd')
time.sleep(2)
@timer
def square(num):
time.sleep(1)
print(num**2)
@timer
def power(a,b):
print(a**b)
hello()
square(2)
power(2,3)
7
hello wolrd
time taken by hello 2.003671884536743 secs
4
time taken by square 1.0009939670562744 secs
8
time taken by power 2.1696090698242188e-05 secs
[ ]: # A big problem
[ ]: @checkdt(int)
def square(num):
print(num**2)
[ ]: def sanity_check(data_type):
def outer_wrapper(func):
def inner_wrapper(*args):
if type(*args) == data_type:
func(*args)
else:
raise TypeError('Ye datatype nai chalega')
return inner_wrapper
return outer_wrapper
@sanity_check(int)
def square(num):
print(num**2)
@sanity_check(str)
def greet(name):
print('hello',name)
square(2)
[ ]:
[ ]:
8
week4-interview-questions
May 3, 2024
display()
print(__name__)
hello
__main__
hello nitish
test.hello('nitish')
print(math.floor(4.3))
hello nitish
4
1
[ ]: # show builtin modules
[ ]: # Normal
import math
import random
[ ]: # clubbing together
import math,random,test
print(factorial(5))
ceil(4.8)
120
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-6-cbe7c40736a8> in <module>
4
5 print(factorial(5))
----> 6 ceil(4.8)
[ ]: # renaming modules
import math as m
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
m.factorial(5)
[ ]: 120
f(5)
[ ]: 120
2
0.0.4 Order of execution of a module
[ ]: import sys
for p in sys.path:
print(p)
/content
/env/python
/usr/lib/python38.zip
/usr/lib/python3.8
/usr/lib/python3.8/lib-dynload
/usr/local/lib/python3.8/dist-packages
/usr/lib/python3/dist-packages
/usr/local/lib/python3.8/dist-packages/IPython/extensions
/root/.ipython
[ ]:
3
session-13-numpy-fundamentals
May 3, 2024
[ ]: # np.array
import numpy as np
a = np.array([1,2,3])
print(a)
[1 2 3]
[ ]: # 2D and 3D
b = np.array([[1,2,3],[4,5,6]])
print(b)
1
[[1 2 3]
[4 5 6]]
[ ]: c = np.array([[[1,2],[3,4]],[[5,6],[7,8]]])
print(c)
[[[1 2]
[3 4]]
[[5 6]
[7 8]]]
[ ]: # dtype
np.array([1,2,3],dtype=float)
[ ]: # np.arange
np.arange(1,11,2)
[ ]: array([1, 3, 5, 7, 9])
[ ]: # with reshape
np.arange(16).reshape(2,2,2,2)
[ ]: array([[[[ 0, 1],
[ 2, 3]],
[[ 4, 5],
[ 6, 7]]],
[[[ 8, 9],
[10, 11]],
[[12, 13],
[14, 15]]]])
[ ]: np.zeros((3,4))
2
[ ]: array([[0., 0., 0., 0.],
[0., 0., 0., 0.],
[0., 0., 0., 0.]])
[ ]: # np.random
np.random.random((3,4))
[ ]: # np.linspace
np.linspace(-10,10,10,dtype=int)
[ ]: # np.identity
np.identity(3)
[ ]: a1 = np.arange(10,dtype=np.int32)
a2 = np.arange(12,dtype=float).reshape(3,4)
a3 = np.arange(8).reshape(2,2,2)
a3
[ ]: array([[[0, 1],
[2, 3]],
[[4, 5],
[6, 7]]])
[ ]: # ndim
a3.ndim
[ ]: 3
[ ]: # shape
print(a3.shape)
a3
(2, 2, 2)
3
[ ]: array([[[0, 1],
[2, 3]],
[[4, 5],
[6, 7]]])
[ ]: # size
print(a2.size)
a2
12
[ ]: # itemsize
a3.itemsize
[ ]: 8
[ ]: # dtype
print(a1.dtype)
print(a2.dtype)
print(a3.dtype)
int32
float64
int64
[ ]: # astype
a3.astype(np.int32)
[ ]: array([[[0, 1],
[2, 3]],
[[4, 5],
[6, 7]]], dtype=int32)
[ ]: a1 = np.arange(12).reshape(3,4)
a2 = np.arange(12,24).reshape(3,4)
a2
4
[ ]: array([[12, 13, 14, 15],
[16, 17, 18, 19],
[20, 21, 22, 23]])
[ ]: # scalar operations
# arithmetic
a1 ** 2
[ ]: array([[ 0, 1, 4, 9],
[ 16, 25, 36, 49],
[ 64, 81, 100, 121]])
[ ]: # relational
a2 == 15
[ ]: # vector operations
# arithmetic
a1 ** a2
[ ]: array([[ 0, 1, 16384,
14348907],
[ 4294967296, 762939453125, 101559956668416,
11398895185373143],
[ 1152921504606846976, -1261475310744950487, 1864712049423024128,
6839173302027254275]])
[ ]: a1 = np.random.random((3,3))
a1 = np.round(a1*100)
a1
[ ]: # max/min/sum/prod
# 0 -> col and 1 -> row
np.prod(a1,axis=0)
5
[ ]: # mean/median/std/var
np.var(a1,axis=1)
[ ]: # trigonomoetric functions
np.sin(a1)
[ ]: # dot product
a2 = np.arange(12).reshape(3,4)
a3 = np.arange(12,24).reshape(4,3)
np.dot(a2,a3)
[ ]: # round/floor/ceil
np.ceil(np.random.random((2,3))*100)
[ ]: a1 = np.arange(10)
a2 = np.arange(12).reshape(3,4)
a3 = np.arange(8).reshape(2,2,2)
a3
[ ]: array([[[0, 1],
[2, 3]],
6
[[4, 5],
[6, 7]]])
[ ]: a1
[ ]: array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
[ ]: a2
[ ]: array([[ 0, 1, 2, 3],
[ 4, 5, 6, 7],
[ 8, 9, 10, 11]])
[ ]: a2[1,0]
[ ]: 4
[ ]: a3
[ ]: array([[[0, 1],
[2, 3]],
[[4, 5],
[6, 7]]])
[ ]: a3[1,0,1]
[ ]: 5
[ ]: a3[1,1,0]
[ ]: 6
[ ]: a1
[ ]: array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
[ ]: a1[2:5:2]
[ ]: array([2, 4])
[ ]: a2
[ ]: array([[ 0, 1, 2, 3],
[ 4, 5, 6, 7],
[ 8, 9, 10, 11]])
7
[ ]: a2[0:2,1::2]
[ ]: array([[1, 3],
[5, 7]])
[ ]: a2[::2,1::2]
[ ]: array([[ 1, 3],
[ 9, 11]])
[ ]: a2[1,::3]
[ ]: array([4, 7])
[ ]: a2[0,:]
[ ]: array([0, 1, 2, 3])
[ ]: a2[:,2]
[ ]: array([ 2, 6, 10])
[ ]: a2[1:,1:3]
[ ]: array([[ 5, 6],
[ 9, 10]])
[ ]: a3 = np.arange(27).reshape(3,3,3)
a3
[ ]: array([[[ 0, 1, 2],
[ 3, 4, 5],
[ 6, 7, 8]],
[[ 9, 10, 11],
[12, 13, 14],
[15, 16, 17]],
[ ]: a3[::2,0,::2]
[ ]: array([[ 0, 2],
[18, 20]])
8
[ ]: a3[2,1:,1:]
[ ]: array([[22, 23],
[25, 26]])
[ ]: a3[0,1,:]
[ ]: array([3, 4, 5])
[ ]:
[ ]:
[ ]:
[ ]:
[ ]:
0.0.9 Iterating
[ ]: a1
for i in a1:
print(i)
0
1
2
3
4
5
6
7
8
9
[ ]: a2
[ ]: array([[ 0, 1, 2, 3],
[ 4, 5, 6, 7],
[ 8, 9, 10, 11]])
[ ]: for i in a2:
print(i)
9
[0 1 2 3]
[4 5 6 7]
[ 8 9 10 11]
[ ]: a3
[ ]: array([[[ 0, 1, 2],
[ 3, 4, 5],
[ 6, 7, 8]],
[[ 9, 10, 11],
[12, 13, 14],
[15, 16, 17]],
[ ]: for i in a3:
print(i)
[[0 1 2]
[3 4 5]
[6 7 8]]
[[ 9 10 11]
[12 13 14]
[15 16 17]]
[[18 19 20]
[21 22 23]
[24 25 26]]
[ ]: for i in np.nditer(a3):
print(i)
0
1
2
3
4
5
6
7
8
9
10
11
12
13
10
14
15
16
17
18
19
20
21
22
23
24
25
26
0.0.10 Reshaping
[ ]: # reshape
[ ]: # Transpose
np.transpose(a2)
a2.T
[ ]: array([[ 0, 4, 8],
[ 1, 5, 9],
[ 2, 6, 10],
[ 3, 7, 11]])
[ ]: # ravel
a3.ravel()
0.0.11 Stacking
[ ]: # horizontal stacking
a4 = np.arange(12).reshape(3,4)
a5 = np.arange(12,24).reshape(3,4)
a5
[ ]: np.hstack((a4,a5))
11
[ ]: array([[ 0, 1, 2, 3, 12, 13, 14, 15],
[ 4, 5, 6, 7, 16, 17, 18, 19],
[ 8, 9, 10, 11, 20, 21, 22, 23]])
[ ]: # Vertical stacking
np.vstack((a4,a5))
[ ]: array([[ 0, 1, 2, 3],
[ 4, 5, 6, 7],
[ 8, 9, 10, 11],
[12, 13, 14, 15],
[16, 17, 18, 19],
[20, 21, 22, 23]])
0.0.12 Splitting
[ ]: # horizontal splitting
a4
[ ]: array([[ 0, 1, 2, 3],
[ 4, 5, 6, 7],
[ 8, 9, 10, 11]])
[ ]: np.hsplit(a4,5)
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-227-59485ca7f23c> in <module>
----> 1 np.hsplit(a4,5)
/usr/local/lib/python3.8/dist-packages/numpy/lib/shape_base.py in hsplit(ary,␣
↪indices_or_sections)
/usr/local/lib/python3.8/dist-packages/numpy/lib/shape_base.py in split(ary,␣
↪indices_or_sections, axis)
870 N = ary.shape[axis]
871 if N % sections:
12
--> 872 raise ValueError(
873 'array split does not result in an equal division') from␣
↪ None
874 return array_split(ary, indices_or_sections, axis)
[ ]: # vertical splitting
[ ]: a5
[ ]: np.vsplit(a5,2)
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-230-5b73f701499e> in <module>
----> 1 np.vsplit(a5,2)
/usr/local/lib/python3.8/dist-packages/numpy/lib/shape_base.py in vsplit(ary,␣
↪indices_or_sections)
/usr/local/lib/python3.8/dist-packages/numpy/lib/shape_base.py in split(ary,␣
↪indices_or_sections, axis)
870 N = ary.shape[axis]
871 if N % sections:
--> 872 raise ValueError(
873 'array split does not result in an equal division') from␣
↪ None
874 return array_split(ary, indices_or_sections, axis)
13
[ ]:
14
session-14-numpy-advanced
May 3, 2024
[ ]: # speed
# list
a = [i for i in range(10000000)]
b = [i for i in range(10000000,20000000)]
c = []
import time
start = time.time()
for i in range(len(a)):
c.append(a[i] + b[i])
print(time.time()-start)
3.2699835300445557
[ ]: # numpy
import numpy as np
a = np.arange(10000000)
b = np.arange(10000000,20000000)
start = time.time()
c = a + b
print(time.time()-start)
0.06481003761291504
[ ]: 3.26/0.06
[ ]: 54.33333333333333
[ ]: # memory
a = [i for i in range(10000000)]
import sys
sys.getsizeof(a)
1
[ ]: 81528048
[ ]: a = np.arange(10000000,dtype=np.int8)
sys.getsizeof(a)
[ ]: 10000104
[ ]: # convenience
a = np.arange(24).reshape(6,4)
a
[ ]: array([[ 0, 1, 2, 3],
[ 4, 5, 6, 7],
[ 8, 9, 10, 11],
[12, 13, 14, 15],
[16, 17, 18, 19],
[20, 21, 22, 23]])
[ ]: a[1,2]
[ ]: 5
[ ]: a[1:3,1:3]
[ ]: array([[4, 5],
[7, 8]])
[ ]: # Fancy Indexing
a[:,[0,2,3]]
[ ]: array([[ 0, 2, 3],
[ 4, 6, 7],
[ 8, 10, 11],
[12, 14, 15],
[16, 18, 19],
[20, 22, 23]])
[ ]:
[ ]: # Boolean Indexing
a = np.random.randint(1,100,24).reshape(6,4)
2
a
[ ]: array([76, 98, 99, 91, 88, 83, 78, 85, 54, 73, 61, 53, 93, 85, 77])
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-97-0e69559201d8> in <module>
1 # find all numbers greater than 50 and are even
2
----> 3 a[(a > 50) and (a % 2 == 0)]
ValueError: The truth value of an array with more than one element is ambiguous.␣
↪Use a.any() or a.all()
[ ]: array([76, 99, 39, 46, 88, 23, 45, 6, 83, 1, 37, 43, 78, 85, 54, 73, 61,
53, 40, 93, 85])
0.0.3 Broadcasting
The term broadcasting describes how NumPy treats arrays with different shapes during arithmetic
operations.
The smaller array is “broadcast” across the larger array so that they have compatible shapes.
3
[ ]: # same shape
a = np.arange(6).reshape(2,3)
b = np.arange(6,12).reshape(2,3)
print(a)
print(b)
print(a+b)
[[0 1 2]
[3 4 5]]
[[ 6 7 8]
[ 9 10 11]]
[[ 6 8 10]
[12 14 16]]
[ ]: # diff shape
a = np.arange(6).reshape(2,3)
b = np.arange(3).reshape(1,3)
print(a)
print(b)
print(a+b)
[[0 1 2]
[3 4 5]]
[[0 1 2]]
[[0 2 4]
[3 5 7]]
Broadcasting Rules 1. Make the two arrays have the same number of dimensions. -
If the numbers of dimensions of the two arrays are different, add new dimensions with size 1 to the
head of the array with the smaller dimension.
2. Make each dimension of the two arrays the same size. - If the sizes of each dimension
of the two arrays do not match, dimensions with size 1 are stretched to the size of the other array.
- If there is a dimension whose size is not 1 in either of the two arrays, it cannot be broadcasted,
and an error is raised.
[ ]: # More examples
a = np.arange(12).reshape(4,3)
b = np.arange(3)
print(a)
print(b)
4
print(a+b)
[[ 0 1 2]
[ 3 4 5]
[ 6 7 8]
[ 9 10 11]]
[0 1 2]
[[ 0 2 4]
[ 3 5 7]
[ 6 8 10]
[ 9 11 13]]
[ ]: a = np.arange(12).reshape(3,4)
b = np.arange(3)
print(a)
print(b)
print(a+b)
[[ 0 1 2 3]
[ 4 5 6 7]
[ 8 9 10 11]]
[0 1 2]
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-104-fa6cbb589166> in <module>
5 print(b)
6
----> 7 print(a+b)
ValueError: operands could not be broadcast together with shapes (3,4) (3,)
[ ]: a = np.arange(3).reshape(1,3)
b = np.arange(3).reshape(3,1)
print(a)
print(b)
print(a+b)
[[0 1 2]]
[[0]
[1]
[2]]
[[0 1 2]
5
[1 2 3]
[2 3 4]]
[ ]: a = np.arange(3).reshape(1,3)
b = np.arange(4).reshape(4,1)
print(a)
print(b)
print(a + b)
[[0 1 2]]
[[0]
[1]
[2]
[3]]
[[0 1 2]
[1 2 3]
[2 3 4]
[3 4 5]]
[ ]: a = np.array([1])
# shape -> (1,1)
b = np.arange(4).reshape(2,2)
# shape -> (2,2)
print(a)
print(b)
print(a+b)
[1]
[[0 1]
[2 3]]
[[1 2]
[3 4]]
[ ]: a = np.arange(12).reshape(3,4)
b = np.arange(12).reshape(4,3)
print(a)
print(b)
print(a+b)
[[ 0 1 2 3]
[ 4 5 6 7]
[ 8 9 10 11]]
6
[[ 0 1 2]
[ 3 4 5]
[ 6 7 8]
[ 9 10 11]]
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-109-c590a65467e5> in <module>
5 print(b)
6
----> 7 print(a+b)
ValueError: operands could not be broadcast together with shapes (3,4) (4,3)
[ ]: a = np.arange(16).reshape(4,4)
b = np.arange(4).reshape(2,2)
print(a)
print(b)
print(a+b)
[[ 0 1 2 3]
[ 4 5 6 7]
[ 8 9 10 11]
[12 13 14 15]]
[[0 1]
[2 3]]
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-110-57df50a0058a> in <module>
5 print(b)
6
----> 7 print(a+b)
ValueError: operands could not be broadcast together with shapes (4,4) (2,2)
[ ]: a = np.arange(10)
np.sin(a)
7
[ ]: # sigmoid
def sigmoid(array):
return 1/(1 + np.exp(-(array)))
a = np.arange(100)
sigmoid(a)
actual = np.random.randint(1,50,25)
predicted = np.random.randint(1,50,25)
[ ]: def mse(actual,predicted):
return np.mean((actual - predicted)**2)
mse(actual,predicted)
[ ]: 500.12
[ ]: 500.12
8
[ ]: actual
[ ]: array([ 5, 3, 9, 7, 3, 36, 49, 28, 20, 40, 2, 23, 29, 18, 30, 23, 7,
40, 15, 11, 27, 44, 32, 28, 10])
[ ]: a[~np.isnan(a)]
[ ]: # plotting a 2D plot
# x = y
import matplotlib.pyplot as plt
x = np.linspace(-10,10,100)
y = x
plt.plot(x,y)
[ ]: [<matplotlib.lines.Line2D at 0x7f6f78e18f70>]
9
[ ]: # y = x^2
x = np.linspace(-10,10,100)
y = x**2
plt.plot(x,y)
[ ]: [<matplotlib.lines.Line2D at 0x7f6f87acf100>]
10
[ ]: # y = sin(x)
x = np.linspace(-10,10,100)
y = np.sin(x)
plt.plot(x,y)
[ ]: [<matplotlib.lines.Line2D at 0x7f6f5d1d0100>]
[ ]: # y = xlog(x)
x = np.linspace(-10,10,100)
y = x * np.log(x)
plt.plot(x,y)
[ ]: [<matplotlib.lines.Line2D at 0x7f6f57ab62e0>]
11
[ ]: # sigmoid
x = np.linspace(-10,10,100)
y = 1/(1+np.exp(-x))
plt.plot(x,y)
[ ]: [<matplotlib.lines.Line2D at 0x7f6f5401e100>]
12
0.0.7 Meshgrids
[ ]: # Meshgrids
[ ]:
13
session-15-numpy-tricks
May 3, 2024
0.0.1 np.sort
Return a sorted copy of an array.
https://numpy.org/doc/stable/reference/generated/numpy.sort.html
[ ]: # code
import numpy as np
a = np.random.randint(1,100,15)
a
[ ]: array([11, 53, 28, 50, 38, 37, 94, 92, 5, 30, 68, 9, 78, 2, 21])
[ ]: b = np.random.randint(1,100,24).reshape(6,4)
b
[ ]: np.sort(a)[::-1]
[ ]: array([94, 92, 78, 68, 53, 50, 38, 37, 30, 28, 21, 11, 9, 5, 2])
[ ]: np.sort(b,axis=0)
0.0.2 np.append
The numpy.append() appends values along the mentioned axis at the end of the array
1
https://numpy.org/doc/stable/reference/generated/numpy.append.html
[ ]: # code
np.append(a,200)
[ ]: array([ 11, 53, 28, 50, 38, 37, 94, 92, 5, 30, 68, 9, 78,
2, 21, 200])
[ ]: b
[ ]: np.append(b,np.random.random((b.shape[0],1)),axis=1)
0.0.3 np.concatenate
numpy.concatenate() function concatenate a sequence of arrays along an existing axis.
https://numpy.org/doc/stable/reference/generated/numpy.concatenate.html
[ ]: # code
c = np.arange(6).reshape(2,3)
d = np.arange(6,12).reshape(2,3)
print(c)
print(d)
[[0 1 2]
[3 4 5]]
[[ 6 7 8]
[ 9 10 11]]
[ ]: np.concatenate((c,d),axis=0)
[ ]: array([[ 0, 1, 2],
[ 3, 4, 5],
2
[ 6, 7, 8],
[ 9, 10, 11]])
[ ]: np.concatenate((c,d),axis=1)
[ ]: array([[ 0, 1, 2, 6, 7, 8],
[ 3, 4, 5, 9, 10, 11]])
0.0.4 np.unique
With the help of np.unique() method, we can get the unique values from an array given as parameter
in np.unique() method.
https://numpy.org/doc/stable/reference/generated/numpy.unique.html/
[ ]: # code
e = np.array([1,1,2,2,3,3,4,4,5,5,6,6])
[ ]: np.unique(e)
[ ]: array([1, 2, 3, 4, 5, 6])
0.0.5 np.expand_dims
With the help of Numpy.expand_dims() method, we can get the expanded dimensions of an array
https://numpy.org/doc/stable/reference/generated/numpy.expand_dims.html
[ ]: # code
a.shape
[ ]: (15,)
[ ]: np.expand_dims(a,axis=0).shape
[ ]: (1, 15)
[ ]: np.expand_dims(a,axis=1)
[ ]: array([[11],
[53],
[28],
[50],
[38],
[37],
[94],
[92],
[ 5],
3
[30],
[68],
[ 9],
[78],
[ 2],
[21]])
0.0.6 np.where
The numpy.where() function returns the indices of elements in an input array where the given
condition is satisfied.
https://numpy.org/doc/stable/reference/generated/numpy.where.html
[ ]: a
[ ]: array([11, 53, 28, 50, 38, 37, 94, 92, 5, 30, 68, 9, 78, 2, 21])
[ ]: np.where(a%2 == 0,0,a)
0.0.7 np.argmax
The numpy.argmax() function returns indices of the max element of the array in a particular axis.
https://numpy.org/doc/stable/reference/generated/numpy.argmax.html
[ ]: # code
a
[ ]: array([11, 53, 28, 50, 38, 37, 94, 92, 5, 30, 68, 9, 78, 2, 21])
[ ]: np.argmax(a)
[ ]: 6
[ ]: b
4
[ ]: array([[12, 52, 42, 6],
[29, 18, 47, 55],
[61, 93, 83, 9],
[38, 63, 44, 85],
[ 8, 87, 31, 72],
[40, 71, 2, 7]])
[ ]: np.argmax(b,axis=0)
[ ]: array([2, 2, 2, 3])
[ ]: np.argmax(b,axis=1)
[ ]: array([1, 3, 1, 3, 1, 1])
[ ]: # np.argmin
np.argmin(a)
[ ]: 13
0.0.8 np.cumsum
numpy.cumsum() function is used when we want to compute the cumulative sum of array elements
over a given axis.
https://numpy.org/doc/stable/reference/generated/numpy.cumsum.html
[ ]: a
[ ]: array([11, 53, 28, 50, 38, 37, 94, 92, 5, 30, 68, 9, 78, 2, 21])
[ ]: np.cumsum(a)
[ ]: array([ 11, 64, 92, 142, 180, 217, 311, 403, 408, 438, 506, 515, 593,
595, 616])
[ ]: b
[ ]: np.cumsum(b,axis=1)
5
[ ]: array([[ 12, 64, 106, 112],
[ 29, 47, 94, 149],
[ 61, 154, 237, 246],
[ 38, 101, 145, 230],
[ 8, 95, 126, 198],
[ 40, 111, 113, 120]])
[ ]: np.cumsum(b)
[ ]: array([ 12, 64, 106, 112, 141, 159, 206, 261, 322, 415, 498,
507, 545, 608, 652, 737, 745, 832, 863, 935, 975, 1046,
1048, 1055])
[ ]: # np.cumprod
np.cumprod(a)
[ ]: a
[ ]: array([11, 53, 28, 50, 38, 37, 94, 92, 5, 30, 68, 9, 78, 2, 21])
0.0.9 np.percentile
numpy.percentile()function used to compute the nth percentile of the given data (array elements)
along the specified axis.
https://numpy.org/doc/stable/reference/generated/numpy.percentile.html
[ ]: a
[ ]: array([11, 53, 28, 50, 38, 37, 94, 92, 5, 30, 68, 9, 78, 2, 21])
[ ]: np.percentile(a,50)
[ ]: 37.0
[ ]: np.median(a)
[ ]: 37.0
6
0.0.10 np.histogram
Numpy has a built-in numpy.histogram() function which represents the frequency of data distribu-
tion in the graphical form.
https://numpy.org/doc/stable/reference/generated/numpy.histogram.html
[ ]: # code
a
[ ]: array([11, 53, 28, 50, 38, 37, 94, 92, 5, 30, 68, 9, 78, 2, 21])
[ ]: np.histogram(a,bins=[0,50,100])
0.0.11 np.corrcoef
Return Pearson product-moment correlation coefficients.
https://numpy.org/doc/stable/reference/generated/numpy.corrcoef.html
[ ]: salary = np.array([20000,40000,25000,35000,60000])
experience = np.array([1,3,2,4,2])
np.corrcoef(salary,experience)
[ ]: array([[1. , 0.25344572],
[0.25344572, 1. ]])
0.0.12 np.isin
With the help of numpy.isin() method, we can see that one array having values are checked in a
different numpy array having different elements with different sizes.
https://numpy.org/doc/stable/reference/generated/numpy.isin.html
[ ]: # code
a
[ ]: array([11, 53, 28, 50, 38, 37, 94, 92, 5, 30, 68, 9, 78, 2, 21])
[ ]: items = [10,20,30,40,50,60,70,80,90,100]
a[np.isin(a,items)]
[ ]: array([50, 30])
7
0.0.13 np.flip
The numpy.flip() function reverses the order of array elements along the specified axis, preserving
the shape of the array.
https://numpy.org/doc/stable/reference/generated/numpy.flip.html
[ ]: # code
a
[ ]: array([11, 53, 28, 50, 38, 37, 94, 92, 5, 30, 68, 9, 78, 2, 21])
[ ]: np.flip(a)
[ ]: array([21, 2, 78, 9, 68, 30, 5, 92, 94, 37, 38, 50, 28, 53, 11])
[ ]: b
[ ]: np.flip(b,axis=1)
0.0.14 np.put
The numpy.put() function replaces specific elements of an array with given values of p_array. Array
indexed works on flattened array.
https://numpy.org/doc/stable/reference/generated/numpy.put.html
[ ]: # code
a
[ ]: array([110, 530, 28, 50, 38, 37, 94, 92, 5, 30, 68, 9, 78,
2, 21])
[ ]: np.put(a,[0,1],[110,530])
8
0.0.15 np.delete
The numpy.delete() function returns a new array with the deletion of sub-arrays along with the
mentioned axis.
https://numpy.org/doc/stable/reference/generated/numpy.delete.html
[ ]: # code
a
[ ]: array([110, 530, 28, 50, 38, 37, 94, 92, 5, 30, 68, 9, 78,
2, 21])
[ ]: np.delete(a,[0,2,4])
np.union1d(m,n)
[ ]: array([1, 2, 3, 4, 5, 6, 7])
[ ]: np.intersect1d(m,n)
[ ]: array([3, 4, 5])
[ ]: np.setdiff1d(n,m)
[ ]: array([6, 7])
[ ]: np.setxor1d(m,n)
[ ]: array([1, 2, 6, 7])
[ ]: m[np.in1d(m,1)]
[ ]: array([1])
9
0.0.17 np.clip
numpy.clip() function is used to Clip (limit) the values in an array.
https://numpy.org/doc/stable/reference/generated/numpy.clip.html
[ ]: # code
a
[ ]: array([110, 530, 28, 50, 38, 37, 94, 92, 5, 30, 68, 9, 78,
2, 21])
[ ]: np.clip(a,a_min=25,a_max=75)
[ ]: array([75, 75, 28, 50, 38, 37, 75, 75, 25, 30, 68, 25, 75, 25, 25])
[ ]: # 17. np.swapaxes
[ ]: # 18. np.uniform
[ ]: # 19. np.count_nonzero
[ ]: # 21. np.tile
# https://www.kaggle.com/code/abhayparashar31/
↪best-numpy-functions-for-data-science-50?scriptVersionId=98816580
[ ]: # 22. np.repeat
# https://towardsdatascience.com/10-numpy-functions-you-should-know-1dc4863764c5
[ ]:
[ ]:
[ ]:
[ ]:
[ ]:
[ ]:
10
session-16-pandas-series
May 3, 2024
[ ]: import numpy as np
import pandas as pd
pd.Series(country)
[ ]: 0 India
1 Pakistan
2 USA
3 Nepal
4 Srilanka
dtype: object
[ ]: # integers
runs = [13,24,56,78,100]
runs_ser = pd.Series(runs)
[ ]: # custom index
marks = [67,57,89,100]
subjects = ['maths','english','science','hindi']
1
pd.Series(marks,index=subjects)
[ ]: maths 67
english 57
science 89
hindi 100
dtype: int64
[ ]: # setting a name
marks = pd.Series(marks,index=subjects,name='Nitish ke marks')
marks
[ ]: maths 67
english 57
science 89
hindi 100
Name: Nitish ke marks, dtype: int64
[ ]: maths 67
english 57
science 89
hindi 100
Name: nitish ke marks, dtype: int64
[ ]: 4
[ ]: # dtype
marks_series.dtype
2
[ ]: dtype('int64')
[ ]: # name
marks_series.name
[ ]: 'nitish ke marks'
[ ]: # is_unique
marks_series.is_unique
pd.Series([1,1,2,3,4,5]).is_unique
[ ]: False
[ ]: # index
marks_series.index
[ ]: runs_ser.index
[ ]: # values
marks_series.values
[ ]: 0 48
1 57
2 40
3 43
4 44
…
360 231
361 226
362 155
363 144
364 172
Name: Subscribers gained, Length: 365, dtype: int64
3
[ ]: # with 2 cols
vk = pd.read_csv('/content/kohli_ipl.csv',index_col='match_no',squeeze=True)
vk
[ ]: match_no
1 1
2 23
3 13
4 12
5 1
..
211 0
212 20
213 73
214 25
215 7
Name: runs, Length: 215, dtype: int64
[ ]: movies = pd.read_csv('/content/bollywood.csv',index_col='movie',squeeze=True)
movies
[ ]: movie
Uri: The Surgical Strike Vicky Kaushal
Battalion 609 Vicky Ahuja
The Accidental Prime Minister (film) Anupam Kher
Why Cheat India Emraan Hashmi
Evening Shadows Mona Ambegaonkar
…
Hum Tumhare Hain Sanam Shah Rukh Khan
Aankhen (2002 film) Amitabh Bachchan
Saathiya (film) Vivek Oberoi
Company (film) Ajay Devgn
Awara Paagal Deewana Akshay Kumar
Name: lead, Length: 1500, dtype: object
[ ]: 0 48
1 57
2 40
3 43
4 44
Name: Subscribers gained, dtype: int64
4
[ ]: vk.head(3)
[ ]: match_no
1 1
2 23
3 13
Name: runs, dtype: int64
[ ]: vk.tail(10)
[ ]: match_no
206 0
207 0
208 9
209 58
210 30
211 0
212 20
213 73
214 25
215 7
Name: runs, dtype: int64
[ ]: # sample
movies.sample(5)
[ ]: movie
Arjun: The Warrior Prince Yudhveer Bakoliya
Viceroy's House (film) Hugh Bonneville
Joggers' Park (film) Victor Banerjee
Tere Mere Phere Vinay Pathak
Mission Mangal Akshay Kumar
Name: lead, dtype: object
[ ]: Akshay Kumar 48
Amitabh Bachchan 45
Ajay Devgn 38
Salman Khan 31
Sanjay Dutt 26
..
Diganth 1
Parveen Kaur 1
Seema Azmi 1
Akanksha Puri 1
5
Edwin Fernandes 1
Name: lead, Length: 566, dtype: int64
[ ]: 113
[ ]: vk.sort_values(ascending=False)
[ ]: match_no
128 113
126 109
123 108
164 100
120 100
…
93 0
211 0
130 0
8 0
135 0
Name: runs, Length: 215, dtype: int64
[ ]: movies
[ ]: movie
Zor Lagaa Ke…Haiya! Meghan Jadhav
Zokkomon Darsheel Safary
Zindagi Tere Naam Mithun Chakraborty
Zindagi Na Milegi Dobara Hrithik Roshan
Zindagi 50-50 Veena Malik
…
2 States (2014 film) Arjun Kapoor
1971 (2007 film) Manoj Bajpayee
1920: The Evil Returns Vicky Ahuja
1920: London Sharman Joshi
1920 (film) Rajniesh Duggall
Name: lead, Length: 1500, dtype: object
[ ]: vk.sort_values(inplace=True)
[ ]: vk
6
[ ]: match_no
87 0
211 0
207 0
206 0
91 0
…
164 100
120 100
123 108
126 109
128 113
Name: runs, Length: 215, dtype: int64
[ ]: 215
[ ]: 49510
24.0
0 Akshay Kumar
dtype: object
62.6750230372527
688.0024777222343
[ ]: # min/max
subs.max()
[ ]: 396
[ ]: # describe
subs.describe()
7
[ ]: count 365.000000
mean 135.643836
std 62.675023
min 33.000000
25% 88.000000
50% 123.000000
75% 177.000000
max 396.000000
Name: Subscribers gained, dtype: float64
[ ]: # integer indexing
x = pd.Series([12,13,14,35,46,57,58,79,9])
x
[ ]: 0 12
1 13
2 14
3 35
4 46
5 57
6 58
7 79
8 9
dtype: int64
[ ]: # negative indexing
x[-1]
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
/usr/local/lib/python3.8/dist-packages/pandas/core/indexes/range.py in␣
↪get_loc(self, key, method, tolerance)
384 try:
--> 385 return self._range.index(new_key)
386 except ValueError as err:
The above exception was the direct cause of the following exception:
8
/usr/local/lib/python3.8/dist-packages/pandas/core/series.py in␣
↪__getitem__(self, key)
940
941 elif key_is_scalar:
--> 942 return self._get_value(key)
943
944 if is_hashable(key):
/usr/local/lib/python3.8/dist-packages/pandas/core/series.py in _get_value(self,␣
↪label, takeable)
1049
1050 # Similar to Index.get_value, but we do not fall back to␣
↪positional
/usr/local/lib/python3.8/dist-packages/pandas/core/indexes/range.py in␣
↪get_loc(self, key, method, tolerance)
KeyError: -1
[ ]: movies
[ ]: movie
Zor Lagaa Ke…Haiya! Meghan Jadhav
Zokkomon Darsheel Safary
Zindagi Tere Naam Mithun Chakraborty
Zindagi Na Milegi Dobara Hrithik Roshan
Zindagi 50-50 Veena Malik
…
2 States (2014 film) Arjun Kapoor
1971 (2007 film) Manoj Bajpayee
1920: The Evil Returns Vicky Ahuja
1920: London Sharman Joshi
1920 (film) Rajniesh Duggall
Name: lead, Length: 1500, dtype: object
[ ]: vk[-1]
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
9
/usr/local/lib/python3.8/dist-packages/pandas/core/indexes/base.py in␣
↪get_loc(self, key, method, tolerance)
3360 try:
-> 3361 return self._engine.get_loc(casted_key)
3362 except KeyError as err:
/usr/local/lib/python3.8/dist-packages/pandas/_libs/index.pyx in pandas._libs.
↪index.IndexEngine.get_loc()
/usr/local/lib/python3.8/dist-packages/pandas/_libs/index.pyx in pandas._libs.
↪index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.
↪get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.
↪get_item()
KeyError: -1
The above exception was the direct cause of the following exception:
/usr/local/lib/python3.8/dist-packages/pandas/core/series.py in␣
↪__getitem__(self, key)
940
941 elif key_is_scalar:
--> 942 return self._get_value(key)
943
944 if is_hashable(key):
/usr/local/lib/python3.8/dist-packages/pandas/core/series.py in _get_value(self,␣
↪label, takeable)
1049
1050 # Similar to Index.get_value, but we do not fall back to␣
↪positional
/usr/local/lib/python3.8/dist-packages/pandas/core/indexes/base.py in␣
↪get_loc(self, key, method, tolerance)
10
3364
3365 if is_scalar(key) and isna(key) and not self.hasnans:
KeyError: -1
[ ]: marks_series[-1]
[ ]: 100
[ ]: # slicing
vk[5:16]
[ ]: match_no
6 9
7 34
8 0
9 21
10 3
11 10
12 38
13 3
14 11
15 50
16 2
Name: runs, dtype: int64
[ ]: # negative slicing
vk[-5:]
[ ]: match_no
211 0
212 20
213 73
214 25
215 7
Name: runs, dtype: int64
[ ]: movies[::2]
[ ]: movie
Zor Lagaa Ke…Haiya! Meghan Jadhav
Zindagi Tere Naam Mithun Chakraborty
Zindagi 50-50 Veena Malik
Zinda (film) Sanjay Dutt
Zid (2014 film) Mannara Chopra
…
3 Storeys Aisha Ahmed
11
3 Deewarein Naseeruddin Shah
22 Yards Barun Sobti
1971 (2007 film) Manoj Bajpayee
1920: London Sharman Joshi
Name: lead, Length: 750, dtype: object
[ ]: # fancy indexing
vk[[1,3,4,5]]
[ ]: match_no
1 1
3 13
4 12
5 1
Name: runs, dtype: int64
[ ]: 'Arjun Kapoor'
[ ]: # using indexing
marks_series[1] = 100
marks_series
[ ]: maths 67
english 100
science 89
hindi 100
Name: nitish ke marks, dtype: int64
[ ]: marks_series
[ ]: maths 67
english 100
science 89
hindi 100
sst 90
evs 100
Name: nitish ke marks, dtype: int64
12
[ ]: # slicing
runs_ser[2:4] = [100,100]
runs_ser
[ ]: 0 13
1 24
2 100
3 100
4 100
dtype: int64
[ ]: # fancy indexing
runs_ser[[0,3,4]] = [0,0,0]
runs_ser
[ ]: 0 0
1 24
2 100
3 0
4 0
dtype: int64
[ ]: movie
Zor Lagaa Ke…Haiya! Meghan Jadhav
Zokkomon Darsheel Safary
Zindagi Tere Naam Mithun Chakraborty
Zindagi Na Milegi Dobara Hrithik Roshan
Zindagi 50-50 Veena Malik
…
2 States (2014 film) Alia Bhatt
1971 (2007 film) Manoj Bajpayee
1920: The Evil Returns Vicky Ahuja
1920: London Sharman Joshi
1920 (film) Rajniesh Duggall
Name: lead, Length: 1500, dtype: object
[ ]:
13
0.0.13 Series with Python Functionalities
[ ]: # len/type/dir/sorted/max/min
print(len(subs))
print(type(subs))
print(dir(subs))
print(sorted(subs))
print(min(subs))
print(max(subs))
365
<class 'pandas.core.series.Series'>
['T', '_AXIS_LEN', '_AXIS_ORDERS', '_AXIS_REVERSED', '_AXIS_TO_AXIS_NUMBER',
'_HANDLED_TYPES', '__abs__', '__add__', '__and__', '__annotations__',
'__array__', '__array_priority__', '__array_ufunc__', '__array_wrap__',
'__bool__', '__class__', '__contains__', '__copy__', '__deepcopy__',
'__delattr__', '__delitem__', '__dict__', '__dir__', '__divmod__', '__doc__',
'__eq__', '__finalize__', '__float__', '__floordiv__', '__format__', '__ge__',
'__getattr__', '__getattribute__', '__getitem__', '__getstate__', '__gt__',
'__hash__', '__iadd__', '__iand__', '__ifloordiv__', '__imod__', '__imul__',
'__init__', '__init_subclass__', '__int__', '__invert__', '__ior__', '__ipow__',
'__isub__', '__iter__', '__itruediv__', '__ixor__', '__le__', '__len__',
'__long__', '__lt__', '__matmul__', '__mod__', '__module__', '__mul__',
'__ne__', '__neg__', '__new__', '__nonzero__', '__or__', '__pos__', '__pow__',
'__radd__', '__rand__', '__rdivmod__', '__reduce__', '__reduce_ex__',
'__repr__', '__rfloordiv__', '__rmatmul__', '__rmod__', '__rmul__', '__ror__',
'__round__', '__rpow__', '__rsub__', '__rtruediv__', '__rxor__', '__setattr__',
'__setitem__', '__setstate__', '__sizeof__', '__str__', '__sub__',
'__subclasshook__', '__truediv__', '__weakref__', '__xor__', '_accessors',
'_accum_func', '_add_numeric_operations', '_agg_by_level', '_agg_examples_doc',
'_agg_see_also_doc', '_align_frame', '_align_series', '_arith_method',
'_as_manager', '_attrs', '_binop', '_can_hold_na',
'_check_inplace_and_allows_duplicate_labels', '_check_inplace_setting',
'_check_is_chained_assignment_possible', '_check_label_or_level_ambiguity',
'_check_setitem_copy', '_clear_item_cache', '_clip_with_one_bound',
'_clip_with_scalar', '_cmp_method', '_consolidate', '_consolidate_inplace',
'_construct_axes_dict', '_construct_axes_from_arguments', '_construct_result',
'_constructor', '_constructor_expanddim', '_convert', '_convert_dtypes',
'_data', '_dir_additions', '_dir_deletions', '_drop_axis',
'_drop_labels_or_levels', '_duplicated', '_find_valid_index', '_flags',
'_from_mgr', '_get_axis', '_get_axis_name', '_get_axis_number',
'_get_axis_resolvers', '_get_block_manager_axis', '_get_bool_data',
'_get_cacher', '_get_cleaned_column_resolvers', '_get_index_resolvers',
'_get_label_or_level_values', '_get_numeric_data', '_get_value', '_get_values',
'_get_values_tuple', '_get_with', '_gotitem', '_hidden_attrs', '_index',
'_indexed_same', '_info_axis', '_info_axis_name', '_info_axis_number',
'_init_dict', '_init_mgr', '_inplace_method', '_internal_names',
'_internal_names_set', '_is_cached', '_is_copy', '_is_label_or_level_reference',
14
'_is_label_reference', '_is_level_reference', '_is_mixed_type', '_is_view',
'_item_cache', '_ixs', '_logical_func', '_logical_method', '_map_values',
'_maybe_update_cacher', '_memory_usage', '_metadata', '_mgr',
'_min_count_stat_function', '_name', '_needs_reindex_multi',
'_protect_consolidate', '_reduce', '_reindex_axes', '_reindex_indexer',
'_reindex_multi', '_reindex_with_indexers', '_replace_single',
'_repr_data_resource_', '_repr_latex_', '_reset_cache', '_reset_cacher',
'_set_as_cached', '_set_axis', '_set_axis_name', '_set_axis_nocheck',
'_set_is_copy', '_set_labels', '_set_name', '_set_value', '_set_values',
'_set_with', '_set_with_engine', '_slice', '_stat_axis', '_stat_axis_name',
'_stat_axis_number', '_stat_function', '_stat_function_ddof',
'_take_with_is_copy', '_typ', '_update_inplace', '_validate_dtype', '_values',
'_where', 'abs', 'add', 'add_prefix', 'add_suffix', 'agg', 'aggregate', 'align',
'all', 'any', 'append', 'apply', 'argmax', 'argmin', 'argsort', 'array',
'asfreq', 'asof', 'astype', 'at', 'at_time', 'attrs', 'autocorr', 'axes',
'backfill', 'between', 'between_time', 'bfill', 'bool', 'clip', 'combine',
'combine_first', 'compare', 'convert_dtypes', 'copy', 'corr', 'count', 'cov',
'cummax', 'cummin', 'cumprod', 'cumsum', 'describe', 'diff', 'div', 'divide',
'divmod', 'dot', 'drop', 'drop_duplicates', 'droplevel', 'dropna', 'dtype',
'dtypes', 'duplicated', 'empty', 'eq', 'equals', 'ewm', 'expanding', 'explode',
'factorize', 'ffill', 'fillna', 'filter', 'first', 'first_valid_index', 'flags',
'floordiv', 'ge', 'get', 'groupby', 'gt', 'hasnans', 'head', 'hist', 'iat',
'idxmax', 'idxmin', 'iloc', 'index', 'infer_objects', 'interpolate',
'is_monotonic', 'is_monotonic_decreasing', 'is_monotonic_increasing',
'is_unique', 'isin', 'isna', 'isnull', 'item', 'items', 'iteritems', 'keys',
'kurt', 'kurtosis', 'last', 'last_valid_index', 'le', 'loc', 'lt', 'mad', 'map',
'mask', 'max', 'mean', 'median', 'memory_usage', 'min', 'mod', 'mode', 'mul',
'multiply', 'name', 'nbytes', 'ndim', 'ne', 'nlargest', 'notna', 'notnull',
'nsmallest', 'nunique', 'pad', 'pct_change', 'pipe', 'plot', 'pop', 'pow',
'prod', 'product', 'quantile', 'radd', 'rank', 'ravel', 'rdiv', 'rdivmod',
'reindex', 'reindex_like', 'rename', 'rename_axis', 'reorder_levels', 'repeat',
'replace', 'resample', 'reset_index', 'rfloordiv', 'rmod', 'rmul', 'rolling',
'round', 'rpow', 'rsub', 'rtruediv', 'sample', 'searchsorted', 'sem',
'set_axis', 'set_flags', 'shape', 'shift', 'size', 'skew', 'slice_shift',
'sort_index', 'sort_values', 'squeeze', 'std', 'sub', 'subtract', 'sum',
'swapaxes', 'swaplevel', 'tail', 'take', 'to_clipboard', 'to_csv', 'to_dict',
'to_excel', 'to_frame', 'to_hdf', 'to_json', 'to_latex', 'to_list',
'to_markdown', 'to_numpy', 'to_period', 'to_pickle', 'to_sql', 'to_string',
'to_timestamp', 'to_xarray', 'transform', 'transpose', 'truediv', 'truncate',
'tz_convert', 'tz_localize', 'unique', 'unstack', 'update', 'value_counts',
'values', 'var', 'view', 'where', 'xs']
[33, 33, 35, 37, 39, 40, 40, 40, 40, 42, 42, 43, 44, 44, 44, 45, 46, 46, 48, 49,
49, 49, 49, 50, 50, 50, 51, 54, 56, 56, 56, 56, 57, 61, 62, 64, 65, 65, 66, 66,
66, 66, 67, 68, 70, 70, 70, 71, 71, 72, 72, 72, 72, 72, 73, 74, 74, 75, 76, 76,
76, 76, 77, 77, 78, 78, 78, 79, 79, 80, 80, 80, 81, 81, 82, 82, 83, 83, 83, 84,
84, 84, 85, 86, 86, 86, 87, 87, 87, 87, 88, 88, 88, 88, 88, 89, 89, 89, 90, 90,
90, 90, 91, 92, 92, 92, 93, 93, 93, 93, 95, 95, 96, 96, 96, 96, 97, 97, 98, 98,
99, 99, 100, 100, 100, 101, 101, 101, 102, 102, 103, 103, 104, 104, 104, 105,
15
105, 105, 105, 105, 105, 105, 105, 105, 108, 108, 108, 108, 108, 108, 109, 109,
110, 110, 110, 111, 111, 112, 113, 113, 113, 114, 114, 114, 114, 115, 115, 115,
115, 117, 117, 117, 118, 118, 119, 119, 119, 119, 120, 122, 123, 123, 123, 123,
123, 124, 125, 126, 127, 128, 128, 129, 130, 131, 131, 132, 132, 134, 134, 134,
135, 135, 136, 136, 136, 137, 138, 138, 138, 139, 140, 144, 145, 146, 146, 146,
146, 147, 149, 150, 150, 150, 150, 151, 152, 152, 152, 153, 153, 153, 154, 154,
154, 155, 155, 156, 156, 156, 156, 157, 157, 157, 157, 158, 158, 159, 159, 160,
160, 160, 160, 162, 164, 166, 167, 167, 168, 170, 170, 170, 170, 171, 172, 172,
173, 173, 173, 174, 174, 175, 175, 176, 176, 177, 178, 179, 179, 180, 180, 180,
182, 183, 183, 183, 184, 184, 184, 185, 185, 185, 185, 186, 186, 186, 188, 189,
190, 190, 192, 192, 192, 196, 196, 196, 197, 197, 202, 202, 202, 203, 204, 206,
207, 209, 210, 210, 211, 212, 213, 214, 216, 219, 220, 221, 221, 222, 222, 224,
225, 225, 226, 227, 228, 229, 230, 231, 233, 236, 236, 237, 241, 243, 244, 245,
247, 249, 254, 254, 258, 259, 259, 261, 261, 265, 267, 268, 269, 276, 276, 290,
295, 301, 306, 312, 396]
33
396
[ ]: # type conversion
list(marks_series)
[ ]: dict(marks_series)
[ ]: {'maths': 67,
'english': 100,
'science': 89,
'hindi': 100,
'sst': 90,
'evs': 100}
[ ]: # membership operator
[ ]: True
[ ]: True
[ ]: movies
[ ]: movie
Zor Lagaa Ke…Haiya! Meghan Jadhav
Zokkomon Darsheel Safary
16
Zindagi Tere Naam Mithun Chakraborty
Zindagi Na Milegi Dobara Hrithik Roshan
Zindagi 50-50 Veena Malik
…
2 States (2014 film) Alia Bhatt
1971 (2007 film) Manoj Bajpayee
1920: The Evil Returns Vicky Ahuja
1920: London Sharman Joshi
1920 (film) Rajniesh Duggall
Name: lead, Length: 1500, dtype: object
[ ]: # looping
for i in movies.index:
print(i)
17
Yadvi – The Dignified Princess
Yaaram (2019 film)
Ya Rab
Xcuse Me
Woodstock Villa
Woh Lamhe…
Why Cheat India
What's Your Raashee?
What the Fish
Well Done Abba
Welcome to Sajjanpur
Welcome Back (film)
Welcome 2 Karachi
Welcome (2007 film)
Wedding Pullav
Wedding Anniversary
Waris Shah: Ishq Daa Waaris
War Chhod Na Yaar
Waqt: The Race Against Time
Wanted (2009 film)
Wake Up Sid
Wake Up India
Wajah Tum Ho
Waiting (2015 film)
Waisa Bhi Hota Hai Part II
Wah Taj
Wafa: A Deadly Love Story
Waarrior Savitri
W (2014 film)
Vodka Diaries
Vivah
Vishwaroopam
Viruddh… Family Comes First
Vidyaarthi
Victory (2009 film)
Vicky Donor
Viceroy's House (film)
Via Darjeeling
Veerey Ki Wedding
Veerappan (2016 film)
Veer-Zaara
Veer (2010 film)
Valentine's Night
Vaastu Shastra (film)
Vaah! Life Ho Toh Aisi!
Vaada Raha
Vaada (film)
Uvaa
18
Utthaan
Utt Pataang
Uri: The Surgical Strike
United Six
Union Leader (film)
Ungli
Umrika
Umrao Jaan (2006 film)
Umar (film)
Ujda Chaman
Ugly (film)
Udta Punjab
Udaan (2010 film)
U R My Jaan
U Me Aur Hum
Turning 30
Tumsa Nahin Dekha: A Love Story
Tumhari Sulu
Tumbbad
Tum Milo Toh Sahi
Tum Mile
Tum Bin II
Tulsi (film)
Tujhe Meri Kasam
Tubelight (2017 Hindi film)
Trump Card (film)
Trapped (2016 Hindi film)
Traffic Signal (film)
Traffic (2016 film)
Total Siyapaa
Total Dhamaal
Toonpur Ka Super Hero
Tom Dick and Harry (2006 film)
Toilet: Ek Prem Katha
Toh Baat Pakki!
Titoo MBA
Titli (2014 film)
Tiger Zinda Hai
Thugs of Hindostan
Thodi Life Thoda Magic
Thoda Tum Badlo Thoda Hum
Thoda Pyaar Thoda Magic
Thoda Lutf Thoda Ishq
The Zoya Factor (film)
The Xposé
The Train (2007 film)
The Tashkent Files
The Stoneman Murders
19
The Sky Is Pink
The Silent Heroes
The Shaukeens
The Pink Mirror
The Namesake (film)
The Lunchbox
The Last Lear
The Killer (2006 film)
The Journey of Karma
The Japanese Wife
The Hero: Love Story of a Spy
The Ghazi Attack
The Final Exit
The Film Emotional Atyachar
The Film
The Dirty Picture
The Bypass
The Blueberry Hunt
The Blue Umbrella (2005 film)
The Accidental Prime Minister (film)
Thanks Maa
Thank You (2011 film)
Thackeray (film)
Tezz
Tevar
Teri Meri Kahaani (film)
Tere Naam
Tere Naal Love Ho Gaya
Tere Mere Phere
Tere Bin Laden: Dead or Alive
Tere Bin Laden
Tera Kya Hoga Johnny
Tell Me O Kkhuda
Tehzeeb (2003 film)
Teesri Aankh: The Hidden Camera
Tees Maar Khan (2010 film)
Teen Thay Bhai
Teen Patti (film)
Te3n
Taxi No. 9211
Tathastu
Tashan (film)
Tanu Weds Manu: Returns
Tanu Weds Manu: Returns
Tanu Weds Manu
Tango Charlie
Tamanchey
Talvar (film)
20
Talaash: The Hunt Begins…
Talaash: The Answer Lies Within
Take It Easy (2015 film)
Taj Mahal: An Eternal Love Story
Tahaan
Table No. 21
Taarzan: The Wonder Car
Taare Zameen Par
Ta Ra Rum Pum
Sweetiee Weds NRI
Swami (2007 film)
Swades
Super Nani
Super Model (film)
Super 30 (film)
Suno Sasurjee
Sunglass (film)
Sunday (2008 film)
Summer 2007
Sultan (2016 film)
Sulemani Keeda
Sukhmani: Hope for Life
Sui Dhaaga
Stumped (film)
Student of the Year 2
Student of the Year
Strings of Passion
Striker (2010 film)
Stree (2018 film)
Strangers (2007 Hindi film)
Staying Alive (2012 film)
Station (2014 film)
Stanley Ka Dabba
Ssukh
Sssshhh…
Speed (2007 film)
Special 26
Spark (2014 film)
Souten: The Other Woman
Sorry Daddy
Sorry Bhai!
Sooper Se Ooper
Sonu Ke Titu Ki Sweety
Sons of Ram
Soni (film)
Sonchiriya
Sonali Cable
Son of Sardaar
21
Socha Na Tha
Soch Lo
Sixteen (2013 Indian film)
Sirf (film)
Singham Returns
Singham
Singh Saab the Great
Singh Is Kinng
Singh Is Bliing
Simran (film)
Simmba
Silsiilay
Sikandar (2009 film)
Siddharth (2013 film)
Shukriya: Till Death Do Us Apart
Shuddh Desi Romance
Shubh Mangal Saavdhan
Showbiz (film)
Shortkut
Shortcut Safari
Shortcut Romeo
Shorgul
Shor in the City
Shootout at Lokhandwala
Sholay
Shivaay
Shiva (2006 film)
Shirin Farhad Ki Toh Nikal Padi
Ship of Theseus (film)
Shikhar (film)
Sheesha (2005 film)
Sheen (film)
Shart: The Challenge
Sharafat Gayi Tel Lene
Shanghai (2012 film)
Shamitabh
Shakalaka Boom Boom
Shaitan (film)
Shahid (film)
Shagird (2011 film)
Shabri
Shabnam Mausi
Shabd (film)
Shab (film)
Shaapit
Shaandaar
Shaadi Se Pehle
Shaadi No. 1
22
Shaadi Mein Zaroor Aana
Shaadi Ke Side Effects
Shaadi Karke Phas Gaya Yaar
Shaadi Ka Laddoo
Setters (film)
Sehar
Section 375
Secret Superstar
Second Hand Husband
Say Salaam India
Satyameva Jayate (2018 film)
Satyagraha (film)
Satya 2
Satta (film)
Satrangee Parachute
Satellite Shankar
Sarkar Raj
Sarkar 3
Sarkar (2005 film)
Sarhad Paar
Sarbjit (film)
Santa Banta Pvt Ltd
Sankat City
Sanju
Sandwich (2006 film)
Sanam Re
Samrat & Co.
Samay: When Time Strikes
Sallu Ki Shaadi
Salaam-e-Ishq: A Tribute to Love
Salaam Namaste
Sahi Dhandhe Galat Bande
Saheb Biwi Aur Gangster Returns
Saheb Biwi Aur Gangster 3
Saheb Biwi Aur Gangster
Sadiyaan
Sadda Adda
Sacred Evil – A True Story
Sachin: A Billion Dreams
Sabki Bajegi Band
Saaya (2003 film)
Saawariya
Saawan… The Love Season
Saathiya (film)
Saat Uchakkey
Saas Bahu Aur Sensex
Saare Jahaan Se Mehnga
Saansein
23
Saankal
Saand Ki Aankh
Saaho
Rustom (film)
Rush (2012 film)
Running Shaadi
Run (2004 film)
Rules: Pyaar Ka Superhit Formula
Rukh (film)
Rudraksh (film)
Roy (film)
Rough Book
Rokkk
Rok Sako To Rok Lo
Rog
Rocky Handsome
Rockstar (2011 film)
Rocket Singh: Salesman of the Year
Rock On!!
Rock On 2
Roar: Tigers of the Sundarbans
Roadside Romeo
Road to Sangam
Riyasat (film)
Risknamaa
Risk (2007 film)
Right Yaaa Wrong
Right Here Right Now (film)
Ribbon (film)
Revolver Rani
Revati (film)
Red: The Dark Side
Red Swastik
Red Alert: The War Within
Rebellious Flower
Rascals (2011 film)
Raqeeb
Rann (film)
Rangrezz
Rangoon (2017 Hindi film)
Rang Rasiya
Rang De Basanti
Ranchi Diaries
Ranbanka
Ramprasad Ki Tehrvi
Ramji Londonwaley
Ramayana: The Epic
Raman Raghav 2.0
24
Ramaiya Vastavaiya
Ramaa: The Saviour
Rakhtbeej
Rakht
Rajma Chawal
Rajjo
Raja Natwarlal
Raja Bhaiya (film)
Raincoat (film)
Raid (2018 film)
Rahasya
Ragini MMS 2
Ragini MMS
Raghu Romeo
Raees (film)
Race 3
Race 2
Race (2008 film)
Rab Ne Bana Di Jodi
Raazi
Raaz: The Mystery Continues
Raaz: Reboot
Raaz (2002 film)
Raavan
Raat Gayi Baat Gayi?
Raanjhanaa
Raag Desh (film)
Raabta (film)
Ra.One
Quick Gun Murugun
Queen (2014 film)
Qissa (film)
Qayamat: City Under Threat
Qarib Qarib Singlle
Qaidi Band
Pyare Mohan
Pyaar Mein Twist
Pyaar Ke Side Effects
Pyaar Ka Punchnama 2
Pyaar Ka Punchnama
Pyaar Impossible!
Purani Jeans
Prince (2010 film)
Prem Ratan Dhan Payo
Prem Kaa Game
Prateeksha
Prassthanam
Pranaam
25
Prague (2013 film)
Praan Jaye Par Shaan Na Jaye
Poster Boys
Popcorn Khao! Mast Ho Jao
Policegiri
Police Force: An Inside Story
Players (2012 film)
Plan (film)
Pizza (2014 film)
Pink (2016 film)
Pinjar (film)
Piku
Pihu
Photograph (film)
Phoonk 2
Phobia (2016 film)
Phir Milenge
Phir Kabhi
Phir Hera Pheri
Phillauri (film)
Phhir
Phata Poster Nikhla Hero
Phas Gaye Re Obama
Phantom (2015 film)
Phamous
Pehchaan: The Face of Truth
Peepli Live
Paying Guests
Patiala House (film)
Pati Patni Aur Woh (2019 film)
Patel Ki Punjabi Shaadi
Pataakha
Parwana (2003 film)
Partner (2007 film)
Parmanu: The Story of Pokhran
Parineeta (2005 film)
Parched
Paranthe Wali Gali
Pankh
Panchlait
Paltan (film)
Pal Pal Dil Ke Paas
Paisa Vasool
Paheli
Page 3 (film)
Pagalpanti (2019 film)
Padmashree Laloo Prasad Yadav
Padmaavat
26
Paathshaala
Paap
Paanch Ghantey Mien Paanch Crore
Paan Singh Tomar (film)
Paa (film)
PM Narendra Modi
PK (film)
P Se Pyaar F Se Faraar
P Se PM Tak
Oye Lucky! Lucky Oye!
Out of Control (2003 film)
One by Two (2014 film)
One Two Three
One Day: Justice Delivered
Once Upon ay Time in Mumbai Dobaara!
Once Upon a Time in Mumbaai
Omkara (2006 film)
Omerta (film)
Om-Dar-B-Dar
Om Shanti Om
Om (2003 film)
Ok Jaanu
Oh My God (2008 film)
October (2018 film)
OMG – Oh My God!
O Teri
Nothing but Life
Notebook (2019 film)
Not a Love Story (2011 film)
Noor (film)
No Smoking (2007 film)
No Problem (2010 film)
No One Killed Jessica
No Entry
Nishabd
Nirdosh
Nil Battey Sannata
Newton (film)
New York (2009 film)
Netaji Subhas Chandra Bose: The Forgotten Hero
Nehlle Pe Dehlla
Neerja
Neal 'n' Nikki
Nayee Padosan
Nawabzaade
Nautanki Saala!
Naughty @ 40
Nasha (film)
27
Naqaab
Nanu Ki Jaanu
Nanhe Jaisalmer
Namastey London
Namaste England
Naksha
Naina (2005 film)
Naam Shabana
Naach (2004 film)
Na Ghar Ke Na Ghaat Ke
NH10 (film)
NH-8 Road to Nidhivan
My Wife's Murder
My Name Is Khan
My Friend Pinto
My Brother…Nikhil
My Bollywood Bride
My Birthday Song
Muskaan
Musafir (2004 film)
Murder 3
Murder 2
Murder (2004 film)
Murari the Mad Gentleman
Munna Michael
Munna Bhai M.B.B.S.
Mummy Punjabi
Mumbhai Connection
Mumbai Se Aaya Mera Dost
Mumbai Salsa
Mumbai Meri Jaan
Mumbai Matinee
Mumbai Mast Kallander
Mumbai Delhi Mumbai
Mumbai Can Dance Saala
Mumbai 125 KM
Mulk (film)
Mukkabaaz
Mukhbiir
Mujhse Shaadi Karogi
Mujhse Fraaandship Karoge
Mughal-e-Azam
Mubarakan
Mr. X (2015 film)
Mr. Singh Mrs. Mehta
Mr. Bhatti on Chutti
Mr Prime Minister
Motu Patlu: King of Kings
28
Motichoor Chaknachoor
Morning Raga
Monsoon Shootout
Monica (film)
Money Hai Toh Honey Hai
Mom (film)
Mohenjo Daro (film)
Mohalla Assi
Moh Maya Money
Mittal v/s Mittal
Mitron
Mission Mangal
Mission Istaanbul
Missing (2018 film)
Miss Tanakpur Haazir Ho
Mirzya (film)
Mirch
Miley Naa Miley Hum
Milenge Milenge
Mickey Virus
Meri Pyaari Bindu
Meri Biwi Ka Jawaab Nahin
Mere Pyare Prime Minister
Mere Jeevan Saathi (2006 film)
Mere Genie Uncle
Mere Dost Picture Abhi Baki Hai
Mere Dad Ki Maruti
Mere Brother Ki Dulhan
Mere Baap Pehle Aap
Mercury (film)
Memories in March
Meinu Ek Ladki Chaahiye
Meeruthiya Gangsters
Meerabai Not Out
Meenaxi: A Tale of Three Cities
Maximum (film)
Mausam (2011 film)
Matrubhoomi
Matru Ki Bijlee Ka Mandola
Mastram
Mastizaade
Masti (2004 film)
Masaan
Mary Kom (film)
Married 2 America
Market (2003 film)
Marjaavaan
Marigold (2007 film)
29
Margarita with a Straw
Mardaani 2
Mardaani
Mard Ko Dard Nahi Hota
Maqbool
Mantra (2016 film)
Manto (2018 film)
Manorama Six Feet Under
Manmarziyaan
Manjunath (film)
Manjhi – The Mountain Man
Manikarnika: The Queen of Jhansi
Mangal Pandey: The Rising
Malik Ek
Malamaal Weekly
Maine Pyaar Kyun Kiya?
Maine Gandhi Ko Nahin Mara
Main Tera Hero
Main Prem Ki Diwani Hoon
Main Meri Patni Aur Woh
Main Madhuri Dixit Banna Chahti Hoon
Main Krishna Hoon
Main Hoon Part-Time Killer
Main Hoon Na
Main Aurr Mrs Khanna
Main Aur Mr. Riight
Main Aisa Hi Hoon
Mai (2013 film)
Magic Magic 3D
Madras Cafe
Madhoshi
Made in China (2019 film)
Madaari
Mad About Dance
Machine (2017 film)
Machhli Jal Ki Rani Hai
Maazii
Maatr
Maan Gaye Mughal-e-Azam
MSG: The Warrior Lion Heart
MSG: The Messenger
MSG-2 The Messenger
MP3: Mera Pehla Pehla Pyaar
M.S. Dhoni: The Untold Story
M Cream
Luv U Soniyo
Luv U Alia
Lucky: No Time for Love
30
Lucky Kabootar
Lucknow Central
Luckhnowi Ishq
Luck by Chance
Luck (2009 film)
Loveshhuda
Love per Square Foot
Love in Bombay
Love U…Mr. Kalakaar!
Love Story 2050
Love Sonia
Love Shagun
Love Sex Aur Dhokha
Love Ke Chakkar Mein
Love Games (film)
Love Breakups Zindagi
Love Aaj Kal
Lootera
London Dreams
Loins of Punjab Presents
Login (film)
Little Zizou
Listen… Amaya
Lipstick Under My Burkha
Life in a… Metro
Life Partner
Life Mein Kabhie Kabhiee
Life Ki Toh Lag Gayi
Life Is Beautiful (2014 film)
Life Express (2010 film)
Lekar Hum Deewana Dil
Lamhaa
Lakshya (film)
Lakshmi (2014 film)
Lakeer – Forbidden Lines
Laila Majnu (2018 film)
Lahore (film)
Lage Raho Munna Bhai
Lafangey Parindey
Ladies vs Ricky Bahl
Laal Rang
Laaga Chunari Mein Daag
LOC Kargil
Kyun! Ho Gaya Na…
Kyon Ki
Kyaa Super Kool Hain Hum
Kyaa Kool Hain Hum 3
Kyaa Kool Hai Hum
31
Kya Love Story Hai
Kya Dilli Kya Lahore
Kushti (film)
Kurbaan (2009 film)
Kuku Mathur Ki Jhand Ho Gayi
Kudiyon Ka Hai Zamana
Kuchh Meetha Ho Jaye
Kuchh Bheege Alfaaz
Kuch Naa Kaho
Kuch Kuch Locha Hai
Kucch To Hai
Kucch Luv Jaisaa
Krrish
Krishna Cottage
Krishna Aur Kans
Krazzy 4
Koyelaanchal
Koi… Mil Gaya
Koi Mere Dil Mein Hai
Koi Aap Sa
Knock Out (2010 film)
Kites (film)
Kisse Pyaar Karoon
Kisna: The Warrior Poet
Kismat Love Paisa Dilli
Kismat Konnection
Kismat (2004 film)
Kisaan
Kis Kisko Pyaar Karoon
Kis Kis Ki Kismat
Kill Dil
Kick (2014 film)
Ki & Ka
Khwahish
Khwaabb
Khushi (2003 Hindi film)
Khuda Kasam
Khoya Khoya Chand
Khosla Ka Ghosla
Khoobsurat (2014 film)
Khichdi: The Movie
Khel – No Ordinary Game
Khel Toh Ab Shuru Hoga
Khatta Meetha (2010 film)
Khap (film)
Khamoshiyan
Khamoshi (2019 film)
Khamoshh… Khauff Ki Raat
32
Khamosh Pani
Khakee
Khajoor Pe Atke
Kesari (film)
Keep Safe Distance (film)
Kaun Kitne Paani Mein
Kaun Hai Jo Sapno Mein Aaya
Katti Batti
Kash Aap Hamare Hote
Kasak (2005 film)
Karzzzz
Karwaan
Karthik Calling Karthik
Karma Aur Holi
Karle Pyaar Karle
Karar: The Deal
Karam (film)
Kapoor & Sons
Kaminey
Kalyug (2005 film)
Kalank
Kal Ho Naa Ho
Kaise Kahoon Ke… Pyaar Hai
Kai Po Che!
Kahin Hai Mera Pyar
Kahaani
Kagaar: Life on the Edge
Kadvi Hawa
Kabul Express
Kabir Singh
Kabhi Alvida Naa Kehna
Kaashi in Search of Ganga
Kaante
Kaanchi: The Unbreakable
Kaalo
Kaalakaandi
Kaal (2005 film)
Kaagaz Ke Fools
Kaabil
Just Married (2007 film)
Jurm (2005 film)
Junooniyat
Junglee (2019 film)
Julie 2
Julie (2004 film)
Jugni (2016 film)
Judwaa 2
Judgementall Hai Kya
33
Jolly LLB
Joker (2012 film)
Johnny Gaddaar
John Day (film)
Joggers' Park (film)
Jodi Breakers
Jodhaa Akbar
Jo Hum Chahein
Jo Bole So Nihaal (film)
Jism (2003 film)
Jimmy (2008 film)
Jigyaasa
Jigariyaa
Jia Aur Jia
Jhootha Kahin Ka
Jhootha Hi Sahi
Jhoom Barabar Jhoom
Jhankaar Beats
Jhalki
Jeena Isi Ka Naam Hai (film)
Jeena Hai Toh Thok Daal
Jazbaa
Jayantabhai Ki Luv Story
Jawani Diwani: A Youthful Joyride
Jattu Engineer
Jannat (film)
Janasheen
James (2005 film)
Jalpari: The Desert Mermaid
Jalebi (film)
Jal (film)
Jajantaram Mamantaram
Jail (2009 film)
Jai Veeru
Jai Jawaan Jai Kisaan (film)
Jai Ho (film)
Jai Gangaajal
Jai Chiranjeeva
Jahan Jaaeyega Hamen Paaeyega
Jagga Jasoos
Jackpot (2013 film)
Jack and Dil
Jabariya Jodi
Jab We Met
Jab Tak Hai Jaan
Jab Harry Met Sejal
Jaane Kyun De Yaaron
Jaane Kahan Se Aayi Hai
34
Jaane Hoga Kya
Jaan-E-Mann
Jaal: The Trap
JD (film)
It's a Wonderful Afterlife
Issaq
Island City (2015 film)
Isi Life Mein
Ishqiya
Ishqeria
Ishqedarriyaan
Ishq Vishk
Ishq Ke Parindey
Ishq Hai Tumse
Ishq Forever
Ishq Click
Ishkq in Paris
Ishaqzaade
Irudhi Suttru
Irada (2017 film)
Iqraar by Chance
Iqbal (film)
Inteqam: The Perfect Game
Inteha (2003 film)
Insan
Insaaf: The Justice
Inkaar (2013 film)
Indu Sarkar
Indian Babu
India's Most Wanted (film)
Impatient Vivek
I See You (2006 film)
I Proud to Be an Indian
I Love NY (2015 film)
I Love Desi
I Hate Luv Storys
I Am Kalam
I Am (2010 Indian film)
Hyderabad Blues 2
Hunterrr
Hungama (2003 film)
Humshakals
Humpty Sharma Ki Dulhania
Humne Jeena Seekh Liya
Humko Tumse Pyaar Hai
Humko Deewana Kar Gaye
Hume Tumse Pyaar Kitna
Hum Tumhare Hain Sanam
35
Hum Tum Shabana
Hum Tum Aur Ghost
Hum Tum
Hum Hai Raahi Car Ke
Hum Chaar
Hulchul (2004 film)
Housefull 4
Housefull 2
Housefull (2010 film)
Hotel Salvation
Hostel (2011 film)
Horror Story (film)
Hope Aur Hum
Honour Killing (film)
Honeymoon Travels Pvt. Ltd.
Home Delivery
Holiday: A Soldier Is Never Off Duty
Holiday (2006 film)
Hisss
Hindi Medium
Hind Ka Napak Ko Jawab: MSG Lion Heart 2
Himmatwala (2013 film)
Highway (2014 Hindi film)
High Jack (film)
Hichki
Heyy Babyy
Hey Bro
Heropanti
Heroine (2012 film)
Heroes (2008 film)
Hero (2015 Hindi film)
Help (film)
Hello Darling
Hello (2008 film)
Helicopter Eela
Heartless (2014 film)
Hazaaron Khwaishein Aisi
Hawayein
Hawas (2004 film)
Hawaizaada
Hawaa Hawaai
Hawa (film)
Hava Aney Dey
Haunted – 3D
Hatya (2004 film)
Hattrick (film)
Hate Story 4
Hate Story 2
36
Hate Story
Hastey Hastey
Haseena Parkar
Hasee Toh Phasee
Hari Puttar: A Comedy of Terrors
Haraamkhor
Happy Phirr Bhag Jayegi
Happy New Year (2014 film)
Happy Husbands (2011 film)
Happy Ending (film)
Happy Bhag Jayegi
Hanuman (2005 film)
Hamid (film)
Hamari Adhuri Kahani
Halla Bol
Halkaa
Half Girlfriend (film)
Haider (film)
Hai Apna Dil Toh Awara
Haasil
Haal-e-Dil
Guzaarish (film)
Guru (2007 film)
Gunday
Gumnaam – The Mystery
Gully Boy
Gulabi Gang (film)
Gulaal (film)
Gulaab Gang
Guest iin London
Guddu Rangeela
Guddu Ki Gun
Green Card Fever
Great Grand Masti
Grand Masti
Gour Hari Dastaan
Gori Tere Pyaar Mein
Good Newwz
Good Boy Bad Boy
Gone Kesh
Golmaal: Fun Unlimited
Golmaal Returns
Golmaal Again
Gollu Aur Pappu
Goliyon Ki Raasleela Ram-Leela
Gold (2018 film)
God Tussi Great Ho
Goal (2007 Hindi film)
37
Go Goa Gone
Go (2007 film)
Global Baba
Girlfriend (2004 film)
Gippi
Ghost (2019 film)
Ghost (2012 film)
Ghayal: Once Again
Ghanchakkar (film)
Ghajini (2008 film)
Genius (2018 Hindi film)
Gayab
Gauri: The Unborn
Gattu
Garv: Pride & Honour
Garam Masala (2005 film)
Gangster (2006 film)
Gangs of Wasseypur – Part 2
Gangs of Wasseypur
Gangoobai
Gangaajal
Gang of Ghosts
Gandhi My Father
Game (2011 film)
Gali Guleiyan
Gabbar Is Back
Gabbar Is Back
G Kutta Se
Fun2shh… Dudes in the 10th Century
Fun – Can Be Dangerous Sometimes
Fukrey Returns
Fukrey
Fugly (film)
Fuddu
FryDay
Fruit and Nut (film)
From Sydney with Love
Fredrick (film)
Freaky Ali
Fraud Saiyaan
Fox (film)
Force 2
Force (2011 film)
Footpath (2003 film)
Fool & Final
Flavors (film)
Flat 211
Fitoor
38
Firangi
Firaaq
Finding Fanny
Filmistaan
Fight Club – Members Only
Fida
Fever (2016 film)
Ferrari Ki Sawaari
Fatso!
Fashion (2008 film)
Fareb (2005 film)
Fanaa (2006 film)
Fan (film)
Familywala
Family of Thakurganj
Family (2006 film)
F.A.L.T.U
Evening Shadows
Escape from Taliban
Entertainment (2014 film)
English Vinglish
Enemmy
Elaan (2005 film)
Eklavya: The Royal Guard
Ekkees Toppon Ki Salaami
Ekk Deewana Tha
Ek: The Power of One
Ek Vivaah… Aisa Bhi
Ek Villain
Ek Thi Rani Aisi Bhi
Ek Thi Daayan
Ek Tha Tiger
Ek Second… Jo Zindagi Badal De?
Ek Se Bure Do
Ek Se Badhkar Ek (2004 film)
Ek Paheli Leela
Ek Main Aur Ekk Tu
Ek Khiladi Ek Haseena (film)
Ek Kahani Julie Ki
Ek Hasina Thi (film)
Ek Haseena Thi Ek Deewana Tha
Ek Din 24 Ghante
Ek Chalis Ki Last Local
Ek Aur Ek Gyarah
Ek Alag Mausam
Ek Ajnabee
Eight: The Power of Shani
Dus Kahaniyaan
39
Dus
Dunno Y… Na Jaane Kyon
Dum Maaro Dum (film)
Dum Laga Ke Haisha
Dum (2003 Hindi film)
Dulha Mil Gaya
Dude Where's the Party?
Drona (2008 film)
Drishyam (2015 film)
Dreams (2006 film)
Dream Girl (2019 film)
Double Dhamaal
Double Cross (2005 film)
Dosti: Friends Forever
Dostana (2008 film)
Dor (film)
Dongari Ka Raja
Don't Stop Dreaming
Don Muthu Swami
Don 2
Don (2006 Hindi film)
Dolly Ki Doli
Dobara
Dobaara: See Your Evil
Do Lafzon Ki Kahani (film)
Do Dooni Chaar
Dishoom
Dishkiyaoon
Dirty Politics (film)
Direct Ishq
Dilwale (2015 film)
Dilliwali Zaalim Girlfriend
Dil Toh Deewana Hai
Dil Toh Baccha Hai Ji
Dil Pardesi Ho Gayaa
Dil Ne Jise Apna Kahaa
Dil Maange More
Dil Kabaddi
Dil Ka Rishta
Dil Juunglee
Dil Jo Na Keh Saka
Dil Jo Bhi Kahey…
Dil Dosti Etc
Dil Diya Hai
Dil Dhadakne Do
Dil Bole Hadippa!
Dil Bechara Pyaar Ka Maara
Dil Bechara
40
Dhund (2003 film)
Dhoop
Dhoondte Reh Jaaoge
Dhoom 3
Dhoom 2
Dhoom
Dhol (film)
Dhokha
Dhobi Ghat (film)
Dharti Kahe Pukar Ke (2006 film)
Dharm (film)
Dharam Sankat Mein
Dhanak
Dhamaal
Dhadak
Devi (2016 film)
Devdas (2002 Hindi film)
Devaki (2005 film)
Dev (2004 film)
Detective Byomkesh Bakshy!
Desi Kattey
Desi Boyz
Deshdrohi
Department (film)
Delhi-6
Delhi Safari
Delhi Belly (film)
Dekh Tamasha Dekh
Dehraadun Diary
Deewane Huye Paagal
Deewaar (2004 film)
Dedh Ishqiya
Dear Zindagi
Dear Maya
Dear Friend Hitler
Dear Dad (film)
Deadline: Sirf 24 Ghante
De Taali
De De Pyaar De
De Dana Dan
Days of Tafree
Dasvidaniya
Dassehra
Darwaaza Bandh Rakho
Darr @ the Mall
Darna Zaroori Hai
Darna Mana Hai
Darling (2007 Indian film)
41
Dangerous Ishhq
Dangal (film)
Damadamm!
Daddy Cool (2009 Hindi film)
Daddy (2017 film)
Dabangg 3
Dabangg 2
Dabangg
Daawat-e-Ishq
Daas Dev
D-Day (2013 film)
D (film)
Crook (film)
Creature 3D
Crazy Cukkad Family
Court (film)
Corporate (2006 film)
Contract (2008 film)
Company (film)
Commando: A One Man Army
Commando 3 (film)
Coffee with D
Coffee Bloom
Cocktail (2012 film)
Click (2010 film)
Classic – Dance of Love
CityLights (2014 film)
City of Gold (2010 film)
Cigarette Ki Tarah
Chura Liyaa Hai Tumne
Chup Chup Ke
Chori Chori (2003 film)
Chor Chor Super Chor
Chocolate (2005 film)
Chittagong (film)
Chintu Ji
Chingaari
Chinar Daastaan-E-Ishq
Chillar Party
Children of War (2014 film)
Children of Heaven
Chicken Curry Law
Chhota Bheem and the Throne of Bali
Chhodon Naa Yaar
Chetna: The Excitement
Chennai Express
Chehraa
Chef (2017 film)
42
Cheeni Kum
Chatur Singh Two Star
Chashme Baddoor (2013 film)
Chase (2010 film)
Chargesheet (film)
Charas (2004 film)
Chandni Chowk to China
Chand Sa Roshan Chehra
Chand Ke Paar Chalo (film)
Chance Pe Dance
Chamku
Chameli (film)
Chalte Chalte (2003 film)
Chalo Dilli
Challo Driver
Chalk n Duster
Chal Pichchur Banate Hain
Chal Chala Chal
Chakravyuh (2012 film)
Chak De! India
Chaarfutiya Chhokare
Chaar Din Ki Chandni
Chaalis Chauraasi
Chaahat – Ek Nasha
Cash (2007 film)
Calendar Girls (2015 film)
Calcutta Mail
Calapor (film)
C Kkompany
Bypass Road (film)
Bunty Aur Babli
Bumper Draw
Bumm Bumm Bole
Bumboo
Bullett Raja
Bullet: Ek Dhamaka
Buddha in a Traffic Jam
Buddha Mar Gaya
Bubble Gum (film)
Brothers (2015 film)
Brij Mohan Amar Rahe
Breakaway (2011 film)
Break Ke Baad
Brahman Naman
Boss (2013 Hindi film)
Border (1997 film)
Boom (film)
Bombay to Goa (2007 film)
43
Bombay to Bangkok
Bombay Velvet
Bombay Talkies (film)
Bombairiya
Bollywood Diaries
Bol Bachchan
Bodyguard (2011 Hindi film)
Bobby Jasoos
Bluffmaster!
Blue (2009 film)
Bloody Isshq
Blood Money (2012 film)
Blood Brothers (2007 Indian film)
Blackmail (2005 film)
Black Friday (2007 film)
Black (2005 film)
Bittoo Boss
Bioscopewala
Bin Bulaye Baraati
Billu
Big Brother (2007 film)
Bhram
Bhopal: A Prayer for Rain
Bhoothnath Returns
Bhoothnath
Bhoot Unkle
Bhoot Returns
Bhoot (film)
Bhoomi (film)
Bhool Bhulaiyaa
Bhola in Bollywood
Bheja Fry 2
Bheja Fry (film)
Bhavesh Joshi Superhero
Bhanwarey
Bhaiaji Superhit
Bhagmati (2005 film)
Bhagam Bhag
Bhaag Milkha Bhaag
Bhaag Johnny
Bezubaan Ishq
Beyond the Clouds (2017 film)
Bewakoofiyaan
Bewafaa (2005 film)
Being Cyrus
Beiimaan Love
Behen Hogi Teri
Begum Jaan
44
Befikre
Bbuddah… Hoga Terra Baap
Bazaar E Husn
Batti Gul Meter Chalu
Battalion 609
Batla House
Basti (film)
Bas Ek Pal
Barsaat (2005 film)
Barkhaa
Barfi!
Bareilly Ki Barfi
Bardaasht
Barah Aana
Bank Chor
Banjo (2016 film)
Bangistan
Bang Bang!
Bandook
Band Baaja Baaraat
Banaras (2006 film)
Balwinder Singh Famous Ho Gaya
Bala (2019 film)
Bajrangi Bhaijaan
Bajirao Mastani
Bajatey Raho
Baghban (2003 film)
Badrinath Ki Dulhania
Badmashiyaan
Badlapur Boys
Badlapur (film)
Badla (2019 film)
Badhaai Ho
Bachna Ae Haseeno
Bachke Rehna Re Baba
Bachche Kachche Sachche
Baby (2015 Hindi film)
Babumoshai Bandookbaaz
Babuji Ek Ticket Bambai
Babloo Happy Hai
Baazaar
Baaz: A Bird in Danger
Baat Bann Gayi
Baar Baar Dekho
Baaghi 2
Baaghi (2016 film)
Baabul (2006 film)
Baabarr
45
B.A. Pass
Azhar (film)
Awarapan
Awara Paagal Deewana
Aval (2017 film)
Aurangzeb (film)
Aur Pappu Paas Ho Gaya
Ata Pata Laapata
Asambhav
Aryan: Unbreakable
Article 15 (film)
Armaan (2003 film)
Arjun: The Warrior Prince
Arjun Patiala
Apne
Apna Sapna Money Money
Apna Asmaan
Apartment (film)
Apaharan
Anwar (2007 film)
Anuradha (2014 film)
Anthony Kaun Hai?
Antardwand
Anna (2016 film)
Ankur Arora Murder Case
Ankhon Dekhi
Ankahee (2006 film)
Anjaane (2005 film)
Anjaana Anjaani
Angel (2011 film)
Andhadhun
Andaaz
Andaaz
Anamika (2008 film)
Anaarkali of Aarah
Amit Sahni Ki List
Amavas
Always Kabhi Kabhi
Aloo Chaat (film)
Alone (2015 Hindi film)
Allah Ke Banday
All the Best: Fun Begins
All Is Well (2015 film)
Aligarh (film)
Albert Pinto Ko Gussa Kyun Aata Hai?
Alag
Aladin (film)
Aksar 2
46
Aksar
Akaash Vani
Ajji
Ajab Prem Ki Ghazab Kahani
Ajab Gazabb Love
Aiyyaa
Aiyaary
Aitraaz
Aisa Yeh Jahaan
Aisa Kyon Hota Hai?
Airlift (film)
Ahista Ahista (2006 film)
Agnipankh
Agneepath (2012 film)
Aggar (film)
Agent Vinod (2012 film)
Aetbaar
Ae Dil Hai Mushkil
Adharm (2006 film)
Action Replayy
Action Jackson (2014 film)
Acid Factory
Accident on Hill Road
Ab Tumhare Hawale Watan Saathiyo
Ab Tak Chhappan 2
Aazaan
Aasma: The Sky Is the Limit
Aashiqui.in
Aashiqui 2
Aashiq Banaya Aapne
Aashayein
Aarakshan
Aapko Pehle Bhi Kahin Dekha Hai
Aap Ki Khatir (2006 film)
Aap Kaa Surroor
Aankhen (2002 film)
Aanch
Aan: Men at Work
Aalaap (film)
Aakrosh (2010 film)
Aakhari Decision
Aaja Nachle
Aaj Ka Andha Kanoon
Aagey Se Right
Aag (2007 film)
Aabra Ka Daabra
Aa Gaya Hero
Aa Dekhen Zara
47
ABCD 2
A Gentleman
A Flying Jatt
A Flat (film)
A Decent Arrangement
?: A Question Mark
99.9 FM (film)
99 (2009 film)
88 Antop Hill
7½ Phere
7 Khoon Maaf
7 Hours to Go
68 Pages
5 Weddings
404 (film)
3G (film)
36 China Town
31st October (film)
3 Storeys
3 Idiots
3 Deewarein
3 A.M. (2014 film)
22 Yards
2 States (2014 film)
1971 (2007 film)
1920: The Evil Returns
1920: London
1920 (film)
[ ]: # Arithmetic Operators(Broadcasting)
100 + marks_series
[ ]: maths 167
english 200
science 189
hindi 200
sst 190
evs 200
Name: nitish ke marks, dtype: int64
[ ]: # Relational Operators
vk >= 50
[ ]: match_no
1 False
2 False
48
3 False
4 False
5 False
…
211 False
212 False
213 True
214 False
215 False
Name: runs, Length: 215, dtype: bool
[ ]: 50
[ ]: 9
[ ]: # Count number of day when I had more than 200 subs a day
subs[subs > 200].size
[ ]: 59
[ ]: Akshay Kumar 48
Amitabh Bachchan 45
Ajay Devgn 38
Salman Khan 31
Sanjay Dutt 26
Shah Rukh Khan 22
Emraan Hashmi 21
Name: lead, dtype: int64
[ ]: subs.plot()
[ ]: <matplotlib.axes._subplots.AxesSubplot at 0x7f54e0531a60>
49
[ ]: movies.value_counts().head(20).plot(kind='pie')
[ ]: <matplotlib.axes._subplots.AxesSubplot at 0x7f54e04f6850>
50
0.0.16 Some Important Series Methods
[ ]: # astype
# between
# clip
# drop_duplicates
# isnull
# dropna
# fillna
# isin
# apply
# copy
[ ]: import numpy as np
import pandas as pd
[ ]: subs = pd.read_csv('/content/subs.csv',squeeze=True)
subs
[ ]: 0 48
1 57
2 40
3 43
4 44
…
360 231
361 226
362 155
363 144
364 172
Name: Subscribers gained, Length: 365, dtype: int64
[ ]: vk = pd.read_csv('/content/kohli_ipl.csv',index_col='match_no',squeeze=True)
vk
[ ]: match_no
1 1
2 23
3 13
4 12
5 1
..
211 0
212 20
213 73
214 25
215 7
51
Name: runs, Length: 215, dtype: int64
[ ]: movies = pd.read_csv('/content/bollywood.csv',index_col='movie',squeeze=True)
movies
[ ]: movie
Uri: The Surgical Strike Vicky Kaushal
Battalion 609 Vicky Ahuja
The Accidental Prime Minister (film) Anupam Kher
Why Cheat India Emraan Hashmi
Evening Shadows Mona Ambegaonkar
…
Hum Tumhare Hain Sanam Shah Rukh Khan
Aankhen (2002 film) Amitabh Bachchan
Saathiya (film) Vivek Oberoi
Company (film) Ajay Devgn
Awara Paagal Deewana Akshay Kumar
Name: lead, Length: 1500, dtype: object
[ ]: # astype
import sys
sys.getsizeof(vk)
[ ]: 3456
[ ]: sys.getsizeof(vk.astype('int16'))
[ ]: 2166
[ ]: # between
vk[vk.between(51,99)].size
[ ]: 43
[ ]:
[ ]: # clip
subs
[ ]: 0 48
1 57
2 40
3 43
4 44
…
360 231
361 226
52
362 155
363 144
364 172
Name: Subscribers gained, Length: 365, dtype: int64
[ ]: subs.clip(100,200)
[ ]: 0 100
1 100
2 100
3 100
4 100
…
360 200
361 200
362 155
363 144
364 172
Name: Subscribers gained, Length: 365, dtype: int64
[ ]: # drop_duplicates
temp = pd.Series([1,1,2,2,3,3,4,4])
temp
[ ]: 0 1
1 1
2 2
3 2
4 3
5 3
6 4
7 4
dtype: int64
[ ]: temp.drop_duplicates(keep='last')
[ ]: 1 1
3 2
5 3
7 4
dtype: int64
[ ]: temp.duplicated().sum()
[ ]: 4
[ ]: vk.duplicated().sum()
53
[ ]: 137
[ ]: movies.drop_duplicates()
[ ]: movie
Uri: The Surgical Strike Vicky Kaushal
Battalion 609 Vicky Ahuja
The Accidental Prime Minister (film) Anupam Kher
Why Cheat India Emraan Hashmi
Evening Shadows Mona Ambegaonkar
…
Sssshhh… Tanishaa Mukerji
Rules: Pyaar Ka Superhit Formula Tanuja
Right Here Right Now (film) Ankit
Talaash: The Hunt Begins… Rakhee Gulzar
The Pink Mirror Edwin Fernandes
Name: lead, Length: 566, dtype: object
[ ]: temp = pd.Series([1,2,3,np.nan,5,6,np.nan,8,np.nan,10])
temp
[ ]: 0 1.0
1 2.0
2 3.0
3 NaN
4 5.0
5 6.0
6 NaN
7 8.0
8 NaN
9 10.0
dtype: float64
[ ]: temp.size
[ ]: 10
[ ]: temp.count()
[ ]: 7
[ ]: # isnull
temp.isnull().sum()
[ ]: 3
[ ]:
54
[ ]: # dropna
temp.dropna()
[ ]: 0 1.0
1 2.0
2 3.0
4 5.0
5 6.0
7 8.0
9 10.0
dtype: float64
[ ]:
[ ]: # fillna
temp.fillna(temp.mean())
[ ]: 0 1.0
1 2.0
2 3.0
3 5.0
4 5.0
5 6.0
6 5.0
7 8.0
8 5.0
9 10.0
dtype: float64
[ ]:
[ ]: # isin
vk[(vk == 49) | (vk == 99)]
[ ]: match_no
82 99
86 49
Name: runs, dtype: int64
[ ]: vk[vk.isin([49,99])]
[ ]: match_no
82 99
86 49
Name: runs, dtype: int64
[ ]:
55
[ ]: # apply
movies
[ ]: movie
Uri: The Surgical Strike Vicky Kaushal
Battalion 609 Vicky Ahuja
The Accidental Prime Minister (film) Anupam Kher
Why Cheat India Emraan Hashmi
Evening Shadows Mona Ambegaonkar
…
Hum Tumhare Hain Sanam Shah Rukh Khan
Aankhen (2002 film) Amitabh Bachchan
Saathiya (film) Vivek Oberoi
Company (film) Ajay Devgn
Awara Paagal Deewana Akshay Kumar
Name: lead, Length: 1500, dtype: object
[ ]: movies.apply(lambda x:x.split()[0].upper())
[ ]: movie
Uri: The Surgical Strike VICKY
Battalion 609 VICKY
The Accidental Prime Minister (film) ANUPAM
Why Cheat India EMRAAN
Evening Shadows MONA
…
Hum Tumhare Hain Sanam SHAH
Aankhen (2002 film) AMITABH
Saathiya (film) VIVEK
Company (film) AJAY
Awara Paagal Deewana AKSHAY
Name: lead, Length: 1500, dtype: object
[ ]: subs
[ ]: 0 48
1 57
2 40
3 43
4 44
…
360 231
361 226
362 155
363 144
364 172
Name: Subscribers gained, Length: 365, dtype: int64
56
[ ]: subs.apply(lambda x:'good day' if x > subs.mean() else 'bad day')
[ ]: 0 bad day
1 bad day
2 bad day
3 bad day
4 bad day
…
360 good day
361 good day
362 good day
363 good day
364 good day
Name: Subscribers gained, Length: 365, dtype: object
[ ]: subs.mean()
[ ]: 135.64383561643837
[ ]: # copy
[ ]: vk
[ ]: match_no
1 1
2 23
3 13
4 12
5 1
..
211 0
212 20
213 73
214 25
215 7
Name: runs, Length: 215, dtype: int64
[ ]: new = vk.head()
[ ]: new
[ ]: match_no
1 1
2 23
3 13
4 12
5 1
57
Name: runs, dtype: int64
[ ]: new[1] = 1
[ ]: new = vk.head().copy()
[ ]: new[1] = 100
[ ]: new
[ ]: match_no
1 100
2 23
3 13
4 12
5 1
Name: runs, dtype: int64
[ ]: vk
[ ]: match_no
1 1
2 23
3 13
4 12
5 1
..
211 0
212 20
213 73
214 25
215 7
Name: runs, Length: 215, dtype: int64
[ ]:
58
session-17-pandas-dataframes
May 3, 2024
[ ]: import numpy as np
import pandas as pd
[ ]: # using lists
student_data = [
[100,80,10],
[90,70,7],
[120,100,14],
[80,50,2]
]
pd.DataFrame(student_data,columns=['iq','marks','package'])
[ ]: iq marks package
0 100 80 10
1 90 70 7
2 120 100 14
3 80 50 2
[ ]: # using dicts
student_dict = {
'name':['nitish','ankit','rupesh','rishabh','amit','ankita'],
'iq':[100,90,120,80,0,0],
'marks':[80,70,100,50,0,0],
'package':[10,7,14,2,0,0]
}
students = pd.DataFrame(student_dict)
students.set_index('name',inplace=True)
students
1
[ ]: iq marks package
name
nitish 100 80 10
ankit 90 70 7
rupesh 120 100 14
rishabh 80 50 2
amit 0 0 0
ankita 0 0 0
[ ]: # using read_csv
movies = pd.read_csv('movies.csv')
movies
[ ]: title_x imdb_id \
0 Uri: The Surgical Strike tt8291224
1 Battalion 609 tt9472208
2 The Accidental Prime Minister (film) tt6986710
3 Why Cheat India tt8108208
4 Evening Shadows tt6028796
… … …
1624 Tera Mera Saath Rahen tt0301250
1625 Yeh Zindagi Ka Safar tt0298607
1626 Sabse Bada Sukh tt0069204
1627 Daaka tt10833860
1628 Humsafar tt2403201
poster_path \
0 https://upload.wikimedia.org/wikipedia/en/thum…
1 NaN
2 https://upload.wikimedia.org/wikipedia/en/thum…
3 https://upload.wikimedia.org/wikipedia/en/thum…
4 NaN
… …
1624 https://upload.wikimedia.org/wikipedia/en/2/2b…
1625 https://upload.wikimedia.org/wikipedia/en/thum…
1626 NaN
1627 https://upload.wikimedia.org/wikipedia/en/thum…
1628 https://upload.wikimedia.org/wikipedia/en/thum…
wiki_link \
0 https://en.wikipedia.org/wiki/Uri:_The_Surgica…
1 https://en.wikipedia.org/wiki/Battalion_609
2 https://en.wikipedia.org/wiki/The_Accidental_P…
3 https://en.wikipedia.org/wiki/Why_Cheat_India
4 https://en.wikipedia.org/wiki/Evening_Shadows
… …
1624 https://en.wikipedia.org/wiki/Tera_Mera_Saath_…
2
1625 https://en.wikipedia.org/wiki/Yeh_Zindagi_Ka_S…
1626 https://en.wikipedia.org/wiki/Sabse_Bada_Sukh
1627 https://en.wikipedia.org/wiki/Daaka
1628 https://en.wikipedia.org/wiki/Humsafar
story \
0 Divided over five chapters the film chronicle…
1 The story revolves around a cricket match betw…
2 Based on the memoir by Indian policy analyst S…
3 The movie focuses on existing malpractices in …
4 While gay rights and marriage equality has bee…
… …
1624 Raj Dixit lives with his younger brother Rahu…
1625 Hindi pop-star Sarina Devan lives a wealthy …
1626 Village born Lalloo re-locates to Bombay and …
1627 Shinda tries robbing a bank so he can be wealt…
1628 Sara and Ashar are childhood friends who share…
summary tagline \
0 Indian army special forces execute a covert op… NaN
1 The story of Battalion 609 revolves around a c… NaN
3
2 Explores Manmohan Singh's tenure as the Prime … NaN
3 The movie focuses on existing malpractices in … NaN
4 Under the 'Evening Shadows' truth often plays… NaN
… … …
1624 A man is torn between his handicapped brother … NaN
1625 A singer finds out she was adopted when the ed… NaN
1626 Village born Lalloo re-locates to Bombay and … NaN
1627 Shinda tries robbing a bank so he can be wealt… NaN
1628 Ashar and Khirad are forced to get married due… NaN
actors \
0 Vicky Kaushal|Paresh Rawal|Mohit Raina|Yami Ga…
1 Vicky Ahuja|Shoaib Ibrahim|Shrikant Kamat|Elen…
2 Anupam Kher|Akshaye Khanna|Aahana Kumra|Atul S…
3 Emraan Hashmi|Shreya Dhanwanthary|Snighdadeep …
4 Mona Ambegaonkar|Ananth Narayan Mahadevan|Deva…
… …
1624 Ajay Devgn|Sonali Bendre|Namrata Shirodkar|Pre…
1625 Ameesha Patel|Jimmy Sheirgill|Nafisa Ali|Gulsh…
1626 Vijay Arora|Asrani|Rajni Bala|Kumud Damle|Utpa…
1627 Gippy Grewal|Zareen Khan|
1628 Fawad Khan|
wins_nominations release_date
0 4 wins 11 January 2019 (USA)
1 NaN 11 January 2019 (India)
2 NaN 11 January 2019 (USA)
3 NaN 18 January 2019 (USA)
4 17 wins & 1 nomination 11 January 2019 (India)
… … …
1624 NaN 7 November 2001 (India)
1625 NaN 16 November 2001 (India)
1626 NaN NaN
1627 NaN 1 November 2019 (USA)
1628 NaN TV Series (2011–2012)
[ ]: ipl = pd.read_csv('ipl-matches.csv')
ipl
4
.. … … … … …
945 335986 Kolkata 2008-04-20 2007/08 4
946 335985 Mumbai 2008-04-20 2007/08 5
947 335984 Delhi 2008-04-19 2007/08 3
948 335983 Chandigarh 2008-04-19 2007/08 2
949 335982 Bangalore 2008-04-18 2007/08 1
Team1 Team2 \
0 Rajasthan Royals Gujarat Titans
1 Royal Challengers Bangalore Rajasthan Royals
2 Royal Challengers Bangalore Lucknow Super Giants
3 Rajasthan Royals Gujarat Titans
4 Sunrisers Hyderabad Punjab Kings
.. … …
945 Kolkata Knight Riders Deccan Chargers
946 Mumbai Indians Royal Challengers Bangalore
947 Delhi Daredevils Rajasthan Royals
948 Kings XI Punjab Chennai Super Kings
949 Royal Challengers Bangalore Kolkata Knight Riders
Venue TossWinner \
0 Narendra Modi Stadium, Ahmedabad Rajasthan Royals
1 Narendra Modi Stadium, Ahmedabad Rajasthan Royals
2 Eden Gardens, Kolkata Lucknow Super Giants
3 Eden Gardens, Kolkata Gujarat Titans
4 Wankhede Stadium, Mumbai Sunrisers Hyderabad
.. … …
945 Eden Gardens Deccan Chargers
946 Wankhede Stadium Mumbai Indians
947 Feroz Shah Kotla Rajasthan Royals
948 Punjab Cricket Association Stadium, Mohali Chennai Super Kings
949 M Chinnaswamy Stadium Royal Challengers Bangalore
5
0 NaN HH Pandya ['YBK Jaiswal', 'JC Buttler', 'SV Samson', 'D …
1 NaN JC Buttler ['V Kohli', 'F du Plessis', 'RM Patidar', 'GJ …
2 NaN RM Patidar ['V Kohli', 'F du Plessis', 'RM Patidar', 'GJ …
3 NaN DA Miller ['YBK Jaiswal', 'JC Buttler', 'SV Samson', 'D …
4 NaN Harpreet Brar ['PK Garg', 'Abhishek Sharma', 'RA Tripathi', …
.. … … …
945 NaN DJ Hussey ['WP Saha', 'BB McCullum', 'RT Ponting', 'SC G…
946 NaN MV Boucher ['L Ronchi', 'ST Jayasuriya', 'DJ Thornely', '…
947 NaN MF Maharoof ['G Gambhir', 'V Sehwag', 'S Dhawan', 'MK Tiwa…
948 NaN MEK Hussey ['K Goel', 'JR Hopes', 'KC Sangakkara', 'Yuvra…
949 NaN BB McCullum ['R Dravid', 'W Jaffer', 'V Kohli', 'JH Kallis…
Team2Players Umpire1 \
0 ['WP Saha', 'Shubman Gill', 'MS Wade', 'HH Pan… CB Gaffaney
1 ['YBK Jaiswal', 'JC Buttler', 'SV Samson', 'D … CB Gaffaney
2 ['Q de Kock', 'KL Rahul', 'M Vohra', 'DJ Hooda… J Madanagopal
3 ['WP Saha', 'Shubman Gill', 'MS Wade', 'HH Pan… BNJ Oxenford
4 ['JM Bairstow', 'S Dhawan', 'M Shahrukh Khan',… AK Chaudhary
.. … …
945 ['AC Gilchrist', 'Y Venugopal Rao', 'VVS Laxma… BF Bowden
946 ['S Chanderpaul', 'R Dravid', 'LRPL Taylor', '… SJ Davis
947 ['T Kohli', 'YK Pathan', 'SR Watson', 'M Kaif'… Aleem Dar
948 ['PA Patel', 'ML Hayden', 'MEK Hussey', 'MS Dh… MR Benson
949 ['SC Ganguly', 'BB McCullum', 'RT Ponting', 'D… Asad Rauf
Umpire2
0 Nitin Menon
1 Nitin Menon
2 MA Gough
3 VK Sharma
4 NA Patwardhan
.. …
945 K Hariharan
946 DJ Harper
947 GA Pratapkumar
948 SL Shastri
949 RE Koertzen
[ ]: (950, 20)
6
[ ]: # dtypes
movies.dtypes
ipl.dtypes
[ ]: ID int64
City object
Date object
Season object
MatchNumber object
Team1 object
Team2 object
Venue object
TossWinner object
TossDecision object
SuperOver object
WinningTeam object
WonBy object
Margin float64
method object
Player_of_Match object
Team1Players object
Team2Players object
Umpire1 object
Umpire2 object
dtype: object
[ ]: # index
movies.index
ipl.index
[ ]: # columns
movies.columns
ipl.columns
student.columns
[ ]: # values
student.values
ipl.values
7
'CB Gaffaney', 'Nitin Menon'],
[1312199, 'Ahmedabad', '2022-05-27', …,
"['YBK Jaiswal', 'JC Buttler', 'SV Samson', 'D Padikkal', 'SO Hetmyer',
'R Parag', 'R Ashwin', 'TA Boult', 'YS Chahal', 'M Prasidh Krishna', 'OC
McCoy']",
'CB Gaffaney', 'Nitin Menon'],
[1312198, 'Kolkata', '2022-05-25', …,
"['Q de Kock', 'KL Rahul', 'M Vohra', 'DJ Hooda', 'MP Stoinis', 'E
Lewis', 'KH Pandya', 'PVD Chameera', 'Mohsin Khan', 'Avesh Khan', 'Ravi
Bishnoi']",
'J Madanagopal', 'MA Gough'],
…,
[335984, 'Delhi', '2008-04-19', …,
"['T Kohli', 'YK Pathan', 'SR Watson', 'M Kaif', 'DS Lehmann', 'RA
Jadeja', 'M Rawat', 'D Salunkhe', 'SK Warne', 'SK Trivedi', 'MM Patel']",
'Aleem Dar', 'GA Pratapkumar'],
[335983, 'Chandigarh', '2008-04-19', …,
"['PA Patel', 'ML Hayden', 'MEK Hussey', 'MS Dhoni', 'SK Raina', 'JDP
Oram', 'S Badrinath', 'Joginder Sharma', 'P Amarnath', 'MS Gony', 'M
Muralitharan']",
'MR Benson', 'SL Shastri'],
[335982, 'Bangalore', '2008-04-18', …,
"['SC Ganguly', 'BB McCullum', 'RT Ponting', 'DJ Hussey', 'Mohammad
Hafeez', 'LR Shukla', 'WP Saha', 'AB Agarkar', 'AB Dinda', 'M Kartik', 'I
Sharma']",
'Asad Rauf', 'RE Koertzen']], dtype=object)
[ ]: title_x imdb_id \
0 Uri: The Surgical Strike tt8291224
1 Battalion 609 tt9472208
poster_path \
0 https://upload.wikimedia.org/wikipedia/en/thum…
1 NaN
wiki_link \
0 https://en.wikipedia.org/wiki/Uri:_The_Surgica…
1 https://en.wikipedia.org/wiki/Battalion_609
8
0 2019 138 Action|Drama|War 8.4 35112
1 2019 131 War 4.1 73
story \
0 Divided over five chapters the film chronicle…
1 The story revolves around a cricket match betw…
summary tagline \
0 Indian army special forces execute a covert op… NaN
1 The story of Battalion 609 revolves around a c… NaN
actors wins_nominations \
0 Vicky Kaushal|Paresh Rawal|Mohit Raina|Yami Ga… 4 wins
1 Vicky Ahuja|Shoaib Ibrahim|Shrikant Kamat|Elen… NaN
release_date
0 11 January 2019 (USA)
1 11 January 2019 (India)
[ ]: ipl.tail(2)
Team1 Team2 \
948 Kings XI Punjab Chennai Super Kings
949 Royal Challengers Bangalore Kolkata Knight Riders
Venue TossWinner \
948 Punjab Cricket Association Stadium, Mohali Chennai Super Kings
949 M Chinnaswamy Stadium Royal Challengers Bangalore
Player_of_Match Team1Players \
948 MEK Hussey ['K Goel', 'JR Hopes', 'KC Sangakkara', 'Yuvra…
949 BB McCullum ['R Dravid', 'W Jaffer', 'V Kohli', 'JH Kallis…
[ ]: # sample
ipl.sample(5)
9
[ ]: ID City Date Season MatchNumber \
336 1082628 Mumbai 2017-05-01 2017 38
98 1254107 Sharjah 2021-09-25 2021 37
890 392182 Cape Town 2009-04-18 2009 2
157 1216533 Abu Dhabi 2020-10-19 2020/21 37
386 980991 Chandigarh 2016-05-15 2016 46
Team1 Team2 \
336 Mumbai Indians Royal Challengers Bangalore
98 Punjab Kings Sunrisers Hyderabad
890 Royal Challengers Bangalore Rajasthan Royals
157 Chennai Super Kings Rajasthan Royals
386 Kings XI Punjab Sunrisers Hyderabad
Venue \
336 Wankhede Stadium
98 Sharjah Cricket Stadium
890 Newlands
157 Sheikh Zayed Stadium
386 Punjab Cricket Association IS Bindra Stadium, …
Team1Players \
336 ['PA Patel', 'JC Buttler', 'N Rana', 'RG Sharm…
98 ['KL Rahul', 'MA Agarwal', 'CH Gayle', 'AK Mar…
890 ['JD Ryder', 'RV Uthappa', 'LRPL Taylor', 'KP …
157 ['SM Curran', 'F du Plessis', 'SR Watson', 'AT…
386 ['HM Amla', 'M Vijay', 'WP Saha', 'Gurkeerat S…
Team2Players Umpire1 \
336 ['V Kohli', 'Mandeep Singh', 'TM Head', 'AB de… AK Chaudhary
98 ['DA Warner', 'WP Saha', 'KS Williamson', 'MK … RK Illingworth
890 ['GC Smith', 'SA Asnodkar', 'NK Patel', 'T Hen… BR Doctrove
157 ['BA Stokes', 'RV Uthappa', 'SV Samson', 'SPD … CB Gaffaney
10
386 ['DA Warner', 'S Dhawan', 'DJ Hooda', 'Yuvraj … KN Ananthapadmanabhan
Umpire2
336 CB Gaffaney
98 YC Barde
890 RB Tiffin
157 VK Sharma
386 M Erasmus
[ ]: # info
movies.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1629 entries, 0 to 1628
Data columns (total 18 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 title_x 1629 non-null object
1 imdb_id 1629 non-null object
2 poster_path 1526 non-null object
3 wiki_link 1629 non-null object
4 title_y 1629 non-null object
5 original_title 1629 non-null object
6 is_adult 1629 non-null int64
7 year_of_release 1629 non-null int64
8 runtime 1629 non-null object
9 genres 1629 non-null object
10 imdb_rating 1629 non-null float64
11 imdb_votes 1629 non-null int64
12 story 1609 non-null object
13 summary 1629 non-null object
14 tagline 557 non-null object
15 actors 1624 non-null object
16 wins_nominations 707 non-null object
17 release_date 1522 non-null object
dtypes: float64(1), int64(3), object(14)
memory usage: 229.2+ KB
[ ]: ipl.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 950 entries, 0 to 949
Data columns (total 20 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 ID 950 non-null int64
1 City 899 non-null object
2 Date 950 non-null object
11
3 Season 950 non-null object
4 MatchNumber 950 non-null object
5 Team1 950 non-null object
6 Team2 950 non-null object
7 Venue 950 non-null object
8 TossWinner 950 non-null object
9 TossDecision 950 non-null object
10 SuperOver 946 non-null object
11 WinningTeam 946 non-null object
12 WonBy 950 non-null object
13 Margin 932 non-null float64
14 method 19 non-null object
15 Player_of_Match 946 non-null object
16 Team1Players 950 non-null object
17 Team2Players 950 non-null object
18 Umpire1 950 non-null object
19 Umpire2 950 non-null object
dtypes: float64(1), int64(1), object(18)
memory usage: 148.6+ KB
[ ]: # describe
movies.describe()
[ ]: ipl.describe()
[ ]: ID Margin
count 9.500000e+02 932.000000
mean 8.304852e+05 17.056867
std 3.375678e+05 21.633109
min 3.359820e+05 1.000000
25% 5.012612e+05 6.000000
50% 8.297380e+05 8.000000
75% 1.175372e+06 19.000000
max 1.312200e+06 146.000000
[ ]: # isnull
movies.isnull().sum()
12
[ ]: title_x 0
imdb_id 0
poster_path 103
wiki_link 0
title_y 0
original_title 0
is_adult 0
year_of_release 0
runtime 0
genres 0
imdb_rating 0
imdb_votes 0
story 20
summary 0
tagline 1072
actors 5
wins_nominations 922
release_date 107
dtype: int64
[ ]: # duplicated
movies.duplicated().sum()
[ ]: 0
[ ]: students.duplicated().sum()
[ ]: 1
[ ]: # rename
students
[ ]: iq percent lpa
0 100 80 10
1 90 70 7
2 120 100 14
3 80 50 2
4 0 0 0
5 0 0 0
[ ]: students.rename(columns={'marks':'percent','package':'lpa'},inplace=True)
13
[ ]: iq 390
percent 300
lpa 33
dtype: int64
[ ]: students.mean(axis=1)
[ ]: 0 63.333333
1 55.666667
2 78.000000
3 44.000000
4 0.000000
5 0.000000
dtype: float64
[ ]: students.var()
[ ]: iq 2710.0
percent 1760.0
lpa 33.5
dtype: float64
[ ]:
[ ]: # single cols
movies['title_x']
[ ]: ipl['Venue']
14
3 Eden Gardens, Kolkata
4 Wankhede Stadium, Mumbai
…
945 Eden Gardens
946 Wankhede Stadium
947 Feroz Shah Kotla
948 Punjab Cricket Association Stadium, Mohali
949 M Chinnaswamy Stadium
Name: Venue, Length: 950, dtype: object
[ ]: # multiple cols
movies[['year_of_release','actors','title_x']]
[ ]: year_of_release actors \
0 2019 Vicky Kaushal|Paresh Rawal|Mohit Raina|Yami Ga…
1 2019 Vicky Ahuja|Shoaib Ibrahim|Shrikant Kamat|Elen…
2 2019 Anupam Kher|Akshaye Khanna|Aahana Kumra|Atul S…
3 2019 Emraan Hashmi|Shreya Dhanwanthary|Snighdadeep …
4 2018 Mona Ambegaonkar|Ananth Narayan Mahadevan|Deva…
… … …
1624 2001 Ajay Devgn|Sonali Bendre|Namrata Shirodkar|Pre…
1625 2001 Ameesha Patel|Jimmy Sheirgill|Nafisa Ali|Gulsh…
1626 2018 Vijay Arora|Asrani|Rajni Bala|Kumud Damle|Utpa…
1627 2019 Gippy Grewal|Zareen Khan|
1628 2011 Fawad Khan|
title_x
0 Uri: The Surgical Strike
1 Battalion 609
2 The Accidental Prime Minister (film)
3 Why Cheat India
4 Evening Shadows
… …
1624 Tera Mera Saath Rahen
1625 Yeh Zindagi Ka Safar
1626 Sabse Bada Sukh
1627 Daaka
1628 Humsafar
[ ]: ipl[['Team1','Team2','WinningTeam']]
[ ]: Team1 Team2 \
0 Rajasthan Royals Gujarat Titans
1 Royal Challengers Bangalore Rajasthan Royals
2 Royal Challengers Bangalore Lucknow Super Giants
15
3 Rajasthan Royals Gujarat Titans
4 Sunrisers Hyderabad Punjab Kings
.. … …
945 Kolkata Knight Riders Deccan Chargers
946 Mumbai Indians Royal Challengers Bangalore
947 Delhi Daredevils Rajasthan Royals
948 Kings XI Punjab Chennai Super Kings
949 Royal Challengers Bangalore Kolkata Knight Riders
WinningTeam
0 Gujarat Titans
1 Rajasthan Royals
2 Royal Challengers Bangalore
3 Gujarat Titans
4 Punjab Kings
.. …
945 Kolkata Knight Riders
946 Royal Challengers Bangalore
947 Delhi Daredevils
948 Chennai Super Kings
949 Kolkata Knight Riders
16
actors Geetika Vidya Ohlyan|Saloni Batra|Vikas Shukla…
wins_nominations 3 wins & 5 nominations
release_date 18 January 2019 (USA)
Name: 5, dtype: object
[ ]: # multiple row
movies.iloc[:5]
[ ]: title_x imdb_id \
0 Uri: The Surgical Strike tt8291224
1 Battalion 609 tt9472208
2 The Accidental Prime Minister (film) tt6986710
3 Why Cheat India tt8108208
4 Evening Shadows tt6028796
poster_path \
0 https://upload.wikimedia.org/wikipedia/en/thum…
1 NaN
2 https://upload.wikimedia.org/wikipedia/en/thum…
3 https://upload.wikimedia.org/wikipedia/en/thum…
4 NaN
wiki_link \
0 https://en.wikipedia.org/wiki/Uri:_The_Surgica…
1 https://en.wikipedia.org/wiki/Battalion_609
2 https://en.wikipedia.org/wiki/The_Accidental_P…
3 https://en.wikipedia.org/wiki/Why_Cheat_India
4 https://en.wikipedia.org/wiki/Evening_Shadows
story \
0 Divided over five chapters the film chronicle…
1 The story revolves around a cricket match betw…
2 Based on the memoir by Indian policy analyst S…
17
3 The movie focuses on existing malpractices in …
4 While gay rights and marriage equality has bee…
summary tagline \
0 Indian army special forces execute a covert op… NaN
1 The story of Battalion 609 revolves around a c… NaN
2 Explores Manmohan Singh's tenure as the Prime … NaN
3 The movie focuses on existing malpractices in … NaN
4 Under the 'Evening Shadows' truth often plays… NaN
actors wins_nominations \
0 Vicky Kaushal|Paresh Rawal|Mohit Raina|Yami Ga… 4 wins
1 Vicky Ahuja|Shoaib Ibrahim|Shrikant Kamat|Elen… NaN
2 Anupam Kher|Akshaye Khanna|Aahana Kumra|Atul S… NaN
3 Emraan Hashmi|Shreya Dhanwanthary|Snighdadeep … NaN
4 Mona Ambegaonkar|Ananth Narayan Mahadevan|Deva… 17 wins & 1 nomination
release_date
0 11 January 2019 (USA)
1 11 January 2019 (India)
2 11 January 2019 (USA)
3 18 January 2019 (USA)
4 11 January 2019 (India)
[ ]: # fancy indexing
movies.iloc[[0,4,5]]
[ ]: title_x imdb_id \
0 Uri: The Surgical Strike tt8291224
4 Evening Shadows tt6028796
5 Soni (film) tt6078866
poster_path \
0 https://upload.wikimedia.org/wikipedia/en/thum…
4 NaN
5 https://upload.wikimedia.org/wikipedia/en/thum…
wiki_link \
0 https://en.wikipedia.org/wiki/Uri:_The_Surgica…
4 https://en.wikipedia.org/wiki/Evening_Shadows
5 https://en.wikipedia.org/wiki/Soni_(film)
18
year_of_release runtime genres imdb_rating imdb_votes \
0 2019 138 Action|Drama|War 8.4 35112
4 2018 102 Drama 7.3 280
5 2018 97 Drama 7.2 1595
story \
0 Divided over five chapters the film chronicle…
4 While gay rights and marriage equality has bee…
5 Soni a young policewoman in Delhi and her su…
summary tagline \
0 Indian army special forces execute a covert op… NaN
4 Under the 'Evening Shadows' truth often plays… NaN
5 While fighting crimes against women in Delhi … NaN
actors wins_nominations \
0 Vicky Kaushal|Paresh Rawal|Mohit Raina|Yami Ga… 4 wins
4 Mona Ambegaonkar|Ananth Narayan Mahadevan|Deva… 17 wins & 1 nomination
5 Geetika Vidya Ohlyan|Saloni Batra|Vikas Shukla… 3 wins & 5 nominations
release_date
0 11 January 2019 (USA)
4 11 January 2019 (India)
5 18 January 2019 (USA)
[ ]: # loc
students
[ ]: iq marks package
name
nitish 100 80 10
ankit 90 70 7
rupesh 120 100 14
rishabh 80 50 2
amit 0 0 0
ankita 0 0 0
[ ]: students.loc['nitish']
[ ]: iq 100
marks 80
package 10
Name: nitish, dtype: int64
[ ]: students.loc['nitish':'rishabh':2]
19
[ ]: iq marks package
name
nitish 100 80 10
rupesh 120 100 14
[ ]: students.loc[['nitish','ankita','rupesh']]
[ ]: iq marks package
name
nitish 100 80 10
ankita 0 0 0
rupesh 120 100 14
[ ]: students.iloc[[0,3,4]]
[ ]: iq marks package
name
nitish 100 80 10
rishabh 80 50 2
amit 0 0 0
[ ]: movies.iloc[0:3,0:3]
[ ]: title_x imdb_id \
0 Uri: The Surgical Strike tt8291224
1 Battalion 609 tt9472208
2 The Accidental Prime Minister (film) tt6986710
poster_path
0 https://upload.wikimedia.org/wikipedia/en/thum…
1 NaN
2 https://upload.wikimedia.org/wikipedia/en/thum…
[ ]: movies.loc[0:2,'title_x':'poster_path']
[ ]: title_x imdb_id \
0 Uri: The Surgical Strike tt8291224
1 Battalion 609 tt9472208
2 The Accidental Prime Minister (film) tt6986710
poster_path
0 https://upload.wikimedia.org/wikipedia/en/thum…
1 NaN
2 https://upload.wikimedia.org/wikipedia/en/thum…
20
0.0.7 Filtering a DataFrame
[ ]: ipl.head(2)
Team1 Team2 \
0 Rajasthan Royals Gujarat Titans
1 Royal Challengers Bangalore Rajasthan Royals
Team1Players \
0 ['YBK Jaiswal', 'JC Buttler', 'SV Samson', 'D …
1 ['V Kohli', 'F du Plessis', 'RM Patidar', 'GJ …
ipl[ipl['MatchNumber'] == 'Final'][['Season','WinningTeam']]
[ ]: Season WinningTeam
0 2022 Gujarat Titans
74 2021 Chennai Super Kings
134 2020/21 Mumbai Indians
194 2019 Mumbai Indians
254 2018 Chennai Super Kings
314 2017 Mumbai Indians
373 2016 Sunrisers Hyderabad
433 2015 Mumbai Indians
492 2014 Kolkata Knight Riders
552 2013 Mumbai Indians
628 2012 Kolkata Knight Riders
21
702 2011 Chennai Super Kings
775 2009/10 Chennai Super Kings
835 2009 Deccan Chargers
892 2007/08 Rajasthan Royals
[ ]: 14
[ ]: 5
[ ]: 51.473684210526315
[ ]: 0
[ ]: title_x imdb_id \
0 Uri: The Surgical Strike tt8291224
41 Family of Thakurganj tt8897986
84 Mukkabaaz tt7180544
106 Raazi tt7098658
110 Parmanu: The Story of Pokhran tt6826438
112 Bhavesh Joshi Superhero tt6129302
169 The Ghazi Attack tt6299040
219 Raag Desh (film) tt6080746
258 Irudhi Suttru tt5310090
280 Laal Rang tt5600714
297 Udta Punjab tt4434004
354 Dangal (film) tt5074352
362 Bajrangi Bhaijaan tt3863552
22
365 Baby (2015 Hindi film) tt3848892
393 Detective Byomkesh Bakshy! tt3447364
449 Titli (2014 film) tt3019620
536 Haider (film) tt3390572
589 Vishwaroopam tt2199711
625 Madras Cafe tt2855648
668 Paan Singh Tomar (film) tt1620933
693 Gangs of Wasseypur tt1954470
694 Gangs of Wasseypur – Part 2 tt1954470
982 Jodhaa Akbar tt0449994
1039 1971 (2007 film) tt0983990
1058 Black Friday (2007 film) tt0400234
1188 Omkara (2006 film) tt0488414
1293 Sarkar (2005 film) tt0432047
1294 Sehar tt0477857
1361 Lakshya (film) tt0323013
1432 Gangaajal tt0373856
1495 Company (film) tt0296574
1554 The Legend of Bhagat Singh tt0319736
1607 Nayak (2001 Hindi film) tt0291376
poster_path \
0 https://upload.wikimedia.org/wikipedia/en/thum…
41 https://upload.wikimedia.org/wikipedia/en/9/99…
84 https://upload.wikimedia.org/wikipedia/en/thum…
106 https://upload.wikimedia.org/wikipedia/en/thum…
110 https://upload.wikimedia.org/wikipedia/en/thum…
112 https://upload.wikimedia.org/wikipedia/en/thum…
169 https://upload.wikimedia.org/wikipedia/en/thum…
219 https://upload.wikimedia.org/wikipedia/en/thum…
258 https://upload.wikimedia.org/wikipedia/en/f/fe…
280 NaN
297 https://upload.wikimedia.org/wikipedia/en/thum…
354 https://upload.wikimedia.org/wikipedia/en/thum…
362 https://upload.wikimedia.org/wikipedia/en/thum…
365 https://upload.wikimedia.org/wikipedia/en/thum…
393 https://upload.wikimedia.org/wikipedia/en/thum…
449 https://upload.wikimedia.org/wikipedia/en/thum…
536 https://upload.wikimedia.org/wikipedia/en/thum…
589 https://upload.wikimedia.org/wikipedia/en/thum…
625 https://upload.wikimedia.org/wikipedia/en/thum…
668 https://upload.wikimedia.org/wikipedia/en/thum…
693 https://upload.wikimedia.org/wikipedia/en/thum…
694 https://upload.wikimedia.org/wikipedia/en/thum…
982 https://upload.wikimedia.org/wikipedia/en/thum…
1039 https://upload.wikimedia.org/wikipedia/en/thum…
1058 https://upload.wikimedia.org/wikipedia/en/5/58…
23
1188 https://upload.wikimedia.org/wikipedia/en/thum…
1293 https://upload.wikimedia.org/wikipedia/en/thum…
1294 https://upload.wikimedia.org/wikipedia/en/thum…
1361 https://upload.wikimedia.org/wikipedia/en/thum…
1432 https://upload.wikimedia.org/wikipedia/en/thum…
1495 https://upload.wikimedia.org/wikipedia/en/thum…
1554 https://upload.wikimedia.org/wikipedia/en/thum…
1607 https://upload.wikimedia.org/wikipedia/en/thum…
wiki_link \
0 https://en.wikipedia.org/wiki/Uri:_The_Surgica…
41 https://en.wikipedia.org/wiki/Family_of_Thakur…
84 https://en.wikipedia.org/wiki/Mukkabaaz
106 https://en.wikipedia.org/wiki/Raazi
110 https://en.wikipedia.org/wiki/Parmanu:_The_Sto…
112 https://en.wikipedia.org/wiki/Bhavesh_Joshi_Su…
169 https://en.wikipedia.org/wiki/The_Ghazi_Attack…
219 https://en.wikipedia.org/wiki/Raagdesh
258 https://en.wikipedia.org/wiki/Saala_Khadoos
280 https://en.wikipedia.org/wiki/Laal_Rang
297 https://en.wikipedia.org/wiki/Udta_Punjab
354 https://en.wikipedia.org/wiki/Dangal_(film)
362 https://en.wikipedia.org/wiki/Bajrangi_Bhaijaan
365 https://en.wikipedia.org/wiki/Baby_(2015_Hindi…
393 https://en.wikipedia.org/wiki/Detective_Byomke…
449 https://en.wikipedia.org/wiki/Titli_(2014_film)
536 https://en.wikipedia.org/wiki/Haider_(film)
589 https://en.wikipedia.org/wiki/Vishwaroop_(Hind…
625 https://en.wikipedia.org/wiki/Madras_Cafe
668 https://en.wikipedia.org/wiki/Paan_Singh_Tomar…
693 https://en.wikipedia.org/wiki/Gangs_of_Wasseypur
694 https://en.wikipedia.org/wiki/Gangs_of_Wasseyp…
982 https://en.wikipedia.org/wiki/Jodhaa_Akbar
1039 https://en.wikipedia.org/wiki/1971_(2007_film)
1058 https://en.wikipedia.org/wiki/Black_Friday_(20…
1188 https://en.wikipedia.org/wiki/Omkara_(2006_film)
1293 https://en.wikipedia.org/wiki/Sarkar_(2005_film)
1294 https://en.wikipedia.org/wiki/Sehar
1361 https://en.wikipedia.org/wiki/Lakshya_(film)
1432 https://en.wikipedia.org/wiki/Gangaajal
1495 https://en.wikipedia.org/wiki/Company_(film)
1554 https://en.wikipedia.org/wiki/The_Legend_of_Bh…
1607 https://en.wikipedia.org/wiki/Nayak_(2001_Hind…
24
84 The Brawler Mukkabaaz 0
106 Raazi Raazi 0
110 Parmanu: The Story of Pokhran Parmanu: The Story of Pokhran 0
112 Bhavesh Joshi Superhero Bhavesh Joshi Superhero 0
169 The Ghazi Attack The Ghazi Attack 0
219 Raag Desh Raag Desh 0
258 Saala Khadoos Saala Khadoos 0
280 Laal Rang Laal Rang 0
297 Udta Punjab Udta Punjab 0
354 Dangal Dangal 0
362 Bajrangi Bhaijaan Bajrangi Bhaijaan 0
365 Baby Baby 0
393 Detective Byomkesh Bakshy! Detective Byomkesh Bakshy! 0
449 Titli Titli 0
536 Haider Haider 0
589 Vishwaroopam Vishwaroopam 0
625 Madras Cafe Madras Cafe 0
668 Paan Singh Tomar Paan Singh Tomar 0
693 Gangs of Wasseypur Gangs of Wasseypur 0
694 Gangs of Wasseypur Gangs of Wasseypur 0
982 Jodhaa Akbar Jodhaa Akbar 0
1039 1971 1971 0
1058 Black Friday Black Friday 0
1188 Omkara Omkara 0
1293 Sarkar Sarkar 0
1294 Sehar Sehar 0
1361 Lakshya Lakshya 0
1432 Gangaajal Gangaajal 0
1495 Company Company 0
1554 The Legend of Bhagat Singh The Legend of Bhagat Singh 0
1607 Nayak: The Real Hero Nayak: The Real Hero 0
25
393 2015 139 Action|Mystery|Thriller 7.6
449 2014 116 Action|Drama|Thriller 7.6
536 2014 160 Action|Crime|Drama 8.1
589 2013 148 Action|Thriller 8.2
625 2013 130 Action|Drama|Thriller 7.7
668 2012 135 Action|Biography|Crime 8.2
693 2012 321 Action|Comedy|Crime 8.2
694 2012 321 Action|Comedy|Crime 8.2
982 2008 213 Action|Drama|History 7.6
1039 2007 160 Action|Drama|War 7.9
1058 2004 143 Action|Crime|Drama 8.5
1188 2006 155 Action|Crime|Drama 8.1
1293 2005 124 Action|Crime|Drama 7.6
1294 2005 125 Action|Crime|Drama 7.8
1361 2004 186 Action|Drama|Romance 7.9
1432 2003 157 Action|Crime|Drama 7.8
1495 2002 155 Action|Crime|Drama 8.0
1554 2002 155 Action|Biography|Drama 8.1
1607 2001 187 Action|Drama|Thriller 7.8
imdb_votes story \
0 35112 Divided over five chapters the film chronicle…
41 895 The film is based on small town of North India…
84 5434 A boxer (Shravan) belonging to upper cast tra…
106 20289 Hidayat Khan is the son of an Indian freedom f…
110 18292 Captain Ashwat Raina's efforts to turn India i…
112 4928 Bhavesh Joshi Superhero is an action film abou…
169 10332 In 1971 amid rising tensions between India an…
219 341 A period film based on the historic 1945 India…
258 10507 An under-fire boxing coach Prabhu is transfer…
280 3741 The friendship of two men is tested when thing…
297 23995 What on earth can a rock star a migrant labor…
354 131338 Biopic of Mahavir Singh Phogat who taught wre…
362 65877 A little mute girl from a Pakistani village ge…
365 49426 The country is perpetually under threat from t…
393 14674 CALCUTTA 1943 A WAR - A MYSTERY - and A DETECT…
449 3677 In the badlands of Delhi's dystopic underbelly…
536 46912 Vishal Bhardwaj's adaptation of William Shakes…
589 38016 Vishwanathan a Kathak dance teacher in New Yo…
625 21393 An Indian Intelligence agent (portrayed by Joh…
668 29994 Paan Singh Tomar is a Hindi-language film bas…
693 71636 Shahid Khan is exiled after impersonating the …
694 71636 Shahid Khan is exiled after impersonating the …
982 27541 Jodhaa Akbar is a sixteenth century love story…
1039 1121 Based on true facts the film revolves around …
1058 16761 A dramatic presentation of the bomb blasts tha…
1188 17594 Advocate Raghunath Mishra has arranged the mar…
26
1293 14694 Meet Subhash Nagre - a wealthy and influential…
1294 1861 At the tender age of 8 Ajay Kumar is traumatiz…
1361 18777 Karan is a lazy good-for-nothing who lives on …
1432 14295 An SP Amit Kumar who is given charge of Tezpur…
1495 13474 Mallik is a henchman of Aslam Bhai a Mumbai u…
1554 13455 Bhagat was born in British India during the ye…
1607 12522 Employed as a camera-man at a popular televisi…
summary \
0 Indian army special forces execute a covert op…
41 The film is based on small town of North India…
84 A boxer struggles to make his mark in the boxi…
106 A Kashmiri woman agrees to marry a Pakistani a…
110 Ashwat Raina and his teammates arrive in Pokhr…
112 The origin story of Bhavesh Joshi an Indian s…
169 A Pakistani submarine Ghazi plans to secretly…
219 A period film based on the historic 1945 India…
258 The story of a former boxer who quits boxing f…
280 The friendship of two men is tested when thing…
297 A story that revolves around drug abuse in the…
354 Former wrestler Mahavir Singh Phogat and his t…
362 An Indian man with a magnanimous heart takes a…
365 An elite counter-intelligence unit learns of a…
393 While investigating the disappearance of a che…
449 A Hindi feature film set in the lower depths o…
536 A young man returns to Kashmir after his fathe…
589 When a classical dancer's suspecting wife sets…
625 An Indian intelligence agent journeys to a war…
668 The story of Paan Singh Tomar an Indian athle…
693 A clash between Sultan and Shahid Khan leads t…
694 A clash between Sultan and Shahid Khan leads t…
982 A sixteenth century love story about a marriag…
1039 Based on true facts the film revolves around …
1058 Black Friday is a film about the investigation…
1188 A politically-minded enforcer's misguided trus…
1293 The authority of a man who runs a parallel go…
1294 Ajay Kumar the newly appointed honest SSP of …
1361 An aimless jobless irresponsible grown man j…
1432 An IPS officer motivates and leads a dysfuncti…
1495 A small-time gangster named Chandu teams up wi…
1554 The story of a young revolutionary who raised …
1607 A man accepts a challenge by the chief ministe…
tagline \
0 NaN
41 NaN
84 NaN
27
106 An incredible true story
110 1998| India: one secret operation| six Indians…
112 This year| justice will have a new name.
169 The war you did not know about
219 NaN
258 NaN
280 Every job good or bad| must be done with honesty.
297 NaN
354 You think our girls are any lesser than boys?
362 NaN
365 History Is Made By Those Who Give A Damn!
393 Expect The Unexpected
449 Daring| Desireable| Dangerous
536 NaN
589 NaN
625 NaN
668 NaN
693 NaN
694 NaN
982 NaN
1039 Honor the heroes…
1058 The story of the Bombay bomb blasts
1188 NaN
1293 'There are no Rights and Wrongs. Only Power' -…
1294 NaN
1361 It took him 24 years and 18000 feet to find hi…
1432 NaN
1495 A law & order enterprise
1554 NaN
1607 Fight the power
actors \
0 Vicky Kaushal|Paresh Rawal|Mohit Raina|Yami Ga…
41 Jimmy Sheirgill|Mahie Gill|Nandish Singh|Prana…
84 Viineet Kumar|Jimmy Sheirgill|Zoya Hussain|Rav…
106 Alia Bhatt|Vicky Kaushal|Rajit Kapoor|Shishir …
110 John Abraham|Boman Irani|Diana Penty|Anuja Sat…
112 Harshvardhan Kapoor|Priyanshu Painyuli|Ashish …
169 Rana Daggubati|Kay Kay Menon|Atul Kulkarni|Om …
219 Kunal Kapoor|Amit Sadh|Mohit Marwah|Kenneth De…
258 Madhavan|Ritika Singh|Mumtaz Sorcar|Nassar|Rad…
280 Randeep Hooda|Akshay Oberoi|Rajniesh Duggall|P…
297 Shahid Kapoor|Alia Bhatt|Kareena Kapoor|Diljit…
354 Aamir Khan|Fatima Sana Shaikh|Sanya Malhotra|S…
362 Salman Khan|Harshaali Malhotra|Nawazuddin Sidd…
365 Akshay Kumar|Danny Denzongpa|Rana Daggubati|Ta…
393 Sushant Singh Rajput|Anand Tiwari|Neeraj Kabi|…
28
449 Nawazuddin Siddiqui|Niharika Singh|Anil George…
536 Tabu|Shahid Kapoor|Shraddha Kapoor|Kay Kay Men…
589 Kamal Haasan|Rahul Bose|Shekhar Kapur|Pooja Ku…
625 John Abraham|Nargis Fakhri|Raashi Khanna|Praka…
668 Irrfan Khan|
693 Manoj Bajpayee|Richa Chadha|Nawazuddin Siddiqu…
694 Manoj Bajpayee|Richa Chadha|Nawazuddin Siddiqu…
982 Hrithik Roshan|Aishwarya Rai Bachchan|Sonu Soo…
1039 Manoj Bajpayee|Ravi Kishan|Deepak Dobriyal|
1058 Kay Kay Menon|Pavan Malhotra|Aditya Srivastava…
1188 Ajay Devgn|Saif Ali Khan|Vivek Oberoi|Kareena …
1293 Amitabh Bachchan|Abhishek Bachchan|Kay Kay Men…
1294 Arshad Warsi|Pankaj Kapur|Mahima Chaudhry|Sush…
1361 Hrithik Roshan|Preity Zinta|Amitabh Bachchan|O…
1432 Ajay Devgn|Gracy Singh|Mohan Joshi|Yashpal Sha…
1495 Ajay Devgn|Mohanlal|Manisha Koirala|Seema Bisw…
1554 Ajay Devgn|Sushant Singh|D. Santosh|Akhilendra…
1607 Anil Kapoor|Rani Mukerji|Amrish Puri|Johnny Le…
wins_nominations release_date
0 4 wins 11 January 2019 (USA)
41 NaN 19 July 2019 (India)
84 3 wins & 6 nominations 12 January 2018 (USA)
106 21 wins & 26 nominations 11 May 2018 (USA)
110 NaN 25 May 2018 (USA)
112 2 nominations 1 June 2018 (USA)
169 1 win & 7 nominations 17 February 2017 (USA)
219 NaN 28 July 2017 (India)
258 9 wins & 2 nominations 29 January 2016 (USA)
280 NaN 22 April 2016 (India)
297 11 wins & 19 nominations 17 June 2016 (USA)
354 23 wins & 4 nominations 21 December 2016 (USA)
362 25 wins & 13 nominations 17 July 2015 (USA)
365 1 win 23 January 2015 (India)
393 NaN 3 April 2015 (USA)
449 4 wins & 5 nominations 20 June 2014 (USA)
536 28 wins & 24 nominations 2 October 2014 (USA)
589 5 wins & 11 nominations 25 January 2013 (India)
625 10 wins & 10 nominations 23 August 2013 (India)
668 10 wins & 11 nominations 2 March 2012 (USA)
693 12 wins & 43 nominations 2 August 2012 (Singapore)
694 12 wins & 43 nominations 2 August 2012 (Singapore)
982 32 wins & 21 nominations 15 February 2008 (USA)
1039 1 win 9 March 2007 (India)
1058 3 nominations 9 February 2007 (India)
1188 19 wins & 20 nominations 28 July 2006 (USA)
1293 2 wins & 10 nominations 1 July 2005 (India)
29
1294 NaN 29 July 2005 (India)
1361 4 wins & 10 nominations 18 June 2004 (USA)
1432 4 wins & 29 nominations 29 August 2003 (India)
1495 16 wins & 9 nominations 15 April 2002 (India)
1554 11 wins & 5 nominations 7 June 2002 (India)
1607 2 nominations 7 September 2001 (India)
[ ]: # write a function that can return the track record of 2 teams against each␣
↪other
[ ]: # completely new
movies['Country'] = 'India'
movies.head()
[ ]: title_x imdb_id \
0 Uri: The Surgical Strike tt8291224
1 Battalion 609 tt9472208
2 The Accidental Prime Minister (film) tt6986710
3 Why Cheat India tt8108208
4 Evening Shadows tt6028796
poster_path \
0 https://upload.wikimedia.org/wikipedia/en/thum…
1 NaN
2 https://upload.wikimedia.org/wikipedia/en/thum…
3 https://upload.wikimedia.org/wikipedia/en/thum…
4 NaN
wiki_link \
0 https://en.wikipedia.org/wiki/Uri:_The_Surgica…
1 https://en.wikipedia.org/wiki/Battalion_609
2 https://en.wikipedia.org/wiki/The_Accidental_P…
3 https://en.wikipedia.org/wiki/Why_Cheat_India
4 https://en.wikipedia.org/wiki/Evening_Shadows
30
2 2019 112 Biography|Drama 6.1 5549
3 2019 121 Crime|Drama 6.0 1891
4 2018 102 Drama 7.3 280
story \
0 Divided over five chapters the film chronicle…
1 The story revolves around a cricket match betw…
2 Based on the memoir by Indian policy analyst S…
3 The movie focuses on existing malpractices in …
4 While gay rights and marriage equality has bee…
summary tagline \
0 Indian army special forces execute a covert op… NaN
1 The story of Battalion 609 revolves around a c… NaN
2 Explores Manmohan Singh's tenure as the Prime … NaN
3 The movie focuses on existing malpractices in … NaN
4 Under the 'Evening Shadows' truth often plays… NaN
actors wins_nominations \
0 Vicky Kaushal|Paresh Rawal|Mohit Raina|Yami Ga… 4 wins
1 Vicky Ahuja|Shoaib Ibrahim|Shrikant Kamat|Elen… NaN
2 Anupam Kher|Akshaye Khanna|Aahana Kumra|Atul S… NaN
3 Emraan Hashmi|Shreya Dhanwanthary|Snighdadeep … NaN
4 Mona Ambegaonkar|Ananth Narayan Mahadevan|Deva… 17 wins & 1 nomination
release_date Country
0 11 January 2019 (USA) India
1 11 January 2019 (India) India
2 11 January 2019 (USA) India
3 18 January 2019 (USA) India
4 11 January 2019 (India) India
[ ]: title_x imdb_id \
11 Gully Boy tt2395469
34 Yeh Hai India tt5525846
37 Article 15 (film) tt10324144
87 Aiyaary tt6774212
96 Raid (2018 film) tt7363076
poster_path \
11 https://upload.wikimedia.org/wikipedia/en/thum…
31
34 https://upload.wikimedia.org/wikipedia/en/thum…
37 https://upload.wikimedia.org/wikipedia/en/thum…
87 https://upload.wikimedia.org/wikipedia/en/thum…
96 https://upload.wikimedia.org/wikipedia/en/thum…
wiki_link title_y \
11 https://en.wikipedia.org/wiki/Gully_Boy Gully Boy
34 https://en.wikipedia.org/wiki/Yeh_Hai_India Yeh Hai India
37 https://en.wikipedia.org/wiki/Article_15_(film) Article 15
87 https://en.wikipedia.org/wiki/Aiyaary Aiyaary
96 https://en.wikipedia.org/wiki/Raid_(2018_film) Raid
imdb_rating imdb_votes \
11 8.2 22440
34 5.7 169
37 8.3 13417
87 5.2 3538
96 7.4 13159
story \
11 Gully Boy is a film about a 22-year-old boy "M…
34 Yeh Hai India follows the story of a 25 years…
37 In the rural heartlands of India an upright p…
87 General Gurinder Singh comes with a proposal t…
96 Set in the 80s in Uttar Pradesh India Raid i…
summary \
11 A coming-of-age story based on the lives of st…
34 Yeh Hai India follows the story of a 25 years…
37 In the rural heartlands of India an upright p…
87 After finding out about an illegal arms deal …
96 A fearless income tax officer raids the mansio…
tagline \
11 Apna Time Aayega!
34 A Film for Every Indian
37 Farq Bahut Kar Liya| Ab Farq Laayenge.
87 The Ultimate Trickery
96 Heroes don't always come in uniform
32
actors wins_nominations \
11 Ranveer Singh|Alia Bhatt|Siddhant Chaturvedi|V… 6 wins & 3 nominations
34 Gavie Chahal|Mohan Agashe|Mohan Joshi|Lom Harsh| 2 wins & 1 nomination
37 Ayushmann Khurrana|Nassar|Manoj Pahwa|Kumud Mi… 1 win
87 Sidharth Malhotra|Manoj Bajpayee|Rakul Preet S… 1 nomination
96 Ajay Devgn|Saurabh Shukla|Ileana D'Cruz|Amit S… 2 wins & 3 nominations
[ ]: movies.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 298 entries, 11 to 1623
Data columns (total 19 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 title_x 298 non-null object
1 imdb_id 298 non-null object
2 poster_path 298 non-null object
3 wiki_link 298 non-null object
4 title_y 298 non-null object
5 original_title 298 non-null object
6 is_adult 298 non-null int64
7 year_of_release 298 non-null int64
8 runtime 298 non-null object
9 genres 298 non-null object
10 imdb_rating 298 non-null float64
11 imdb_votes 298 non-null int64
12 story 298 non-null object
13 summary 298 non-null object
14 tagline 298 non-null object
15 actors 298 non-null object
16 wins_nominations 298 non-null object
17 release_date 298 non-null object
18 Country 298 non-null object
dtypes: float64(1), int64(3), object(15)
memory usage: 46.6+ KB
33
0.0.9 Important DataFrame Functions
[ ]: # astype
ipl.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 950 entries, 0 to 949
Data columns (total 20 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 ID 950 non-null int64
1 City 899 non-null object
2 Date 950 non-null object
3 Season 950 non-null object
4 MatchNumber 950 non-null object
5 Team1 950 non-null object
6 Team2 950 non-null object
7 Venue 950 non-null object
8 TossWinner 950 non-null object
9 TossDecision 950 non-null object
10 SuperOver 946 non-null object
11 WinningTeam 946 non-null object
12 WonBy 950 non-null object
13 Margin 932 non-null float64
14 method 19 non-null object
15 Player_of_Match 946 non-null object
16 Team1Players 950 non-null object
17 Team2Players 950 non-null object
18 Umpire1 950 non-null object
19 Umpire2 950 non-null object
dtypes: float64(1), int64(1), object(18)
memory usage: 148.6+ KB
[ ]: ipl['ID'] = ipl['ID'].astype('int32')
[ ]: ipl.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 950 entries, 0 to 949
Data columns (total 20 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 ID 950 non-null int32
1 City 899 non-null object
2 Date 950 non-null object
3 Season 950 non-null object
4 MatchNumber 950 non-null object
5 Team1 950 non-null object
34
6 Team2 950 non-null object
7 Venue 950 non-null object
8 TossWinner 950 non-null object
9 TossDecision 950 non-null object
10 SuperOver 946 non-null object
11 WinningTeam 946 non-null object
12 WonBy 950 non-null object
13 Margin 932 non-null float64
14 method 19 non-null object
15 Player_of_Match 946 non-null object
16 Team1Players 950 non-null object
17 Team2Players 950 non-null object
18 Umpire1 950 non-null object
19 Umpire2 950 non-null object
dtypes: float64(1), int32(1), object(18)
memory usage: 144.9+ KB
[ ]: # ipl['Season'] = ipl['Season'].astype('category')
ipl['Team1'] = ipl['Team1'].astype('category')
ipl['Team2'] = ipl['Team2'].astype('category')
[ ]: ipl.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 950 entries, 0 to 949
Data columns (total 20 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 ID 950 non-null int32
1 City 899 non-null object
2 Date 950 non-null object
3 Season 950 non-null category
4 MatchNumber 950 non-null object
5 Team1 950 non-null category
6 Team2 950 non-null category
7 Venue 950 non-null object
8 TossWinner 950 non-null object
9 TossDecision 950 non-null object
10 SuperOver 946 non-null object
11 WinningTeam 946 non-null object
12 WonBy 950 non-null object
13 Margin 932 non-null float64
14 method 19 non-null object
15 Player_of_Match 946 non-null object
16 Team1Players 950 non-null object
17 Team2Players 950 non-null object
18 Umpire1 950 non-null object
19 Umpire2 950 non-null object
35
dtypes: category(3), float64(1), int32(1), object(15)
memory usage: 127.4+ KB
[ ]:
[ ]: # value_counts
[ ]: # find which player has won most potm -> in finals and qualifiers
[ ]: # sort_values -> ascending -> na_position -> inplace -> multiple cols
36
session-18-dataframe-methods
May 3, 2024
[ ]: # value_counts
# sort_values
# rank
# sort index
# set index
# rename index -> rename
# reset index
# unique & nunique
# isnull/notnull/hasnans
# dropna
# fillna
# drop_duplicates
# drop
# apply
# isin
# corr
# nlargest -> nsmallest
# insert
# copy
[ ]: import numpy as np
import pandas as pd
[ ]: a = pd.Series([1,1,1,2,2,3])
a.value_counts()
[ ]: 1 3
2 2
3 1
dtype: int64
1
marks = pd.DataFrame([
[100,80,10],
[90,70,7],
[120,100,14],
[80,70,14],
[80,70,14]
],columns=['iq','marks','package'])
marks
[ ]: iq marks package
0 100 80 10
1 90 70 7
2 120 100 14
3 80 70 14
4 80 70 14
[ ]: marks.value_counts()
[ ]: iq marks package
80 70 14 2
90 70 7 1
100 80 10 1
120 100 14 1
dtype: int64
[ ]: ipl = pd.read_csv('ipl-matches.csv')
ipl[~ipl['MatchNumber'].str.isdigit()]['Player_of_Match'].value_counts()
[ ]: KA Pollard 3
F du Plessis 3
SK Raina 3
A Kumble 2
MK Pandey 2
YK Pathan 2
M Vijay 2
JJ Bumrah 2
AB de Villiers 2
SR Watson 2
HH Pandya 1
Harbhajan Singh 1
A Nehra 1
V Sehwag 1
UT Yadav 1
MS Bisla 1
BJ Hodge 1
MEK Hussey 1
2
MS Dhoni 1
CH Gayle 1
MM Patel 1
DE Bollinger 1
AC Gilchrist 1
RG Sharma 1
DA Warner 1
MC Henriques 1
JC Buttler 1
RM Patidar 1
DA Miller 1
VR Iyer 1
SP Narine 1
RD Gaikwad 1
TA Boult 1
MP Stoinis 1
KS Williamson 1
RR Pant 1
SA Yadav 1
Rashid Khan 1
AD Russell 1
KH Pandya 1
KV Sharma 1
NM Coulter-Nile 1
Washington Sundar 1
BCJ Cutting 1
M Ntini 1
Name: Player_of_Match, dtype: int64
[ ]: # find which player has won most potm -> in finals and qualifiers
[ ]: <matplotlib.axes._subplots.AxesSubplot at 0x7f034efd49d0>
3
[ ]: # how many matches each team has played
(ipl['Team2'].value_counts() + ipl['Team1'].value_counts()).
↪sort_values(ascending=False)
[ ]: # sort_values(series and dataframe) -> ascending -> na_position -> inplace ->␣
↪multiple cols
4
[ ]: x = pd.Series([12,14,1,56,89])
x
[ ]: 0 12
1 14
2 1
3 56
4 89
dtype: int64
[ ]: x.sort_values(ascending=False)
[ ]: 4 89
3 56
1 14
0 12
2 1
dtype: int64
[ ]: movies = pd.read_csv('movies.csv')
movies.head(4)
[ ]: title_x imdb_id \
0 Uri: The Surgical Strike tt8291224
1 Battalion 609 tt9472208
2 The Accidental Prime Minister (film) tt6986710
3 Why Cheat India tt8108208
poster_path \
0 https://upload.wikimedia.org/wikipedia/en/thum…
1 NaN
2 https://upload.wikimedia.org/wikipedia/en/thum…
3 https://upload.wikimedia.org/wikipedia/en/thum…
wiki_link \
0 https://en.wikipedia.org/wiki/Uri:_The_Surgica…
1 https://en.wikipedia.org/wiki/Battalion_609
2 https://en.wikipedia.org/wiki/The_Accidental_P…
3 https://en.wikipedia.org/wiki/Why_Cheat_India
5
0 2019 138 Action|Drama|War 8.4 35112
1 2019 131 War 4.1 73
2 2019 112 Biography|Drama 6.1 5549
3 2019 121 Crime|Drama 6.0 1891
story \
0 Divided over five chapters the film chronicle…
1 The story revolves around a cricket match betw…
2 Based on the memoir by Indian policy analyst S…
3 The movie focuses on existing malpractices in …
summary tagline \
0 Indian army special forces execute a covert op… NaN
1 The story of Battalion 609 revolves around a c… NaN
2 Explores Manmohan Singh's tenure as the Prime … NaN
3 The movie focuses on existing malpractices in … NaN
actors wins_nominations \
0 Vicky Kaushal|Paresh Rawal|Mohit Raina|Yami Ga… 4 wins
1 Vicky Ahuja|Shoaib Ibrahim|Shrikant Kamat|Elen… NaN
2 Anupam Kher|Akshaye Khanna|Aahana Kumra|Atul S… NaN
3 Emraan Hashmi|Shreya Dhanwanthary|Snighdadeep … NaN
release_date
0 11 January 2019 (USA)
1 11 January 2019 (India)
2 11 January 2019 (USA)
3 18 January 2019 (USA)
[ ]: movies.sort_values('title_x',ascending=False)
[ ]: title_x imdb_id \
1623 Zubeidaa tt0255713
939 Zor Lagaa Ke…Haiya! tt1479857
756 Zokkomon tt1605790
670 Zindagi Tere Naam tt2164702
778 Zindagi Na Milegi Dobara tt1562872
… … …
1039 1971 (2007 film) tt0983990
723 1920: The Evil Returns tt2222550
287 1920: London tt5638500
1021 1920 (film) tt1301698
1498 16 December (film) tt0313844
poster_path \
1623 https://upload.wikimedia.org/wikipedia/en/thum…
939 https://upload.wikimedia.org/wikipedia/en/thum…
6
756 https://upload.wikimedia.org/wikipedia/en/thum…
670 https://upload.wikimedia.org/wikipedia/en/thum…
778 https://upload.wikimedia.org/wikipedia/en/thum…
… …
1039 https://upload.wikimedia.org/wikipedia/en/thum…
723 https://upload.wikimedia.org/wikipedia/en/e/e7…
287 https://upload.wikimedia.org/wikipedia/en/thum…
1021 https://upload.wikimedia.org/wikipedia/en/thum…
1498 https://upload.wikimedia.org/wikipedia/en/thum…
wiki_link \
1623 https://en.wikipedia.org/wiki/Zubeidaa
939 https://en.wikipedia.org/wiki/Zor_Lagaa_Ke…H…
756 https://en.wikipedia.org/wiki/Zokkomon
670 https://en.wikipedia.org/wiki/Zindagi_Tere_Naam
778 https://en.wikipedia.org/wiki/Zindagi_Na_Mileg…
… …
1039 https://en.wikipedia.org/wiki/1971_(2007_film)
723 https://en.wikipedia.org/wiki/1920:_The_Evil_R…
287 https://en.wikipedia.org/wiki/1920_London
1021 https://en.wikipedia.org/wiki/1920_(film)
1498 https://en.wikipedia.org/wiki/16_December_(film)
7
1498 2002 158 Action|Thriller 6.9
imdb_votes story \
1623 1384 The film begins with Riyaz (Rajat Kapoor) Zub…
939 46 A tree narrates the story of four Mumbai-based…
756 274 After the passing of his parents in an acciden…
670 27 Mr. Singh an elderly gentleman relates to hi…
778 60826 Three friends decide to turn their fantasy vac…
… … …
1039 1121 Based on true facts the film revolves around …
723 1587 This story revolves around a famous poet who m…
287 1373 Shivangi (Meera Chopra) lives in London with h…
1021 2588 A devotee of Bhagwan Shri Hanuman Arjun Singh…
1498 1091 16 December 1971 was the day when India won t…
summary \
1623 Zubeidaa an aspiring Muslim actress marries …
939 Children build a tree-house to spy on a beggar…
756 An orphan is abused and abandoned believed to…
670 Mr. Singh an elderly gentleman relates to hi…
778 Three friends decide to turn their fantasy vac…
… …
1039 Based on true facts the film revolves around …
723 This story revolves around a famous poet who m…
287 After her husband is possessed by an evil spir…
1021 After forsaking his family and religion a hus…
1498 Indian intelligence agents race against time t…
tagline \
1623 The Story of a Princess
939 NaN
756 Betrayal. Friendship. Bravery.
670 NaN
778 NaN
… …
1039 Honor the heroes…
723 Possession is back
287 Fear strikes again
1021 A Love Made in Heaven…A Revenge Born in Hell…
1498 NaN
actors \
1623 Karisma Kapoor|Rekha|Manoj Bajpayee|Rajit Kapo…
939 Meghan Jadhav|Mithun Chakraborty|Riya Sen|Seem…
756 Darsheel Safary|Anupam Kher|Manjari Fadnnis|Ti…
670 Mithun Chakraborty|Ranjeeta Kaur|Priyanka Meht…
778 Hrithik Roshan|Farhan Akhtar|Abhay Deol|Katrin…
8
… …
1039 Manoj Bajpayee|Ravi Kishan|Deepak Dobriyal|
723 Vicky Ahuja|Tia Bajpai|Irma Jämhammar|Sharad K…
287 Sharman Joshi|Meera Chopra|Vishal Karwal|Suren…
1021 Rajniesh Duggall|Adah Sharma|Anjori Alagh|Raj …
1498 Danny Denzongpa|Gulshan Grover|Milind Soman|Di…
wins_nominations release_date
1623 3 wins & 13 nominations 19 January 2001 (India)
939 NaN NaN
756 NaN 22 April 2011 (India)
670 1 win 16 March 2012 (India)
778 30 wins & 22 nominations 15 July 2011 (India)
… … …
1039 1 win 9 March 2007 (India)
723 NaN 2 November 2012 (India)
287 NaN 6 May 2016 (USA)
1021 NaN 12 September 2008 (India)
1498 2 nominations 22 March 2002 (India)
[ ]:
[ ]: students = pd.DataFrame(
{
'name':['nitish','ankit','rupesh',np.nan,'mrityunjay',np.
↪nan,'rishabh',np.nan,'aditya',np.nan],
'college':['bit','iit','vit',np.nan,np.nan,'vlsi','ssit',np.nan,np.
↪nan,'git'],
'branch':['eee','it','cse',np.nan,'me','ce','civ','cse','bio',np.nan],
'cgpa':[6.66,8.25,6.41,np.nan,5.6,9.0,7.4,10,7.4,np.nan],
'package':[4,5,6,np.nan,6,7,8,9,np.nan,np.nan]
}
)
students
9
7 NaN NaN cse 10.00 9.0
8 aditya NaN bio 7.40 NaN
9 NaN git NaN NaN NaN
[ ]: students.sort_values('name',na_position='first',ascending=False,inplace=True)
[ ]: students
[ ]: movies.sort_values(['year_of_release','title_x'],ascending=[True,False])
[ ]: title_x imdb_id \
1623 Zubeidaa tt0255713
1625 Yeh Zindagi Ka Safar tt0298607
1622 Yeh Teraa Ghar Yeh Meraa Ghar tt0298606
1620 Yeh Raaste Hain Pyaar Ke tt0292740
1573 Yaadein (2001 film) tt0248617
… … …
37 Article 15 (film) tt10324144
46 Arjun Patiala tt7881524
10 Amavas tt8396186
26 Albert Pinto Ko Gussa Kyun Aata Hai? tt4355838
21 22 Yards tt9496212
poster_path \
1623 https://upload.wikimedia.org/wikipedia/en/thum…
1625 https://upload.wikimedia.org/wikipedia/en/thum…
1622 https://upload.wikimedia.org/wikipedia/en/thum…
1620 https://upload.wikimedia.org/wikipedia/en/thum…
1573 https://upload.wikimedia.org/wikipedia/en/thum…
… …
37 https://upload.wikimedia.org/wikipedia/en/thum…
46 https://upload.wikimedia.org/wikipedia/en/thum…
10 https://upload.wikimedia.org/wikipedia/en/thum…
26 https://upload.wikimedia.org/wikipedia/en/thum…
21 https://upload.wikimedia.org/wikipedia/en/thum…
10
wiki_link \
1623 https://en.wikipedia.org/wiki/Zubeidaa
1625 https://en.wikipedia.org/wiki/Yeh_Zindagi_Ka_S…
1622 https://en.wikipedia.org/wiki/Yeh_Teraa_Ghar_Y…
1620 https://en.wikipedia.org/wiki/Yeh_Raaste_Hain_…
1573 https://en.wikipedia.org/wiki/Yaadein_(2001_film)
… …
37 https://en.wikipedia.org/wiki/Article_15_(film)
46 https://en.wikipedia.org/wiki/Arjun_Patiala
10 https://en.wikipedia.org/wiki/Amavas
26 https://en.wikipedia.org/wiki/Albert_Pinto_Ko_…
21 https://en.wikipedia.org/wiki/22_Yards
title_y \
1623 Zubeidaa
1625 Yeh Zindagi Ka Safar
1622 Yeh Teraa Ghar Yeh Meraa Ghar
1620 Yeh Raaste Hain Pyaar Ke
1573 Yaadein…
… …
37 Article 15
46 Arjun Patiala
10 Amavas
26 Albert Pinto Ko Gussa Kyun Aata Hai?
21 22 Yards
11
37 Crime|Drama 8.3 13417
46 Action|Comedy 4.1 676
10 Horror|Thriller 2.8 235
26 Drama 4.8 56
21 Sport 5.3 124
story \
1623 The film begins with Riyaz (Rajat Kapoor) Zub…
1625 Hindi pop-star Sarina Devan lives a wealthy …
1622 In debt; Dayashankar Pandey is forced to go to…
1620 Two con artistes and car thieves Vicky (Ajay …
1573 Raj Singh Puri is best friends with L.K. Malho…
… …
37 In the rural heartlands of India an upright p…
46 Arjun Patiala(Diljit Dosanjh)has recently been…
10 Far away from the bustle of the city a young …
26 Albert leaves his house one morning without te…
21 A dramatic portrayal of a victorious tale of a…
summary \
1623 Zubeidaa an aspiring Muslim actress marries …
1625 A singer finds out she was adopted when the ed…
1622 In debt; Dayashankar Pandey is forced to go to…
1620 Two con artistes and car thieves Vicky (Ajay …
1573 Raj Singh Puri is best friends with L.K. Malho…
… …
37 In the rural heartlands of India an upright p…
46 This spoof comedy narrates the story of a cop …
10 The lives of a couple turn into a nightmare a…
26 Albert Pinto goes missing one day and his girl…
21 A dramatic portrayal of a victorious tale of a…
tagline \
1623 The Story of a Princess
1625 NaN
1622 NaN
1620 Love is a journey… not a destination
1573 memories to cherish…
… …
37 Farq Bahut Kar Liya| Ab Farq Laayenge.
46 NaN
10 NaN
26 NaN
21 NaN
actors \
1623 Karisma Kapoor|Rekha|Manoj Bajpayee|Rajit Kapo…
12
1625 Ameesha Patel|Jimmy Sheirgill|Nafisa Ali|Gulsh…
1622 Sunil Shetty|Mahima Chaudhry|Paresh Rawal|Saur…
1620 Ajay Devgn|Madhuri Dixit|Preity Zinta|Vikram G…
1573 Jackie Shroff|Hrithik Roshan|Kareena Kapoor|Am…
… …
37 Ayushmann Khurrana|Nassar|Manoj Pahwa|Kumud Mi…
46 Diljit Dosanjh|Kriti Sanon|Varun Sharma|Ronit …
10 Ali Asgar|Vivan Bhatena|Nargis Fakhri|Sachiin …
26 Manav Kaul|Nandita Das|
21 Barun Sobti|Rajit Kapur|Panchhi Bora|Kartikey …
wins_nominations release_date
1623 3 wins & 13 nominations 19 January2001 (India)
1625 NaN 16 November2001 (India)
1622 1 nomination 12 October2001 (India)
1620 NaN 10 August2001 (India)
1573 1 nomination 27 June2001 (India)
… … …
37 1 win 28 June 2019 (USA)
46 NaN 26 July 2019 (USA)
10 NaN 8 February 2019 (India)
26 NaN 12 April 2019 (India)
21 NaN 15 March 2019 (India)
[ ]:
[ ]: # rank(series)
batsman = pd.read_csv('batsman_runs_ipl.csv')
batsman.head()
[ ]: batter batsman_run
0 A Ashish Reddy 280
1 A Badoni 161
2 A Chandila 4
3 A Chopra 53
4 A Choudhary 25
[ ]: batsman['batting_rank'] = batsman['batsman_run'].rank(ascending=False)
batsman.sort_values('batting_rank')
13
493 SK Raina 5536 5.0
.. … … …
512 SS Cottrell 0 594.0
466 S Kaushik 0 594.0
203 IC Pandey 0 594.0
467 S Ladda 0 594.0
468 S Lamichhane 0 594.0
[ ]: marks = {
'maths':67,
'english':57,
'science':89,
'hindi':100
}
marks_series = pd.Series(marks)
marks_series
[ ]: maths 67
english 57
science 89
hindi 100
dtype: int64
[ ]: marks_series.sort_index(ascending=False)
[ ]: science 89
maths 67
hindi 100
english 57
dtype: int64
[ ]: movies.sort_index(ascending=False)
[ ]: title_x imdb_id \
1628 Humsafar tt2403201
1627 Daaka tt10833860
1626 Sabse Bada Sukh tt0069204
1625 Yeh Zindagi Ka Safar tt0298607
1624 Tera Mera Saath Rahen tt0301250
… … …
4 Evening Shadows tt6028796
3 Why Cheat India tt8108208
14
2 The Accidental Prime Minister (film) tt6986710
1 Battalion 609 tt9472208
0 Uri: The Surgical Strike tt8291224
poster_path \
1628 https://upload.wikimedia.org/wikipedia/en/thum…
1627 https://upload.wikimedia.org/wikipedia/en/thum…
1626 NaN
1625 https://upload.wikimedia.org/wikipedia/en/thum…
1624 https://upload.wikimedia.org/wikipedia/en/2/2b…
… …
4 NaN
3 https://upload.wikimedia.org/wikipedia/en/thum…
2 https://upload.wikimedia.org/wikipedia/en/thum…
1 NaN
0 https://upload.wikimedia.org/wikipedia/en/thum…
wiki_link \
1628 https://en.wikipedia.org/wiki/Humsafar
1627 https://en.wikipedia.org/wiki/Daaka
1626 https://en.wikipedia.org/wiki/Sabse_Bada_Sukh
1625 https://en.wikipedia.org/wiki/Yeh_Zindagi_Ka_S…
1624 https://en.wikipedia.org/wiki/Tera_Mera_Saath_…
… …
4 https://en.wikipedia.org/wiki/Evening_Shadows
3 https://en.wikipedia.org/wiki/Why_Cheat_India
2 https://en.wikipedia.org/wiki/The_Accidental_P…
1 https://en.wikipedia.org/wiki/Battalion_609
0 https://en.wikipedia.org/wiki/Uri:_The_Surgica…
15
1625 2001 146 Drama 3.0 133
1624 2001 148 Drama 4.9 278
… … … … … …
4 2018 102 Drama 7.3 280
3 2019 121 Crime|Drama 6.0 1891
2 2019 112 Biography|Drama 6.1 5549
1 2019 131 War 4.1 73
0 2019 138 Action|Drama|War 8.4 35112
story \
1628 Sara and Ashar are childhood friends who share…
1627 Shinda tries robbing a bank so he can be wealt…
1626 Village born Lalloo re-locates to Bombay and …
1625 Hindi pop-star Sarina Devan lives a wealthy …
1624 Raj Dixit lives with his younger brother Rahu…
… …
4 While gay rights and marriage equality has bee…
3 The movie focuses on existing malpractices in …
2 Based on the memoir by Indian policy analyst S…
1 The story revolves around a cricket match betw…
0 Divided over five chapters the film chronicle…
summary tagline \
1628 Ashar and Khirad are forced to get married due… NaN
1627 Shinda tries robbing a bank so he can be wealt… NaN
1626 Village born Lalloo re-locates to Bombay and … NaN
1625 A singer finds out she was adopted when the ed… NaN
1624 A man is torn between his handicapped brother … NaN
… … …
4 Under the 'Evening Shadows' truth often plays… NaN
3 The movie focuses on existing malpractices in … NaN
2 Explores Manmohan Singh's tenure as the Prime … NaN
1 The story of Battalion 609 revolves around a c… NaN
0 Indian army special forces execute a covert op… NaN
actors \
1628 Fawad Khan|
1627 Gippy Grewal|Zareen Khan|
1626 Vijay Arora|Asrani|Rajni Bala|Kumud Damle|Utpa…
1625 Ameesha Patel|Jimmy Sheirgill|Nafisa Ali|Gulsh…
1624 Ajay Devgn|Sonali Bendre|Namrata Shirodkar|Pre…
… …
4 Mona Ambegaonkar|Ananth Narayan Mahadevan|Deva…
3 Emraan Hashmi|Shreya Dhanwanthary|Snighdadeep …
2 Anupam Kher|Akshaye Khanna|Aahana Kumra|Atul S…
1 Vicky Ahuja|Shoaib Ibrahim|Shrikant Kamat|Elen…
0 Vicky Kaushal|Paresh Rawal|Mohit Raina|Yami Ga…
16
wins_nominations release_date
1628 NaN TV Series (2011–2012)
1627 NaN 1 November 2019 (USA)
1626 NaN NaN
1625 NaN 16 November 2001 (India)
1624 NaN 7 November 2001 (India)
… … …
4 17 wins & 1 nomination 11 January 2019 (India)
3 NaN 18 January 2019 (USA)
2 NaN 11 January 2019 (USA)
1 NaN 11 January 2019 (India)
0 4 wins 11 January 2019 (USA)
[ ]: batsman
[ ]: batsman_run batting_rank
batter
A Ashish Reddy 280 166.5
A Badoni 161 226.0
A Chandila 4 535.0
A Chopra 53 329.0
A Choudhary 25 402.5
… … …
Yash Dayal 0 594.0
Yashpal Singh 47 343.0
Younis Khan 3 547.5
Yuvraj Singh 2754 27.0
Z Khan 117 256.0
17
600 Yash Dayal 0 594.0
601 Yashpal Singh 47 343.0
602 Younis Khan 3 547.5
603 Yuvraj Singh 2754 27.0
604 Z Khan 117 256.0
[ ]: batsman
[ ]: batsman_run batting_rank
batter
A Ashish Reddy 280 166.5
A Badoni 161 226.0
A Chandila 4 535.0
A Chopra 53 329.0
A Choudhary 25 402.5
… … …
Yash Dayal 0 594.0
Yashpal Singh 47 343.0
Younis Khan 3 547.5
Yuvraj Singh 2754 27.0
Z Khan 117 256.0
[ ]: batter batsman_run
batting_rank
166.5 A Ashish Reddy 280
226.0 A Badoni 161
535.0 A Chandila 4
329.0 A Chopra 53
402.5 A Choudhary 25
… … …
594.0 Yash Dayal 0
343.0 Yashpal Singh 47
547.5 Younis Khan 3
27.0 Yuvraj Singh 2754
256.0 Z Khan 117
18
[ ]: index 0
0 maths 67
1 english 57
2 science 89
3 hindi 100
[ ]: movies.set_index('title_x',inplace=True)
[ ]: movies.rename(columns={'imdb_id':'imdb','poster_path':'link'},inplace=True)
[ ]: imdb \
title_x
Uri tt8291224
Battalion tt9472208
The Accidental Prime Minister (film) tt6986710
Why Cheat India tt8108208
Evening Shadows tt6028796
… …
Tera Mera Saath Rahen tt0301250
Yeh Zindagi Ka Safar tt0298607
Sabse Bada Sukh tt0069204
Daaka tt10833860
Humsafar tt2403201
link \
title_x
Uri
https://upload.wikimedia.org/wikipedia/en/thum…
Battalion
NaN
The Accidental Prime Minister (film)
https://upload.wikimedia.org/wikipedia/en/thum…
Why Cheat India
https://upload.wikimedia.org/wikipedia/en/thum…
Evening Shadows
NaN
…
…
Tera Mera Saath Rahen
https://upload.wikimedia.org/wikipedia/en/2/2b…
Yeh Zindagi Ka Safar
https://upload.wikimedia.org/wikipedia/en/thum…
19
Sabse Bada Sukh
NaN
Daaka
https://upload.wikimedia.org/wikipedia/en/thum…
Humsafar
https://upload.wikimedia.org/wikipedia/en/thum…
wiki_link \
title_x
Uri
https://en.wikipedia.org/wiki/Uri:_The_Surgica…
Battalion
https://en.wikipedia.org/wiki/Battalion_609
The Accidental Prime Minister (film)
https://en.wikipedia.org/wiki/The_Accidental_P…
Why Cheat India
https://en.wikipedia.org/wiki/Why_Cheat_India
Evening Shadows
https://en.wikipedia.org/wiki/Evening_Shadows
…
…
Tera Mera Saath Rahen
https://en.wikipedia.org/wiki/Tera_Mera_Saath_…
Yeh Zindagi Ka Safar
https://en.wikipedia.org/wiki/Yeh_Zindagi_Ka_S…
Sabse Bada Sukh
https://en.wikipedia.org/wiki/Sabse_Bada_Sukh
Daaka
https://en.wikipedia.org/wiki/Daaka
Humsafar
https://en.wikipedia.org/wiki/Humsafar
title_y \
title_x
Uri Uri: The Surgical Strike
Battalion Battalion 609
The Accidental Prime Minister (film) The Accidental Prime Minister
Why Cheat India Why Cheat India
Evening Shadows Evening Shadows
… …
Tera Mera Saath Rahen Tera Mera Saath Rahen
Yeh Zindagi Ka Safar Yeh Zindagi Ka Safar
Sabse Bada Sukh Sabse Bada Sukh
Daaka Daaka
Humsafar Humsafar
original_title is_adult \
20
title_x
Uri Uri: The Surgical Strike 0
Battalion Battalion 609 0
The Accidental Prime Minister (film) The Accidental Prime Minister 0
Why Cheat India Why Cheat India 0
Evening Shadows Evening Shadows 0
… … …
Tera Mera Saath Rahen Tera Mera Saath Rahen 0
Yeh Zindagi Ka Safar Yeh Zindagi Ka Safar 0
Sabse Bada Sukh Sabse Bada Sukh 0
Daaka Daaka 0
Humsafar Humsafar 0
year_of_release runtime \
title_x
Uri 2019 138
Battalion 2019 131
The Accidental Prime Minister (film) 2019 112
Why Cheat India 2019 121
Evening Shadows 2018 102
… … …
Tera Mera Saath Rahen 2001 148
Yeh Zindagi Ka Safar 2001 146
Sabse Bada Sukh 2018 \N
Daaka 2019 136
Humsafar 2011 35
genres imdb_rating \
title_x
Uri Action|Drama|War 8.4
Battalion War 4.1
The Accidental Prime Minister (film) Biography|Drama 6.1
Why Cheat India Crime|Drama 6.0
Evening Shadows Drama 7.3
… … …
Tera Mera Saath Rahen Drama 4.9
Yeh Zindagi Ka Safar Drama 3.0
Sabse Bada Sukh Comedy|Drama 6.1
Daaka Action 7.4
Humsafar Drama|Romance 9.0
imdb_votes \
title_x
Uri 35112
Battalion 73
The Accidental Prime Minister (film) 5549
Why Cheat India 1891
21
Evening Shadows 280
… …
Tera Mera Saath Rahen 278
Yeh Zindagi Ka Safar 133
Sabse Bada Sukh 13
Daaka 38
Humsafar 2968
story \
title_x
Uri Divided over five chapters the film
chronicle…
Battalion The story revolves around a cricket match
betw…
The Accidental Prime Minister (film) Based on the memoir by Indian policy
analyst S…
Why Cheat India The movie focuses on existing malpractices
in …
Evening Shadows While gay rights and marriage equality has
bee…
…
…
Tera Mera Saath Rahen Raj Dixit lives with his younger brother
Rahu…
Yeh Zindagi Ka Safar Hindi pop-star Sarina Devan lives a
wealthy …
Sabse Bada Sukh Village born Lalloo re-locates to Bombay
and …
Daaka Shinda tries robbing a bank so he can be
wealt…
Humsafar Sara and Ashar are childhood friends who
share…
summary \
title_x
Uri Indian army special forces execute a
covert op…
Battalion The story of Battalion 609 revolves around
a c…
The Accidental Prime Minister (film) Explores Manmohan Singh's tenure as the
Prime …
Why Cheat India The movie focuses on existing malpractices
in …
Evening Shadows Under the 'Evening Shadows' truth often
plays…
…
…
22
Tera Mera Saath Rahen A man is torn between his handicapped
brother …
Yeh Zindagi Ka Safar A singer finds out she was adopted when
the ed…
Sabse Bada Sukh Village born Lalloo re-locates to Bombay
and …
Daaka Shinda tries robbing a bank so he can be
wealt…
Humsafar Ashar and Khirad are forced to get married
due…
tagline \
title_x
Uri NaN
Battalion NaN
The Accidental Prime Minister (film) NaN
Why Cheat India NaN
Evening Shadows NaN
… …
Tera Mera Saath Rahen NaN
Yeh Zindagi Ka Safar NaN
Sabse Bada Sukh NaN
Daaka NaN
Humsafar NaN
actors \
title_x
Uri Vicky Kaushal|Paresh Rawal|Mohit
Raina|Yami Ga…
Battalion Vicky Ahuja|Shoaib Ibrahim|Shrikant
Kamat|Elen…
The Accidental Prime Minister (film) Anupam Kher|Akshaye Khanna|Aahana
Kumra|Atul S…
Why Cheat India Emraan Hashmi|Shreya
Dhanwanthary|Snighdadeep …
Evening Shadows Mona Ambegaonkar|Ananth Narayan
Mahadevan|Deva…
…
…
Tera Mera Saath Rahen Ajay Devgn|Sonali Bendre|Namrata
Shirodkar|Pre…
Yeh Zindagi Ka Safar Ameesha Patel|Jimmy Sheirgill|Nafisa
Ali|Gulsh…
Sabse Bada Sukh Vijay Arora|Asrani|Rajni Bala|Kumud
Damle|Utpa…
Daaka Gippy
Grewal|Zareen Khan|
23
Humsafar
Fawad Khan|
wins_nominations \
title_x
Uri 4 wins
Battalion NaN
The Accidental Prime Minister (film) NaN
Why Cheat India NaN
Evening Shadows 17 wins & 1 nomination
… …
Tera Mera Saath Rahen NaN
Yeh Zindagi Ka Safar NaN
Sabse Bada Sukh NaN
Daaka NaN
Humsafar NaN
release_date
title_x
Uri 11 January 2019 (USA)
Battalion 11 January 2019 (India)
The Accidental Prime Minister (film) 11 January 2019 (USA)
Why Cheat India 18 January 2019 (USA)
Evening Shadows 11 January 2019 (India)
… …
Tera Mera Saath Rahen 7 November 2001 (India)
Yeh Zindagi Ka Safar 16 November 2001 (India)
Sabse Bada Sukh NaN
Daaka 1 November 2019 (USA)
Humsafar TV Series (2011–2012)
[ ]: # unique(series)
temp = pd.Series([1,1,2,2,3,3,4,4,5,5,np.nan,np.nan])
print(temp)
0 1.0
1 1.0
2 2.0
3 2.0
4 3.0
5 3.0
6 4.0
7 4.0
8 5.0
9 5.0
24
10 NaN
11 NaN
dtype: float64
[ ]: len(temp.unique())
[ ]: 6
[ ]: temp.nunique()
[ ]: 5
[ ]: len(ipl['Season'].unique())
[ ]: 15
[ ]: # nunique(series + dataframe) -> does not count nan -> dropna parameter
ipl['Season'].nunique()
[ ]: 15
[ ]: # isnull(series + dataframe)
students['name'][students['name'].isnull()]
[ ]: 3 NaN
5 NaN
7 NaN
9 NaN
Name: name, dtype: object
[ ]: # notnull(series + dataframe)
students['name'][students['name'].notnull()]
[ ]: 0 nitish
1 ankit
2 rupesh
4 mrityunjay
6 rishabh
8 aditya
Name: name, dtype: object
[ ]: # hasnans(series)
students['name'].hasnans
[ ]: True
[ ]: students
25
[ ]: name college branch cgpa package
0 nitish bit eee 6.66 4.0
1 ankit iit it 8.25 5.0
2 rupesh vit cse 6.41 6.0
3 NaN NaN NaN NaN NaN
4 mrityunjay NaN me 5.60 6.0
5 NaN vlsi ce 9.00 7.0
6 rishabh ssit civ 7.40 8.0
7 NaN NaN cse 10.00 9.0
8 aditya NaN bio 7.40 NaN
9 NaN git NaN NaN NaN
[ ]: students.isnull()
[ ]: students.notnull()
[ ]: 0 nitish
1 ankit
2 rupesh
4 mrityunjay
26
6 rishabh
8 aditya
Name: name, dtype: object
[ ]: students
[ ]: students.dropna(how='any')
[ ]: students.dropna(how='all')
[ ]: students.dropna(subset=['name'])
27
8 aditya NaN bio 7.40 NaN
[ ]: students.dropna(subset=['name','college'])
[ ]: students.dropna(inplace=True)
[ ]: # fillna(series + dataframe)
students['name'].fillna('unknown')
[ ]: 0 nitish
1 ankit
2 rupesh
3 unknown
4 mrityunjay
5 unknown
6 rishabh
7 unknown
8 aditya
9 unknown
Name: name, dtype: object
[ ]: students
28
5 NaN vlsi ce 9.00 7.0
6 rishabh ssit civ 7.40 8.0
7 NaN NaN cse 10.00 9.0
8 aditya NaN bio 7.40 NaN
9 NaN git NaN NaN NaN
[ ]: students['package'].fillna(students['package'].mean())
[ ]: 0 4.000000
1 5.000000
2 6.000000
3 6.428571
4 6.000000
5 7.000000
6 8.000000
7 9.000000
8 6.428571
9 6.428571
Name: package, dtype: float64
[ ]: students['name'].fillna(method='bfill')
[ ]: 0 nitish
1 ankit
2 rupesh
3 mrityunjay
4 mrityunjay
5 rishabh
6 rishabh
7 aditya
8 aditya
9 NaN
Name: name, dtype: object
[ ]: temp = pd.Series([1,1,1,2,3,3,4,4])
temp.drop_duplicates()
[ ]: 0 1
3 2
4 3
6 4
dtype: int64
[ ]: marks.drop_duplicates(keep='last')
29
[ ]: iq marks package
0 100 80 10
1 90 70 7
2 120 100 14
4 80 70 14
Team1 Team2 \
0 Rajasthan Royals Gujarat Titans
1 Royal Challengers Bangalore Rajasthan Royals
2 Royal Challengers Bangalore Lucknow Super Giants
3 Rajasthan Royals Gujarat Titans
4 Sunrisers Hyderabad Punjab Kings
Team1Players \
0 ['YBK Jaiswal', 'JC Buttler', 'SV Samson', 'D …
1 ['V Kohli', 'F du Plessis', 'RM Patidar', 'GJ …
2 ['V Kohli', 'F du Plessis', 'RM Patidar', 'GJ …
3 ['YBK Jaiswal', 'JC Buttler', 'SV Samson', 'D …
4 ['PK Garg', 'Abhishek Sharma', 'RA Tripathi', …
Team2Players Umpire1 \
0 ['WP Saha', 'Shubman Gill', 'MS Wade', 'HH Pan… CB Gaffaney
30
1 ['YBK Jaiswal', 'JC Buttler', 'SV Samson', 'D … CB Gaffaney
2 ['Q de Kock', 'KL Rahul', 'M Vohra', 'DJ Hooda… J Madanagopal
3 ['WP Saha', 'Shubman Gill', 'MS Wade', 'HH Pan… BNJ Oxenford
4 ['JM Bairstow', 'S Dhawan', 'M Shahrukh Khan',… AK Chaudhary
Umpire2 all_players
0 Nitin Menon ['YBK Jaiswal', 'JC Buttler', 'SV Samson', 'D …
1 Nitin Menon ['V Kohli', 'F du Plessis', 'RM Patidar', 'GJ …
2 MA Gough ['V Kohli', 'F du Plessis', 'RM Patidar', 'GJ …
3 VK Sharma ['YBK Jaiswal', 'JC Buttler', 'SV Samson', 'D …
4 NA Patwardhan ['PK Garg', 'Abhishek Sharma', 'RA Tripathi', …
[5 rows x 21 columns]
[ ]: def did_kohli_play(players_list):
return 'V Kohli' in players_list
[ ]: ipl['did_kohli_play'] = ipl['all_players'].apply(did_kohli_play)
ipl[(ipl['City'] == 'Delhi') & (ipl['did_kohli_play'] == True)].
↪drop_duplicates(subset=['City','did_kohli_play'],keep='first')
Team1Players \
208 ['PP Shaw', 'S Dhawan', 'SS Iyer', 'RR Pant', …
Team2Players Umpire1 \
208 ['PA Patel', 'V Kohli', 'AB de Villiers', 'S D… BNJ Oxenford
Umpire2 all_players \
208 KN Ananthapadmanabhan ['PP Shaw', 'S Dhawan', 'SS Iyer', 'RR Pant', …
did_kohli_play
208 True
[1 rows x 22 columns]
[ ]: students.drop_duplicates()
31
[ ]: # drop(series + dataframe)
temp = pd.Series([10,2,3,16,45,78,10])
temp
[ ]: 0 10
1 2
2 3
3 16
4 45
5 78
6 10
dtype: int64
[ ]: temp.drop(index=[0,6])
[ ]: 1 2
2 3
3 16
4 45
5 78
dtype: int64
[ ]: students
[ ]: students.drop(columns=['branch','cgpa'],inplace=True)
32
8 aditya NaN NaN
9 NaN git NaN
[ ]: students.set_index('name').drop(index=['nitish','aditya'])
[ ]: # apply(series + dataframe)
temp = pd.Series([10,20,30,40,50])
temp
[ ]: 0 10
1 20
2 30
3 40
4 50
dtype: int64
[ ]: def sigmoid(value):
return 1/1+np.exp(-value)
[ ]: temp.apply(sigmoid)
[ ]: 0 1.000045
1 1.000000
2 1.000000
3 1.000000
4 1.000000
dtype: float64
[ ]: points_df = pd.DataFrame(
{
'1st point':[(3,4),(-6,5),(0,0),(-10,1),(4,5)],
'2nd point':[(-3,4),(0,0),(2,2),(10,10),(1,1)]
}
)
33
points_df
[ ]: def euclidean(row):
pt_A = row['1st point']
pt_B = row['2nd point']
[ ]: points_df['distance'] = points_df.apply(euclidean,axis=1)
points_df
[ ]: # isin(series)
[ ]:
[ ]: # corr
[ ]:
[ ]:
[ ]: # insert(dataframe)
[ ]:
[ ]: # copy(series + dataframe)
[ ]:
34
week-5-interview-questions-numpy
May 3, 2024
[ ]: # plotting 3D graphs
# meshgrids
a = np.linspace(-10,9,20)
b = np.linspace(-10,9,20)
b
[ ]: array([-10., -9., -8., -7., -6., -5., -4., -3., -2., -1., 0.,
1., 2., 3., 4., 5., 6., 7., 8., 9.])
[ ]: xx,yy = np.meshgrid(a,b)
yy
[ ]: array([[-10., -10., -10., -10., -10., -10., -10., -10., -10., -10., -10.,
-10., -10., -10., -10., -10., -10., -10., -10., -10.],
[ -9., -9., -9., -9., -9., -9., -9., -9., -9., -9., -9.,
-9., -9., -9., -9., -9., -9., -9., -9., -9.],
[ -8., -8., -8., -8., -8., -8., -8., -8., -8., -8., -8.,
-8., -8., -8., -8., -8., -8., -8., -8., -8.],
[ -7., -7., -7., -7., -7., -7., -7., -7., -7., -7., -7.,
-7., -7., -7., -7., -7., -7., -7., -7., -7.],
[ -6., -6., -6., -6., -6., -6., -6., -6., -6., -6., -6.,
-6., -6., -6., -6., -6., -6., -6., -6., -6.],
[ -5., -5., -5., -5., -5., -5., -5., -5., -5., -5., -5.,
-5., -5., -5., -5., -5., -5., -5., -5., -5.],
[ -4., -4., -4., -4., -4., -4., -4., -4., -4., -4., -4.,
-4., -4., -4., -4., -4., -4., -4., -4., -4.],
[ -3., -3., -3., -3., -3., -3., -3., -3., -3., -3., -3.,
-3., -3., -3., -3., -3., -3., -3., -3., -3.],
[ -2., -2., -2., -2., -2., -2., -2., -2., -2., -2., -2.,
-2., -2., -2., -2., -2., -2., -2., -2., -2.],
[ -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
-1., -1., -1., -1., -1., -1., -1., -1., -1.],
[ 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
0., 0., 0., 0., 0., 0., 0., 0., 0.],
[ 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
1., 1., 1., 1., 1., 1., 1., 1., 1.],
[ 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2.,
1
2., 2., 2., 2., 2., 2., 2., 2., 2.],
[ 3., 3., 3., 3., 3., 3., 3., 3., 3., 3., 3.,
3., 3., 3., 3., 3., 3., 3., 3., 3.],
[ 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4.,
4., 4., 4., 4., 4., 4., 4., 4., 4.],
[ 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5.,
5., 5., 5., 5., 5., 5., 5., 5., 5.],
[ 6., 6., 6., 6., 6., 6., 6., 6., 6., 6., 6.,
6., 6., 6., 6., 6., 6., 6., 6., 6.],
[ 7., 7., 7., 7., 7., 7., 7., 7., 7., 7., 7.,
7., 7., 7., 7., 7., 7., 7., 7., 7.],
[ 8., 8., 8., 8., 8., 8., 8., 8., 8., 8., 8.,
8., 8., 8., 8., 8., 8., 8., 8., 8.],
[ 9., 9., 9., 9., 9., 9., 9., 9., 9., 9., 9.,
9., 9., 9., 9., 9., 9., 9., 9., 9.]])
[ ]: <matplotlib.collections.PathCollection at 0x7f0ba86d3160>
[ ]: def func(x,y):
return 3*np.log(x) + 2*y
zz = func(xx,yy)
zz
2
<ipython-input-34-78e1ed4898ab>:2: RuntimeWarning:
<ipython-input-34-78e1ed4898ab>:2: RuntimeWarning:
3
-3.92055846, -2.70416313, -1.84111692, -1.17168626,
-0.62472159, -0.16226955, 0.23832463, 0.59167373],
[ nan, nan, nan, nan,
nan, nan, nan, nan,
nan, nan, -inf, -4. ,
-1.92055846, -0.70416313, 0.15888308, 0.82831374,
1.37527841, 1.83773045, 2.23832463, 2.59167373],
[ nan, nan, nan, nan,
nan, nan, nan, nan,
nan, nan, -inf, -2. ,
0.07944154, 1.29583687, 2.15888308, 2.82831374,
3.37527841, 3.83773045, 4.23832463, 4.59167373],
[ nan, nan, nan, nan,
nan, nan, nan, nan,
nan, nan, -inf, 0. ,
2.07944154, 3.29583687, 4.15888308, 4.82831374,
5.37527841, 5.83773045, 6.23832463, 6.59167373],
[ nan, nan, nan, nan,
nan, nan, nan, nan,
nan, nan, -inf, 2. ,
4.07944154, 5.29583687, 6.15888308, 6.82831374,
7.37527841, 7.83773045, 8.23832463, 8.59167373],
[ nan, nan, nan, nan,
nan, nan, nan, nan,
nan, nan, -inf, 4. ,
6.07944154, 7.29583687, 8.15888308, 8.82831374,
9.37527841, 9.83773045, 10.23832463, 10.59167373],
[ nan, nan, nan, nan,
nan, nan, nan, nan,
nan, nan, -inf, 6. ,
8.07944154, 9.29583687, 10.15888308, 10.82831374,
11.37527841, 11.83773045, 12.23832463, 12.59167373],
[ nan, nan, nan, nan,
nan, nan, nan, nan,
nan, nan, -inf, 8. ,
10.07944154, 11.29583687, 12.15888308, 12.82831374,
13.37527841, 13.83773045, 14.23832463, 14.59167373],
[ nan, nan, nan, nan,
nan, nan, nan, nan,
nan, nan, -inf, 10. ,
12.07944154, 13.29583687, 14.15888308, 14.82831374,
15.37527841, 15.83773045, 16.23832463, 16.59167373],
[ nan, nan, nan, nan,
nan, nan, nan, nan,
nan, nan, -inf, 12. ,
14.07944154, 15.29583687, 16.15888308, 16.82831374,
17.37527841, 17.83773045, 18.23832463, 18.59167373],
4
[ nan, nan, nan, nan,
nan, nan, nan, nan,
nan, nan, -inf, 14. ,
16.07944154, 17.29583687, 18.15888308, 18.82831374,
19.37527841, 19.83773045, 20.23832463, 20.59167373],
[ nan, nan, nan, nan,
nan, nan, nan, nan,
nan, nan, -inf, 16. ,
18.07944154, 19.29583687, 20.15888308, 20.82831374,
21.37527841, 21.83773045, 22.23832463, 22.59167373],
[ nan, nan, nan, nan,
nan, nan, nan, nan,
nan, nan, -inf, 18. ,
20.07944154, 21.29583687, 22.15888308, 22.82831374,
23.37527841, 23.83773045, 24.23832463, 24.59167373]])
[ ]: import plotly.express as px
import plotly.graph_objects as go
fig = px.scatter_3d()
fig.add_trace(go.Surface(x=xx,y=yy,z=zz))
fig.show()
[ ]:
[ ]: np.random.random((2,3,2))
[ ]: array([[[0.28969975, 0.30904037],
[0.02229412, 0.08411571],
[0.34225695, 0.87044578]],
[[0.3088764 , 0.55506361],
[0.95240073, 0.44318119],
[0.28857773, 0.17184448]]])
[ ]: np.random.seed(0)
np.random.randint(1,100,12).reshape(3,4)
5
[ ]: array([[45, 48, 65, 68],
[68, 10, 84, 22],
[37, 88, 71, 89]])
[ ]: np.random.seed(0)
np.random.randint(1,100,12).reshape(3,4)
[ ]: np.random.seed(0)
np.random.randint(1,100,12).reshape(3,4)
[ ]: a = np.array([12,41,33,67,89,100])
print(a)
[ 12 41 33 67 89 100]
[ ]: np.random.shuffle(a)
[ ]: a
[ ]: np.random.choice(a,3,replace=False)
[ ]: (1080, 1920, 3)
[ ]: # show image
plt.imshow(np.flip(img,axis=1))
6
[ ]: <matplotlib.image.AxesImage at 0x7f0b99c62a90>
[ ]: # flip
a = np.arange(6).reshape(2,3)
a
[ ]: array([[0, 1, 2],
[3, 4, 5]])
[ ]: np.flip(a)
[ ]: array([[5, 4, 3],
[2, 1, 0]])
[ ]: <matplotlib.image.AxesImage at 0x7f0b99c27460>
7
[ ]: # negative
plt.imshow(255 - img)
[ ]: <matplotlib.image.AxesImage at 0x7f0b99bfed30>
[ ]: # trim
plt.imshow(img[100:900,50:900,:])
8
[ ]: <matplotlib.image.AxesImage at 0x7f0b99bd7370>
[ ]: # plot histogram
plt.hist(img.flatten(),bins=255)
9
2993., 3039., 3626., 3502., 3197., 3267., 3431.,
3537., 3430., 3806., 3812., 3885., 3903., 4267.,
3932., 4034., 4504., 4495., 4400., 4588., 48076.,
4579., 4605., 4995., 4924., 4662., 5033., 4601.,
4708., 4690., 4840., 4866., 5068., 5442., 5411.,
6105., 5614., 5859., 6175., 5931., 6187., 6620.,
8808., 7224., 7613., 7644., 8179., 8892., 8996.,
9540., 9238., 10309., 9368., 62780., 9507., 10008.,
9692., 9651., 145488., 8852., 9088., 9845., 9887.,
9995., 41993., 10901., 11676., 12333., 11779., 11582.,
12097., 12558., 12718., 13179., 13679., 14015., 14429.,
15286., 16292., 15925., 45982., 15984., 15175., 16160.,
15732., 15708., 15245., 15241., 15082., 15365., 14679.,
13129., 11545., 10565., 9551., 8079., 6503., 5564.,
4465., 3726., 4960., 18087., 2445., 3282., 2036.,
1357., 1252., 1516., 134183., 133769., 133781., 1547.,
1361., 1394., 86033.]),
array([ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10.,
11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21.,
22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
33., 34., 35., 36., 37., 38., 39., 40., 41., 42., 43.,
44., 45., 46., 47., 48., 49., 50., 51., 52., 53., 54.,
55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65.,
66., 67., 68., 69., 70., 71., 72., 73., 74., 75., 76.,
77., 78., 79., 80., 81., 82., 83., 84., 85., 86., 87.,
88., 89., 90., 91., 92., 93., 94., 95., 96., 97., 98.,
99., 100., 101., 102., 103., 104., 105., 106., 107., 108., 109.,
110., 111., 112., 113., 114., 115., 116., 117., 118., 119., 120.,
121., 122., 123., 124., 125., 126., 127., 128., 129., 130., 131.,
132., 133., 134., 135., 136., 137., 138., 139., 140., 141., 142.,
143., 144., 145., 146., 147., 148., 149., 150., 151., 152., 153.,
154., 155., 156., 157., 158., 159., 160., 161., 162., 163., 164.,
165., 166., 167., 168., 169., 170., 171., 172., 173., 174., 175.,
176., 177., 178., 179., 180., 181., 182., 183., 184., 185., 186.,
187., 188., 189., 190., 191., 192., 193., 194., 195., 196., 197.,
198., 199., 200., 201., 202., 203., 204., 205., 206., 207., 208.,
209., 210., 211., 212., 213., 214., 215., 216., 217., 218., 219.,
220., 221., 222., 223., 224., 225., 226., 227., 228., 229., 230.,
231., 232., 233., 234., 235., 236., 237., 238., 239., 240., 241.,
242., 243., 244., 245., 246., 247., 248., 249., 250., 251., 252.,
253., 254., 255.]),
<a list of 255 Patch objects>)
10
[ ]: # More manipulations
# https://www.analyticsvidhya.com/blog/2021/05/
↪image-processing-using-numpy-with-practical-implementation-and-code/
[ ]: # structured arrays
a = np.array([1,'hello',True,1.5])
a
[ ]: a[0] / 100
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-91-4ea26a035019> in <module>
----> 1 a[0] / 100
[ ]: # name,iq,cgpa,placed
dt = np.dtype(
[
('name','U20'),
('iq',np.int32),
11
('cgpa',np.float64),
('placed','U20')
]
)
[ ]: dt
[ ]: stu = np.array(
[
('nitish',100,6.66,'Yes'),
('ankit',120,8.9,'Yes'),
('rahul',80,7.3,'No')
],dtype=dt
)
stu
[ ]: stu['placed']
[ ]: # remaining functions
# --> np.swapaxes
# --> np.uniform
# --> np.count_nonzero
# --> np.tile
# --> np.repeat
# --> np.allclose
12
Return : [ndarray]
[ ]: #Example
x = np.array([[1,2,3],[4,5,6]])
print(x)
print(x.shape)
print("Swapped")
x_swapped = np.swapaxes(x,0,1)
print(x_swapped)
print(x_swapped.shape)
[[1 2 3]
[4 5 6]]
(2, 3)
Swapped
[[1 4]
[2 5]
[3 6]]
(3, 2)
Note: It is not same as reshaping.
[ ]: x_reshaped = x.reshape(3,2)
print(x_reshaped)
[[1 2]
[3 4]
[5 6]]
###numpy.random.uniform(low=0.0, high=1.0, size=None) Draw samples
from a uniform distribution in rangge [low - high); high not included.
https://numpy.org/doc/stable/reference/random/generated/numpy.random.uniform.html
Syntax : numpy.random.uniform(low, high, size=None)
low -> lower bound of sample; default value is 0
high -> uper bound of sample; defalut value is 1.0
size -> shape of the desired sample. If the given shape is, e.g., (m, n, k), then m
13
[ ]: #Example:
import matplotlib.pyplot as plt
[ ]:
Parameters :
arr : [array_like] The array for which to count non-zeros.
axis : [int or tuple, optional] Axis or tuple of axes along which to count non-zeros. Defau
keepdims : [bool, optional] If this is set to True, the axes that are counted are left in t
Return : [int or array of int] Number of non-zero values in the array along a given axis. Other
14
[ ]: #Example
a = np.array([[0, 1, 7, 0],
[3, 0, 2, 19]])
print(np.count_nonzero(a))
print(np.count_nonzero(a, axis=0))
print(np.count_nonzero(a, axis=1))
5
[1 1 2 1]
[2 3]
###np.tile(A, reps) Construct an array by repeating A the number of times given by reps. If
reps has length d, the result will have dimension of max(d, A.ndim).
Parameters:
A: array_like
The input array.
reps: array_like
The number of repetitions of A along each axis.
Returns
c: ndarray
The tiled output array.
https://numpy.org/doc/stable/reference/generated/numpy.tile.html
[ ]: # np.tile - Example
a = np.array([0, 1, 2])
print(a)
print("Tiled")
print(np.tile(a, 2))
# Reps is given as 2 so whole array will get repeted 2 times
[0 1 2]
Tiled
[0 1 2 0 1 2]
15
[ ]: array([[0, 1, 2, 0, 1, 2],
[0, 1, 2, 0, 1, 2]])
[ ]: array([[0, 1, 2, 0, 1, 2, 0, 1, 2],
[0, 1, 2, 0, 1, 2, 0, 1, 2]])
Returns:
repeated_array: ndarray
Output array which has the same shape as a, except along the given axis.
https://numpy.org/doc/stable/reference/generated/numpy.repeat.html
[ ]: x = np.array([[1,2],[3,4]])
print(x)
print(np.repeat(x, 2)) # Every element is getting repeted 2 times.
[[1 2]
[3 4]]
[1 1 2 2 3 3 4 4]
[ ]: print(x)
print(np.repeat(x, 3, axis=1)) # Alog axis-1 means rightward inside rows/ along␣
↪columns
[[1 2]
[3 4]]
[[1 1 1 2 2 2]
[3 3 3 4 4 4]]
16
[[1 2]
[1 2]
[1 2]
[3 4]
[3 4]
[3 4]]
0.0.1 np.allclose
Returns True if two arrays are element-wise equal within a tolerance.
The tolerance values are positive, typically very small numbers. The relative difference (rtol *
abs(b)) and the absolute difference atol are added together to compare against the absolute
difference between a and b.
If the following equation is element-wise True, then allclose returns True.
absolute(a - b) <= (atol + rtol * absolute(b))
Syntax : numpy.allclose(arr1, arr2, rtol, atol, equal_nan=False)
Parameters :
arr1 : [array_like] Input 1st array.
arr2 : [array_like] Input 2nd array.
rtol : [float] The relative tolerance parameter.
atol : [float] The absolute tolerance parameter.
equal_nan : [bool] Whether to compare NaN’s as equal.
If True, NaN’s in arr1 will be considered equal to NaN’s in arr2 in the output arra
Return : [ bool] Returns True if the two arrays are equal within the given tolerance, otherwise
https://numpy.org/doc/stable/reference/generated/numpy.allclose.html
https://www.geeksforgeeks.org/numpy-allclose-in-python/
[ ]: #np.allclose example
#Comparing -
a = np.array([1.1, 1.2, 1.0001])
b = np.array([1., 1.02, 1.001])
print(a)
print(b)
print(np.abs(a-b))
17
[ ]: print(np.allclose([1.0, np.nan], [1.0, np.nan])) # Nan will be taken as␣
↪different
False
True
[ ]:
18
Common Git Commands
Setup
Set the name and email sattached to your commits and tags
git config --global user.name "CampusX"
Make a Change
Add a file to staging
git add <file>
Stage all files - . will add all files in the project folder to stage
git add .
Branches
List all local branches.
● Add -r flag to show all remote branches.
● Add -a flag for all branches.
git branch -r
git branch -a
Add a tag to current commit (often used for new version releases)
git tag <tag-name>
Merging
Merge branch a into branch b.
git checkout b
git merge a
IGNORING PATTERNS
Preventing unintentional staging or commiting of files
Save a file with desired patterns, file name as .gitignore with either direct string
matches or wildcard globs.
Undoing Things
Move (&/or rename) a file & stage move -> mv command
git mv <existing_path> <new_path>
Remove a file from working directory & staging area, then stage the
removal – rm command
git rm <file>
git status
Show changes to unstaged files. For changes to staged files, add --cached option
git diff
Show all commit logs with indication of any paths that moved
Synchronizing / Updating
Add a remote repo
git remote add <alias> <url>
Rename a connection
git remote rename <old> <new>
Fetch the remote repo's copy of the current branch, then merge
git pull
Move (rebase) your local changes onto the top of new changes
made to the remote repo (for clean, linear history)
git pull --rebase <alias>
Upload to a branch
git push <alias> <branch>
EXAMPLE: Pushing new project to git repo
Project/
static
.idea
.gitignore
logs
app.py
data.db
requirements.txt
.gitignore contents
logs/
*.db
.idea
Order of commands :
● Enter git init.
● Enter git add . to add all the files
● Type git commit -m “initial commit”.
● git branch -M main
● git remote add origin https://github.com/user-name/example.git
● git push -u origin main
All the files from Project directory will get pushed to example repo, leaving files
mentioned in .gitignore file.
Happy Learning!!
session-19-groupby-objects
May 3, 2024
[ ]: import pandas as pd
import numpy as np
• Dataset : https://drive.google.com/drive/folders/1IiMIOGCv-
giUV_rtF02sImgkaVuAQxkz?usp=share_link
[ ]: movies = pd.read_csv('/content/imdb-top-1000.csv')
[ ]: movies.head()
[ ]: genres = movies.groupby('Genre')
1
Family 10.606602 0.000000 137008.302816 3.048412e+08 16.970563
Fantasy 12.727922 0.141421 22179.111299 7.606861e+07 NaN
Film-Noir 4.000000 0.152753 54649.083277 7.048472e+07 1.527525
Horror 13.604812 0.311302 234883.508691 9.965017e+07 15.362291
Mystery 14.475423 0.310791 404621.915297 1.567524e+08 18.604435
Thriller NaN NaN NaN NaN NaN
Western 17.153717 0.420317 263489.554280 1.230626e+07 9.032349
[ ]: Genre
Drama 3.540997e+10
Action 3.263226e+10
Comedy 1.566387e+10
Name: Gross, dtype: float64
[ ]: movies.groupby('Genre')['Gross'].sum().sort_values(ascending=False).head(3)
[ ]: Genre
Drama 3.540997e+10
Action 3.263226e+10
Comedy 1.566387e+10
Name: Gross, dtype: float64
[ ]: Genre
Western 8.35
Name: IMDB_Rating, dtype: float64
[ ]: Director
Christopher Nolan 11578345
Name: No_of_Votes, dtype: int64
[ ]: Genre
Action 9.0
Adventure 8.6
2
Animation 8.6
Biography 8.9
Comedy 8.6
Crime 9.2
Drama 9.3
Family 7.8
Fantasy 8.1
Film-Noir 8.1
Horror 8.5
Mystery 8.4
Thriller 7.8
Western 8.8
Name: IMDB_Rating, dtype: float64
movies.groupby('Star1')['Series_Title'].count().sort_values(ascending=False)
[ ]: Star1
Tom Hanks 12
Robert De Niro 11
Clint Eastwood 10
Al Pacino 10
Leonardo DiCaprio 9
..
Glen Hansard 1
Giuseppe Battiston 1
Giulietta Masina 1
Gerardo Taracena 1
Ömer Faruk Sorak 1
Name: Series_Title, Length: 660, dtype: int64
[ ]: len(movies.groupby('Genre'))
[ ]: 14
3
[ ]: movies['Genre'].nunique()
[ ]: 14
[ ]: movies.groupby('Genre').size()
[ ]: Genre
Action 172
Adventure 72
Animation 82
Biography 88
Comedy 155
Crime 107
Drama 289
Family 2
Fantasy 2
Film-Noir 3
Horror 11
Mystery 12
Thriller 1
Western 4
dtype: int64
[ ]: genres = movies.groupby('Genre')
# genres.first()
# genres.last()
genres.nth(6)
[ ]: Series_Title Released_Year \
Genre
Action Star Wars: Episode V - The Empire Strikes Back 1980
Adventure North by Northwest 1959
Animation WALL·E 2008
Biography Braveheart 1995
Comedy The Great Dictator 1940
Crime Se7en 1995
Drama It's a Wonderful Life 1946
Horror Get Out 2017
Mystery Sleuth 1972
4
Crime 127 8.6 David Fincher Morgan Freeman
Drama 130 8.6 Frank Capra James Stewart
Horror 104 7.7 Jordan Peele Daniel Kaluuya
Mystery 138 8.0 Joseph L. Mankiewicz Laurence Olivier
[ ]: movies['Genre'].value_counts()
[ ]: Drama 289
Action 172
Comedy 155
Crime 107
Biography 88
Animation 82
Adventure 72
Mystery 12
Horror 11
Western 4
Film-Noir 3
Fantasy 2
Family 2
Thriller 1
Name: Genre, dtype: int64
[ ]: genres.get_group('Fantasy')
movies[movies['Genre'] == 'Fantasy']
5
Metascore
321 NaN
568 NaN
[ ]: genres.groups
[ ]: {'Action': [2, 5, 8, 10, 13, 14, 16, 29, 30, 31, 39, 42, 44, 55, 57, 59, 60, 63,
68, 72, 106, 109, 129, 130, 134, 140, 142, 144, 152, 155, 160, 161, 166, 168,
171, 172, 177, 181, 194, 201, 202, 216, 217, 223, 224, 236, 241, 262, 275, 294,
308, 320, 325, 326, 331, 337, 339, 340, 343, 345, 348, 351, 353, 356, 357, 362,
368, 369, 375, 376, 390, 410, 431, 436, 473, 477, 479, 482, 488, 493, 496, 502,
507, 511, 532, 535, 540, 543, 564, 569, 570, 573, 577, 582, 583, 602, 605, 608,
615, 623, …], 'Adventure': [21, 47, 93, 110, 114, 116, 118, 137, 178, 179,
191, 193, 209, 226, 231, 247, 267, 273, 281, 300, 301, 304, 306, 323, 329, 361,
366, 377, 402, 406, 415, 426, 458, 470, 497, 498, 506, 513, 514, 537, 549, 552,
553, 566, 576, 604, 609, 618, 638, 647, 675, 681, 686, 692, 711, 713, 739, 755,
781, 797, 798, 851, 873, 884, 912, 919, 947, 957, 964, 966, 984, 991],
'Animation': [23, 43, 46, 56, 58, 61, 66, 70, 101, 135, 146, 151, 158, 170, 197,
205, 211, 213, 219, 229, 230, 242, 245, 246, 270, 330, 332, 358, 367, 378, 386,
389, 394, 395, 399, 401, 405, 409, 469, 499, 510, 516, 518, 522, 578, 586, 592,
595, 596, 599, 633, 640, 643, 651, 665, 672, 694, 728, 740, 741, 744, 756, 758,
761, 771, 783, 796, 799, 822, 828, 843, 875, 891, 892, 902, 906, 920, 956, 971,
976, 986, 992], 'Biography': [7, 15, 18, 35, 38, 54, 102, 107, 131, 139, 147,
157, 159, 173, 176, 212, 215, 218, 228, 235, 243, 263, 276, 282, 290, 298, 317,
328, 338, 342, 346, 359, 360, 365, 372, 373, 385, 411, 416, 418, 424, 429, 484,
525, 536, 542, 545, 575, 579, 587, 600, 606, 614, 622, 632, 635, 644, 649, 650,
657, 671, 673, 684, 729, 748, 753, 757, 759, 766, 770, 779, 809, 810, 815, 820,
831, 849, 858, 877, 882, 897, 910, 915, 923, 940, 949, 952, 987], 'Comedy': [19,
26, 51, 52, 64, 78, 83, 95, 96, 112, 117, 120, 127, 128, 132, 153, 169, 183,
192, 204, 207, 208, 214, 221, 233, 238, 240, 250, 251, 252, 256, 261, 266, 277,
284, 311, 313, 316, 318, 322, 327, 374, 379, 381, 392, 396, 403, 413, 414, 417,
427, 435, 445, 446, 449, 455, 459, 460, 463, 464, 466, 471, 472, 475, 481, 490,
494, 500, 503, 509, 526, 528, 530, 531, 533, 538, 539, 541, 547, 557, 558, 562,
563, 565, 574, 591, 593, 594, 598, 613, 626, 630, 660, 662, 667, 679, 680, 683,
687, 701, …], 'Crime': [1, 3, 4, 6, 22, 25, 27, 28, 33, 37, 41, 71, 77, 79,
86, 87, 103, 108, 111, 113, 123, 125, 133, 136, 162, 163, 164, 165, 180, 186,
187, 189, 198, 222, 232, 239, 255, 257, 287, 288, 299, 305, 335, 363, 364, 380,
384, 397, 437, 438, 441, 442, 444, 450, 451, 465, 474, 480, 485, 487, 505, 512,
519, 520, 523, 527, 546, 556, 560, 584, 597, 603, 607, 611, 621, 639, 653, 664,
669, 676, 695, 708, 723, 762, 763, 767, 775, 791, 795, 802, 811, 823, 827, 833,
885, 895, 921, 922, 926, 938, …], 'Drama': [0, 9, 11, 17, 20, 24, 32, 34, 36,
40, 45, 50, 53, 62, 65, 67, 73, 74, 76, 80, 82, 84, 85, 88, 89, 90, 91, 92, 94,
97, 98, 99, 100, 104, 105, 121, 122, 124, 126, 138, 141, 143, 148, 149, 150,
154, 156, 167, 174, 175, 182, 184, 185, 188, 190, 195, 196, 199, 200, 203, 206,
210, 225, 227, 234, 237, 244, 248, 249, 253, 254, 258, 259, 260, 264, 265, 268,
269, 272, 274, 278, 279, 280, 283, 285, 286, 289, 291, 292, 293, 295, 296, 297,
302, 303, 307, 310, 312, 314, 315, …], 'Family': [688, 698], 'Fantasy': [321,
6
568], 'Film-Noir': [309, 456, 712], 'Horror': [49, 75, 271, 419, 544, 707, 724,
844, 876, 932, 948], 'Mystery': [69, 81, 119, 145, 220, 393, 420, 714, 829, 899,
959, 961], 'Thriller': [700], 'Western': [12, 48, 115, 691]}
[ ]: genres.describe()
[ ]: Runtime \
count mean std min 25% 50% 75% max
Genre
Action 172.0 129.046512 28.500706 45.0 110.75 127.5 143.25 321.0
Adventure 72.0 134.111111 33.317320 88.0 109.00 127.0 149.00 228.0
Animation 82.0 99.585366 14.530471 71.0 90.00 99.5 106.75 137.0
Biography 88.0 136.022727 25.514466 93.0 120.00 129.0 146.25 209.0
Comedy 155.0 112.129032 22.946213 68.0 96.00 106.0 124.50 188.0
Crime 107.0 126.392523 27.689231 80.0 106.50 122.0 141.50 229.0
Drama 289.0 124.737024 27.740490 64.0 105.00 121.0 137.00 242.0
Family 2.0 107.500000 10.606602 100.0 103.75 107.5 111.25 115.0
Fantasy 2.0 85.000000 12.727922 76.0 80.50 85.0 89.50 94.0
Film-Noir 3.0 104.000000 4.000000 100.0 102.00 104.0 106.00 108.0
Horror 11.0 102.090909 13.604812 71.0 98.00 103.0 109.00 122.0
Mystery 12.0 119.083333 14.475423 96.0 110.75 117.5 130.25 138.0
Thriller 1.0 108.000000 NaN 108.0 108.00 108.0 108.00 108.0
Western 4.0 148.250000 17.153717 132.0 134.25 148.0 162.00 165.0
7
Animation 81.093333 8.813646 61.0 75.00 82.0 87.50 96.0
Biography 76.240506 11.028187 48.0 70.50 76.0 84.50 97.0
Comedy 78.720000 11.829160 45.0 72.00 79.0 88.00 99.0
Crime 77.080460 13.099102 47.0 69.50 77.0 87.00 100.0
Drama 79.701245 12.744687 28.0 72.00 82.0 89.00 100.0
Family 79.000000 16.970563 67.0 73.00 79.0 85.00 91.0
Fantasy NaN NaN NaN NaN NaN NaN NaN
Film-Noir 95.666667 1.527525 94.0 95.00 96.0 96.50 97.0
Horror 80.000000 15.362291 46.0 77.50 87.0 88.50 97.0
Mystery 79.125000 18.604435 52.0 65.25 77.0 98.50 100.0
Thriller 81.000000 NaN 81.0 81.00 81.0 81.00 81.0
Western 78.250000 9.032349 69.0 72.75 77.0 82.50 90.0
[ ]: genres.sample(2,replace=True)
8
IMDB_Rating Director Star1 No_of_Votes \
944 7.6 Kinji Fukasaku Tatsuya Fujiwara 169091
625 7.8 Mel Gibson Gerardo Taracena 291018
991 7.6 Brian G. Hutton Clint Eastwood 45338
300 8.1 William Wyler Charlton Heston 219466
891 7.6 Brad Bird Craig T. Nelson 250057
389 8.0 Brad Bird Eli Marienthal 172083
536 7.9 Alan J. Pakula Dustin Hoffman 103031
635 7.8 James Mangold Joaquin Phoenix 234207
826 7.7 Joel Coen Ethan Coen 113240
732 7.7 Alfonso Gomez-Rejon Thomas Mann 123210
438 8.0 Orson Welles Charlton Heston 98431
222 8.1 Denis Villeneuve Hugh Jackman 601149
555 7.9 Fred Zinnemann Gary Cooper 97222
314 8.1 Victor Fleming George Cukor 290074
698 7.8 Mel Stuart Gene Wilder 178731
698 7.8 Mel Stuart Gene Wilder 178731
321 8.1 Robert Wiene Werner Krauss 57428
321 8.1 Robert Wiene Werner Krauss 57428
456 8.0 John Huston Humphrey Bogart 148928
456 8.0 John Huston Humphrey Bogart 148928
544 7.9 George A. Romero Duane Jones 116557
707 7.8 Jack Clayton Deborah Kerr 27007
393 8.0 Terry Gilliam Bruce Willis 578443
829 7.7 George Sluizer Bernard-Pierre Donnadieu 33982
700 7.8 Terence Young Audrey Hepburn 27733
700 7.8 Terence Young Audrey Hepburn 27733
12 8.8 Sergio Leone Clint Eastwood 688390
115 8.3 Sergio Leone Clint Eastwood 232772
Gross Metascore
944 195856489.0 81.0
625 50866635.0 68.0
991 1378435.0 50.0
300 74700000.0 90.0
891 608581744.0 80.0
389 23159305.0 85.0
536 70600000.0 84.0
635 119519402.0 72.0
826 6153939.0 69.0
732 6743776.0 74.0
438 2237659.0 99.0
222 61002302.0 70.0
555 9450000.0 89.0
314 198676459.0 97.0
698 4000000.0 67.0
698 4000000.0 67.0
9
321 337574718.0 NaN
321 337574718.0 NaN
456 2108060.0 96.0
456 2108060.0 96.0
544 89029.0 89.0
707 2616000.0 88.0
393 57141459.0 74.0
829 367916835.0 NaN
700 17550741.0 81.0
700 17550741.0 81.0
12 6100000.0 90.0
115 15000000.0 74.0
[ ]: genres.nunique()
10
Western 4 4 4
[ ]: # agg method
# passing dict
genres.agg(
{
'Runtime':'mean',
'IMDB_Rating':'mean',
'No_of_Votes':'sum',
'Gross':'sum',
'Metascore':'min'
}
)
[ ]: # passing list
genres.agg(['min','max','mean','sum'])
[ ]: Runtime IMDB_Rating \
min max mean sum min max mean sum
Genre
Action 45 321 129.046512 22196 7.6 9.0 7.949419 1367.3
Adventure 88 228 134.111111 9656 7.6 8.6 7.937500 571.5
Animation 71 137 99.585366 8166 7.6 8.6 7.930488 650.3
Biography 93 209 136.022727 11970 7.6 8.9 7.938636 698.6
Comedy 68 188 112.129032 17380 7.6 8.6 7.901290 1224.7
Crime 80 229 126.392523 13524 7.6 9.2 8.016822 857.8
Drama 64 242 124.737024 36049 7.6 9.3 7.957439 2299.7
Family 100 115 107.500000 215 7.8 7.8 7.800000 15.6
Fantasy 76 94 85.000000 170 7.9 8.1 8.000000 16.0
Film-Noir 100 108 104.000000 312 7.8 8.1 7.966667 23.9
11
Horror 71 122 102.090909 1123 7.6 8.5 7.909091 87.0
Mystery 96 138 119.083333 1429 7.6 8.4 7.975000 95.7
Thriller 108 108 108.000000 108 7.8 7.8 7.800000 7.8
Western 132 165 148.250000 593 7.8 8.8 8.350000 33.4
No_of_Votes Gross \
min max mean sum min
Genre
Action 25312 2303232 420246.581395 72282412 3296.0
Adventure 29999 1512360 313557.819444 22576163 61001.0
Animation 25229 999790 268032.073171 21978630 128985.0
Biography 27254 1213505 272805.045455 24006844 21877.0
Comedy 26337 939631 178195.658065 27620327 1305.0
Crime 27712 1826188 313398.271028 33533615 6013.0
Drama 25088 2343110 212343.612457 61367304 3600.0
Family 178731 372490 275610.500000 551221 4000000.0
Fantasy 57428 88794 73111.000000 146222 337574718.0
Film-Noir 59556 158731 122405.000000 367215 449191.0
Horror 27007 787806 340232.363636 3742556 89029.0
Mystery 33982 1129894 350250.333333 4203004 1035953.0
Thriller 27733 27733 27733.000000 27733 17550741.0
Western 65659 688390 322416.250000 1289665 5321508.0
Metascore \
max mean sum min max
Genre
Action 936662225.0 1.897224e+08 3.263226e+10 33.0 98.0
Adventure 874211619.0 1.319017e+08 9.496922e+09 41.0 100.0
Animation 873839108.0 1.784326e+08 1.463147e+10 61.0 96.0
Biography 753585104.0 9.404952e+07 8.276358e+09 48.0 97.0
Comedy 886752933.0 1.010572e+08 1.566387e+10 45.0 99.0
Crime 790482117.0 7.899656e+07 8.452632e+09 47.0 100.0
Drama 924558264.0 1.225259e+08 3.540997e+10 28.0 100.0
Family 435110554.0 2.195553e+08 4.391106e+08 67.0 91.0
Fantasy 445151978.0 3.913633e+08 7.827267e+08 NaN NaN
Film-Noir 123353292.0 4.197018e+07 1.259105e+08 94.0 97.0
Horror 298791505.0 9.405902e+07 1.034649e+09 46.0 97.0
Mystery 474203697.0 1.047014e+08 1.256417e+09 52.0 100.0
Thriller 17550741.0 1.755074e+07 1.755074e+07 81.0 81.0
Western 31800000.0 1.455538e+07 5.822151e+07 69.0 90.0
mean sum
Genre
Action 73.419580 10499.0
Adventure 78.437500 5020.0
Animation 81.093333 6082.0
12
Biography 76.240506 6023.0
Comedy 78.720000 9840.0
Crime 77.080460 6706.0
Drama 79.701245 19208.0
Family 79.000000 158.0
Fantasy NaN 0.0
Film-Noir 95.666667 287.0
Horror 80.000000 880.0
Mystery 79.125000 633.0
Thriller 81.000000 81.0
Western 78.250000 313.0
Metascore
min
Genre
Action 33.0
Adventure 41.0
Animation 61.0
13
Biography 48.0
Comedy 45.0
Crime 47.0
Drama 28.0
Family 67.0
Fantasy NaN
Film-Noir 94.0
Horror 46.0
Mystery 52.0
Thriller 81.0
Western 69.0
[ ]: # looping on groups
df = pd.DataFrame(columns=movies.columns)
for group,data in genres:
df = df.append(data[data['IMDB_Rating'] == data['IMDB_Rating'].max()])
df
14
688 7.8 Steven Spielberg Henry Thomas 372490
698 7.8 Mel Stuart Gene Wilder 178731
321 8.1 Robert Wiene Werner Krauss 57428
309 8.1 Carol Reed Orson Welles 158731
49 8.5 Alfred Hitchcock Anthony Perkins 604211
69 8.4 Christopher Nolan Guy Pearce 1125712
81 8.4 Alfred Hitchcock James Stewart 444074
700 7.8 Terence Young Audrey Hepburn 27733
12 8.8 Sergio Leone Clint Eastwood 688390
Gross Metascore
2 534858444.0 84.0
21 188020017.0 74.0
23 10055859.0 96.0
7 96898818.0 94.0
19 53367844.0 96.0
26 57598247.0 59.0
1 134966411.0 100.0
0 28341469.0 80.0
688 435110554.0 91.0
698 4000000.0 67.0
321 337574718.0 NaN
309 449191.0 97.0
49 32000000.0 97.0
69 25544867.0 80.0
81 36764313.0 100.0
700 17550741.0 81.0
12 6100000.0 90.0
genres.apply(min)
15
Mystery Dark City 1938 96 Mystery
Thriller Wait Until Dark 1967 108 Thriller
Western Il buono, il brutto, il cattivo 1965 132 Western
def foo(group):
return group['Series_Title'].str.startswith('A').sum()
[ ]: genres.apply(foo)
[ ]: Genre
Action 10
16
Adventure 2
Animation 2
Biography 9
Comedy 14
Crime 4
Drama 21
Family 0
Fantasy 0
Film-Noir 0
Horror 1
Mystery 0
Thriller 0
Western 0
dtype: int64
def rank_movie(group):
group['genre_rank'] = group['IMDB_Rating'].rank(ascending=False)
return group
[ ]: genres.apply(rank_movie)
17
999 Alfred Hitchcock Robert Donat 51853 302787539.0
Metascore genre_rank
0 80.0 1.0
1 100.0 1.0
2 84.0 1.0
3 90.0 2.5
4 96.0 2.5
.. … …
995 76.0 147.0
996 84.0 272.5
997 85.0 272.5
998 78.0 272.5
999 93.0 101.0
def normal(group):
group['norm_rating'] = (group['IMDB_Rating'] - group['IMDB_Rating'].min())/
↪(group['IMDB_Rating'].max() - group['IMDB_Rating'].min())
return group
genres.apply(normal)
18
996 George Stevens Elizabeth Taylor 34075 195217415.0
997 Fred Zinnemann Burt Lancaster 43374 30500000.0
998 Alfred Hitchcock Tallulah Bankhead 26471 852142728.0
999 Alfred Hitchcock Robert Donat 51853 302787539.0
Metascore norm_rating
0 80.0 1.000
1 100.0 1.000
2 84.0 1.000
3 90.0 0.875
4 96.0 0.875
.. … …
995 76.0 0.000
996 84.0 0.000
997 85.0 0.000
998 78.0 0.000
999 93.0 0.000
[ ]: Director Star1
Akira Kurosawa Toshirô Mifune 2.999877e+09
Name: Gross, dtype: float64
19
[ ]: # agg on multiple groupby
duo.agg(['min','max','mean'])
[ ]: Runtime IMDB_Rating \
min max mean min max mean
Director Star1
Aamir Khan Amole Gupte 165 165 165.0 8.4 8.4 8.4
Aaron Sorkin Eddie Redmayne 129 129 129.0 7.8 7.8 7.8
Abdellatif Kechiche Léa Seydoux 180 180 180.0 7.7 7.7 7.7
Abhishek Chaubey Shahid Kapoor 148 148 148.0 7.8 7.8 7.8
Abhishek Kapoor Amit Sadh 130 130 130.0 7.7 7.7 7.7
… … … … … … …
Zaza Urushadze Lembit Ulfsak 87 87 87.0 8.2 8.2 8.2
Zoya Akhtar Hrithik Roshan 155 155 155.0 8.1 8.1 8.1
Vijay Varma 154 154 154.0 8.0 8.0 8.0
Çagan Irmak Çetin Tekindor 112 112 112.0 8.3 8.3 8.3
Ömer Faruk Sorak Cem Yilmaz 127 127 127.0 8.0 8.0 8.0
No_of_Votes Gross \
min max mean min
Director Star1
Aamir Khan Amole Gupte 168895 168895 168895.0 1223869.0
Aaron Sorkin Eddie Redmayne 89896 89896 89896.0 853090410.0
Abdellatif Kechiche Léa Seydoux 138741 138741 138741.0 2199675.0
Abhishek Chaubey Shahid Kapoor 27175 27175 27175.0 218428303.0
Abhishek Kapoor Amit Sadh 32628 32628 32628.0 1122527.0
… … … … …
Zaza Urushadze Lembit Ulfsak 40382 40382 40382.0 144501.0
Zoya Akhtar Hrithik Roshan 67927 67927 67927.0 3108485.0
Vijay Varma 31886 31886 31886.0 5566534.0
Çagan Irmak Çetin Tekindor 78925 78925 78925.0 461855363.0
Ömer Faruk Sorak Cem Yilmaz 56960 56960 56960.0 196206077.0
Metascore \
max mean min max
Director Star1
Aamir Khan Amole Gupte 1223869.0 1223869.0 NaN NaN
Aaron Sorkin Eddie Redmayne 853090410.0 853090410.0 77.0 77.0
Abdellatif Kechiche Léa Seydoux 2199675.0 2199675.0 89.0 89.0
Abhishek Chaubey Shahid Kapoor 218428303.0 218428303.0 NaN NaN
Abhishek Kapoor Amit Sadh 1122527.0 1122527.0 40.0 40.0
… … … … …
Zaza Urushadze Lembit Ulfsak 144501.0 144501.0 73.0 73.0
Zoya Akhtar Hrithik Roshan 3108485.0 3108485.0 NaN NaN
Vijay Varma 5566534.0 5566534.0 65.0 65.0
Çagan Irmak Çetin Tekindor 461855363.0 461855363.0 NaN NaN
Ömer Faruk Sorak Cem Yilmaz 196206077.0 196206077.0 NaN NaN
20
mean
Director Star1
Aamir Khan Amole Gupte NaN
Aaron Sorkin Eddie Redmayne 77.0
Abdellatif Kechiche Léa Seydoux 89.0
Abhishek Chaubey Shahid Kapoor NaN
Abhishek Kapoor Amit Sadh 40.0
… …
Zaza Urushadze Lembit Ulfsak 73.0
Zoya Akhtar Hrithik Roshan NaN
Vijay Varma 65.0
Çagan Irmak Çetin Tekindor NaN
Ömer Faruk Sorak Cem Yilmaz NaN
0.0.1 Excercise
[ ]: ipl = pd.read_csv('/content/deliveries.csv')
ipl.head()
21
2 4 NaN NaN NaN
3 0 NaN NaN NaN
4 2 NaN NaN NaN
[5 rows x 21 columns]
[ ]: ipl.shape
[ ]: (179078, 21)
[ ]: batsman
V Kohli 5434
SK Raina 5415
RG Sharma 4914
DA Warner 4741
S Dhawan 4632
CH Gayle 4560
MS Dhoni 4477
RV Uthappa 4446
AB de Villiers 4428
G Gambhir 4223
Name: batsman_runs, dtype: int64
six.groupby('batsman')['batsman'].count().sort_values(ascending=False).head(1).
↪index[0]
[ ]: 'CH Gayle'
[ ]: # find batsman with most number of 4's and 6's in last 5 overs
temp_df = ipl[ipl['over'] > 15]
temp_df = temp_df[(temp_df['batsman_runs'] == 4) | (temp_df['batsman_runs'] ==␣
↪6)]
temp_df.groupby('batsman')['batsman'].count().sort_values(ascending=False).
↪head(1).index[0]
[ ]: 'MS Dhoni'
22
temp_df.groupby('bowling_team')['batsman_runs'].sum().reset_index()
[ ]: bowling_team batsman_runs
0 Chennai Super Kings 749
1 Deccan Chargers 306
2 Delhi Capitals 66
3 Delhi Daredevils 763
4 Gujarat Lions 283
5 Kings XI Punjab 636
6 Kochi Tuskers Kerala 50
7 Kolkata Knight Riders 675
8 Mumbai Indians 628
9 Pune Warriors 128
10 Rajasthan Royals 370
11 Rising Pune Supergiant 83
12 Rising Pune Supergiants 188
13 Sunrisers Hyderabad 509
[ ]: # Create a function that can return the highest score of any batsman
temp_df = ipl[ipl['batsman'] == 'V Kohli']
temp_df.groupby('match_id')['batsman_runs'].sum().sort_values(ascending=False).
↪head(1).values[0]
[ ]: 113
[ ]: def highest(batsman):
temp_df = ipl[ipl['batsman'] == batsman]
return temp_df.groupby('match_id')['batsman_runs'].sum().
↪sort_values(ascending=False).head(1).values[0]
[ ]: highest('DA Warner')
[ ]: 126
[ ]:
23
session-20-merging
May 3, 2024
[ ]: import pandas as pd
import numpy as np
• Dataset : https://drive.google.com/drive/folders/1tE0LxbzsVX70y8Br_VxiZas288ODDBup?usp=share_lin
[ ]: courses = pd.read_csv('/content/courses.csv')
students = pd.read_csv('/content/students.csv')
nov = pd.read_csv('/content/reg-month1.csv')
dec = pd.read_csv('/content/reg-month2.csv')
matches = pd.read_csv('/content/matches.csv')
delivery = pd.read_csv('/content/deliveries.csv')
[ ]: dec
[ ]: student_id course_id
0 3 5
1 16 7
2 12 10
3 12 1
4 14 9
5 7 7
6 7 2
7 16 3
8 17 10
9 11 8
10 14 6
11 12 5
12 12 7
13 18 8
14 1 10
15 1 9
16 2 5
17 7 6
18 22 5
19 22 6
20 23 9
21 23 5
1
22 14 4
23 14 1
24 11 10
25 42 9
26 50 8
27 38 1
[ ]: # pd.concat
# df.concat
# ignore_index
# df.append
# mullitindex -> fetch using iloc
# concat dataframes horizontally
[ ]: regs = pd.concat([nov,dec],ignore_index=True)
regs
[ ]: student_id course_id
0 23 1
1 15 5
2 18 6
3 23 4
4 16 9
5 18 1
6 1 1
7 7 8
8 22 3
9 15 1
10 19 4
11 1 6
12 7 10
13 11 7
14 13 3
15 24 4
16 21 1
17 16 5
18 23 3
19 17 7
20 23 6
21 25 1
22 19 2
23 25 10
24 3 3
25 3 5
26 16 7
27 12 10
28 12 1
2
29 14 9
30 7 7
31 7 2
32 16 3
33 17 10
34 11 8
35 14 6
36 12 5
37 12 7
38 18 8
39 1 10
40 1 9
41 2 5
42 7 6
43 22 5
44 22 6
45 23 9
46 23 5
47 14 4
48 14 1
49 11 10
50 42 9
51 50 8
52 38 1
[ ]: nov.append(dec,ignore_index=True)
[ ]: student_id course_id
0 23 1
1 15 5
2 18 6
3 23 4
4 16 9
5 18 1
6 1 1
7 7 8
8 22 3
9 15 1
10 19 4
11 1 6
12 7 10
13 11 7
14 13 3
15 24 4
16 21 1
17 16 5
18 23 3
3
19 17 7
20 23 6
21 25 1
22 19 2
23 25 10
24 3 3
25 3 5
26 16 7
27 12 10
28 12 1
29 14 9
30 7 7
31 7 2
32 16 3
33 17 10
34 11 8
35 14 6
36 12 5
37 12 7
38 18 8
39 1 10
40 1 9
41 2 5
42 7 6
43 22 5
44 22 6
45 23 9
46 23 5
47 14 4
48 14 1
49 11 10
50 42 9
51 50 8
52 38 1
[ ]: multi = pd.concat([nov,dec],keys=['Nov','Dec'])
# Multiindex DataFrame
multi.loc[('Dec',4)]
[ ]: student_id 14
course_id 9
Name: (Dec, 4), dtype: int64
[ ]: pd.concat([nov,dec],axis=1)
4
1 15.0 5.0 16 7
2 18.0 6.0 12 10
3 23.0 4.0 12 1
4 16.0 9.0 14 9
5 18.0 1.0 7 7
6 1.0 1.0 7 2
7 7.0 8.0 16 3
8 22.0 3.0 17 10
9 15.0 1.0 11 8
10 19.0 4.0 14 6
11 1.0 6.0 12 5
12 7.0 10.0 12 7
13 11.0 7.0 18 8
14 13.0 3.0 1 10
15 24.0 4.0 1 9
16 21.0 1.0 2 5
17 16.0 5.0 7 6
18 23.0 3.0 22 5
19 17.0 7.0 22 6
20 23.0 6.0 23 9
21 25.0 1.0 23 5
22 19.0 2.0 14 4
23 25.0 10.0 14 1
24 3.0 3.0 11 10
25 NaN NaN 42 9
26 NaN NaN 50 8
27 NaN NaN 38 1
[ ]: # inner join
students.merge(regs,how='inner',on='student_id')
5
15 12 Radha Dutt 19 10
16 12 Radha Dutt 19 1
17 12 Radha Dutt 19 5
18 12 Radha Dutt 19 7
19 13 Munni Varghese 24 3
20 14 Pranab Natarajan 22 9
21 14 Pranab Natarajan 22 6
22 14 Pranab Natarajan 22 4
23 14 Pranab Natarajan 22 1
24 15 Preet Sha 16 5
25 15 Preet Sha 16 1
26 16 Elias Dodiya 25 9
27 16 Elias Dodiya 25 5
28 16 Elias Dodiya 25 7
29 16 Elias Dodiya 25 3
30 17 Yasmin Palan 7 7
31 17 Yasmin Palan 7 10
32 18 Fardeen Mahabir 13 6
33 18 Fardeen Mahabir 13 1
34 18 Fardeen Mahabir 13 8
35 19 Qabeel Raman 12 4
36 19 Qabeel Raman 12 2
37 21 Seema Kota 15 1
38 22 Yash Sethi 21 3
39 22 Yash Sethi 21 5
40 22 Yash Sethi 21 6
41 23 Chhavi Lachman 18 1
42 23 Chhavi Lachman 18 4
43 23 Chhavi Lachman 18 3
44 23 Chhavi Lachman 18 6
45 23 Chhavi Lachman 18 9
46 23 Chhavi Lachman 18 5
47 24 Radhika Suri 17 4
48 25 Shashank D’Alia 2 1
49 25 Shashank D’Alia 2 10
[ ]: # left join
courses.merge(regs,how='left',on='course_id')
6
7 1 python 2499 14.0
8 1 python 2499 38.0
9 2 sql 3499 19.0
10 2 sql 3499 7.0
11 3 data analysis 4999 22.0
12 3 data analysis 4999 13.0
13 3 data analysis 4999 23.0
14 3 data analysis 4999 3.0
15 3 data analysis 4999 16.0
16 4 machine learning 9999 23.0
17 4 machine learning 9999 19.0
18 4 machine learning 9999 24.0
19 4 machine learning 9999 14.0
20 5 tableau 2499 15.0
21 5 tableau 2499 16.0
22 5 tableau 2499 3.0
23 5 tableau 2499 12.0
24 5 tableau 2499 2.0
25 5 tableau 2499 22.0
26 5 tableau 2499 23.0
27 6 power bi 1899 18.0
28 6 power bi 1899 1.0
29 6 power bi 1899 23.0
30 6 power bi 1899 14.0
31 6 power bi 1899 7.0
32 6 power bi 1899 22.0
33 7 ms sxcel 1599 11.0
34 7 ms sxcel 1599 17.0
35 7 ms sxcel 1599 16.0
36 7 ms sxcel 1599 7.0
37 7 ms sxcel 1599 12.0
38 8 pandas 1099 7.0
39 8 pandas 1099 11.0
40 8 pandas 1099 18.0
41 8 pandas 1099 50.0
42 9 plotly 699 16.0
43 9 plotly 699 14.0
44 9 plotly 699 1.0
45 9 plotly 699 23.0
46 9 plotly 699 42.0
47 10 pyspark 2499 7.0
48 10 pyspark 2499 25.0
49 10 pyspark 2499 12.0
50 10 pyspark 2499 17.0
51 10 pyspark 2499 1.0
52 10 pyspark 2499 11.0
53 11 Numpy 699 NaN
7
54 12 C++ 1299 NaN
[ ]: # right join
temp_df = pd.DataFrame({
'student_id':[26,27,28],
'name':['Nitish','Ankit','Rahul'],
'partner':[28,26,17]
})
students = pd.concat([students,temp_df],ignore_index=True)
[ ]: students.tail()
[ ]: students.merge(regs,how='right',on='student_id')
8
24 3 Parveen Bhalla 3.0 3
25 3 Parveen Bhalla 3.0 5
26 16 Elias Dodiya 25.0 7
27 12 Radha Dutt 19.0 10
28 12 Radha Dutt 19.0 1
29 14 Pranab Natarajan 22.0 9
30 7 Tarun Thaker 9.0 7
31 7 Tarun Thaker 9.0 2
32 16 Elias Dodiya 25.0 3
33 17 Yasmin Palan 7.0 10
34 11 David Mukhopadhyay 20.0 8
35 14 Pranab Natarajan 22.0 6
36 12 Radha Dutt 19.0 5
37 12 Radha Dutt 19.0 7
38 18 Fardeen Mahabir 13.0 8
39 1 Kailash Harjo 23.0 10
40 1 Kailash Harjo 23.0 9
41 2 Esha Butala 1.0 5
42 7 Tarun Thaker 9.0 6
43 22 Yash Sethi 21.0 5
44 22 Yash Sethi 21.0 6
45 23 Chhavi Lachman 18.0 9
46 23 Chhavi Lachman 18.0 5
47 14 Pranab Natarajan 22.0 4
48 14 Pranab Natarajan 22.0 1
49 11 David Mukhopadhyay 20.0 10
50 42 NaN NaN 9
51 50 NaN NaN 8
52 38 NaN NaN 1
[ ]: regs.merge(students,how='left',on='student_id')
9
14 13 3 Munni Varghese 24.0
15 24 4 Radhika Suri 17.0
16 21 1 Seema Kota 15.0
17 16 5 Elias Dodiya 25.0
18 23 3 Chhavi Lachman 18.0
19 17 7 Yasmin Palan 7.0
20 23 6 Chhavi Lachman 18.0
21 25 1 Shashank D’Alia 2.0
22 19 2 Qabeel Raman 12.0
23 25 10 Shashank D’Alia 2.0
24 3 3 Parveen Bhalla 3.0
25 3 5 Parveen Bhalla 3.0
26 16 7 Elias Dodiya 25.0
27 12 10 Radha Dutt 19.0
28 12 1 Radha Dutt 19.0
29 14 9 Pranab Natarajan 22.0
30 7 7 Tarun Thaker 9.0
31 7 2 Tarun Thaker 9.0
32 16 3 Elias Dodiya 25.0
33 17 10 Yasmin Palan 7.0
34 11 8 David Mukhopadhyay 20.0
35 14 6 Pranab Natarajan 22.0
36 12 5 Radha Dutt 19.0
37 12 7 Radha Dutt 19.0
38 18 8 Fardeen Mahabir 13.0
39 1 10 Kailash Harjo 23.0
40 1 9 Kailash Harjo 23.0
41 2 5 Esha Butala 1.0
42 7 6 Tarun Thaker 9.0
43 22 5 Yash Sethi 21.0
44 22 6 Yash Sethi 21.0
45 23 9 Chhavi Lachman 18.0
46 23 5 Chhavi Lachman 18.0
47 14 4 Pranab Natarajan 22.0
48 14 1 Pranab Natarajan 22.0
49 11 10 David Mukhopadhyay 20.0
50 42 9 NaN NaN
51 50 8 NaN NaN
52 38 1 NaN NaN
[ ]: # outer join
students.merge(regs,how='outer',on='student_id').tail(10)
10
56 25 Shashank D’Alia 2.0 10.0
57 26 Nitish 28.0 NaN
58 27 Ankit 26.0 NaN
59 28 Rahul 17.0 NaN
60 42 NaN NaN 9.0
61 50 NaN NaN 8.0
62 38 NaN NaN 1.0
[ ]: 154247
[ ]: level_0
Dec 65072
Nov 89175
Name: price, dtype: int64
11
18 Fardeen Mahabir power bi 1899
19 Kailash Harjo power bi 1899
20 Tarun Thaker power bi 1899
21 Yash Sethi power bi 1899
22 Pranab Natarajan power bi 1899
23 Chhavi Lachman plotly 699
24 Elias Dodiya plotly 699
25 Kailash Harjo plotly 699
26 Pranab Natarajan plotly 699
27 Chhavi Lachman tableau 2499
28 Preet Sha tableau 2499
29 Elias Dodiya tableau 2499
30 Yash Sethi tableau 2499
31 Parveen Bhalla tableau 2499
32 Radha Dutt tableau 2499
33 Esha Butala tableau 2499
34 Fardeen Mahabir pandas 1099
35 Tarun Thaker pandas 1099
36 David Mukhopadhyay pandas 1099
37 Elias Dodiya ms sxcel 1599
38 Tarun Thaker ms sxcel 1599
39 David Mukhopadhyay ms sxcel 1599
40 Yasmin Palan ms sxcel 1599
41 Radha Dutt ms sxcel 1599
42 Kailash Harjo pyspark 2499
43 Tarun Thaker pyspark 2499
44 David Mukhopadhyay pyspark 2499
45 Yasmin Palan pyspark 2499
46 Shashank D’Alia pyspark 2499
47 Radha Dutt pyspark 2499
48 Tarun Thaker sql 3499
49 Qabeel Raman sql 3499
[ ]: <matplotlib.axes._subplots.AxesSubplot at 0x7f56b73cb2e0>
12
[ ]: # 5. find students who enrolled in both the months
common_student_id = np.intersect1d(nov['student_id'],dec['student_id'])
common_student_id
[ ]: students[students['student_id'].isin(common_student_id)]
13
[ ]: # 6. find course that got no enrollment
# courses['course_id']
# regs['course_id']
course_id_list = np.setdiff1d(courses['course_id'],regs['course_id'])
courses[courses['course_id'].isin(course_id_list)]
(10/28)*100
[ ]: 35.714285714285715
[ ]: students
14
25 26 Nitish 28
26 27 Ankit 26
27 28 Rahul 17
[ ]: # 8. Print student name -> partner name for all enrolled students
# self join
students.
↪merge(students,how='inner',left_on='partner',right_on='student_id')[['name_x','name_y']]
[ ]: name_x name_y
0 Kailash Harjo Chhavi Lachman
1 Esha Butala Kailash Harjo
2 Parveen Bhalla Parveen Bhalla
3 Marlo Dugal Pranab Natarajan
4 Kusum Bahri Lakshmi Contractor
5 Lakshmi Contractor Aayushman Sant
6 Tarun Thaker Nitika Chatterjee
7 Radheshyam Dey Kusum Bahri
8 Nitika Chatterjee Marlo Dugal
9 Aayushman Sant Radheshyam Dey
10 David Mukhopadhyay Hanuman Hegde
11 Radha Dutt Qabeel Raman
12 Munni Varghese Radhika Suri
13 Pranab Natarajan Yash Sethi
14 Preet Sha Elias Dodiya
15 Elias Dodiya Shashank D’Alia
16 Yasmin Palan Tarun Thaker
17 Fardeen Mahabir Munni Varghese
18 Qabeel Raman Radha Dutt
19 Hanuman Hegde David Mukhopadhyay
20 Seema Kota Preet Sha
21 Yash Sethi Seema Kota
22 Chhavi Lachman Fardeen Mahabir
23 Radhika Suri Yasmin Palan
24 Rahul Yasmin Palan
25 Shashank D’Alia Esha Butala
26 Nitish Rahul
27 Ankit Nitish
[ ]: student_id name
23 Chhavi Lachman 6
7 Tarun Thaker 5
1 Kailash Harjo 4
15
Name: name, dtype: int64
[ ]: # 10. find top 3 students who spent most amount of money on courses
regs.merge(students,on='student_id').merge(courses,on='course_id').
↪groupby(['student_id','name'])['price'].sum().sort_values(ascending=False).
↪head(3)
[ ]: student_id name
23 Chhavi Lachman 22594
14 Pranab Natarajan 15096
19 Qabeel Raman 13498
Name: price, dtype: int64
pd.merge(students,regs,how='inner',on='student_id')
16
28 16 Elias Dodiya 25 7
29 16 Elias Dodiya 25 3
30 17 Yasmin Palan 7 7
31 17 Yasmin Palan 7 10
32 18 Fardeen Mahabir 13 6
33 18 Fardeen Mahabir 13 1
34 18 Fardeen Mahabir 13 8
35 19 Qabeel Raman 12 4
36 19 Qabeel Raman 12 2
37 21 Seema Kota 15 1
38 22 Yash Sethi 21 3
39 22 Yash Sethi 21 5
40 22 Yash Sethi 21 6
41 23 Chhavi Lachman 18 1
42 23 Chhavi Lachman 18 4
43 23 Chhavi Lachman 18 3
44 23 Chhavi Lachman 18 6
45 23 Chhavi Lachman 18 9
46 23 Chhavi Lachman 18 5
47 24 Radhika Suri 17 4
48 25 Shashank D’Alia 2 1
49 25 Shashank D’Alia 2 10
[ ]: # IPL Problems
[ ]: matches
17
3 Kings XI Punjab Kings XI Punjab field
4 Delhi Daredevils Royal Challengers Bangalore bat
.. … … …
631 Royal Challengers Bangalore Royal Challengers Bangalore field
632 Royal Challengers Bangalore Royal Challengers Bangalore field
633 Kolkata Knight Riders Kolkata Knight Riders field
634 Sunrisers Hyderabad Sunrisers Hyderabad field
635 Royal Challengers Bangalore Sunrisers Hyderabad bat
win_by_wickets player_of_match \
0 0 Yuvraj Singh
1 7 SPD Smith
2 10 CA Lynn
3 6 GJ Maxwell
4 0 KM Jadhav
.. … …
631 6 V Kohli
632 4 AB de Villiers
633 0 MC Henriques
634 4 DA Warner
635 0 BCJ Cutting
venue umpire1 \
0 Rajiv Gandhi International Stadium, Uppal AY Dandekar
1 Maharashtra Cricket Association Stadium A Nand Kishore
2 Saurashtra Cricket Association Stadium Nitin Menon
3 Holkar Cricket Stadium AK Chaudhary
4 M Chinnaswamy Stadium NaN
.. … …
631 Shaheed Veer Narayan Singh International Stadium A Nand Kishore
632 M Chinnaswamy Stadium AK Chaudhary
633 Feroz Shah Kotla M Erasmus
634 Feroz Shah Kotla M Erasmus
635 M Chinnaswamy Stadium HDPK Dharmasena
18
umpire2 umpire3
0 NJ Llong NaN
1 S Ravi NaN
2 CK Nandan NaN
3 C Shamshuddin NaN
4 NaN NaN
.. … …
631 BNJ Oxenford NaN
632 HDPK Dharmasena NaN
633 C Shamshuddin NaN
634 CK Nandan NaN
635 BNJ Oxenford NaN
[ ]: delivery
19
2 TS Mills 0 … 0 0 0
3 TS Mills 0 … 0 0 0
4 TS Mills 0 … 0 0 0
… … … … … … …
150455 B Kumar 0 … 0 0 0
150456 B Kumar 0 … 0 0 0
150457 B Kumar 0 … 0 1 0
150458 B Kumar 0 … 0 0 0
150459 B Kumar 0 … 0 0 0
dismissal_kind fielder
0 NaN NaN
1 NaN NaN
2 NaN NaN
3 NaN NaN
4 NaN NaN
… … …
150455 NaN NaN
150456 run out NV Ojha
150457 NaN NaN
150458 NaN NaN
150459 NaN NaN
[ ]: temp_df = delivery.merge(matches,left_on='match_id',right_on='id')
[ ]: six_df = temp_df[temp_df['batsman_runs'] == 6]
[ ]: num_matches = matches['venue'].value_counts()
20
[ ]: (num_sixes/num_matches).sort_values(ascending=False).head(10)
[ ]: matches
21
4 normal 0 Royal Challengers Bangalore 15
.. … … … …
631 normal 0 Royal Challengers Bangalore 0
632 normal 0 Royal Challengers Bangalore 0
633 normal 0 Sunrisers Hyderabad 22
634 normal 0 Sunrisers Hyderabad 0
635 normal 0 Sunrisers Hyderabad 8
win_by_wickets player_of_match \
0 0 Yuvraj Singh
1 7 SPD Smith
2 10 CA Lynn
3 6 GJ Maxwell
4 0 KM Jadhav
.. … …
631 6 V Kohli
632 4 AB de Villiers
633 0 MC Henriques
634 4 DA Warner
635 0 BCJ Cutting
venue umpire1 \
0 Rajiv Gandhi International Stadium, Uppal AY Dandekar
1 Maharashtra Cricket Association Stadium A Nand Kishore
2 Saurashtra Cricket Association Stadium Nitin Menon
3 Holkar Cricket Stadium AK Chaudhary
4 M Chinnaswamy Stadium NaN
.. … …
631 Shaheed Veer Narayan Singh International Stadium A Nand Kishore
632 M Chinnaswamy Stadium AK Chaudhary
633 Feroz Shah Kotla M Erasmus
634 Feroz Shah Kotla M Erasmus
635 M Chinnaswamy Stadium HDPK Dharmasena
umpire2 umpire3
0 NJ Llong NaN
1 S Ravi NaN
2 CK Nandan NaN
3 C Shamshuddin NaN
4 NaN NaN
.. … …
631 BNJ Oxenford NaN
632 HDPK Dharmasena NaN
633 C Shamshuddin NaN
634 CK Nandan NaN
635 BNJ Oxenford NaN
22
[636 rows x 18 columns]
[ ]: temp_df.groupby(['season','batsman'])['batsman_runs'].sum().reset_index().
↪sort_values('batsman_runs',ascending=False).
↪drop_duplicates(subset=['season'],keep='first').sort_values('season')
[ ]: temp_df.groupby(['season','batsman'])['batsman_runs'].sum().reset_index().
↪sort_values('batsman_runs',ascending=False)
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-214-5a08c989d1a3> in <module>
----> 1 temp_df.groupby(['season','batsman'])['batsman_runs'].sum().
↪reset_index().sort_values('batsman_runs',ascending=False).first('season')
/usr/local/lib/python3.8/dist-packages/pandas/core/generic.py in first(self,␣
↪offset)
8193 """
8194 if not isinstance(self.index, DatetimeIndex):
-> 8195 raise TypeError("'first' only supports a DatetimeIndex␣
↪index")
8196
8197 if len(self.index) == 0:
[ ]:
23
session-21-multiindex-objects
May 3, 2024
[ ]: import numpy as np
import pandas as pd
• Dataset : https://drive.google.com/drive/folders/1AP_
M96SnIe985aQQp9SmDkz69AXHrs5t?usp=share_link
a = pd.Series([1,2,3,4,5,6,7,8],index=index_val)
a
[ ]: (cse, 2019) 1
(cse, 2020) 2
(cse, 2021) 3
(cse, 2022) 4
(ece, 2019) 5
(ece, 2020) 6
(ece, 2021) 7
(ece, 2022) 8
dtype: int64
[ ]: # The problem?
a['cse']
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
/usr/local/lib/python3.8/dist-packages/pandas/core/indexes/base.py in␣
↪get_loc(self, key, method, tolerance)
3360 try:
-> 3361 return self._engine.get_loc(casted_key)
3362 except KeyError as err:
1
/usr/local/lib/python3.8/dist-packages/pandas/_libs/index.pyx in pandas._libs.
↪index.IndexEngine.get_loc()
/usr/local/lib/python3.8/dist-packages/pandas/_libs/index.pyx in pandas._libs.
↪index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.
↪PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.
↪PyObjectHashTable.get_item()
KeyError: 'cse'
The above exception was the direct cause of the following exception:
/usr/local/lib/python3.8/dist-packages/pandas/core/series.py in␣
↪__getitem__(self, key)
940
941 elif key_is_scalar:
--> 942 return self._get_value(key)
943
944 if is_hashable(key):
/usr/local/lib/python3.8/dist-packages/pandas/core/series.py in _get_value(self,␣
↪label, takeable)
1049
1050 # Similar to Index.get_value, but we do not fall back to␣
↪positional
/usr/local/lib/python3.8/dist-packages/pandas/core/indexes/base.py in␣
↪get_loc(self, key, method, tolerance)
KeyError: 'cse'
2
[ ]: # The solution -> multiindex series(also known as Hierarchical Indexing)
# multiple index levels within a single index
multiindex = pd.MultiIndex.from_tuples(index_val)
multiindex.levels[1]
# 2. pd.MultiIndex.from_product()
pd.MultiIndex.from_product([['cse','ece'],[2019,2020,2021,2022]])
[ ]: MultiIndex([('cse', 2019),
('cse', 2020),
('cse', 2021),
('cse', 2022),
('ece', 2019),
('ece', 2020),
('ece', 2021),
('ece', 2022)],
)
[ ]: cse 2019 1
2020 2
2021 3
2022 4
ece 2019 5
2020 6
2021 7
2022 8
dtype: int64
[ ]: 2019 1
2020 2
2021 3
2022 4
dtype: int64
3
[ ]: # a logical question to ask
[ ]: # unstack
temp = s.unstack()
temp
[ ]: # stack
temp.stack()
[ ]: cse 2019 1
2020 2
2021 3
2022 4
ece 2019 5
2020 6
2021 7
2022 8
dtype: int64
[ ]: # multiindex dataframe
[ ]: branch_df1 = pd.DataFrame(
[
[1,2],
[3,4],
[5,6],
[7,8],
[9,10],
[11,12],
[13,14],
[15,16],
],
index = multiindex,
columns = ['avg_package','students']
)
branch_df1
[ ]: avg_package students
cse 2019 1 2
2020 3 4
4
2021 5 6
2022 7 8
ece 2019 9 10
2020 11 12
2021 13 14
2022 15 16
[ ]: branch_df1['students']
[ ]: cse 2019 2
2020 4
2021 6
2022 8
ece 2019 10
2020 12
2021 14
2022 16
Name: students, dtype: int64
branch_df2
[ ]: delhi mumbai
avg_package students avg_package students
2019 1 2 0 0
2020 3 4 0 0
2021 5 6 0 0
2022 7 8 0 0
[ ]: branch_df2.loc[2019]
[ ]: delhi avg_package 1
students 2
5
mumbai avg_package 0
students 0
Name: 2019, dtype: int64
branch_df3 = pd.DataFrame(
[
[1,2,0,0],
[3,4,0,0],
[5,6,0,0],
[7,8,0,0],
[9,10,0,0],
[11,12,0,0],
[13,14,0,0],
[15,16,0,0],
],
index = multiindex,
columns = pd.MultiIndex.
↪from_product([['delhi','mumbai'],['avg_package','students']])
branch_df3
[ ]: delhi mumbai
avg_package students avg_package students
cse 2019 1 2 0 0
2020 3 4 0 0
2021 5 6 0 0
2022 7 8 0 0
ece 2019 9 10 0 0
2020 11 12 0 0
2021 13 14 0 0
2022 15 16 0 0
[ ]: branch_df3.stack().stack()
6
2021 avg_package delhi 5
mumbai 0
students delhi 6
mumbai 0
2022 avg_package delhi 7
mumbai 0
students delhi 8
mumbai 0
ece 2019 avg_package delhi 9
mumbai 0
students delhi 10
mumbai 0
2020 avg_package delhi 11
mumbai 0
students delhi 12
mumbai 0
2021 avg_package delhi 13
mumbai 0
students delhi 14
mumbai 0
2022 avg_package delhi 15
mumbai 0
students delhi 16
mumbai 0
dtype: int64
<class 'pandas.core.frame.DataFrame'>
MultiIndex: 8 entries, ('cse', 2019) to ('ece', 2022)
Data columns (total 4 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 (delhi, avg_package) 8 non-null int64
1 (delhi, students) 8 non-null int64
2 (mumbai, avg_package) 8 non-null int64
3 (mumbai, students) 8 non-null int64
7
dtypes: int64(4)
memory usage: 932.0+ bytes
[ ]: delhi mumbai
avg_package students avg_package students
cse 2019 False False False False
2020 False False False False
2021 False False False False
2022 False False False False
ece 2019 False False False False
2020 False False False False
2021 False False False False
2022 False False False False
[ ]: delhi avg_package 7
students 8
mumbai avg_package 0
students 0
Name: (cse, 2022), dtype: int64
[ ]: # multiple
branch_df3.loc[('cse',2019):('ece',2020):2]
[ ]: delhi mumbai
avg_package students avg_package students
cse 2019 1 2 0 0
2021 5 6 0 0
ece 2019 9 10 0 0
[ ]: # using iloc
branch_df3.iloc[0:5:2]
[ ]: delhi mumbai
avg_package students avg_package students
cse 2019 1 2 0 0
2021 5 6 0 0
ece 2019 9 10 0 0
[ ]: # Extracting cols
branch_df3['delhi']['students']
[ ]: cse 2019 2
2020 4
2021 6
8
2022 8
ece 2019 10
2020 12
2021 14
2022 16
Name: students, dtype: int64
[ ]: branch_df3.iloc[:,1:3]
[ ]: delhi mumbai
students avg_package
cse 2019 2 0
2020 4 0
2021 6 0
2022 8 0
ece 2019 10 0
2020 12 0
2021 14 0
2022 16 0
[ ]: # Extracting both
branch_df3.iloc[[0,4],[1,2]]
[ ]: delhi mumbai
students avg_package
cse 2019 2 0
ece 2019 10 0
[ ]: # sort index
# both -> descending -> diff order
# based on one level
branch_df3.sort_index(ascending=False)
branch_df3.sort_index(ascending=[False,True])
branch_df3.sort_index(level=0,ascending=[False])
[ ]: delhi mumbai
avg_package students avg_package students
ece 2019 9 10 0 0
2020 11 12 0 0
2021 13 14 0 0
2022 15 16 0 0
cse 2019 1 2 0 0
2020 3 4 0 0
2021 5 6 0 0
2022 7 8 0 0
9
[ ]: # multiindex dataframe(col) -> transpose
branch_df3.transpose()
[ ]: cse ece
2019 2020 2021 2022 2019 2020 2021 2022
delhi avg_package 1 3 5 7 9 11 13 15
students 2 4 6 8 10 12 14 16
mumbai avg_package 0 0 0 0 0 0 0 0
students 0 0 0 0 0 0 0 0
[ ]: # swaplevel
branch_df3.swaplevel(axis=1)
Wide format is where we have a single row for every data point with multiple columns to hold
the values of various attributes.
Long format is where, for each data point we have as many rows as the number of attributes and
each row contains the value of a particular attribute for a given data point.
[ ]: # melt -> simple example branch
# wide to long
pd.DataFrame({'cse':[120]}).melt()
[ ]: variable value
0 cse 120
[ ]: branch num_students
0 cse 120
10
1 ece 100
2 mech 50
[ ]: pd.DataFrame(
{
'branch':['cse','ece','mech'],
'2020':[100,150,60],
'2021':[120,130,80],
'2022':[150,140,70]
}
).melt(id_vars=['branch'],var_name='year',value_name='students')
[ ]: death.head()
11
3 165 165 165 165 165 165 165
4 1930 1930 1930 1930 1930 1930 1930
[ ]: confirm.head()
[ ]: death = death.melt(id_vars=['Province/State','Country/
↪Region','Lat','Long'],var_name='date',value_name='num_deaths')
confirm = confirm.melt(id_vars=['Province/State','Country/
↪Region','Lat','Long'],var_name='date',value_name='num_cases')
[ ]: death.head()
[ ]: confirm.merge(death,on=['Province/State','Country/
↪Region','Lat','Long','date'])[['Country/
↪Region','date','num_cases','num_deaths']]
12
[ ]: Country/Region date num_cases num_deaths
0 Afghanistan 1/22/20 0 0
1 Albania 1/22/20 0 0
2 Algeria 1/22/20 0 0
3 Andorra 1/22/20 0 0
4 Angola 1/22/20 0 0
… … … … …
311248 West Bank and Gaza 1/2/23 703228 5708
311249 Winter Olympics 2022 1/2/23 535 0
311250 Yemen 1/2/23 11945 2159
311251 Zambia 1/2/23 334661 4024
311252 Zimbabwe 1/2/23 259981 5637
[ ]:
[ ]:
[ ]: df = sns.load_dataset('tips')
df.head()
[ ]: df.groupby('sex')[['total_bill']].mean()
[ ]: total_bill
sex
Male 20.744076
Female 18.056897
[ ]: df.groupby(['sex','smoker'])[['total_bill']].mean().unstack()
13
[ ]: total_bill
smoker Yes No
sex
Male 22.284500 19.791237
Female 17.977879 18.105185
[ ]: df.pivot_table(index='sex',columns='smoker',values='total_bill')
[ ]: smoker Yes No
sex
Male 22.284500 19.791237
Female 17.977879 18.105185
[ ]:
[ ]:
[ ]: # aggfunc
df.pivot_table(index='sex',columns='smoker',values='total_bill',aggfunc='std')
[ ]: smoker Yes No
sex
Male 9.911845 8.726566
Female 9.189751 7.286455
[ ]: smoker Yes No
sex
Male 2.500000 2.711340
Female 2.242424 2.592593
[ ]: # multidimensional
df.pivot_table(index=['sex','smoker'],columns=['day','time'],aggfunc={'size':
↪'mean','tip':'max','total_bill':'sum'},margins=True)
[ ]: size \
day Thur Fri Sat Sun
time Lunch Dinner Lunch Dinner Dinner Dinner
sex smoker
Male Yes 2.300000 NaN 1.666667 2.400000 2.629630 2.600000
No 2.500000 NaN NaN 2.000000 2.656250 2.883721
Female Yes 2.428571 NaN 2.000000 2.000000 2.200000 2.500000
No 2.500000 2.0 3.000000 2.000000 2.307692 3.071429
All 2.459016 2.0 2.000000 2.166667 2.517241 2.842105
14
tip … total_bill \
day All Thur Fri … All Thur
time Lunch Dinner Lunch … Lunch Dinner
sex smoker …
Male Yes 2.500000 5.00 NaN 2.20 … 10.0 191.71 0.00
No 2.711340 6.70 NaN NaN … 9.0 369.73 0.00
Female Yes 2.242424 5.00 NaN 3.48 … 6.5 134.53 0.00
No 2.592593 5.17 3.0 3.00 … 5.2 381.58 18.78
All 2.569672 6.70 3.0 3.48 … 10.0 1077.55 18.78
[5 rows x 23 columns]
[ ]: # margins
df.
↪pivot_table(index='sex',columns='smoker',values='total_bill',aggfunc='sum',margins=True)
[ ]: # plotting graphs
df = pd.read_csv('/content/expense_data.csv')
[ ]: df.head()
15
2 Dinner 78.0 Expense NaN 78.0 INR 78.0
3 Metro 30.0 Expense NaN 30.0 INR 30.0
4 Snacks 67.0 Expense NaN 67.0 INR 67.0
[ ]: df['Category'].value_counts()
[ ]: Food 156
Other 60
Transportation 31
Apparel 7
Household 6
Allowance 6
Social Life 5
Education 1
Salary 1
Self-development 1
Beauty 1
Gift 1
Petty cash 1
Name: Category, dtype: int64
[ ]: df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 277 entries, 0 to 276
Data columns (total 11 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Date 277 non-null object
1 Account 277 non-null object
2 Category 277 non-null object
3 Subcategory 0 non-null float64
4 Note 273 non-null object
5 INR 277 non-null float64
6 Income/Expense 277 non-null object
7 Note.1 0 non-null float64
8 Amount 277 non-null float64
9 Currency 277 non-null object
10 Account.1 277 non-null float64
dtypes: float64(5), object(6)
memory usage: 23.9+ KB
[ ]: df['Date'] = pd.to_datetime(df['Date'])
[ ]: df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 277 entries, 0 to 276
16
Data columns (total 11 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Date 277 non-null datetime64[ns]
1 Account 277 non-null object
2 Category 277 non-null object
3 Subcategory 0 non-null float64
4 Note 273 non-null object
5 INR 277 non-null float64
6 Income/Expense 277 non-null object
7 Note.1 0 non-null float64
8 Amount 277 non-null float64
9 Currency 277 non-null object
10 Account.1 277 non-null float64
dtypes: datetime64[ns](1), float64(5), object(5)
memory usage: 23.9+ KB
[ ]: df['month'] = df['Date'].dt.month_name()
[ ]: df.head()
month
0 March
1 March
2 March
3 March
4 March
[ ]: df.
↪pivot_table(index='month',columns='Category',values='INR',aggfunc='sum',fill_value=0).
↪plot()
[ ]: <matplotlib.axes._subplots.AxesSubplot at 0x7f8976423df0>
17
[ ]: df.pivot_table(index='month',columns='Income/
↪Expense',values='INR',aggfunc='sum',fill_value=0).plot()
[ ]: <matplotlib.axes._subplots.AxesSubplot at 0x7f8976118f70>
18
[ ]: df.
↪pivot_table(index='month',columns='Account',values='INR',aggfunc='sum',fill_value=0).
↪plot()
[ ]: <matplotlib.axes._subplots.AxesSubplot at 0x7f89760dbe50>
[ ]:
19
session-22-pandas-strings
May 3, 2024
[ ]: import pandas as pd
import numpy as np
[i.startswith('c') for i in s]
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-19-3fe713c7ebb8> in <module>
2 s = ['cat','mat',None,'rat']
3
----> 4 [i.startswith('c') for i in s]
<ipython-input-19-3fe713c7ebb8> in <listcomp>(.0)
2 s = ['cat','mat',None,'rat']
3
----> 4 [i.startswith('c') for i in s]
s = pd.Series(['cat','mat',None,'rat'])
# string accessor
s.str.startswith('c')
1
[ ]: 0 True
1 False
2 None
3 False
dtype: object
[ ]: # import titanic
df = pd.read_csv('/content/titanic.csv')
df['Name']
[ ]: # Common Functions
# lower/upper/capitalize/title
df['Name'].str.upper()
df['Name'].str.capitalize()
df['Name'].str.title()
# len
df['Name'][df['Name'].str.len() == 82].values[0]
# strip
" nitish ".strip()
df['Name'].str.strip()
2
[ ]: # split -> get
df['lastname'] = df['Name'].str.split(',').str.get(0)
df.head()
[ ]: df[['title','firstname']] = df['Name'].str.split(',').str.get(1).str.strip().
↪str.split(' ', n=1, expand=True)
df.head()
df['title'].value_counts()
[ ]: Mr. 517
Miss. 182
Mrs. 125
Master. 40
Dr. 7
Rev. 6
Mlle. 2
Major. 2
Col. 2
the 1
Capt. 1
Ms. 1
Sir. 1
Lady. 1
Mme. 1
Don. 1
3
Jonkheer. 1
Name: title, dtype: int64
[ ]: # replace
df['title'] = df['title'].str.replace('Ms.','Miss.')
df['title'] = df['title'].str.replace('Mlle.','Miss.')
[ ]: df['title'].value_counts()
[ ]: Mr. 517
Miss. 185
Mrs. 125
Master. 40
Dr. 7
Rev. 6
Major. 2
Col. 2
Don. 1
Mme. 1
Lady. 1
Sir. 1
Capt. 1
the 1
Jonkheer. 1
Name: title, dtype: int64
[ ]: # filtering
# startswith/endswith
df[df['firstname'].str.endswith('A')]
# isdigit/isalpha...
df[df['firstname'].str.isdigit()]
[ ]: Empty DataFrame
Columns: [PassengerId, Survived, Pclass, Name, Sex, Age, SibSp, Parch, Ticket,
Fare, Cabin, Embarked, lastname, title, firstname]
Index: []
[ ]: # applying regex
# contains
# search john -> both case
4
df[df['firstname'].str.contains('john',case=False)]
# find lastnames with start and end char vowel
df[df['lastname'].str.contains('^[^aeiouAEIOU].+[^aeiouAEIOU]$')]
firstname
0 Owen Harris
1 John Bradley (Florence Briggs Thayer)
2 Laina
5
5 James
6 Timothy J
.. …
884 Henry Jr
887 Margaret Edith
888 Catherine Helen "Carrie"
889 Karl Howell
890 Patrick
[ ]: # slicing
df['Name'].str[::-1]
[ ]:
6
esssion-22-date-and-time-in-pandas
May 3, 2024
[ ]: import numpy as np
import pandas as pd
[ ]: # creating a timestamp
type(pd.Timestamp('2023/1/5'))
[ ]: pandas._libs.tslibs.timestamps.Timestamp
[ ]: # variations
pd.Timestamp('2023-1-5')
pd.Timestamp('2023, 1, 5')
[ ]: Timestamp('2023-01-05 00:00:00')
[ ]: # only year
pd.Timestamp('2023')
[ ]: Timestamp('2023-01-01 00:00:00')
[ ]: # using text
pd.Timestamp('5th January 2023')
[ ]: Timestamp('2023-01-05 00:00:00')
---------------------------------------------------------------------------
ParserError Traceback (most recent call last)
/usr/local/lib/python3.8/dist-packages/pandas/_libs/tslibs/conversion.pyx in␣
↪pandas._libs.tslibs.conversion._convert_str_to_tsobject()
1
/usr/local/lib/python3.8/dist-packages/pandas/_libs/tslibs/parsing.pyx in pandas.
↪_libs.tslibs.parsing.parse_datetime_string()
/usr/local/lib/python3.8/dist-packages/dateutil/parser/_parser.py in␣
↪parse(timestr, parserinfo, **kwargs)
1367 else:
-> 1368 return DEFAULTPARSER.parse(timestr, **kwargs)
1369
/usr/local/lib/python3.8/dist-packages/dateutil/parser/_parser.py in parse(self,␣
↪timestr, default, ignoretz, tzinfos, **kwargs)
/usr/local/lib/python3.8/dist-packages/pandas/_libs/tslibs/timestamps.pyx in␣
↪pandas._libs.tslibs.timestamps.Timestamp.__new__()
/usr/local/lib/python3.8/dist-packages/pandas/_libs/tslibs/conversion.pyx in␣
↪pandas._libs.tslibs.conversion.convert_to_tsobject()
/usr/local/lib/python3.8/dist-packages/pandas/_libs/tslibs/conversion.pyx in␣
↪pandas._libs.tslibs.conversion._convert_str_to_tsobject()
[ ]: # AM and PM
x = pd.Timestamp(dt.datetime(2023,1,5,9,21,56))
x
[ ]: Timestamp('2023-01-05 09:21:56')
2
[ ]: # fetching attributes
x.year
x.month
x.day
x.hour
x.minute
x.second
[ ]: 56
[ ]: # why separate objects to handle data and time when python already has datetime␣
↪functionality?
[ ]: array('2015-07-04', dtype='datetime64[D]')
[ ]: date + np.arange(12)
• Because of the uniform type in NumPy datetime64 arrays, this type of operation can be
accomplished much more quickly than if we were working directly with Python’s datetime
objects, especially as arrays get large
• Pandas Timestamp object combines the ease-of-use of python datetime with the efficient
storage and vectorized interface of numpy.datetime64
• From a group of these Timestamp objects, Pandas can construct a DatetimeIndex that can
be used to index data in a Series or DataFrame
3
[ ]: pandas.core.indexes.datetimes.DatetimeIndex
[ ]: # using pd.timestamps
dt_index = pd.DatetimeIndex([pd.Timestamp(2023,1,1),pd.Timestamp(2022,1,1),pd.
↪Timestamp(2021,1,1)])
pd.Series([1,2,3],index=dt_index)
[ ]: 2023-01-01 1
2022-01-01 2
2021-01-01 3
dtype: int64
4
'2023-02-14', '2023-02-15', '2023-02-16', '2023-02-17',
'2023-02-20', '2023-02-21', '2023-02-22', '2023-02-23',
'2023-02-24', '2023-02-27', '2023-02-28'],
dtype='datetime64[ns]', freq='B')
5
[ ]: DatetimeIndex(['2023-01-31', '2023-02-28', '2023-03-31', '2023-04-30',
'2023-05-31', '2023-06-30', '2023-07-31', '2023-08-31',
'2023-09-30', '2023-10-31', '2023-11-30', '2023-12-31',
'2024-01-31', '2024-02-29', '2024-03-31', '2024-04-30',
'2024-05-31', '2024-06-30', '2024-07-31', '2024-08-31',
'2024-09-30', '2024-10-31', '2024-11-30', '2024-12-31',
'2025-01-31'],
dtype='datetime64[ns]', freq='M')
s = pd.Series(['2023/1/1','2022/1/1','2021/1/1'])
pd.to_datetime(s).dt.day_name()
[ ]: 0 Sunday
1 Saturday
2 Friday
dtype: object
[ ]: # with errors
s = pd.Series(['2023/1/1','2022/1/1','2021/130/1'])
pd.to_datetime(s,errors='coerce').dt.month_name()
[ ]: 0 January
1 January
2 NaN
dtype: object
[ ]: df = pd.read_csv('/content/expense_data.csv')
df.shape
[ ]: (277, 11)
[ ]: df.head()
6
0 Brownie 50.0 Expense NaN 50.0 INR 50.0
1 To lended people 300.0 Expense NaN 300.0 INR 300.0
2 Dinner 78.0 Expense NaN 78.0 INR 78.0
3 Metro 30.0 Expense NaN 30.0 INR 30.0
4 Snacks 67.0 Expense NaN 67.0 INR 67.0
[ ]: df['Date'] = pd.to_datetime(df['Date'])
[ ]: df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 277 entries, 0 to 276
Data columns (total 11 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Date 277 non-null datetime64[ns]
1 Account 277 non-null object
2 Category 277 non-null object
3 Subcategory 0 non-null float64
4 Note 273 non-null object
5 INR 277 non-null float64
6 Income/Expense 277 non-null object
7 Note.1 0 non-null float64
8 Amount 277 non-null float64
9 Currency 277 non-null object
10 Account.1 277 non-null float64
dtypes: datetime64[ns](1), float64(5), object(5)
memory usage: 23.9+ KB
0.0.6 dt accessor
Accessor object for datetimelike properties of the Series values.
[ ]: df['Date'].dt.is_quarter_start
[ ]: 0 False
1 False
2 False
3 False
4 False
…
272 False
273 False
274 False
275 False
276 False
Name: Date, Length: 277, dtype: bool
7
[ ]: # plot graph
import matplotlib.pyplot as plt
plt.plot(df['Date'],df['INR'])
[ ]: [<matplotlib.lines.Line2D at 0x7f89b2206880>]
df['day_name'] = df['Date'].dt.day_name()
[ ]: df.head()
8
day_name
0 Wednesday
1 Wednesday
2 Tuesday
3 Tuesday
4 Tuesday
[ ]: df.groupby('day_name')['INR'].mean().plot(kind='bar')
[ ]: <matplotlib.axes._subplots.AxesSubplot at 0x7f89b18629a0>
[ ]: df['month_name'] = df['Date'].dt.month_name()
[ ]: df.head()
9
Note INR Income/Expense Note.1 Amount Currency Account.1 \
0 Brownie 50.0 Expense NaN 50.0 INR 50.0
1 To lended people 300.0 Expense NaN 300.0 INR 300.0
2 Dinner 78.0 Expense NaN 78.0 INR 78.0
3 Metro 30.0 Expense NaN 30.0 INR 30.0
4 Snacks 67.0 Expense NaN 67.0 INR 67.0
day_name month_name
0 Wednesday March
1 Wednesday March
2 Tuesday March
3 Tuesday March
4 Tuesday March
[ ]: df.groupby('month_name')['INR'].sum().plot(kind='bar')
[ ]: <matplotlib.axes._subplots.AxesSubplot at 0x7f89b1905c40>
[ ]: df[df['Date'].dt.is_month_end]
10
[ ]: Date Account Category Subcategory \
7 2022-02-28 11:56:00 CUB - online payment Food NaN
8 2022-02-28 11:45:00 CUB - online payment Other NaN
61 2022-01-31 08:44:00 CUB - online payment Transportation NaN
62 2022-01-31 08:27:00 CUB - online payment Other NaN
63 2022-01-31 08:26:00 CUB - online payment Transportation NaN
242 2021-11-30 14:24:00 CUB - online payment Gift NaN
243 2021-11-30 14:17:00 CUB - online payment Food NaN
244 2021-11-30 10:11:00 CUB - online payment Food NaN
[ ]:
11
time-series-analysis
May 3, 2024
[ ]: import pandas as pd
import numpy as np
• Dataset : drive.google.com/drive/folders/15WZn-YqSRbEAMM3ErlDdm3zDu6m47tUb?usp=share_link
[ ]: # till now
# Timestamp
pd.Timestamp('6th jan 2023 8:10')
# DatetimeIndex -> df and series index
pd.DatetimeIndex([pd.Timestamp('6th jan 2023 8:10'),pd.Timestamp('7th jan 2023␣
↪8:10'),pd.Timestamp('8th jan 2023 8:10')])[0]
# date_range()
pd.date_range(start='2023-1-6',end='2023-1-31',freq='D')
# to_datetime()
s = pd.Series(['2023/1/6','2023/1/7','2023/1/7'])
pd.to_datetime(s).dt.day_name()
[ ]: 0 Friday
1 Saturday
2 Saturday
dtype: object
t2 - t1
[ ]: # standalone creation
pd.Timedelta(days=2,hours=10,minutes=35)
1
[ ]: # Arithmetic
pd.Timestamp('6th jan 2023') + pd.Timedelta(days=2,hours=10,minutes=35)
[ ]: Timestamp('2023-01-08 10:35:00')
[ ]: pd.date_range(start='2023-1-6',end='2023-1-31',freq='D') - pd.
↪Timedelta(days=2,hours=10,minutes=35)
[ ]: order_date delivery_date
0 5/24/98 2/5/99
1 4/22/92 3/6/98
2 2/10/91 8/26/92
3 7/21/92 11/20/97
4 9/2/93 6/10/98
[ ]: df['order_date'] = pd.to_datetime(df['order_date'])
df['delivery_date'] = pd.to_datetime(df['delivery_date'])
df['delivery_time_period'].mean()
2
useful to see how a given asset, security, or economic variable changes over time.
Examples
• Financial Data (Company stocks)
• Natural Data (Rainfall measurement)
• Event Data (Covid)
• Medical Data (Heart rate monitoring)
Types of Operations done on Time Series
• Time Series Analysis
• Time Series Forecasting
[ ]: google = pd.read_csv('google.csv')
google.head()
[ ]: google.tail()
[ ]: subs = pd.read_csv('subscribers.csv')
views = pd.read_csv('views.csv')
wt = pd.read_csv('watch-time.csv')
comments = pd.read_csv('comments.csv')
[ ]: comments.tail()
3
1355 2023-01-01 13
1356 2023-01-02 66
1357 2023-01-03 22
1358 2023-01-04 21
[ ]: yt = subs.merge(views,on='Date').merge(wt,on='Date').merge(comments,on='Date')
[ ]: yt.head()
[ ]: google['Date'] = pd.to_datetime(google['Date'])
yt['Date'] = pd.to_datetime(yt['Date'])
[ ]: yt.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 1359 entries, 0 to 1358
Data columns (total 5 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Date 1359 non-null datetime64[ns]
1 Subscribers 1359 non-null int64
2 Views 1359 non-null int64
3 Watch time (hours) 1359 non-null float64
4 Comments added 1359 non-null int64
dtypes: datetime64[ns](1), float64(1), int64(3)
memory usage: 63.7 KB
[ ]: google.set_index('Date',inplace=True)
yt.set_index('Date',inplace=True)
[ ]: yt.head()
4
[ ]: # fetch a specific date
google.loc['2021-12-30']
[ ]: Open 2929.000000
High 2941.250000
Low 2915.169922
Close 2920.050049
Adj Close 2920.050049
Volume 648900.000000
Name: 2021-12-30 00:00:00, dtype: float64
[ ]: yt.head()
5
Date
2019-04-17 April 2 Wednesday
2019-04-18 April 2 Thursday
2019-04-19 April 2 Friday
2019-04-20 April 2 Saturday
2019-04-21 April 2 Sunday
[ ]: google['month_name'] = google.index.month_name()
google['weekday_name'] = google.index.day_name()
google['quarter'] = google.index.quarter
google.head()
[ ]: # slicing
yt.loc['2022-12-15':'2023-1-1':2]
6
2022-12-17 December 4 Saturday
2022-12-19 December 4 Monday
2022-12-21 December 4 Wednesday
2022-12-23 December 4 Friday
2022-12-25 December 4 Sunday
2022-12-27 December 4 Tuesday
2022-12-29 December 4 Thursday
2022-12-31 December 4 Saturday
[ ]: # challenge -> fetch info for a particular date every year -> limitation of␣
↪timedelta
google.head()
google[google.index.isin(pd.date_range(start='2005-1-6',end='2022-1-6',freq=pd.
↪DateOffset(years=1)))]
7
2022-01-06 1452500 January Thursday 1
[ ]: <matplotlib.axes._subplots.AxesSubplot at 0x7f2a760e42e0>
[ ]: yt['Subscribers'].plot()
[ ]: <matplotlib.axes._subplots.AxesSubplot at 0x7f2a77a8c100>
8
[ ]: # viz all cols together
subset_yt = yt[['Subscribers', 'Views', 'Watch time (hours)', 'Comments added']]
ax = subset_yt.plot(subplots=True,
layout=(2, 2),
sharex=False,
sharey=False,
linewidth=0.7,
fontsize=10,
legend=False,
figsize=(20,10),
title=['Subscribers', 'Views', 'Watch time (hours)',␣
↪'Comments'])
9
[ ]: # plot for a particular year/month/week
yt.loc['2022-12']['Subscribers'].plot()
[ ]: <matplotlib.axes._subplots.AxesSubplot at 0x7f2a77d25eb0>
10
[ ]: google.loc['2021-12']['Close'].plot()
[ ]: <matplotlib.axes._subplots.AxesSubplot at 0x7f2a77d25700>
[ ]: # monthly trend
yt.groupby('month_name')['Subscribers'].mean().plot(kind='bar')
[ ]: <matplotlib.axes._subplots.AxesSubplot at 0x7f2a75581730>
11
[ ]: # yearly trend
yt.groupby('weekday_name')['Subscribers'].mean().plot(kind='bar')
[ ]: <matplotlib.axes._subplots.AxesSubplot at 0x7f2a75504fa0>
12
[ ]: google.groupby('month_name')['Close'].mean().plot(kind='bar')
[ ]: <matplotlib.axes._subplots.AxesSubplot at 0x7f2a75441a90>
13
[ ]: # quaterly trend
google.groupby('quarter')['Close'].mean().plot(kind='bar')
[ ]: <matplotlib.axes._subplots.AxesSubplot at 0x7f2a7552d6a0>
14
[ ]: # frequency
google.index
[ ]: # asfreq
google.asfreq('6H',method='bfill')
15
2022-05-19 06:00:00 2241.709961 2251.000000 2127.459961 2186.260010
2022-05-19 12:00:00 2241.709961 2251.000000 2127.459961 2186.260010
2022-05-19 18:00:00 2241.709961 2251.000000 2127.459961 2186.260010
2022-05-20 00:00:00 2241.709961 2251.000000 2127.459961 2186.260010
0.0.3 Resampling
Resampling involves changing the frequency of your time series observations.
Two types of resampling are:
Upsampling: Where you increase the frequency of the samples, such as from minutes to seconds.
Downsampling: Where you decrease the frequency of the samples, such as from days to months.
[ ]: # Downsampling
yt['Subscribers'].plot()
yt['Subscribers'].resample('Y').mean().plot()
[ ]: <matplotlib.axes._subplots.AxesSubplot at 0x7f2a74e2f280>
16
[ ]: # Upsampling
google['Close'].resample('12H').interpolate(method='spline',order=2).plot()
[ ]: <matplotlib.axes._subplots.AxesSubplot at 0x7f2a751e70d0>
17
0.0.4 Rolling Window(Smoothing)
Time series data in original format can be quite volatile, especially on smaller aggregation levels.
The concept of rolling, or moving averages is a useful technique for smoothing time series data.
[ ]: # Rolling window
yt['Subscribers'].rolling(30).mean().plot(title='rolling')
yt['Subscribers'].ewm(30).mean().plot(title='ewm')
[ ]: <matplotlib.axes._subplots.AxesSubplot at 0x7f2a6d4ae9d0>
0.0.5 Shifting
The shift() function is Pandas is used to, well, shift the entire series up or down by the desired
number of periods.
[ ]: # shift
yt['Subscribers']
18
[ ]: Date
2019-04-17 0
2019-04-18 0
2019-04-19 0
2019-04-20 0
2019-04-21 0
…
2022-12-31 116
2023-01-01 142
2023-01-02 171
2023-01-03 162
2023-01-04 147
Name: Subscribers, Length: 1359, dtype: int64
[ ]: yt['Subscribers'].shift(-1)
[ ]: Date
2019-04-17 0.0
2019-04-18 0.0
2019-04-19 0.0
2019-04-20 0.0
2019-04-21 0.0
…
2022-12-31 142.0
2023-01-01 171.0
2023-01-02 162.0
2023-01-03 147.0
2023-01-04 NaN
Name: Subscribers, Length: 1359, dtype: float64
[ ]: # shift example
df = pd.read_csv('/content/login.csv',header=None)
df = df[[1,2]]
df.head()
df.rename(columns={1:'user_id',2:'login_time'},inplace=True)
df.head()
[ ]: user_id login_time
0 466 2017-01-07 18:24:07
1 466 2017-01-07 18:24:55
2 458 2017-01-07 18:25:18
3 458 2017-01-07 18:26:21
4 592 2017-01-07 19:09:59
19
[ ]: user_id login_time
2 458 2017-01-07 18:25:18
3 458 2017-01-07 18:26:21
9 458 2017-01-09 11:13:12
10 458 2017-01-09 11:34:02
25 458 2017-01-10 12:14:11
[ ]: user_df['login_time'] = pd.to_datetime(user_df['login_time'])
user_df.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 208 entries, 2 to 1018
Data columns (total 2 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 user_id 208 non-null int64
1 login_time 208 non-null datetime64[ns]
dtypes: datetime64[ns](1), int64(1)
memory usage: 4.9 KB
<ipython-input-269-fd41e73e6ce6>:1: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
[ ]: user_df['shifted'] = user_df['login_time'].shift(1)
(user_df['login_time'] - user_df['shifted']).mean()
<ipython-input-270-091b95be4a6b>:1: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
[ ]: ax = df.plot(subplots=True,
layout=(3, 2),
sharex=False,
sharey=False,
linewidth=0.7,
fontsize=10,
legend=False,
20
figsize=(20,15),
title=['Open', 'High', 'Low', 'Close','Adjusted Close',␣
'Volume'])
↪
[ ]:
21
session-23-matplotlib
May 3, 2024
plt.style.use('default')
• Bivariate Analysis
• categorical -> numerical and numerical -> numerical
• Use case - Time series data
[ ]: # plotting a simple function
price = [48000,54000,57000,49000,47000,45000]
year = [2015,2016,2017,2018,2019,2020]
plt.plot(year,price)
[ ]: [<matplotlib.lines.Line2D at 0x7fb5d79e54f0>]
1
[ ]: # from a pandas dataframe
batsman = pd.read_csv('/content/sharma-kohli.csv')
batsman
plt.plot(batsman['index'],batsman['V Kohli'])
[ ]: [<matplotlib.lines.Line2D at 0x7fb5d682a220>]
2
[ ]: # plotting multiple plots
plt.plot(batsman['index'],batsman['V Kohli'])
plt.plot(batsman['index'],batsman['RG Sharma'])
[ ]: [<matplotlib.lines.Line2D at 0x7fb5d66f6fa0>]
3
[ ]: # labels title
plt.plot(batsman['index'],batsman['V Kohli'])
plt.plot(batsman['index'],batsman['RG Sharma'])
4
[ ]: # colors(hex) and line(width and style) and marker(size)
plt.plot(batsman['index'],batsman['V Kohli'],color='#D9F10F')
plt.plot(batsman['index'],batsman['RG Sharma'],color='#FC00D6')
5
[ ]: plt.plot(batsman['index'],batsman['V␣
↪Kohli'],color='#D9F10F',linestyle='solid',linewidth=3)
plt.plot(batsman['index'],batsman['RG␣
↪Sharma'],color='#FC00D6',linestyle='dashdot',linewidth=2)
6
[ ]: plt.plot(batsman['index'],batsman['V␣
↪Kohli'],color='#D9F10F',linestyle='solid',linewidth=3,marker='D',markersize=10)
plt.plot(batsman['index'],batsman['RG␣
↪Sharma'],color='#FC00D6',linestyle='dashdot',linewidth=2,marker='o')
7
[ ]: # legend -> location
plt.plot(batsman['index'],batsman['V␣
↪Kohli'],color='#D9F10F',linestyle='solid',linewidth=3,marker='D',markersize=10,label='Virat'
plt.plot(batsman['index'],batsman['RG␣
↪Sharma'],color='#FC00D6',linestyle='dashdot',linewidth=2,marker='o',label='Rohit')
plt.legend(loc='upper right')
[ ]: <matplotlib.legend.Legend at 0x7fb5d60124f0>
8
[ ]: # limiting axes
price = [48000,54000,57000,49000,47000,45000,4500000]
year = [2015,2016,2017,2018,2019,2020,2021]
plt.plot(year,price)
plt.ylim(0,75000)
plt.xlim(2017,2019)
[ ]: (2017.0, 2019.0)
9
[ ]: # grid
plt.plot(batsman['index'],batsman['V␣
↪Kohli'],color='#D9F10F',linestyle='solid',linewidth=3,marker='D',markersize=10)
plt.plot(batsman['index'],batsman['RG␣
↪Sharma'],color='#FC00D6',linestyle='dashdot',linewidth=2,marker='o')
plt.grid()
10
[ ]: # show
plt.plot(batsman['index'],batsman['V␣
↪Kohli'],color='#D9F10F',linestyle='solid',linewidth=3,marker='D',markersize=10)
plt.plot(batsman['index'],batsman['RG␣
↪Sharma'],color='#FC00D6',linestyle='dashdot',linewidth=2,marker='o')
plt.grid()
plt.show()
11
1 Scatter Plots
• Bivariate Analysis
• numerical vs numerical
• Use case - Finding correlation
[ ]: # plt.scatter simple function
x = np.linspace(-10,10,50)
y = 10*x + 3 + np.random.randint(0,300,50)
y
12
203.63265306, 182.71428571, 139.79591837, 164.87755102,
67.95918367, 57.04081633, 190.12244898, 51.20408163,
101.28571429, 84.36734694, 31.44897959, 47.53061224,
223.6122449 , 145.69387755, 278.7755102 , 122.85714286,
258.93877551, 174.02040816, 315.10204082, 338.18367347,
363.26530612, 242.34693878, 342.42857143, 376.51020408,
98.59183673, 376.67346939, 95.75510204, 268.83673469,
309.91836735, 324. ])
[ ]: plt.scatter(x,y)
[ ]: <matplotlib.collections.PathCollection at 0x7fb5d5da8850>
13
3 RG Sharma 5881 30.314433 126.964594
4 SK Raina 5536 32.374269 132.535312
5 AB de Villiers 5181 39.853846 148.580442
6 CH Gayle 4997 39.658730 142.121729
7 MS Dhoni 4978 39.196850 130.931089
8 RV Uthappa 4954 27.522222 126.152279
9 KD Karthik 4377 26.852761 129.267572
10 G Gambhir 4217 31.007353 119.665153
11 AT Rayudu 4190 28.896552 124.148148
12 AM Rahane 4074 30.863636 117.575758
13 KL Rahul 3895 46.927711 132.799182
14 SR Watson 3880 30.793651 134.163209
15 MK Pandey 3657 29.731707 117.739858
16 SV Samson 3526 29.140496 132.407060
17 KA Pollard 3437 28.404959 140.457703
18 F du Plessis 3403 34.373737 127.167414
19 YK Pathan 3222 29.290909 138.046272
20 BB McCullum 2882 27.711538 126.848592
21 RR Pant 2851 34.768293 142.550000
22 PA Patel 2848 22.603175 116.625717
23 JC Buttler 2832 39.333333 144.859335
24 SS Iyer 2780 31.235955 121.132898
25 Q de Kock 2767 31.804598 130.951254
26 Yuvraj Singh 2754 24.810811 124.784776
27 V Sehwag 2728 27.555556 148.827059
28 SA Yadav 2644 29.707865 134.009123
29 M Vijay 2619 25.930693 118.614130
30 RA Jadeja 2502 26.617021 122.108346
31 SPD Smith 2495 34.652778 124.812406
32 SE Marsh 2489 39.507937 130.109775
33 DA Miller 2455 36.102941 133.569097
34 JH Kallis 2427 28.552941 105.936272
35 WP Saha 2427 25.281250 124.397745
36 DR Smith 2385 28.392857 132.279534
37 MA Agarwal 2335 22.669903 129.506378
38 SR Tendulkar 2334 33.826087 114.187867
39 GJ Maxwell 2320 25.494505 147.676639
40 N Rana 2181 27.961538 130.053667
41 R Dravid 2174 28.233766 113.347237
42 KS Williamson 2105 36.293103 123.315759
43 AJ Finch 2092 24.904762 123.349057
44 AC Gilchrist 2069 27.223684 133.054662
45 AD Russell 2039 29.985294 168.234323
46 JP Duminy 2029 39.784314 120.773810
47 MEK Hussey 1977 38.764706 119.963592
48 HH Pandya 1972 29.878788 140.256046
49 Shubman Gill 1900 32.203390 122.186495
14
[ ]: plt.scatter(df['avg'],df['strike_rate'],color='red',marker='+')
plt.title('Avg and SR analysis of Top 50 Batsman')
plt.xlabel('Average')
plt.ylabel('SR')
[ ]: # marker
[ ]: # size
tips = sns.load_dataset('tips')
# slower
plt.scatter(tips['total_bill'],tips['tip'],s=tips['size']*20)
[ ]: <matplotlib.collections.PathCollection at 0x7fb5d597f550>
15
[ ]: # scatterplot using plt.plot
# faster
plt.plot(tips['total_bill'],tips['tip'],'o')
[ ]: [<matplotlib.lines.Line2D at 0x7fb5d591ac10>]
16
[ ]: # plt.plot vs plt.scatter
• Bivariate Analysis
• Numerical vs Categorical
• Use case - Aggregate analysis of groups
[ ]: # simple bar chart
children = [10,20,40,10,30]
colors = ['red','blue','green','yellow','pink']
plt.bar(colors,children,color='black')
17
[ ]: # bar chart using data
18
[ ]: # color and label
df = pd.read_csv('/content/batsman_season_record.csv')
df
[ ]: plt.bar(np.arange(df.shape[0]) - 0.2,df['2015'],width=0.2,color='yellow')
plt.bar(np.arange(df.shape[0]),df['2016'],width=0.2,color='red')
plt.bar(np.arange(df.shape[0]) + 0.2,df['2017'],width=0.2,color='blue')
plt.xticks(np.arange(df.shape[0]), df['batsman'])
plt.show()
19
[ ]: np.arange(df.shape[0])
[ ]: array([0, 1, 2, 3, 4])
[ ]: # xticks
[ ]: # a problem
children = [10,20,40,10,30]
colors = ['red red red red red red','blue blue blue blue','green green green␣
↪green green','yellow yellow yellow yellow ','pink pinkpinkpink']
plt.bar(colors,children,color='black')
plt.xticks(rotation='vertical')
20
[ ]: # Stacked Bar chart
plt.bar(df['batsman'],df['2017'],label='2017')
plt.bar(df['batsman'],df['2016'],bottom=df['2017'],label='2016')
plt.bar(df['batsman'],df['2015'],bottom=(df['2016'] + df['2017']),label='2015')
plt.legend()
plt.show()
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
21
/usr/local/lib/python3.8/dist-packages/pandas/core/indexes/base.py in␣
↪get_loc(self, key, method, tolerance)
3360 try:
-> 3361 return self._engine.get_loc(casted_key)
3362 except KeyError as err:
/usr/local/lib/python3.8/dist-packages/pandas/_libs/index.pyx in pandas._libs.
↪index.IndexEngine.get_loc()
/usr/local/lib/python3.8/dist-packages/pandas/_libs/index.pyx in pandas._libs.
↪index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.
↪PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.
↪PyObjectHashTable.get_item()
KeyError: '2017'
The above exception was the direct cause of the following exception:
/usr/local/lib/python3.8/dist-packages/pandas/core/frame.py in __getitem__(self,␣
↪key)
/usr/local/lib/python3.8/dist-packages/pandas/core/indexes/base.py in␣
↪get_loc(self, key, method, tolerance)
22
KeyError: '2017'
1.0.2 Histogram
• Univariate Analysis
• Numerical col
• Use case - Frequency Count
[ ]: # simple data
data = [32,45,56,10,15,27,61]
plt.hist(data,bins=[10,25,40,55,70])
[ ]: # on some data
df = pd.read_csv('/content/vk.csv')
23
df
[ ]: match_id batsman_runs
0 12 62
1 17 28
2 20 64
3 27 0
4 30 10
.. … …
136 624 75
137 626 113
138 632 54
139 633 0
140 636 54
[ ]: plt.hist(df['batsman_runs'],bins=[0,10,20,30,40,50,60,70,80,90,100,110,120])
plt.show()
[ ]: # handling bins
24
[ ]: # logarithmic scale
arr = np.load('/content/big-array.npy')
plt.hist(arr,bins=[10,20,30,40,50,60,70],log=True)
plt.show()
• Univariate/Bivariate Analysis
• Categorical vs numerical
• Use case - To find contibution on a standard scale
[ ]: # simple data
data = [23,45,100,20,49]
subjects = ['eng','science','maths','sst','hindi']
plt.pie(data,labels=subjects)
plt.show()
25
[ ]: # dataset
df = pd.read_csv('/content/gayle-175.csv')
df
[ ]: batsman batsman_runs
0 AB de Villiers 31
1 CH Gayle 175
2 R Rampaul 0
3 SS Tiwary 2
4 TM Dilshan 33
5 V Kohli 11
[ ]: plt.pie(df['batsman_runs'],labels=df['batsman'],autopct='%0.1f%%')
plt.show()
26
[ ]: # percentage and colors
plt.pie(df['batsman_runs'],labels=df['batsman'],autopct='%0.
↪1f%%',colors=['blue','green','yellow','pink','cyan','brown'])
plt.show()
27
[ ]: # explode shadow
plt.pie(df['batsman_runs'],labels=df['batsman'],autopct='%0.1f%%',explode=[0.
↪3,0,0,0,0,0.1],shadow=True)
plt.show()
28
1.0.4 Changing styles
[ ]: plt.style.available
[ ]: ['Solarize_Light2',
'_classic_test_patch',
'bmh',
'classic',
'dark_background',
'fast',
'fivethirtyeight',
'ggplot',
'grayscale',
'seaborn',
'seaborn-bright',
'seaborn-colorblind',
'seaborn-dark',
'seaborn-dark-palette',
'seaborn-darkgrid',
'seaborn-deep',
'seaborn-muted',
'seaborn-notebook',
29
'seaborn-paper',
'seaborn-pastel',
'seaborn-poster',
'seaborn-talk',
'seaborn-ticks',
'seaborn-white',
'seaborn-whitegrid',
'tableau-colorblind10']
[ ]: plt.style.use('dark_background')
[ ]: arr = np.load('/content/big-array.npy')
plt.hist(arr,bins=[10,20,30,40,50,60,70],log=True)
plt.show()
[ ]: arr = np.load('/content/big-array.npy')
plt.hist(arr,bins=[10,20,30,40,50,60,70],log=True)
plt.savefig('sample.png')
30
1.0.6 Checkout Doc on website
[ ]:
31
session-24-advanced-matplotlib
May 3, 2024
[ ]: import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
• Dataset : drive.google.com/drive/folders/17q7WRLJ7hdkA7nk8J_GUTEcZ7WHgPKUg?usp=share_link
[ ]: iris = pd.read_csv('iris.csv')
iris.sample(5)
Species
9 Iris-setosa
73 Iris-versicolor
44 Iris-setosa
51 Iris-versicolor
104 Iris-virginica
[ ]: iris['Species'] = iris['Species'].replace({'Iris-setosa':0,'Iris-versicolor':
↪1,'Iris-virginica':2})
iris.sample(5)
1
[ ]: plt.
↪scatter(iris['SepalLengthCm'],iris['PetalLengthCm'],c=iris['Species'],cmap='jet',alpha=0.
↪7)
plt.xlabel('Sepal Length')
plt.ylabel('Petal Length')
plt.colorbar()
[ ]: <matplotlib.colorbar.Colorbar at 0x7f5e17170bb0>
plt.
↪scatter(iris['SepalLengthCm'],iris['PetalLengthCm'],c=iris['Species'],cmap='jet',alpha=0.
↪7)
plt.xlabel('Sepal Length')
plt.ylabel('Petal Length')
plt.colorbar()
[ ]: <matplotlib.colorbar.Colorbar at 0x7f5e16ee4430>
2
0.0.3 Annotations
[ ]: batters = pd.read_csv('batter.csv')
[ ]: batters.shape
[ ]: (605, 4)
[ ]: sample_df = df.head(100).sample(25,random_state=5)
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-137-839dfd0bcf32> in <module>
----> 1 sample_df = df.head(100).sample(25,random_state=5)
5363 )
5364
-> 5365 locs = rs.choice(axis_length, size=n, replace=replace, p=weights)
5366 result = self.take(locs, axis=axis)
5367 if ignore_index:
mtrand.pyx in numpy.random.mtrand.RandomState.choice()
3
[ ]: sample_df
[ ]: plt.figure(figsize=(18,10))
plt.scatter(sample_df['avg'],sample_df['strike_rate'],s=sample_df['runs'])
for i in range(sample_df.shape[0]):
plt.text(sample_df['avg'].values[i],sample_df['strike_rate'].
↪values[i],sample_df['batter'].values[i])
4
[ ]: x = [1,2,3,4]
y = [5,6,7,8]
plt.scatter(x,y)
plt.text(1,5,'Point 1')
plt.text(2,6,'Point 2')
plt.text(3,7,'Point 3')
plt.text(4,8,'Point 4',fontdict={'size':12,'color':'brown'})
5
0.0.4 Horizontal and Vertical lines
[ ]: plt.figure(figsize=(18,10))
plt.scatter(sample_df['avg'],sample_df['strike_rate'],s=sample_df['runs'])
plt.axhline(130,color='red')
plt.axhline(140,color='green')
plt.axvline(30,color='red')
for i in range(sample_df.shape[0]):
plt.text(sample_df['avg'].values[i],sample_df['strike_rate'].
↪values[i],sample_df['batter'].values[i])
6
0.0.5 Subplots
[ ]: plt.figure(figsize=(15,6))
plt.scatter(batters['avg'],batters['strike_rate'])
plt.title('Something')
plt.xlabel('Avg')
plt.ylabel('Strike Rate')
plt.show()
7
[ ]: fig,ax = plt.subplots(figsize=(15,6))
ax.scatter(batters['avg'],batters['strike_rate'],color='red',marker='+')
ax.set_title('Something')
ax.set_xlabel('Avg')
ax.set_ylabel('Strike Rate')
fig.show()
[ ]: # batter dataset
[ ]: fig, ax = plt.subplots(nrows=2,ncols=1,sharex=True,figsize=(10,6))
ax[0].scatter(batters['avg'],batters['strike_rate'],color='red')
8
ax[1].scatter(batters['avg'],batters['runs'])
ax[1].set_title('Avg Vs Runs')
ax[1].set_ylabel('Runs')
ax[1].set_xlabel('Avg')
[ ]: Text(0.5, 0, 'Avg')
[ ]: fig, ax = plt.subplots(nrows=2,ncols=2,figsize=(10,10))
ax[0,0].
ax[0,1].scatter(batters['avg'],batters['runs'])
ax[1,0].hist(batters['avg'])
ax[1,1].hist(batters['runs'])
[ ]: (array([499., 40., 19., 19., 9., 6., 4., 4., 3., 2.]),
array([ 0. , 663.4, 1326.8, 1990.2, 2653.6, 3317. , 3980.4, 4643.8,
5307.2, 5970.6, 6634. ]),
<a list of 10 Patch objects>)
9
[ ]: fig = plt.figure()
ax1 = fig.add_subplot(2,2,1)
ax1.scatter(batters['avg'],batters['strike_rate'],color='red')
ax2 = fig.add_subplot(2,2,2)
ax2.hist(batters['runs'])
ax3 = fig.add_subplot(2,2,3)
ax3.hist(batters['avg'])
[ ]: (array([102., 125., 103., 82., 78., 43., 22., 14., 2., 1.]),
array([ 0. , 5.56666667, 11.13333333, 16.7 , 22.26666667,
27.83333333, 33.4 , 38.96666667, 44.53333333, 50.1 ,
10
55.66666667]),
<a list of 10 Patch objects>)
[ ]: fig, ax = plt.subplots(nrows=2,ncols=2,sharex=True,figsize=(10,10))
ax[1,1]
[ ]: <matplotlib.axes._subplots.AxesSubplot at 0x7f5e15913c10>
11
0.0.6 3D Scatter Plots
[ ]: batters
fig = plt.figure()
ax = plt.subplot(projection='3d')
ax.scatter3D(batters['runs'],batters['avg'],batters['strike_rate'],marker='+')
ax.set_title('IPL batsman analysis')
ax.set_xlabel('Runs')
ax.set_ylabel('Avg')
ax.set_zlabel('SR')
12
[ ]: Text(0.5, 0, 'SR')
fig = plt.figure()
ax = plt.subplot(projection='3d')
ax.scatter3D(x,y,z,s=[100,100,100,100])
ax.plot3D(x,y,z,color='red')
[ ]: [<mpl_toolkits.mplot3d.art3d.Line3D at 0x7f5e14d13f10>]
13
0.0.8 3D Surface Plots
[ ]: x = np.linspace(-10,10,100)
y = np.linspace(-10,10,100)
[ ]: xx, yy = np.meshgrid(x,y)
[ ]: (100, 100)
[ ]: z = xx**2 + yy**2
z.shape
[ ]: (100, 100)
[ ]: fig = plt.figure(figsize=(12,8))
ax = plt.subplot(projection='3d')
p = ax.plot_surface(xx,yy,z,cmap='viridis')
fig.colorbar(p)
[ ]: <matplotlib.colorbar.Colorbar at 0x7f5e141ac970>
14
[ ]: z = np.sin(xx) + np.cos(yy)
fig = plt.figure(figsize=(12,8))
ax = plt.subplot(projection='3d')
p = ax.plot_surface(xx,yy,z,cmap='viridis')
fig.colorbar(p)
[ ]: <matplotlib.colorbar.Colorbar at 0x7f5e14076be0>
15
[ ]: z = np.sin(xx) + np.log(xx)
fig = plt.figure(figsize=(12,8))
ax = plt.subplot(projection='3d')
p = ax.plot_surface(xx,yy,z,cmap='viridis')
fig.colorbar(p)
[ ]: <matplotlib.colorbar.Colorbar at 0x7f5e139a4a00>
16
[ ]: fig = plt.figure(figsize=(12,8))
ax = plt.subplot(projection='3d')
p = ax.plot_surface(xx,yy,z,cmap='viridis')
fig.colorbar(p)
[ ]: <matplotlib.colorbar.Colorbar at 0x7f5e136f8970>
17
0.0.9 Contour Plots
[ ]: fig = plt.figure(figsize=(12,8))
ax = plt.subplot()
p = ax.contour(xx,yy,z,cmap='viridis')
fig.colorbar(p)
[ ]: <matplotlib.colorbar.Colorbar at 0x7f5e13580a30>
18
[ ]: fig = plt.figure(figsize=(12,8))
ax = plt.subplot()
p = ax.contourf(xx,yy,z,cmap='viridis')
fig.colorbar(p)
[ ]: <matplotlib.colorbar.Colorbar at 0x7f5e14f202b0>
19
[ ]: z = np.sin(xx) + np.cos(yy)
fig = plt.figure(figsize=(12,8))
ax = plt.subplot()
p = ax.contourf(xx,yy,z,cmap='viridis')
fig.colorbar(p)
[ ]: <matplotlib.colorbar.Colorbar at 0x7f5e14d5a2e0>
20
0.0.10 Heatmap
[ ]: delivery = pd.read_csv('/content/IPL_Ball_by_Ball_2008_2022.csv')
delivery.head()
21
2 0 NaN NaN NaN Rajasthan Royals
3 0 NaN NaN NaN Rajasthan Royals
4 0 NaN NaN NaN Rajasthan Royals
[ ]: grid = temp_df.
↪pivot_table(index='overs',columns='ballnumber',values='batsman_run',aggfunc='count')
[ ]: plt.figure(figsize=(20,10))
plt.imshow(grid)
plt.yticks(delivery['overs'].unique(), list(range(1,21)))
plt.xticks(np.arange(0,6), list(range(1,7)))
plt.colorbar()
[ ]: <matplotlib.colorbar.Colorbar at 0x7f5e12f98cd0>
22
[ ]:
23
0.0.11 Pandas Plot()
[ ]: # on a series
s = pd.Series([1,2,3,4,5,6,7])
s.plot(kind='pie')
[ ]: <matplotlib.axes._subplots.AxesSubplot at 0x7f5e12f0a070>
[ ]:
[ ]: tips.head()
24
[ ]: # Scatter plot -> labels -> markers -> figsize -> color -> cmap
tips.plot(kind='scatter',x='total_bill',y='tip',title='Cost␣
↪Analysis',marker='+',figsize=(10,6),s='size',c='sex',cmap='viridis')
[ ]: <matplotlib.axes._subplots.AxesSubplot at 0x7f5e12b4d760>
[ ]: # 2d plot
# dataset = 'https://raw.githubusercontent.com/m-mehdi/pandas_tutorials/main/
↪weekly_stocks.csv'
stocks = pd.read_csv('https://raw.githubusercontent.com/m-mehdi/
↪pandas_tutorials/main/weekly_stocks.csv')
stocks.head()
[ ]: # line plot
stocks['MSFT'].plot(kind='line')
[ ]: <matplotlib.axes._subplots.AxesSubplot at 0x7f5e12a55730>
25
[ ]: stocks.plot(kind='line',x='Date')
[ ]: <matplotlib.axes._subplots.AxesSubplot at 0x7f5e129f15e0>
26
[ ]: stocks[['Date','AAPL','FB']].plot(kind='line',x='Date')
[ ]: <matplotlib.axes._subplots.AxesSubplot at 0x7f5e12950fa0>
[ ]: tips.groupby('sex')['total_bill'].mean().plot(kind='bar')
[ ]: <matplotlib.axes._subplots.AxesSubplot at 0x7f5e12350550>
27
[ ]: temp['2015'].plot(kind='bar')
[ ]: <matplotlib.axes._subplots.AxesSubplot at 0x7f5e123ceaf0>
28
[ ]: temp.plot(kind='bar')
[ ]: <matplotlib.axes._subplots.AxesSubplot at 0x7f5e1228fac0>
[ ]: <matplotlib.axes._subplots.AxesSubplot at 0x7f5e12216e50>
29
[ ]: # histogram
# using stocks
stocks[['MSFT','FB']].plot(kind='hist',bins=40)
[ ]: <matplotlib.axes._subplots.AxesSubplot at 0x7f5e150247f0>
30
[ ]: # pie -> single and multiple
df = pd.DataFrame(
{
'batsman':['Dhawan','Rohit','Kohli','SKY','Pandya','Pant'],
'match1':[120,90,35,45,12,10],
'match2':[0,1,123,130,34,45],
'match3':[50,24,145,45,10,90]
}
)
df.head()
[ ]: df['match1'].plot(kind='pie',labels=df['batsman'].values,autopct='%0.1f%%')
[ ]: <matplotlib.axes._subplots.AxesSubplot at 0x7f5e11e50790>
31
[ ]: # multiple pie charts
df[['match1','match2','match3']].plot(kind='pie',subplots=True,figsize=(15,8))
stocks.plot(kind='line',subplots=True)
32
[ ]: # on multiindex dataframes
# using tips
[ ]: tips.
↪pivot_table(index=['day','time'],columns=['sex','smoker'],values='total_bill',aggfunc='mean'
↪plot(kind='pie',subplots=True,figsize=(20,10))
[ ]: tips
33
[ ]: total_bill tip sex smoker day time size
0 16.99 1.01 Female No Sun Dinner 200
1 10.34 1.66 Male No Sun Dinner 300
2 21.01 3.50 Male No Sun Dinner 300
3 23.68 3.31 Male No Sun Dinner 200
4 24.59 3.61 Female No Sun Dinner 400
.. … … … … … … …
239 29.03 5.92 Male No Sat Dinner 300
240 27.18 2.00 Female Yes Sat Dinner 200
241 22.67 2.00 Male Yes Sat Dinner 200
242 17.82 1.75 Male No Sat Dinner 200
243 18.78 3.00 Female No Thur Dinner 200
[ ]: stocks.plot(kind='scatter3D')
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-321-4e91fa40f850> in <module>
----> 1 stocks.plot(kind='scatter3D')
/usr/local/lib/python3.8/dist-packages/pandas/plotting/_core.py in␣
↪__call__(self, *args, **kwargs)
903
904 if kind not in self._all_kinds:
--> 905 raise ValueError(f"{kind} is not a valid plot kind")
906
907 # The original data structured can be transformed before passed␣
↪to the
[ ]:
34
session-on-plotly
May 3, 2024
Advantages
• Multi language support
• Lot’s of graphs
• Interactive plots
• Beautiful plots
Does not work with live data streams. Dash can be explored for that.
[ ]: # import datasets
tips = px.data.tips()
iris = px.data.iris()
gap = px.data.gapminder()
[ ]: gap.head()
1
2 Afghanistan Asia 1962 31.997 10267083 853.100710 AFG
3 Afghanistan Asia 1967 34.020 11537966 836.197138 AFG
4 Afghanistan Asia 1972 36.088 13079460 739.981106 AFG
iso_num
0 4
1 4
2 4
3 4
4 4
iso_alpha iso_num
11 AFG 4
23 ALB 8
35 DZA 12
47 AGO 24
59 ARG 32
… … …
1655 VNM 704
1667 PSE 275
1679 YEM 887
1691 ZMB 894
1703 ZWE 716
[ ]: trace1 = go.Scatter(x=temp_df['lifeExp'],y=temp_df['gdpPercap'],mode='markers')
trace2 = go.Scatter(x=[0,1,2],y=[0,90,30000],mode='lines')
2
data = [trace1,trace2]
fig = go.Figure(data,layout)
fig.show()
[ ]: # plot life exp and gdp scatter plot -> continent as color -> pop as size ->␣
↪hover name -> range_x/range_y -> log_x/log_y
px.scatter(temp_df, x='lifeExp',␣
↪y='gdpPercap',color='continent',size='pop',size_max=100,␣
↪hover_name='country')
[ ]: # line plot
# plot india pop line plot
temp_df = gap[gap['country'] == 'India']
temp_df
3
[ ]: px.line(temp_df, x=temp_df.index, y=temp_df.columns)
[ ]: # bar chart
# india's pop over the years
temp_df = gap[gap['country'] == 'India']
px.bar(temp_df,x='year',y='pop')
temp_df
[ ]: # histogram
# plot histogram of life expt of all countries in 2007 -> nbins -> text_auto
temp_df = gap[gap['year'] == 2007]
px.histogram(temp_df, x='lifeExp',nbins=10,text_auto=True)
4
[ ]: # plot histogram of sepal length of all iris species
px.histogram(iris,x='sepal_length',color='species',nbins=30,text_auto=True)
[ ]: # plot pie chart of world pop in 1952 continent wise -> -> explode(pull)
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-136-91f06935b01d> in <module>
2
3 temp_df = gap[gap['year'] == 1952].groupby('continent')['pop'].sum().
↪reset_index()
[ ]: # Sunburst plot -> Sunburst plots visualize hierarchical data spanning outwards␣
↪radially from root to leaves. -> color
px.sunburst(temp_df, path=['continent','country'],values='pop',color='lifeExp')
[ ]: px.
↪sunburst(tips,path=['sex','smoker','day','time'],values='total_bill',color='size')
[ ]: # Treemap
temp_df = gap[gap['year'] == 2007]
px.treemap(temp_df, path=[px.
↪Constant('World'),'continent','country'],values='pop',color='lifeExp')
[ ]: # Heatmap -> find heatmap of all continents with year on avg life exp
#temp_df = tips.
↪pivot_table(index='day',columns='sex',values='total_bill',aggfunc='sum')
5
temp_df = gap.
↪pivot_table(index='year',columns='continent',values='lifeExp',aggfunc='mean')
px.imshow(temp_df)
[ ]: # 3d scatterplot
# plot a 3d scatter plot of all country data for 2007
temp_df = gap[gap['year'] == 2007]
px.scatter_3d(temp_df,␣
↪x='lifeExp',y='pop',z='gdpPercap',log_y=True,color='continent',hover_name='country')
[ ]: px.
↪scatter_3d(iris,x='sepal_length',y='sepal_width',z='petal_length',color='species')
[ ]: import plotly.graph_objects as go
import plotly.express as px
import numpy as np
[ ]: # facet plot
tips = px.data.tips()
gap = px.data.gapminder()
[ ]: px.histogram(tips,x='total_bill',facet_row='sex')
[ ]: # 3d Surface plot
# can not be created using Plotly express
# we will use plotly graph object -> go
x = np.linspace(-10,10,100)
y = np.linspace(-10,10,100)
xx, yy = np.meshgrid(x,y)
z = xx**2 + yy**2
# z = np.sin(xx) + np.tan(yy)
# z = np.sqrt(xx**2 + yy**2)
6
trace = go.Surface(x=x,y=y,z=z)
data = [trace]
fig = go.Figure(data,layout)
fig.show()
[ ]: # Contour plot
x = np.linspace(-10,10,100)
y = np.linspace(-10,10,100)
xx, yy = np.meshgrid(x,y)
# z = xx**2 + yy**2
z = np.sin(xx) + np.cos(yy)
# z = np.sqrt(xx**2 + yy**2)
trace = go.Contour(x=x,y=y,z=z)
data = [trace]
fig = go.Figure(data,layout)
fig.show()
[ ]: # Subplots
from plotly.subplots import make_subplots
[ ]: fig = make_subplots(rows=2,cols=2)
[ ]: fig.add_trace(
go.Scatter(x=[1,9,5],y=[2,10,1]),
row = 1,
col = 1
)
fig.add_trace(
go.Histogram(x=[1,9,5,22,109,134,56,78,12,34,89]),
row = 1,
col = 2
)
7
fig.add_trace(
px.Scatter(x=[1,9,5],y=[2,10,1]),
row = 2,
col = 1
)
fig.add_trace(
go.Histogram(x=[1,9,5,22,109,134,56,78,12,34,89]),
row = 2,
col = 2
)
fig.update_layout(title='Subplot Demo')
fig.show()
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-50-0e27d6ad3108> in <module>
12
13 fig.add_trace(
---> 14 px.Scatter(x=[1,9,5],y=[2,10,1]),
15 row = 2,
16 col = 1
[ ]:
8
eaborn-themeing-and-color-palettes
May 3, 2024
tips = sns.load_dataset('tips')
1 Themeing
• set_theme
Set aspects of the visual theme for all matplotlib and seaborn plots.
• axes_style
Get the parameters that control the general style of the plots.
• set_style
Set the parameters that control the general style of the plots.
• plotting_context
Get the parameters that control the scaling of plot elements.
• set_context
Set the parameters that control the scaling of plot elements.
• set_color_codes
Change how matplotlib color shorthands are interpreted.
• reset_defaults
Restore all RC params to default settings.
• reset_orig
Restore all RC params to original settings (respects custom rc).
1
Example:
[ ]: tips.head()
[ ]: <matplotlib.axes._subplots.AxesSubplot at 0x7f8df79f8d60>
[ ]: # Using whitegrid
sns.set_theme(style='whitegrid')
sns.barplot(x=["A", "B", "C"], y=[1, 3, 2])
[ ]: <matplotlib.axes._subplots.AxesSubplot at 0x7f8df79c6a00>
2
[ ]: # Using dark background
sns.set_theme(style='dark')
sns.barplot(x=["A", "B", "C"], y=[1, 3, 2])
[ ]: <matplotlib.axes._subplots.AxesSubplot at 0x7f8df799a730>
3
1.2 axes_style function :
This function is used to set the style of the axes of your plots. It can take a variety of inputs such
as ‘white’, ‘dark’, ‘ticks’ or a dictionary with key-value pairs of valid style options.
[ ]: # Example:
sns.axes_style(style = 'white')
sns.barplot(x=["A", "B", "C"], y=[1, 3, 2])
[ ]: <matplotlib.axes._subplots.AxesSubplot at 0x7f8df70b56d0>
4
[ ]: # Use the function as a context manager to temporarily change the style of your␣
↪plots:
with sns.axes_style("white"):
sns.barplot(x=[1, 2, 3], y=[2, 5, 3])
5
[ ]: # Load the iris data
iris = sns.load_dataset("iris")
6
1.3 Scaling Figure Styles - sns.set_context()
Matplotlib allows you to generate powerful plots, but styling those plots for different presentation
purposes is difficult. Seaborn makes it easy to produce the same plots in a variety of different visual
formats so you can customize the presentation of your data for the appropriate context, whether
it be a research paper or a conference poster.
You can set the visual format, or context, using sns.set_context()
Within the usage of sns.set_context(), there are three levels of complexity:
• Pass in one parameter that adjusts the scale of the plot
• Pass in two parameters - one for the scale and the other for the font size
• Pass in three parameters - including the previous two, as well as the rc with the style parameter
that you want to override
7
default.
[ ]: sns.set_style("ticks")
[ ]: <matplotlib.axes._subplots.AxesSubplot at 0x7f8df8265700>
[ ]: sns.set_style("ticks")
[ ]: <matplotlib.axes._subplots.AxesSubplot at 0x7f8df82730d0>
8
1.3.2 Scaling Fonts and Line Widths
You are also able to change the size of the text using the font_scale parameter for
sns.set_context()
You may want to also change the line width so it matches. We do this with the rc parameter, which
we’ll explain in detail below.
[ ]: # Set font scale and reduce grid line width to match
sns.set_style("darkgrid")
[ ]: <matplotlib.axes._subplots.AxesSubplot at 0x7f8df7dbe7c0>
9
While you’re able to change these parameters, you should keep in mind that it’s not
always useful to make certain changes. Notice in this example that we’ve changed the
line width, but because of it’s relative size to the plot, it distracts from the actual
plotted data.
[ ]: # Set font scale and increase grid line width to match
sns.set_context("poster", font_scale = .8, rc={"grid.linewidth": 5})
sns.stripplot(x="day", y="total_bill", data=tips)
[ ]: <matplotlib.axes._subplots.AxesSubplot at 0x7f8df7e57400>
10
1.3.3 The RC Parameter
As we mentioned above, if you want to override any of these standards, you can use
sns.set_context and pass in the parameter rc to target and reset the value of an individual
parameter in a dictionary. rc stands for the phrase ‘run command’ - essentially, configurations
which will execute when you run your code.
[ ]: # Plotting context function
sns.plotting_context()
# These are the property you can tweak in rc parameter
[ ]: {'font.size': 19.200000000000003,
'axes.labelsize': 19.200000000000003,
'axes.titlesize': 19.200000000000003,
'xtick.labelsize': 17.6,
'ytick.labelsize': 17.6,
'legend.fontsize': 17.6,
'axes.linewidth': 2.5,
'grid.linewidth': 5.0,
11
'lines.linewidth': 3.0,
'lines.markersize': 12.0,
'patch.linewidth': 2.0,
'xtick.major.width': 2.5,
'ytick.major.width': 2.5,
'xtick.minor.width': 2.0,
'ytick.minor.width': 2.0,
'xtick.major.size': 12.0,
'ytick.major.size': 12.0,
'xtick.minor.size': 8.0,
'ytick.minor.size': 8.0,
'legend.title_fontsize': 19.200000000000003}
1.4 seaborn.set_color_codes(palette=’deep’)
Change how matplotlib color shorthands are interpreted.
Calling this will change how shorthand codes like “b” or “g” are interpreted by matplotlib in
subsequent plots.
Parameters: > palette : {deep, muted, pastel, dark, bright, colorblind} Named seaborn
palette to use as the source of colors.
[ ]: <seaborn.axisgrid.FacetGrid at 0x7f8df7cf7e20>
12
[ ]: # 'b' color code with pastel palette
sns.set_color_codes(palette='pastel')
sns.relplot(data=tips, x='total_bill', y='tip', color='b')
[ ]: <seaborn.axisgrid.FacetGrid at 0x7f8df7f5d280>
13
2 Color palettes
• set_palette
Set the matplotlib color cycle using a seaborn palette.
• color_palette
Return a list of colors or continuous colormap defining a palette.
• husl_palette
Return hues with constant lightness and saturation in the HUSL system.
• hls_palette
Return hues with constant lightness and saturation in the HLS system.
• cubehelix_palette
Make a sequential palette from the cubehelix system.
• dark_palette
14
Make a sequential palette that blends from dark to color.
• light_palette
Make a sequential palette that blends from light to color.
• diverging_palette
Make a diverging palette between two HUSL colors.
• blend_palette
Make a palette that blends between a list of colors.
• xkcd_palette
Make a palette with color names from the xkcd color survey.
• crayon_palette
Make a palette with color names from Crayola crayons.
• mpl_palette
Return a palette or colormap from the matplotlib registry.
2.0.1 color_palette
https://seaborn.pydata.org/generated/seaborn.color_palette.html#seaborn.color_palette
In Seaborn, the color_palette() function allows you to easily specify the colors for your plots. You
can use pre-defined palettes, such as “deep”, “muted”, “pastel”, “bright”, “dark”, and “colorblind”,
or you can create your own custom palette.
When using a pre-defined palette, you can specify the number of colors you want to use by passing
in the desired number as the argument.
For example, using the “deep” palette and specifying 6 colors will return an array of 6 RGB color
codes that can be used in your plot.
[ ]: deep_colors = sns.color_palette("deep", 6)
You can also create your own custom color palette by passing in a list of RGB color codes.
15
[ ]: colors = ["#00CD00", "#00FFAA", "#FCCC00", "#FF0000"]
colors = sns.color_palette(colors)
sns.palplot(colors)
plt.show()
The as_cmap parameter in seaborn’s color_palette function is a boolean flag that, when set to
True, returns a colormap object instead of a list of RGB values. This can be useful when plotting
data that needs to be colored based on a continuous variable, such as a heatmap or a 2D histogram.
The colormap can then be passed to other plotting functions, such as heatmap or imshow, to color
the plotted data. An example of using color_palette with as_cmap is:
[ ]: data = tips.corr()
16
[ ]: data = tips.corr()
17
2.1 set_palette
The set_palette() function in seaborn allows you to specify a color palette for your plots. This can
be done by passing in one of the pre-defined seaborn palettes (such as “deep”, “muted”, “bright”,
etc.) or by passing in your own custom list of colors or color_palette.
Here is an example of using set_palette() to specify the “deep” palette:
18
[ ]: # Loading different data
import plotly.express as px
gap = px.data.gapminder()
gap.head()
iso_num
0 4
1 4
2 4
3 4
4 4
19
You can also pass in a custom list of colors. For example, the following code would set the palette
to the colors red, blue, and green:
[ ]: temp_df = gap[gap['country'].isin(['India','Brazil','Germany'])]
[ ]: <seaborn.axisgrid.FacetGrid at 0x7f8df770bf40>
You can also pass in a number of different arguments to set_palette. For example, the following
code sets the color palette to a specific hue, with 8 colors, and a desaturated lightness:
[ ]: sns.set_palette("husl", 8, .7)
sns.relplot(data=temp_df, kind='scatter', x='year', y='lifeExp', hue='country')
[ ]: <seaborn.axisgrid.FacetGrid at 0x7f8df7fcd5b0>
20
Now say we have set pallete colors and passed three colors, like we did above and want to plot of
4 or more country line plots?
What color will be assinged to 4th country? Let’s see
[ ]: temp_df = gap[gap['country'].isin(['India','Brazil','Germany','Afghanistan'])]
[ ]: <seaborn.axisgrid.FacetGrid at 0x7f8df76fcc10>
21
See it took, color palette we set as - sns.set_palette(“husl”,8, .7), with eight colors. Even if we are
specifying set palette as ['red', 'blue', 'green']
[ ]: temp_df = gap[gap['country'].isin(['India','Brazil','Germany','Afghanistan'])]
# This will give right expected result as it has enough colors in the palette␣
↪to show.
[ ]: <seaborn.axisgrid.FacetGrid at 0x7f8df76d9d00>
22
2.2 seaborn.husl_palette
seaborn.husl_palette(n_colors=6, h=0.01, s=0.9, l=0.65, as_cmap=False) > Return
hues with constant lightness and saturation in the HUSL system.
The hues are evenly sampled along a circular path. The resulting palette will be appro-
priate for categorical or cyclical data.
The h, l, and s values should be between 0 and 1.
Parameters: > n_colors : int Number of colors in the palette.
h : float (0-1) The value of the first hue.
l : float (0-1) The lightness value.
s: float (0-1) The saturation intensity.
as_cmap : bool If True, return a matplotlib colormap object.
sns.hls_palette() > This function is similar to husl_palette(), but it uses a nonlinear color space
that is more perceptually uniform, and saturation in the HLS system.
We can also use ‘husl’ or ‘hsl’ parameter in set_palette function for the same. Like we did in above
example.
23
[ ]: # Iris data loading
iris = sns.load_dataset('iris')
2.3 cubehelix_palette
The seaborn.cubehelix_palette function is used to generate a colormap based on the cubehelix
color scheme, which is a sequential color map with a linear increase in brightness and a smooth
progression through the hues of the spectrum. This function takes several optional parameters such
as start, rot, gamma, light, dark, reverse and as_cmap to control the properties of the color
palette.
For example, the following code generates a cubehelix color palette with 8 colors, starting from a
blue hue, and with increasing brightness and a rotation of 0.5:
[ ]: colors = sns.cubehelix_palette(8, start=.5, rot=-.75, gamma=.3, light=.9, dark=.
↪1, reverse=True)
sns.palplot(colors)
This palette can be used to color various plotting elements such as bars, lines, and points in a
graph.
[ ]: sns.barplot(x='species', y='petal_length', data=iris, palette=colors)
[ ]: <matplotlib.axes._subplots.AxesSubplot at 0x7f8df7503280>
24
Alternatively, it can also be passed as a colormap to a heatmap or a 2D histogram.
[ ]: sns.heatmap(iris.corr(), cmap=sns.cubehelix_palette(8, start=.5, rot=-.75,␣
↪gamma=.3, light=.9, dark=.1, as_cmap=True))
[ ]: <matplotlib.axes._subplots.AxesSubplot at 0x7f8df7529ca0>
25
All those function mentioned in coloring have similar implementation.https://
Follow below links to read about specif palette - seaborn.pydata.org/api.html#color-palettes
26
session-25-seaborn
May 3, 2024
[ ]: tips = sns.load_dataset('tips')
tips
1
[ ]: total_bill tip sex smoker day time size
0 16.99 1.01 Female No Sun Dinner 2
1 10.34 1.66 Male No Sun Dinner 3
2 21.01 3.50 Male No Sun Dinner 3
3 23.68 3.31 Male No Sun Dinner 2
4 24.59 3.61 Female No Sun Dinner 4
.. … … … … … … …
239 29.03 5.92 Male No Sat Dinner 3
240 27.18 2.00 Female Yes Sat Dinner 2
241 22.67 2.00 Male Yes Sat Dinner 2
242 17.82 1.75 Male No Sat Dinner 2
243 18.78 3.00 Female No Thur Dinner 2
[ ]: <matplotlib.axes._subplots.AxesSubplot at 0x7f9585634670>
2
[ ]: <seaborn.axisgrid.FacetGrid at 0x7f9585625820>
[ ]: # style semantics
[ ]: # line plot
gap = px.data.gapminder()
temp_df = gap[gap['country'] == 'India']
temp_df
3
702 India Asia 1982 56.596 708000000 855.723538 IND
703 India Asia 1987 58.553 788000000 976.512676 IND
704 India Asia 1992 60.223 872000000 1164.406809 IND
705 India Asia 1997 61.765 959000000 1458.817442 IND
706 India Asia 2002 62.879 1034172547 1746.769454 IND
707 India Asia 2007 64.698 1110396331 2452.210407 IND
iso_num
696 356
697 356
698 356
699 356
700 356
701 356
702 356
703 356
704 356
705 356
706 356
707 356
[ ]: <matplotlib.axes._subplots.AxesSubplot at 0x7f95854b7c70>
4
[ ]: # using relpplot
sns.relplot(data=temp_df, x='year', y='lifeExp', kind='line')
[ ]: <seaborn.axisgrid.FacetGrid at 0x7f9585427a60>
5
175 Brazil Americas 1987 65.205 142938076 7807.095818 BRA
176 Brazil Americas 1992 67.057 155975974 6950.283021 BRA
177 Brazil Americas 1997 69.388 168546719 7957.980824 BRA
178 Brazil Americas 2002 71.006 179914212 8131.212843 BRA
179 Brazil Americas 2007 72.390 190010647 9065.800825 BRA
564 Germany Europe 1952 67.500 69145952 7144.114393 DEU
565 Germany Europe 1957 69.100 71019069 10187.826650 DEU
566 Germany Europe 1962 70.300 73739117 12902.462910 DEU
567 Germany Europe 1967 70.800 76368453 14745.625610 DEU
568 Germany Europe 1972 71.000 78717088 18016.180270 DEU
569 Germany Europe 1977 72.500 78160773 20512.921230 DEU
570 Germany Europe 1982 73.800 78335266 22031.532740 DEU
571 Germany Europe 1987 74.847 77718298 24639.185660 DEU
572 Germany Europe 1992 76.070 80597764 26505.303170 DEU
573 Germany Europe 1997 77.340 82011073 27788.884160 DEU
574 Germany Europe 2002 78.670 82350671 30035.801980 DEU
575 Germany Europe 2007 79.406 82400996 32170.374420 DEU
696 India Asia 1952 37.373 372000000 546.565749 IND
697 India Asia 1957 40.249 409000000 590.061996 IND
698 India Asia 1962 43.605 454000000 658.347151 IND
699 India Asia 1967 47.193 506000000 700.770611 IND
700 India Asia 1972 50.651 567000000 724.032527 IND
701 India Asia 1977 54.208 634000000 813.337323 IND
702 India Asia 1982 56.596 708000000 855.723538 IND
703 India Asia 1987 58.553 788000000 976.512676 IND
704 India Asia 1992 60.223 872000000 1164.406809 IND
705 India Asia 1997 61.765 959000000 1458.817442 IND
706 India Asia 2002 62.879 1034172547 1746.769454 IND
707 India Asia 2007 64.698 1110396331 2452.210407 IND
iso_num
168 76
169 76
170 76
171 76
172 76
173 76
174 76
175 76
176 76
177 76
178 76
179 76
564 276
565 276
566 276
567 276
6
568 276
569 276
570 276
571 276
572 276
573 276
574 276
575 276
696 356
697 356
698 356
699 356
700 356
701 356
702 356
703 356
704 356
705 356
706 356
707 356
[ ]: <seaborn.axisgrid.FacetGrid at 0x7f9585258c70>
7
[ ]: sns.lineplot(data=temp_df, x='year', y='lifeExp', hue='country')
[ ]: <matplotlib.axes._subplots.AxesSubplot at 0x7f95853837c0>
8
[ ]: # facet plot -> figure level function -> work with relplot
# it will not work with scatterplot and lineplot
sns.relplot(data=tips, x='total_bill', y='tip', kind='line', col='sex',␣
↪row='day')
[ ]: <seaborn.axisgrid.FacetGrid at 0x7f9584b8f8b0>
9
10
[ ]: # col wrap
sns.relplot(data=gap, x='lifeExp', y='gdpPercap', kind='scatter', col='year',␣
↪col_wrap=3)
[ ]: <seaborn.axisgrid.FacetGrid at 0x7f95844efb80>
11
[ ]: sns.scatterplot(data=tips, x='total_bill', y='tip', col='sex', row='day')
12
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-199-13fa0b1b528e> in <module>
----> 1 sns.scatterplot(data=tips, x='total_bill', y='tip', col='sex', row='day')
/usr/local/lib/python3.8/dist-packages/seaborn/_decorators.py in inner_f(*args,␣
↪**kwargs)
44 )
45 kwargs.update({k: arg for k, arg in zip(sig.parameters, args)})
---> 46 return f(**kwargs)
47 return inner_f
48
/usr/local/lib/python3.8/dist-packages/seaborn/relational.py in scatterplot(x,␣
↪y, hue, style, size, data, palette, hue_order, hue_norm, sizes, size_order,␣
↪size_norm, markers, style_order, x_bins, y_bins, units, estimator, ci, n_boot,␣
825 p._attach(ax)
826
--> 827 p.plot(ax, kwargs)
828
829 return ax
606 )
607 scout_x = scout_y = np.full(scout_size, np.nan)
--> 608 scout = ax.scatter(scout_x, scout_y, **kws)
609 s = kws.pop("s", scout.get_sizes())
610 c = kws.pop("c", scout.get_facecolors())
/usr/local/lib/python3.8/dist-packages/matplotlib/cbook/deprecation.py in␣
↪wrapper(*args, **kwargs)
13
/usr/local/lib/python3.8/dist-packages/matplotlib/axes/_axes.py in scatter(self,␣
↪x, y, s, c, marker, cmap, norm, vmin, vmax, alpha, linewidths, verts,␣
4441 )
4442 collection.set_transform(mtransforms.IdentityTransform())
-> 4443 collection.update(kwargs)
4444
4445 if colors is None:
/usr/local/lib/python3.8/dist-packages/matplotlib/artist.py in update(self,␣
↪props)
1004
1005 with cbook._setattr_cm(self, eventson=False):
-> 1006 ret = [_update_property(self, k, v) for k, v in props.
↪items()]
1007
1008 if len(ret):
/usr/local/lib/python3.8/dist-packages/matplotlib/artist.py in <listcomp>(.0)
1004
1005 with cbook._setattr_cm(self, eventson=False):
-> 1006 ret = [_update_property(self, k, v) for k, v in props.
↪items()]
1007
1008 if len(ret):
/usr/local/lib/python3.8/dist-packages/matplotlib/artist.py in␣
↪_update_property(self, k, v)
14
0.0.4 2. Distribution Plots
• used for univariate analysis
• used to find out the distribution
• Range of the observation
• Central Tendency
• is the data bimodal?
• Are there outliers?
Plots under distribution plot
• histplot
• kdeplot
• rugplot
[ ]: # figure level -> displot
# axes level -> histplot -> kdeplot -> rugplot
[ ]: <matplotlib.axes._subplots.AxesSubplot at 0x7f9583d7dbb0>
15
[ ]: sns.displot(data=tips, x='total_bill', kind='hist')
[ ]: <seaborn.axisgrid.FacetGrid at 0x7f9583ebc7f0>
16
[ ]: # bins parameter
sns.displot(data=tips, x='total_bill', kind='hist',bins=2)
[ ]: <seaborn.axisgrid.FacetGrid at 0x7f9583c93280>
17
[ ]: # It’s also possible to visualize the distribution of a categorical variable␣
↪using the logic of a histogram.
# countplot
sns.displot(data=tips, x='day', kind='hist')
[ ]: <seaborn.axisgrid.FacetGrid at 0x7f958517d9d0>
18
[ ]: # hue parameter
sns.displot(data=tips, x='tip', kind='hist',hue='sex')
[ ]: <seaborn.axisgrid.FacetGrid at 0x7f9583d05280>
19
[ ]: # element -> step
sns.displot(data=tips, x='tip', kind='hist',hue='sex',element='step')
[ ]: <seaborn.axisgrid.FacetGrid at 0x7f9583c2cfa0>
20
[ ]: titanic = sns.load_dataset('titanic')
titanic
21
1 woman False C Cherbourg yes False
2 woman False NaN Southampton yes True
3 woman False C Southampton yes False
4 man True NaN Southampton no True
.. … … … … … …
886 man True NaN Southampton no True
887 woman False B Southampton yes True
888 woman False NaN Southampton no False
889 man True C Cherbourg yes True
890 man True NaN Queenstown no True
[ ]: <seaborn.axisgrid.FacetGrid at 0x7f9583c1b9d0>
22
[ ]: # faceting using col and row -> not work on histplot function
sns.displot(data=tips, x='tip', kind='hist',col='sex',element='step')
[ ]: <seaborn.axisgrid.FacetGrid at 0x7f9583904850>
[ ]: # kdeplot
# Rather than using discrete bins, a KDE plot smooths the observations with a␣
↪Gaussian kernel, producing a continuous density estimate
sns.kdeplot(data=tips,x='total_bill')
[ ]: <matplotlib.axes._subplots.AxesSubplot at 0x7f958384a160>
23
[ ]: sns.displot(data=tips,x='total_bill',kind='kde')
[ ]: <seaborn.axisgrid.FacetGrid at 0x7f95838c2790>
24
[ ]: # hue -> fill
sns.
↪displot(data=tips,x='total_bill',kind='kde',hue='sex',fill=True,height=10,aspect=2)
[ ]: <seaborn.axisgrid.FacetGrid at 0x7f95821a35b0>
25
[ ]: # Rugplot
sns.kdeplot(data=tips,x='total_bill')
sns.rugplot(data=tips,x='total_bill')
[ ]: <matplotlib.axes._subplots.AxesSubplot at 0x7f95836ff0d0>
26
[ ]: # Bivariate histogram
# A bivariate histogram bins the data within rectangles that tile the plot
# and then shows the count of observations within each rectangle with the fill␣
↪color
[ ]: <seaborn.axisgrid.FacetGrid at 0x7f958362fc10>
27
[ ]: # Bivariate Kdeplot
# a bivariate KDE plot smoothes the (x, y) observations with a 2D Gaussian
sns.kdeplot(data=tips, x='total_bill', y='tip')
[ ]: <matplotlib.axes._subplots.AxesSubplot at 0x7f95835b60d0>
28
0.0.5 2. Matrix Plot
• Heatmap
• Clustermap
[ ]: # Heatmap
[ ]: <matplotlib.axes._subplots.AxesSubplot at 0x7f958387d910>
29
[ ]: # annot
temp_df = gap[gap['continent'] == 'Europe'].
↪pivot(index='country',columns='year',values='lifeExp')
plt.figure(figsize=(15,15))
sns.heatmap(temp_df,annot=True,linewidth=0.5, cmap='summer')
[ ]: <matplotlib.axes._subplots.AxesSubplot at 0x7f9584de7af0>
30
[ ]: # linewidth
[ ]: # cmap
[ ]: # Clustermap
iris = px.data.iris()
iris
31
1 4.9 3.0 1.4 0.2 setosa
2 4.7 3.2 1.3 0.2 setosa
3 4.6 3.1 1.5 0.2 setosa
4 5.0 3.6 1.4 0.2 setosa
.. … … … … …
145 6.7 3.0 5.2 2.3 virginica
146 6.3 2.5 5.0 1.9 virginica
147 6.5 3.0 5.2 2.0 virginica
148 6.2 3.4 5.4 2.3 virginica
149 5.9 3.0 5.1 1.8 virginica
species_id
0 1
1 1
2 1
3 1
4 1
.. …
145 3
146 3
147 3
148 3
149 3
[ ]: sns.clustermap(iris.iloc[:,[0,1,2,3]])
[ ]: <seaborn.matrix.ClusterGrid at 0x7f958226c580>
32
[ ]:
33
session-26-seaborn-continued
May 3, 2024
[ ]: # import datasets
tips = sns.load_dataset('tips')
iris = sns.load_dataset('iris')
[ ]: sns.scatterplot(data=tips, x='total_bill',y='tip')
[ ]: <matplotlib.axes._subplots.AxesSubplot at 0x7fcc63bca430>
1
[ ]: # strip plot
# axes level function
sns.stripplot(data=tips,x='day',y='total_bill')
[ ]: <matplotlib.axes._subplots.AxesSubplot at 0x7fcc63ae0790>
2
[ ]: # using catplot
# figure level function
sns.catplot(data=tips, x='day',y='total_bill',kind='strip')
[ ]: <seaborn.axisgrid.FacetGrid at 0x7fcc63ab2610>
[ ]: # jitter
sns.catplot(data=tips, x='day',y='total_bill',kind='strip',jitter=0.2,hue='sex')
[ ]: <seaborn.axisgrid.FacetGrid at 0x7fcc63f700a0>
3
[ ]: # swarmplot
sns.catplot(data=tips, x='day',y='total_bill',kind='swarm')
[ ]: <seaborn.axisgrid.FacetGrid at 0x7fcc63808670>
4
[ ]: sns.swarmplot(data=tips, x='day',y='total_bill')
[ ]: <matplotlib.axes._subplots.AxesSubplot at 0x7fcc637d6520>
5
[ ]: # hue
sns.swarmplot(data=tips, x='day',y='total_bill',hue='sex')
[ ]: <matplotlib.axes._subplots.AxesSubplot at 0x7fcc63748dc0>
6
0.1.5 Boxplot
A boxplot is a standardized way of displaying the distribution of data based on a five number
summary (“minimum”, first quartile [Q1], median, third quartile [Q3] and “maximum”). It can
tell you about your outliers and what their values are. Boxplots can also tell you if your data is
symmetrical, how tightly your data is grouped and if and how your data is skewed.
[ ]: # Box plot
sns.boxplot(data=tips,x='day',y='total_bill')
[ ]: <matplotlib.axes._subplots.AxesSubplot at 0x7fcc6369a1c0>
[ ]: # Using catplot
sns.catplot(data=tips,x='day',y='total_bill',kind='box')
[ ]: <seaborn.axisgrid.FacetGrid at 0x7fcc638849a0>
7
[ ]: # hue
sns.boxplot(data=tips,x='day',y='total_bill',hue='sex')
[ ]: <matplotlib.axes._subplots.AxesSubplot at 0x7fcc64d03490>
8
[ ]: # single boxplot -> numerical col
sns.boxplot(data=tips,y='total_bill')
[ ]: <matplotlib.axes._subplots.AxesSubplot at 0x7fcc63588280>
9
0.1.6 Violinplot = (Boxplot + KDEplot)
[ ]: # violinplot
sns.violinplot(data=tips,x='day',y='total_bill')
[ ]: <matplotlib.axes._subplots.AxesSubplot at 0x7fcc63576f40>
[ ]: sns.catplot(data=tips,x='day',y='total_bill',kind='violin')
[ ]: <seaborn.axisgrid.FacetGrid at 0x7fcc635084f0>
10
[ ]: # hue
sns.catplot(data=tips,x='day',y='total_bill',kind='violin',hue='sex',split=True)
[ ]: <seaborn.axisgrid.FacetGrid at 0x7fcc635085e0>
11
[ ]: # barplot
# some issue with errorbar
import numpy as np
sns.barplot(data=tips, x='sex', y='total_bill',hue='smoker',estimator=np.min)
[ ]: <matplotlib.axes._subplots.AxesSubplot at 0x7fcc62f23a00>
12
[ ]: sns.barplot(data=tips, x='sex', y='total_bill',ci=None)
[ ]: <matplotlib.axes._subplots.AxesSubplot at 0x7fcc634468e0>
13
[ ]: # point plot
sns.pointplot(data=tips, x='sex', y='total_bill',hue='smoker',ci=None)
[ ]: <matplotlib.axes._subplots.AxesSubplot at 0x7fcc634a3100>
When there are multiple observations in each category, it also uses bootstrapping to compute a
confidence interval around the estimate, which is plotted using error bars
[ ]: # countplot
sns.countplot(data=tips,x='sex',hue='day')
[ ]: <matplotlib.axes._subplots.AxesSubplot at 0x7fcc62c552b0>
14
A special case for the bar plot is when you want to show the number of observations in each
category rather than computing a statistic for a second variable. This is similar to a histogram
over a categorical, rather than quantitative, variable
[ ]: # pointplot
[ ]: <seaborn.axisgrid.FacetGrid at 0x7fcc62b8ee80>
15
0.1.7 Regression Plots
• regplot
• lmplot
In the simplest invocation, both functions draw a scatterplot of two variables, x and y, and then
fit the regression model y ~ x and plot the resulting regression line and a 95% confidence interval
for that regression.
[ ]: # axes level
# hue parameter is not available
sns.regplot(data=tips,x='total_bill',y='tip')
[ ]: <matplotlib.axes._subplots.AxesSubplot at 0x7fcc62829550>
16
[ ]: sns.lmplot(data=tips,x='total_bill',y='tip',hue='sex')
[ ]: <seaborn.axisgrid.FacetGrid at 0x7fcc627f1250>
17
[ ]: # residplot
sns.residplot(data=tips,x='total_bill',y='tip')
[ ]: <matplotlib.axes._subplots.AxesSubplot at 0x7fcc62755d30>
18
[ ]:
[ ]:
[ ]: # figure level -> relplot -> displot -> catplot -> lmplot
sns.catplot(data=tips,x='sex',y='total_bill',kind='violin',col='day',row='time')
[ ]: <seaborn.axisgrid.FacetGrid at 0x7fcc62538970>
19
[ ]: g = sns.FacetGrid(data=tips,col='day',row='time',hue='smoker')
g.map(sns.boxplot,'sex','total_bill')
g.add_legend()
/usr/local/lib/python3.8/dist-packages/seaborn/axisgrid.py:670: UserWarning:
Using the boxplot function without specifying `order` is likely to produce an
incorrect plot.
warnings.warn(warning)
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-134-983b773fc0d8> in <module>
1 g = sns.FacetGrid(data=tips,col='day',row='time',hue='smoker')
----> 2 g.map(sns.boxplot,'sex','total_bill')
3 g.add_legend()
708
709 # Draw the plot
--> 710 self._facet_plot(func, ax, plot_args, kwargs)
711
712 # Finalize the annotations and layout
/usr/local/lib/python3.8/dist-packages/seaborn/axisgrid.py in _facet_plot(self,␣
↪func, ax, plot_args, plot_kwargs)
804 plot_args = []
805 plot_kwargs["ax"] = ax
--> 806 func(*plot_args, **plot_kwargs)
20
807
808 # Sort out the supporting information
/usr/local/lib/python3.8/dist-packages/seaborn/_decorators.py in inner_f(*args,␣
↪**kwargs)
44 )
45 kwargs.update({k: arg for k, arg in zip(sig.parameters, args)})
---> 46 return f(**kwargs)
47 return inner_f
48
2249 kwargs.update(dict(whis=whis))
2250
-> 2251 plotter.plot(ax, kwargs)
2252 return ax
2253
/usr/local/lib/python3.8/dist-packages/seaborn/categorical.py in␣
↪draw_boxplot(self, ax, kws)
439 continue
440
--> 441 artist_dict = ax.boxplot(box_data,
442 vert=vert,
443 patch_artist=True,
/usr/local/lib/python3.8/dist-packages/matplotlib/cbook/deprecation.py in␣
↪wrapper(*args, **kwargs)
21
1564 if data is None:
-> 1565 return func(ax, *map(sanitize_sequence, args), **kwargs)
1566
1567 bound = new_sig.bind(ax, *args, **kwargs)
[ ]: <seaborn.axisgrid.FacetGrid at 0x7f8b2a2f3070>
[ ]:
[ ]:
[ ]: sns.pairplot(iris,hue='species')
[ ]: <seaborn.axisgrid.PairGrid at 0x7fcc60bdcac0>
22
[ ]:
[ ]: # pair grid
g = sns.PairGrid(data=iris,hue='species')
# g.map
g.map(sns.scatterplot)
[ ]: <seaborn.axisgrid.PairGrid at 0x7fcc5fd0d700>
23
[ ]: # map_diag -> map_offdiag
g = sns.PairGrid(data=iris,hue='species')
g.map_diag(sns.boxplot)
g.map_offdiag(sns.kdeplot)
[ ]: <seaborn.axisgrid.PairGrid at 0x7fcc5e28da60>
24
[ ]: # map_diag -> map_upper -> map_lower
g = sns.PairGrid(data=iris,hue='species')
g.map_diag(sns.histplot)
g.map_upper(sns.kdeplot)
g.map_lower(sns.scatterplot)
[ ]: <seaborn.axisgrid.PairGrid at 0x7fcc5daaa880>
25
[ ]: # vars
g = sns.PairGrid(data=iris,hue='species',vars=['sepal_width','petal_width'])
g.map_diag(sns.histplot)
g.map_upper(sns.kdeplot)
g.map_lower(sns.scatterplot)
[ ]: <seaborn.axisgrid.PairGrid at 0x7fcc5ea01790>
26
0.1.10 JointGrid Vs Jointplot
[ ]: sns.jointplot(data=tips,x='total_bill',y='tip',kind='hist',hue='sex')
[ ]: <seaborn.axisgrid.JointGrid at 0x7fcc5c8c6070>
27
[ ]: g = sns.JointGrid(data=tips,x='total_bill',y='tip')
g.plot(sns.kdeplot,sns.violinplot)
[ ]: <seaborn.axisgrid.JointGrid at 0x7fcc5bf817f0>
28
[ ]:
[ ]:
[ ]:
[ ]:
[ ]:
[ ]:
29
0.1.11 Utility Functions
[ ]: ['anagrams',
'anscombe',
'attention',
'brain_networks',
'car_crashes',
'diamonds',
'dots',
'dowjones',
'exercise',
'flights',
'fmri',
'geyser',
'glue',
'healthexp',
'iris',
'mpg',
'penguins',
'planets',
'seaice',
'taxis',
'tips',
'titanic']
[ ]: # load dataset
sns.load_dataset('planets')
[ ]:
30
ion-28-data-accessing-and-cleaning
May 3, 2024
• Dirty Data (Data with Quality issues): Dirty data, also known as low quality data.
Low quality data has content issues.
1
– Duplicated data
– Missing Data
– Corrupt Data
– Inaccurate Data
• Messy Data (Data with tidiness issues): Messy data, also known as untidy data. Un-
tidy data has structural issues.Tidy data has the following properties:
– Each variable forms a column
– Each observation forms a row
– Each observational unit forms a table
[ ]: import pandas as pd
import numpy as np
[ ]: patients = pd.read_csv('patients.csv')
treatments = pd.read_csv('treatments.csv')
adverse_reactions = pd.read_csv('adverse_reactions.csv')
treatments_cut = pd.read_csv('treatments_cut.csv')
[ ]: # view datasets
patients.head()
[ ]: treatments.head()
2
[ ]: given_name surname auralin novodra hba1c_start hba1c_end \
0 veronika jindrová 41u - 48u - 7.63 7.20
1 elliot richardson - 40u - 45u 7.56 7.09
2 yukitaka takenaka - 39u - 36u 7.68 7.25
3 skye gormanston 33u - 36u - 7.97 7.62
4 alissa montez - 33u - 29u 7.78 7.46
hba1c_change
0 NaN
1 0.97
2 NaN
3 0.35
4 0.32
[ ]: treatments_cut.shape
[ ]: (70, 7)
[ ]: adverse_reactions
3
26 clinton miller throat irritation
27 idalia moore hypoglycemia
28 xiuxiu chang hypoglycemia
29 alex crawford hypoglycemia
30 monika lončar hypoglycemia
31 steven roy headache
32 cecilie nilsen hypoglycemia
33 krisztina magyar hypoglycemia
4
Table -> treatments and treatment_cut:
• given_name: the given name of each patient in the Master Patient Index that took part in
the clinical trial
• surname: the surname of each patient in the Master Patient Index that took part in the
clinical trial
• auralin: the baseline median daily dose of insulin from the week prior to switching to Auralin
(the number before the dash) and the ending median daily dose of insulin at the end of the 24
weeks of treatment measured over the 24th week of treatment (the number after the dash).
Both are measured in units (shortform ‘u’), which is the international unit of measurement
and the standard measurement for insulin.
• novodra: same as above, except for patients that continued treatment with Novodra
• hba1c_start: the patient’s HbA1c level at the beginning of the first week of treatment.
HbA1c stands for Hemoglobin A1c. The HbA1c test measures what the average blood sugar
has been over the past three months. It is thus a powerful way to get an overall sense of how
well diabetes has been controlled. Everyone with diabetes should have this test 2 to 4 times
per year. Measured in %.
• hba1c_end: the patient’s HbA1c level at the end of the last week of treatment
• hba1c_change: the change in the patient’s HbA1c level from the start of treatment to the end,
i.e., hba1c_start - hba1c_end. For Auralin to be deemed effective, it must be “noninferior” to
Novodra, the current standard for insulin. This “noninferiority” is statistically defined as the
upper bound of the 95% confidence interval being less than 0.4% for the difference between
the mean HbA1c changes for Novodra and Auralin (i.e. Novodra minus Auralin).
5
0.1.9 Steps in Assessment
There are 2 steps involved in Assessment
• Discover
• Document
[ ]: # export data for manual assessment
6
- This table should not exist independently
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 34 entries, 0 to 33
Data columns (total 3 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 given_name 34 non-null object
1 surname 34 non-null object
2 adverse_reaction 34 non-null object
dtypes: object(3)
memory usage: 944.0+ bytes
[ ]: patients[patients['address'].isnull()]
7
264 NaN NaN NaN 11/3/1989 158.6 63 28.1
269 NaN NaN NaN 10/9/1937 175.2 61 33.1
278 NaN NaN NaN 12/16/1962 124.3 69 18.4
286 NaN NaN NaN 4/1/1979 155.3 68 23.6
296 NaN NaN NaN 5/14/1990 181.1 63 32.1
[ ]: treatments[treatments.duplicated()]
hba1c_change
136 NaN
[ ]: treatments[treatments.duplicated(subset=['given_name' ,'surname'])]
hba1c_change
136 NaN
[ ]: treatments_cut[treatments_cut.
↪duplicated(subset=['given_name' ,'surname'])]
[ ]: Empty DataFrame
Columns: [given_name, surname, auralin, novodra, hba1c_start, hba1c_end,
hba1c_change]
Index: []
[ ]: adverse_reactions.duplicated().sum()
[ ]: 0
[ ]: patients.describe()
[ ]: patients[patients['height'] == 27]
8
[ ]: patient_id assigned_sex given_name surname address city \
4 5 male Tim Neudorf 1428 Turkey Pen Lane Dothan
[ ]: treatments_cut.describe()
[ ]: treatments.sort_values('hba1c_change',na_position='first')
hba1c_change
0 NaN
2 NaN
8 NaN
9 NaN
10 NaN
.. …
49 0.98
17 0.98
32 0.99
245 0.99
9
138 0.99
[ ]: patients_df = patients.copy()
treatments_df = treatments.copy()
treatments_cut_df = treatments_cut.copy()
adverse_reactions_df = adverse_reactions.copy()
0.1.16 Define
• replace all missing values of patients df with no data
• sub hba1c_start from hba1c_end to get all the change values
• in patients table we will use regex to separate email and phone
[ ]: # code
patients_df.fillna('No data',inplace=True)
[ ]: # test
patients_df.info()
10
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 503 entries, 0 to 502
Data columns (total 14 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 patient_id 503 non-null int64
1 assigned_sex 503 non-null object
2 given_name 503 non-null object
3 surname 503 non-null object
4 address 503 non-null object
5 city 503 non-null object
6 state 503 non-null object
7 zip_code 503 non-null object
8 country 503 non-null object
9 contact 503 non-null object
10 birthdate 503 non-null object
11 weight 503 non-null float64
12 height 503 non-null int64
13 bmi 503 non-null float64
dtypes: float64(2), int64(2), object(10)
memory usage: 55.1+ KB
[ ]: treatments.head()
hba1c_change
0 NaN
1 0.97
2 NaN
3 0.35
4 0.32
[ ]: # code
treatments_df['hba1c_change'] = treatments_df['hba1c_start'] -␣
↪treatments_df['hba1c_end']
treatments_cut_df['hba1c_change'] = treatments_cut_df['hba1c_start'] -␣
↪treatments_cut_df['hba1c_end']
[ ]: # test
treatments_cut_df.info()
<class 'pandas.core.frame.DataFrame'>
11
RangeIndex: 70 entries, 0 to 69
Data columns (total 7 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 given_name 70 non-null object
1 surname 70 non-null object
2 auralin 70 non-null object
3 novodra 70 non-null object
4 hba1c_start 70 non-null float64
5 hba1c_end 70 non-null float64
6 hba1c_change 70 non-null float64
dtypes: float64(3), object(4)
memory usage: 4.0+ KB
[ ]: patients.head()
# find the phone number from the value/text, as a result we will get a list
phone_number = re.findall(phone_number_pattern, text)
12
# if length is 0, then the regex can't find any ph number, then define with␣
↪NaN
if len(phone_number) <= 0:
phone_number = np.nan
# if the country code is attached with the ph number, for that case, the␣
↪first
# element will be the country code and the 2nd element will be the actual ph
# number. So, get that ph number
elif len(phone_number) >= 2:
phone_number = phone_number[1]
# else, we will get the ph number. Grab it.
else:
phone_number = phone_number[0]
[ ]: patients_df.drop(columns='contact',inplace=True)
[ ]:
[ ]:
[ ]: import re
df = pd.DataFrame(columns=['phone', 'email'])
email_match = re.search(r'([a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.
↪]+)', item)
13
phone = phone_match.group(0) if phone_match else None
email = email_match.group(1) if email_match else None
df = df.append({'phone': phone, 'email': email}, ignore_index=True)
print(df)
phone email
0 951-719-9170 [email protected]
1 (217) 569-3204 [email protected]
2 402-363-6804 [email protected]
3 (732) 636-8246 [email protected]
4 334-515-7487 [email protected]
.. … …
498 207-477-0579 [email protected]
499 928-284-4492 [email protected]
500 816-223-6007 [email protected]
501 360 443 2060 [email protected]
502 402-848-4923 [email protected]
[ ]: treatments_df = pd.concat([treatments_df,treatments_cut_df])
↪'hba1c_end','hba1c_change'],var_name='type',value_name='dosage_range')
[ ]: treatments_df['dosage_start'] = treatments_df['dosage_range'].str.split('-').
↪str.get(0)
treatments_df['dosage_end'] = treatments_df['dosage_range'].str.split('-').str.
↪get(1)
<ipython-input-171-19cff1a047dc>:1: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
14
docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
treatments_df['dosage_end'] =
treatments_df['dosage_range'].str.split('-').str.get(1)
[ ]: treatments_df.drop(columns='dosage_range',inplace=True)
/usr/local/lib/python3.8/dist-packages/pandas/core/frame.py:4906:
SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame
[ ]: treatments_df['dosage_start'] = treatments_df['dosage_start'].str.
↪replace('u','')
treatments_df['dosage_end'] = treatments_df['dosage_end'].str.replace('u','')
<ipython-input-176-71418162ebe6>:1: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
[ ]: treatments_df['dosage_start'] = treatments_df['dosage_start'].astype('int')
treatments_df['dosage_end'] = treatments_df['dosage_end'].astype('int')
<ipython-input-179-c144854612b4>:1: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
15
See the caveats in the documentation: https://pandas.pydata.org/pandas-
docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
treatments_df['dosage_end'] = treatments_df['dosage_end'].astype('int')
[ ]: treatments_df
dosage_start dosage_end
0 41 48
3 33 36
6 37 42
7 31 38
9 30 36
.. … …
688 55 51
690 26 23
694 22 23
696 28 26
698 42 44
[ ]: treatments_df
16
346 maret sultygov 7.67 7.30 0.37 novodra
347 lixue hsueh 9.21 8.80 0.41 novodra
348 jakob jakobsen 7.96 7.51 0.45 novodra
349 berta napolitani 7.68 7.21 0.47 novodra
[ ]:
17
etl-using-aws-rdsextract-transform
May 3, 2024
[ ]: conn = mysql.connector.connect(host='database-1.codzmntflx6t.ap-northeast-1.rds.
↪amazonaws.com',user='admin',password='911Pentagon')
[ ]: import pandas as pd
[ ]: temp_df = player.
↪merge(player_captain,on='Player_Id')[['Player_Name','Match_Id','Is_Captain']]
[ ]: temp_df.head(1)
1
[ ]: delivery = delivery.merge(temp_df,␣
↪left_on=['ID','batter'],right_on=['Match_Id','Player_Name'],how='left').
↪fillna(0)
[ ]: delivery
2
1 Rajasthan Royals 0 0.0 0.0
2 Rajasthan Royals 0 0.0 0.0
3 Rajasthan Royals 0 0.0 0.0
4 Rajasthan Royals 0 0.0 0.0
… … … … …
225949 Royal Challengers Bangalore 0 0.0 0.0
225950 Royal Challengers Bangalore 0 0.0 0.0
225951 Royal Challengers Bangalore 0 0.0 0.0
225952 Royal Challengers Bangalore 0 0.0 0.0
225953 Royal Challengers Bangalore 0 0.0 0.0
[ ]: runs = delivery.groupby(['ID','batter'])['batsman_run'].sum().reset_index()
balls = delivery.groupby(['ID','batter'])['batsman_run'].count().reset_index()
[ ]: runs
[ ]: ID batter batsman_run
0 335982 AA Noffke 9
1 335982 B Akhil 0
2 335982 BB McCullum 158
3 335982 CL White 6
4 335982 DJ Hussey 12
… … … …
14224 1312200 SV Samson 14
14225 1312200 Shubman Gill 45
14226 1312200 TA Boult 11
14227 1312200 WP Saha 5
14228 1312200 YBK Jaiswal 22
[ ]: final_df = runs.merge(balls,on=['ID','batter'],suffixes=('_runs','_balls')).
↪merge(fours,on=['ID','batter'],how='left').
↪merge(sixes,on=['ID','batter'],how='left')
[ ]: final_df.fillna(0,inplace=True)
[ ]: final_df.rename(columns={
'batsman_run_runs':'runs',
'batsman_run_balls':'balls',
3
'batsman_run_x':'fours',
'batsman_run_y':'sixes'
},inplace=True)
[ ]: final_df['sr'] = round((final_df['runs']/final_df['balls'])*100,2)
[ ]: final_df.drop(columns=['balls'],inplace=True)
[ ]: final_df = final_df.
↪merge(temp_df,left_on=['ID','batter'],right_on=['Match_Id','Player_Name'],how='left').
↪drop(columns=['Player_Name','Match_Id']).fillna(0)
[ ]: final_df = final_df.merge(balls,on=['ID','batter']).
↪rename(columns={'batsman_run':'balls'})
[ ]: final_df
[ ]: def dream11(row):
score = 0
4
if row['sr'] > 170:
score = score + 6
elif row['sr'] > 150 and row['sr'] <= 170:
score = score + 4
elif row['sr'] > 130 and row['sr'] <= 150:
score = score + 2
elif row['sr'] > 60 and row['sr'] <= 70:
score = score - 2
elif row['sr'] > 50 and row['sr'] <= 60:
score = score - 4
elif row['sr'] <= 50:
score = score - 6
else:
pass
if row['Is_Captain'] == 1:
score = score*2
return score
[ ]: final_df['score'] = final_df.apply(dream11,axis=1)
[ ]: export_df = final_df.
↪sort_values('score',ascending=False)[['ID','batter','score']]
[ ]: export_df
[ ]: ID batter score
3325 501243 V Sehwag 332.0
2844 501210 SR Tendulkar 272.0
5302 598027 CH Gayle 244.0
2 335982 BB McCullum 216.0
4254 548342 V Sehwag 202.0
… … … …
3546 501258 ND Doshi -8.0
3648 501266 S Badrinath -8.0
4048 548325 DJ Jacobs -8.0
10432 1175356 SR Watson -8.0
13132 1304051 N Pooran -8.0
[ ]: conn = mysql.connector.connect(host='database-1.codzmntflx6t.ap-northeast-1.rds.
↪amazonaws.com',user='admin',password='911Pentagon')
5
[ ]: import pymysql
import pandas as pd
from sqlalchemy import create_engine
[ ]: df = pd.read_csv('/content/flights_cleaned.csv')
[ ]: mycursor = conn.cursor()
[ ]: engine = create_engine("mysql+pymysql://admin:911Pentagon@database-1.
↪codzmntflx6t.ap-northeast-1.rds.amazonaws.com/flights")
# {root}:{password}@{url}/{database}
df.to_sql('flights', con = engine)
[ ]: import pandas as pd
[ ]:
6
keyboard_arrow_down Week -11 : Data CLeaning on Smartphone Round - 1
import numpy as np
import pandas as pd
Dataset - https://docs.google.com/spreadsheets/d/1oBG0ZtYiWzehWa1K6pV8huMtVEJxCY4C9vPaGCt1_gU/edit?usp=sharing
df = pd.read_csv('smartphones.csv')
df.head()
account_circle model price rating sim processor ram battery display camera card os
Dual Sim, 3G, Snapdragon 8 Gen2, 12 GB 5000 mAh Battery 6.7 inches, 1440 x 50 MP + 48 MP +
OnePlus 11 Memory Card Android
0 ₹54,999 89.0 4G, 5G, VoLTE, Octa Core, 3.2 GHz RAM, 256 with 100W Fast 3216 px, 120 Hz 32 MP Triple Rear
5G Not Supported v13
Wi-Fi, NFC Processor GB inbuilt Charging Display wit... & 16 MP Fron...
OnePlus Dual Sim, 3G, Snapdragon 695, 6 GB RAM, 5000 mAh Battery 6.59 inches, 1080 64 MP + 2 MP + 2 Memory Card
Android
1 Nord CE 2 ₹19,989 81.0 4G, 5G, VoLTE, Octa Core, 2.2 GHz 128 GB with 33W Fast x 2412 px, 120 Hz MP Triple Rear & (Hybrid), upto 1
v12
Lite 5G Wi-Fi Processor inbuilt Charging Display wi... 16 MP Front ... TB
Samsung Dual Sim, 3G, Exynos 1330, Octa 4 GB RAM, 5000 mAh Battery 6.6 inches, 1080 x 50 MP + 2 MP + 2 Memory Card
Android
2 Galaxy A14 ₹16,499 75.0 4G, 5G, VoLTE, Core, 2.4 GHz 64 GB with 15W Fast 2408 px, 90 Hz MP Triple Rear & Supported, upto
v13
5G Wi-Fi Processor inbuilt Charging Display with... 13 MP Front ... 1 TB
Dual Sim, 3G, Snapdragon 695, 6 GB RAM, 5000 mAh Battery 6.55 inches, 1080 50 MP + 8 MP + 2 Memory Card
Motorola Android
3 ₹14,999 81.0 4G, 5G, VoLTE, Octa Core, 2.2 GHz 128 GB with Fast x 2400 px, 120 Hz MP Triple Rear & (Hybrid), upto 1
Moto G62 5G v12
Wi-Fi Processor inbuilt Charging Display wi... 16 MP Front ... TB
Dual Sim, 3G, Dimensity 1080, Octa 6 GB RAM, 5000 mAh Battery 6.7 inches, 1080 x 108 MP + 8 MP +
Realme 10 Memory Card Android
4 ₹24,999 82.0 4G, 5G, VoLTE, Core, 2.6 GHz 128 GB with 67W Fast 2412 px, 120 Hz 2 MP Triple Rear
Pro Plus Not Supported v13
Wi-Fi Processor inbuilt Charging Display wit... & 16 MP Front...
Tidiness Issues
1. sim - can be split into 3 cols has_5g, has_NFC, has_IR_Blaster
2. ram - can be split into 2 cols RAM and ROM
3. processor - can be split into processor name, cores and cpu speed.
4. battery - can be split into battery capacity, fast_charging_available
5. display - can be split into size, resolution_width, resolution_height and frequency
6. camera - can be split into front and rear camera
7. card - can be split into supported, extended_upto
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1020 entries, 0 to 1019
Data columns (total 11 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 model 1020 non-null object
1 price 1020 non-null object
2 rating 879 non-null float64
3 sim 1020 non-null object
4 processor 1020 non-null object
5 ram 1020 non-null object
6 battery 1020 non-null object
7 display 1020 non-null object
8 camera 1019 non-null object
9 card 1013 non-null object
10 os 1003 non-null object
dtypes: float64(1), object(10)
memory usage: 87.8+ KB
df.describe()
rating
count 879.000000
mean 78.258248
std 7.402854
min 60.000000
25% 74.000000
50% 80.000000
75% 84.000000
max 89.000000
df.duplicated().sum()
# make a copy
df1 = df.copy()
df1['price'] = df1['price'].str.replace('₹','').str.replace(',','').astype('int')
df1
model price rating sim processor ram battery display camera card os
50 MP + 48
Dual Sim, Snapdragon 8 12 GB 5000 mAh 6.7 inches,
MP + 32 MP Memory Card
OnePlus 11 3G, 4G, 5G, Gen2, Octa RAM, Battery with 1440 x 3216 Android
0 54999 89.0 Triple Rear Not
5G VoLTE, Wi- Core, 3.2 GHz 256 GB 100W Fast px, 120 Hz v13
& 16 MP Supported
Fi, NFC Processor inbuilt Charging Display wit...
Fron...
64 MP + 2
Dual Sim, Snapdragon 6 GB 5000 mAh 6.59 inches,
OnePlus MP + 2 MP Memory Card
3G, 4G, 5G, 695, Octa Core, RAM, Battery with 1080 x 2412 Android
1 Nord CE 2 19989 81.0 Triple Rear (Hybrid), upto
VoLTE, Wi- 2.2 GHz 128 GB 33W Fast px, 120 Hz v12
Lite 5G & 16 MP 1 TB
Fi Processor inbuilt Charging Display wi...
Front ...
6.6 inches, 50 MP + 2
Dual Sim, 4 GB 5000 mAh
Samsung Exynos 1330, 1080 x 2408 MP + 2 MP Memory Card
3G, 4G, 5G, RAM, Battery with Android
2 Galaxy A14 16499 75.0 Octa Core, 2.4 px, 90 Hz Triple Rear Supported,
VoLTE, Wi- 64 GB 15W Fast v13
5G GHz Processor Display & 13 MP upto 1 TB
Fi inbuilt Charging
with... Front ...
50 MP + 8
Dual Sim, Snapdragon 6 GB 5000 mAh 6.55 inches,
Motorola MP + 2 MP Memory Card
3G, 4G, 5G, 695, Octa Core, RAM, Battery with 1080 x 2400 Android
3 Moto G62 14999 81.0 Triple Rear (Hybrid), upto
VoLTE, Wi- 2.2 GHz 128 GB Fast px, 120 Hz v12
5G & 16 MP 1 TB
Fi Processor inbuilt Charging Display wi...
Front ...
108 MP + 8
Dual Sim, 6 GB 5000 mAh 6.7 inches,
Dimensity 1080, MP + 2 MP Memory Card
Realme 10 3G, 4G, 5G, RAM, Battery with 1080 x 2412 Android
4 24999 82.0 Octa Core, 2.6 Triple Rear Not
Pro Plus VoLTE, Wi- 128 GB 67W Fast px, 120 Hz v13
GHz Processor & 16 MP Supported
Fi inbuilt Charging Display wit...
Front...
... ... ... ... ... ... ... ... ... ... ... ...
64 MP + 8
Dual Sim, Snapdragon 8 8 GB 5000 mAh 6.67 inches,
Motorola MP + 2 MP
3G, 4G, 5G, Gen1, Octa RAM, Battery with 1080 x 2460 No FM
1015 Moto Edge 34990 83.0 Triple Rear Android v12
VoLTE, Wi- Core, 3 GHz 128 GB 68.2W Fast px, 120 Hz Radio
S30 Pro & 16 MP
Fi Processor inbuilt Charging Display wi...
Front ...
6.5 inches, 48 MP + 2
Dual Sim, Snapdragon 6 GB 5000 mAh
720 x 1600 MP + Depth Memory Card
Honor X8 3G, 4G, 5G, 480+, Octa RAM, Battery with Android
1016 14990 75.0 px Display Sensor Supported,
5G VoLTE, Wi- Core, 2.2 GHz 128 GB 22.5W Fast v11
with Water Triple Rear upto 1 TB
Fi Processor inbuilt Charging
D... & 8 MP...
D l Si 64 MP + 8
df1 = df1.reset_index()
df1['index'] = df1['index'] + 2
df1
index model price rating sim processor ram battery display camera card os
6.59
Dual Sim, 64 MP + 2
Snapdragon 6 GB 5000 mAh inches, Memory
OnePlus 3G, 4G, MP + 2 MP
695, Octa RAM, Battery with 1080 x Card Android
1 3 Nord CE 2 19989 81.0 5G, Triple Rear
Core, 2.2 GHz 128 GB 33W Fast 2412 px, (Hybrid), v12
Lite 5G VoLTE, & 16 MP
Processor inbuilt Charging 120 Hz upto 1 TB
Wi-Fi Front ...
Display wi...
6.55
Dual Sim, 50 MP + 8
Snapdragon 6 GB 5000 mAh inches, Memory
Motorola 3G, 4G, MP + 2 MP
695, Octa RAM, Battery with 1080 x Card Android
3 5 Moto G62 14999 81.0 5G, Triple Rear
Core, 2.2 GHz 128 GB Fast 2400 px, (Hybrid), v12
5G VoLTE, & 16 MP
Processor inbuilt Charging 120 Hz upto 1 TB
Wi-Fi Front ...
Display wi...
6.7 inches,
Dual Sim, 108 MP +
Dimensity 6 GB 5000 mAh 1080 x
3G, 4G, 8 MP + 2 Memory
Realme 10 1080, Octa RAM, Battery with 2412 px, Android
4 6 24999 82.0 5G, MP Triple Card Not
Pro Plus Core, 2.6 GHz 128 GB 67W Fast 120 Hz v13
VoLTE, Rear & 16 Supported
Processor inbuilt Charging Display
Wi-Fi MP Front...
wit...
... ... ... ... ... ... ... ... ... ... ... ... ...
6.67
Dual Sim, 64 MP + 8
Snapdragon 8 8 GB 5000 mAh inches,
Motorola 3G, 4G, MP + 2 MP
Gen1, Octa RAM, Battery with 1080 x No FM
1015 1017 Moto Edge 34990 83.0 5G, Triple Rear Android v12
Core, 3 GHz 128 GB 68.2W Fast 2460 px, Radio
S30 Pro VoLTE, & 16 MP
Processor inbuilt Charging 120 Hz
Wi-Fi Front ...
Display wi...
48 MP + 2
Dual Sim, 6.5 inches,
Snapdragon 6 GB 5000 mAh MP + Memory
3G, 4G, 720 x 1600
Honor X8 480+, Octa RAM, Battery with Depth Card Android
1016 1018 14990 75.0 5G, px Display
5G Core, 2.2 GHz 128 GB 22.5W Fast Sensor Supported, v11
VoLTE, with Water
Processor inbuilt Charging Triple Rear upto 1 TB
Wi Fi D
g g p p
Wi-Fi D...
& 8 MP...
processor_rows = set((642,647,649,659,667,701,750,759,819,859,883,884,919,927,929,932,1002))
ram_rows = set((441,485,534,553,584,610,613,642,647,649,659,667,701,750,759,819,859,884,919,927,929,932,990,1002))
battery_rows = set((113,151,309,365,378,441,450,553,584,610,613,630,642,647,649,659,667,701,750,756,759,764,819,855,859,884,915,916,927,929,932,990
display_rows = set((378,441,450,553,584,610,613,630,642,647,649,659,667,701,750,759,764,819,859,884,915,916,927,929,932,990,1002))
camera_rows = set((100,113,151,157,161,238,273,308,309,323,324,365,367,378,394,441,450,484,506,534,553,571,572,575,584,610,613,615,630,642,647,649,
Snapdragon 4 GB
2.7 inches, Memory Card
Nokia 2780 Dual Sim, 3G, QM215, Quad RAM, 1450 mAh 5 MP Rear
155 157 4990 NaN 240 x 320 px Dual Display Supported,
Flip 4G, Wi-Fi Core, 1.3 GHz 512 MB Battery Camera
Display upto 32 GB
Processor inbuilt
... ... ... ... ... ... ... ... ... ... ... ... ...
1.77 inches,
XTouch F40 32 MB RAM, 800 mAh 1.3 MP Rear
1000 1002 1999 NaN Dual Sim No 3G No Wifi 240 x 320 px Dual Display
Flip 32 MB inbuilt Battery Camera
Display
68 rows × 12 columns
0.49 inches,
32 MB RAM, 200 mAh No Rear
657 659 Zanco Tiny T1 2799 NaN Single Sim 64 x 32 px No FM Radio Bluetooth NaN
32 MB inbuilt Battery Camera
Display
Dual Sim,
1.3 MP Memory Card
3G, 4G, 48 MB RAM, 1200 mAh 2.4 inches, 240
817 819 itel Magic X 2239 NaN No 3G T117 Rear Supported, upto
VoLTE, Wi- 128 MB inbuilt Battery x 320 px Display
Camera 64 GB
Fi
Dual Sim,
2.4 inches, Memory Card
3G, 4G, 256 MB RAM, 1200 mAh 2 MP Rear
925 927 Nokia 3310 4G 3999 NaN 240 x 320 px Supported, upto Bluetooth Browser
VoLTE, Wi- 512 MB inbuilt Battery Camera
Display 32 GB
Fi
1.5 inches,
Samsung Guru 800 mAh No Rear
927 929 1685 NaN Single Sim No Wifi 128 x 128 px No FM Radio NaN NaN
E1200 Battery Camera
Display
df1
index model price rating sim processor ram battery display camera card os
6 GB 6.55 inches, 50 MP + 8 MP +
Dual Sim, 3G, Snapdragon 695, 5000 mAh Memory Card
Motorola Moto RAM, 1080 x 2400 px, 2 MP Triple Android
3 5 14999 81.0 4G, 5G, Octa Core, 2.2 GHz Battery with (Hybrid), upto
G62 5G 128 GB 120 Hz Display Rear & 16 MP v12
VoLTE, Wi-Fi Processor Fast Charging 1 TB
inbuilt wi... Front ...
... ... ... ... ... ... ... ... ... ... ... ... ...
8 GB 6.5 inches, 64 MP + 8 MP +
Samsung Dual Sim, 3G, 5000 mAh Memory Card
Octa Core RAM 1080 x 2400 px 5 MP Triple Android
Octa Core RAM, 1080 x 2400 px 5 MP Triple Android
1019 1021 Galaxy M52s 24990 74.0 4G, 5G, Battery with Supported,
Processor 128 GB Display with Rear & 32 MP v12
G V LTE Wi Fi F Ch i 1 TB
df1[df1['index'].isin(processor_rows)]
index model price rating sim processor ram battery display camera card os
df1.drop([645,857,882,925],inplace=True)
df1[df1['index'].isin(ram_rows)]
index model price rating sim processor ram battery display camera card os
Memory Card
Nokia 8210 Dual Sim, 3G, Unisoc 48 MB RAM, 1450 mAh 2.8 inches, 240 x 0.3 MP Rear
582 584 3749 NaN No Wifi Supported,
4G 4G T107 128 MB inbuilt Battery 320 px Display Camera
upto 32 GB
df1.drop(582,inplace=True)
df1[df1['index'].isin(battery_rows)]
index model price rating sim processor ram battery display camera card os
Dual Sim 3G
Dual Sim, 3G,
Apple iPhone Bionic A14, Hexa 6 GB RAM, 6.1 inches, 1170 12 MP + 12 MP +
4G, 5G, Memory Card No FM
853 855 12 Pro 119900 80.0 Core, 3.1 GHz 256 GB x 2532 px Display 12 MP Triple Rear iOS v14.0
VoLTE, Wi-Fi, Not Supported Radio
(256GB) Processor inbuilt with Large ... & 12 MP Fron...
NFC
df1.drop([376,754],inplace=True)
temp_df = df1[df1['index'].isin(battery_rows)]
x = temp_df.iloc[:,7:].shift(1,axis=1).values
df1.loc[temp_df.index,temp_df.columns[7:]] = x
df1[df1['index'].isin(display_rows)]
index model price rating sim processor ram battery display camera card os
Apple iPhone Dual Sim, 3G, Bionic A14, Hexa 6 GB RAM, 6.1 inches, 1170 x 12 MP + 12 MP +
Memory Card iOS
628 630 12 Pro 139900 80.0 4G, 5G, VoLTE, Core, 3.1 GHz 512 GB NaN 2532 px Display 12 MP Triple Rear &
Not Supported v14.0
(512GB) Wi-Fi, NFC Processor inbuilt with Large ... 12 MP Fron...
Dual Sim, 3G, Bionic A14, Hexa 4 GB RAM, 6.1 inches, 1170 x 12 MP + 12 MP
Apple iPhone Memory Card iOS
914 916 67999 76.0 4G, 5G, VoLTE, Core, 3.1 GHz 256 GB NaN 2532 px Display Dual Rear & 12 MP
12 (256GB) Not Supported v14
Wi-Fi, NFC Processor inbuilt with Large ... Front Camera
len(display_rows)
27
len(camera_rows)
64
df1[df1['index'].isin(camera_rows)]
# 155 271
index model price rating sim processor ram battery display camera card os
Dual Sim, 4 GB 12 MP + 12 MP
Bionic A14, Hexa 5.4 inches,
Apple iPhone 3G, 4G, 5G, RAM, Dual Rear & Memory Card
149 151 40999 74.0 Core, 3.1 GHz NaN 1080 x 2340 iOS v14
12 Mini VoLTE, Wi-Fi, 64 GB 12 MP Front Not Supported
Processor px Display
NFC inbuilt Camera
Snapdragon 4 GB
2.7 inches, Memory Card
Nokia 2780 Dual Sim, QM215, Quad RAM, 1450 mAh 5 MP Rear
155 157 4990 NaN 240 x 320 px Dual Display Supported,
Flip 3G, 4G, Wi-Fi Core, 1.3 GHz 512 MB Battery Camera
Display upto 32 GB
Processor inbuilt
512 MB
Dual Sim, Snapdragon 205 , 2.8 inches,
Nokia 2720 V RAM, 4 1500 mAh 2 MP Rear Memory Card
271 273 6199 NaN 3G, 4G, Dual Core, 1.1 240 x 320 px Dual Display
Flip GB Battery Camera Supported
VoLTE, Wi-Fi GHz Processor Display
inbuilt
8 GB
Dual Sim, Snapdragon 865, 7.8 inches, Foldable 64 MP + 16 MP + Memory Card
Royole FlexPai RAM, 4450 mAh
322 324 109999 87.0 3G, 4G, 5G, Octa Core, 2.84 1440 x 1920 Display, Dual 8 MP Triple Rear Supported,
2 128 GB Battery
VoLTE, Wi-Fi GHz Processor px Display Display & 32 MP Front... upto 256 GB
inbuilt
Dual Sim, 4 GB 12 MP + 12 MP
Apple iPhone Bionic A14, Hexa 5.4 inches,
3G, 4G, 5G, RAM, Dual Rear & Memory Card
363 365 12 Mini 45999 75.0 Core, 3.1 GHz NaN 1080 x 2340 iOS v14
VoLTE, Wi-Fi, 128 GB 12 MP Front Not Supported
(128GB) Processor px Display
NFC inbuilt Camera
Dual Sim,
Bionic A15, Hexa 4.7 inches, 12 MP Rear &
Apple iPhone 3G, 4G, 5G, 64 GB Memory Card
439 441 43900 NaN Core, 3.22 GHz NaN 750 x 1334 px 7 MP Front iOS v15
SE 3 2022 VoLTE, Wi-Fi, inbuilt Not Supported
Processor Display Camera
NFC
Dual Sim, 8 GB 64 MP + 13 MP +
Snapdragon 765G 4000 mAh 6.8 inches, Memory Card
3G, 4G, 5G, RAM, 12 MP Triple
570 572 LG Wing 5G 54999 89.0 , Octa Core, 2.4 Battery with 1080 x 2460 Dual Display (Hybrid), upto
VoLTE, Wi-Fi, 128 GB Rear & 32 MP
GHz Processor Fast Charging px Display 2 TB
NFC inbuilt Fron...
12 GB 9800 mAh 6.78 inches,
Dual Sim, Helio G99, Octa 64 MP + 20 MP +
RAM, Battery with 1080 x 2400 Memory Card
613 615 Oukitel WP21 22990 82.0 3G, 4G, Core, 2.2 GHz Dual Display 2 MP Triple Rear
256 GB 66W Fast px, 120 Hz (Hybrid)
VoLTE, Wi-Fi Processor & Main Front ...
inbuilt Charging Display
8 GB 4000 mAh 64 MP + 16 MP +
Single Sim, Snapdragon 865, 7.4 inches,
RAM, Battery with 12 MP Triple
709 711 OPPO X 2021 134999 86.0 3G, 4G, Octa Core, 2.84 1440 x 3200 Dual Display Android v11
256 GB 45W Fast Rear & 16 MP
VoLTE, Wi-Fi GHz Processor px Display
inbuilt Charging Fron...
Dual Sim,
8 GB 4600 mAh 7.8 inches,
3G, 4G, Snapdragon 888, 50 MP + 13 MP + Memory Card
Huawei Mate RAM, Battery with 2200 x 2480 Foldable
726 728 162990 89.0 VoLTE, Wi-Fi, Octa Core, 2.84 8 MP Triple Rear (Hybrid), upto
Xs 2 512 GB 66W Fast px, 120 Hz Display
NFC, IR GHz Processor & 10.7 MP Fro... 256 GB
inbuilt Charging Display wit...
Blaster
Dual Sim,
Bionic A15, Hexa 6.1 inches, 12 MP Rear &
Apple iPhone 3G, 4G, 5G, 64 GB Memory Card
762 764 49990 60.0 Core, 3.22 GHz NaN 750 x 1580 px 10.8 MP Front iOS v16
SE 4 VoLTE, Wi-Fi, inbuilt Not Supported
Processor Display Camera
NFC
2 GB
Single Sim, Qualcomm 215, 4 inches, 480 5 MP Rear & 2 Memory Card
RAM, 2000 mAh
844 846 CAT S22 Flip 14999 NaN 3G, 4G, Quad Core, 1.3 x 640 px Dual Display MP Front Supported,
16 GB Battery
VoLTE, Wi-Fi GHz Processor Display Camera upto 128 GB
inbuilt
Dual Sim, 6 GB 6.1 inches, 12 MP + 12 MP
Apple iPhone Bionic A14, Hexa
3G, 4G, 5G, RAM, 1170 x 2532 px + 12 MP Triple Memory Card
853 855 12 Pro 119900 80.0 Core, 3.1 GHz NaN iOS v14.0
VoLTE, Wi-Fi, 256 GB Display with Rear & 12 MP Not Supported
(256GB) Processor
NFC inbuilt Large ... Fron...
1 GB
Samsung Dual Sim, (28 nm), Quad 5.3 inches, 8 MP Rear & 5
RAM, 3000 mAh Memory Card
856 858 Galaxy A01 4999 NaN 3G, 4G, Core, 1.5 GHz 720 x 1480 px MP Front Android v10
16 GB Battery Supported
Core VoLTE, Wi-Fi Processor Display Camera
inbuilt
8 GB
Dual Sim, Snapdragon 888, 7.2 inches, Foldable 64 MP + 16 MP + Memory Card
Royole FlexPai RAM, 3360 mAh
894 896 149999 87.0 3G, 4G, 5G, Octa Core, 2.84 1440 x 1920 Display, Dual 8 MP Triple Rear Supported,
3 5G 128 GB Battery
VoLTE, Wi-Fi GHz Processor px Display Display & 32 MP Front... upto 256 GB
inbuilt
Dual Sim, 4 GB 12 MP + 12 MP
Apple iPhone Bionic A14, Hexa 5.4 inches,
3G, 4G, 5G, RAM, Dual Rear & Memory Card
913 915 12 Mini 55999 75.0 Core, 3.1 GHz NaN 1080 x 2340 iOS v14
VoLTE, Wi-Fi, 256 GB 12 MP Front Not Supported
(256GB) Processor px Display
NFC inbuilt Camera
df1.drop([155, 271],inplace=True)
temp_df = df1[df1['index'].isin(camera_rows)]
temp_df = temp_df[~temp_df['camera'].str.contains('MP')]
df1.loc[temp_df.index, 'camera'] = temp_df['card'].values
df1['card'].value_counts()
temp_df = df1[df1['card'].str.contains('MP')]
df1['card'].value_counts()
pd.set_option('display.max_rows', None)
temp_df = df1[~df1['card'].str.contains('Memory Card')]
df1.loc[temp_df.index,'os'] = temp_df['card'].values
df1['card'].value_counts()
df1['os'].value_counts()
df1.loc[temp_df.index,'os'] = np.nan
df1.head()
index model price rating sim processor ram battery display camera card os
6.59 inches,
OnePlus Dual Sim, 3G, Snapdragon 695, 6 GB 5000 mAh 64 MP + 2 MP + Memory Card
1080 x 2412 px, Android
1 3 Nord CE 2 19989 81.0 4G, 5G, Octa Core, 2.2 GHz RAM, 128 Battery with 33W 2 MP Triple Rear (Hybrid), upto 1
120 Hz Display v12
Lite 5G VoLTE, Wi-Fi Processor GB inbuilt Fast Charging & 16 MP Front ... TB
wi...
Samsung Dual Sim, 3G, Exynos 1330, Octa 4 GB 5000 mAh 6.6 inches, 1080 50 MP + 2 MP + Memory Card
Android
2 4 Galaxy A14 16499 75.0 4G, 5G, Core, 2.4 GHz RAM, 64 Battery with 15W x 2408 px, 90 Hz 2 MP Triple Rear Supported, upto
v13
5G VoLTE, Wi-Fi Processor GB inbuilt Fast Charging Display with... & 13 MP Front ... 1 TB
6.55 inches,
Dual Sim, 3G, Snapdragon 695, 6 GB 5000 mAh 50 MP + 8 MP + Memory Card
Motorola 1080 x 2400 px, Android
3 5 14999 81.0 4G, 5G, Octa Core, 2.2 GHz RAM, 128 Battery with Fast 2 MP Triple Rear (Hybrid), upto 1
Moto G62 5G 120 Hz Display v12
VoLTE, Wi-Fi Processor GB inbuilt Charging & 16 MP Front ... TB
wi...
Dual Sim, 3G, Dimensity 1080, 6 GB 5000 mAh 6.7 inches, 1080 108 MP + 8 MP +
Realme 10 Memory Card Android
df1['display'].value_counts()
6.67 inches, 1080 x 2400 px, 120 Hz Display with Punch Hole 54
6.5 inches, 720 x 1600 px Display with Water Drop Notch 36
6.7 inches, 1080 x 2412 px, 120 Hz Display with Punch Hole 25
6.52 inches, 720 x 1600 px Display with Water Drop Notch 23
6.5 inches, 1080 x 2400 px, 90 Hz Display with Punch Hole 22
6.51 inches, 720 x 1600 px Display with Water Drop Notch 21
6.43 inches, 1080 x 2400 px, 90 Hz Display with Punch Hole 19
6.43 inches, 1080 x 2400 px Display with Punch Hole 16
6.62 inches, 1080 x 2400 px, 120 Hz Display with Punch Hole 15
6.53 inches, 720 x 1600 px Display with Water Drop Notch 14
6.4 inches, 1080 x 2400 px, 90 Hz Display with Punch Hole 14
6.55 inches, 1080 x 2400 px, 120 Hz Display with Punch Hole 11
6.7 inches, 1440 x 3216 px, 120 Hz Display with Punch Hole 10
6.58 inches, 1080 x 2408 px, 120 Hz Display with Water Drop Notch 10
6.5 inches, 720 x 1600 px, 90 Hz Display with Water Drop Notch 10
6.6 inches, 1080 x 2400 px, 90 Hz Display with Punch Hole 10
6.7 inches, 1080 x 2400 px, 120 Hz Display with Punch Hole 10
6.5 inches, 1080 x 2400 px, 120 Hz Display with Punch Hole 10
6.67 inches, 1080 x 2400 px Display with Punch Hole 9
6.6 inches, 1080 x 2408 px Display with Water Drop Notch 9
6.55 inches, 1080 x 2400 px, 90 Hz Display with Punch Hole 9
6.6 inches, 1080 x 2408 px, 90 Hz Display with Water Drop Notch 9
6.78 inches, 1080 x 2400 px, 120 Hz Display with Punch Hole 9
6.7 inches, 1080 x 2400 px Display with Punch Hole 8
6.58 inches, 1080 x 2408 px, 90 Hz Display with Water Drop Notch 8
6.5 inches, 720 x 1600 px, 90 Hz Display with Punch Hole 7
6.59 inches, 1080 x 2412 px, 120 Hz Display with Punch Hole 7
6.6 inches, 1080 x 2408 px, 120 Hz Display with Water Drop Notch 7
6.53 inches, 1080 x 2340 px Display with Water Drop Notch 7
6.6 inches, 1080 x 2412 px, 120 Hz Display with Punch Hole 7
6.58 inches, 1080 x 2400 px, 90 Hz Display with Water Drop Notch 7
6.58 inches, 1080 x 2408 px Display with Water Drop Notch 7
6.67 inches, 1080 x 2400 px, 144 Hz Display with Punch Hole 7
6.1 inches, 1170 x 2532 px Display with Small Notch 6
6.73 inches, 1440 x 3200 px, 120 Hz Display with Punch Hole 6
6.4 inches, 1080 x 2400 px, 90 Hz Display with Water Drop Notch 6
6.44 inches, 1080 x 2400 px Display with Water Drop Notch 6
6.1 inches, 1170 x 2532 px Display with Large Notch 5
6.6 inches, 1080 x 2460 px, 144 Hz Display with Punch Hole 5
6.5 inches, 720 x 1560 px Display with Water Drop Notch 5
6.4 inches, 1080 x 2340 px Display with Water Drop Notch 5
6.4 inches, 1080 x 2400 px Display with Punch Hole 5
6.67 inches, 1440 x 3200 px, 120 Hz Display with Punch Hole 5
6.8 inches, 1080 x 2400 px, 120 Hz Display with Punch Hole 5
6.44 inches, 1080 x 2404 px, 90 Hz Display with Water Drop Notch 5
6.56 inches, 720 x 1612 px, 90 Hz Display with Water Drop Notch 5
6.67 inches, 1080 x 2460 px, 120 Hz Display with Punch Hole 5
6.56 inches, 720 x 1612 px Display with Water Drop Notch 5
6.6 inches, 720 x 1612 px, 90 Hz Display with Water Drop Notch 4
6.6 inches, 1080 x 2400 px, 120 Hz Display with Punch Hole 4
6.7 inches, 720 x 1600 px Display with Water Drop Notch 4
6.7 inches, 1290 x 2796 px, 120 Hz Display 4
6.56 inches, 1080 x 2376 px, 120 Hz Display with Punch Hole 4
6.67 inches, 2400 x 1080 px, 120 Hz Display with Punch Hole 4
6.5 inches, 1600 x 720 px Display with Water Drop Notch 4
6.56 inches, 720 x 1600 px Display with Water Drop Notch 4
6.7 inches, 1080 x 2400 px Display with Water Drop Notch 4
6.35 inches, 720 x 1544 px Display with Water Drop Notch 4
(982/1020)*100
96.27450980392157
df1
index model price rating sim processor ram battery display camera card os
has_5g = df1['sim'].str.contains('5G') Dual Sim, Dimensity 8100 8 GB 5000 mAh 6.7 inches, 50 MP + 8 MP
has_nfc18= df1['sim'].str.contains('NFC') 3G, 4G, 5G, Max, Octa Core, RAM, Battery with 1080 x 2412 + 2 MP Triple Memory Card
20 OnePlus 10R 5G 32999 86.0 Android v12
VoLTE, Wi- 2.85 GHz 128 GB 80W Fast px, 120 Hz Rear & 16 MP Not Supported
has_ir_blaster = df1['sim'].str.contains('IR Blaster')
Fi, NFC Processor inbuilt Charging Display wit... Front ...
Dual Sim,
Snapdragon 8 8 GB 4700 mAh 6.6 inches, 50 MP + 12
3G, 4G, 5G,
Samsung Galaxy Gen2, Octa Core, RAM, Battery with 1080 x 2340 MP + 10 MP Memory Card
58 60 84990 89.0 VoLTE, Android v13
S23 Plus 3.2 GHz 256 GB 45W Fast px, 120 Hz Triple Rear & Not Supported
Vo5G, Wi-Fi,
Processor inbuilt Charging Display wit... 12 MP Fron...
NFC
Dual
Dual Sim, 8 GB 5000 mAh 6.7 inches, 50 MP + 48
Sim, Snapdragon 8 Snapdragon
OnePlus 10 Pro 3G, 4G, 5G, RAM, Battery with 1440 x 3216 MP + 8 MP Memory Card
66 68 Motorola 60999 89.0 3G, Wi- Gen1, Octa Core, 695, Octa Android v12
5G VoLTE, 128 GB 80W Fast px, 120 Hz Triple Rear & Not Supported
3 GHz Processor
3 5 motorola Moto 14999 81.0 4G,
Fi, NFCTrue False False Charging
inbuilt Core, 2.2 Snapdragon
Display wit... 695 snapdragon Octa Core
32 MP Front...
G62 5G 5G, GHz
Xiaomi Redmi Dual Sim,
VoLTE, 12 GB 4980 mAh
Processor 6.67 inches, 200 MP + 8
Dimensity 1080,
Note 12 Pro Plus 3G, 4G, 5G,
Wi-Fi RAM, Battery with 1080 x 2400 MP + 2 MP Memory Card
67 69 32999 87.0 Octa Core, 2.6 Android v12
(12GB RAM + VoLTE, Wi- 256 GB 120W Fast px, 120 Hz Triple Rear & Not Supported
GHz Processor
256GB) Fi, IR Blaster inbuilt Charging Display wi... 16 MP Front...
Dual
Dual Sim,
Sim, Snapdragon 4 GB 5000 mAh
Dimensity 6.6 inches, 50 MP + 8 MP
Memory Card
Samsung Galaxy
Realme 3G,3G,
4G, 5G, 750G, Octa Core, RAM, Battery
1080,with
Octa 1080 x 2408 + 2 MP Triple
68 70 15999 78.0 Supported, Android v12
4 6 realme F23
105GPro 24999 82.0 VoLTE,
4G, Wi-True 2.2 GHz
False 128 GB
False 25W Fast
Core, 2.6 px, 120 Hz 1080Rear & 8 MPdimensity Octa Core
Dimensity upto 1 TB
Plus Fi, NFC
5G, Processor inbuilt ChargingGHz Display wit... Front C...
VoLTE, Processor
Dual Sim,
Wi-Fi Snapdragon 8+ 8 GB 5000 mAh 7.1 inches,
Foldable
3G, 4G, 5G, Gen1, Octa Core, RAM, Battery with 1792 x 1920 Memory Card
69 71 Oppo Find N Fold 99990 NaN Display, Dual Android v12
VoLTE, Wi- 3.2 GHz 256 GB 67W Fast px, 120 Hz Not Supported
Display
Fi, NFC Processor inbuilt Charging Display wit...
D l Si 3 GB 6 56 i h
Dual Sim, 3 GB 6.56 inches,
Helio G35, Octa 8 MP Rear & Memory Card
3G, 4G, RAM, 5000 mAh 720 x 1612 px
70 72 OPPO A17K 9499 62.0 Core, 2.3 GHz 5 MP Front Supported, Android v12
VoLTE, Wi- 64 GB Battery Display with
Processor Camera upto 1 TB
internal_memory = df1['ram'].str.strip().str.split(',').str.get(1).str.strip().str.findall(r'\b(\d+)\b').str.get(0)
Fi inbuilt Water ...
screen_size = df1['display'].str.strip().str.split(',').str.get(0).str.strip().str.split('
Dual Sim, Dimensity 810 8 GB ').str.get(0).astype(float)
5000 mAh 6.56 inches, 48 MP + 2 MP
Memory Card
3G, 4G, 5G, 5G, Octa Core, RAM, Battery with 720 x 1612 Dual Rear & 8
87 89 OPPO K10 5G 16999 79.0 Supported, Android v12
VoLTE, Wi- 2.4 GHz 128 GB 33W Fast px, 90 Hz MP Front
df1.insert(21,'screen_size',screen_size) upto 1 TB
Fi Processor inbuilt Charging Display with... Camera
Dual Sim,
Snapdragon 8 12 GB 4700 mAh 6.78 inches,
3G, 4G, 5G, 50.3 MP Quad
Vivo X90 Pro Plus Gen2, Octa Core, RAM, Battery with 1440 x 3200 Memory Card
93 95 73999 NaN VoLTE, Wi- Rear & 32 MP Android v13
5G 3.2 GHz 256 GB 80W Fast px, 120 Hz Not Supported
Fi, NFC, IR Front Camera
Processor inbuilt Charging Display wi...
Bl...
Dual Sim,
Snapdragon 8 8 GB 3900 mAh 6.1 inches, 50 MP + 12
3G, 4G, 5G,
Samsung Galaxy Gen2, Octa Core, RAM, Battery with 1080 x 2340 MP + 10 MP Memory Card
119 121 70990 88.0 VoLTE, Android v13
S23 3.2 GHz 128 GB 25W Fast px, 120 Hz Triple Rear & Not Supported
Vo5G, Wi-Fi,
Processor inbuilt Charging Display wit... 12 MP Fron...
NFC
Dual
Dual Sim, 12 GB 4500 mAh 6.43 inches, 50 MP + 8 MP
OnePlus Nord 2T Sim, Dimensity 1300,
3G, 4G, 5G, RAM, Battery with
Snapdragon 1080 x 2400 + 2 MP Triple Memory Card
121 123 (12GB RAM + 33900 85.0 3G, Octa Core, 3 GHz Android v12
VoLTE, Wi- 256 GB 80W8 Fast
Gen2, px, 90 Hz Rear & 32 MP Not Supported
256GB)
OnePlus 4G, Processor Snapdragon 8
0 2 oneplus 54999 89.0 Fi, NFCTrue True inbuilt
False Charging
Octa Core, Display wit... Frontsnapdragon
... Octa Core
11 5G 5G, Gen2
3.2 GHz
VoLTE,
Dual Sim, 6 GB 4410 mAh 6.14 inches, 12.2 MP + 12
Google Tensor, Processor
3G, 4G, 5G,
Wi-Fi, RAM, Battery with 1080 x 2400 MP Dual Rear Memory Card
122 124 Google Pixel 6A 29999 71.0 Octa Core Android v12
NFC Wi-
VoLTE, 128 GB Fast px Display & 8 MP Front Not Supported
Processor
Fi, NFC inbuilt Charging with Punch... Camera
Dual
Dual Sim, Dimensity 810 8 GB 5000 mAh 6.7 inches, 108 MP + 2
Sim, Snapdragon Memory Card
Infinix Note 12 Pro 3G, 4G, 5G, 5G, Octa Core, RAM, Battery with 1080 x 2400 MP + 2 MP
123 125 OnePlus 18999 84.0 3G, 695, Octa Supported, Android v12
5G VoLTE, Wi- 2.4 GHz 128 GB 33W Fast px Display Triple Rear &
1 3 oneplus Nord CE 19989 81.0 4G, True False False Core, 2.2 Snapdragon 695 snapdragon
upto Octa
2 TB Core
Fi Processor inbuilt Charging with Punch ... 16 MP Front...
2 Lite 5G 5G, GHz
VoLTE,
Dual Sim, 8 GB Processor
6000 mAh 6.6 inches,
Samsung Galaxy Wi-Fi Exynos 1280, 50 MP Quad Memory Card
3G, 4G, 5G, RAM, Battery with 1080 x 2400
124 126 M33 5G (8GB 19499 84.0 Octa Core, 2.4 Rear & 8 MP Supported, Android v12
VoLTE, Wi- 128 GB 25W Fast px, 120 Hz
RAM + 128GB) GHz Processor Front Camera upto 1 TB
Fi, NFC inbuilt Charging Display wit...
Dual
Sim,
Dual Sim, Dimensity 920 8 GB 4200Exynos
mAh 6.44 inches, 64 MP + 8 MP
Samsung 3G,3G,
4G, 5G, 5G, Octa Core, RAM, 1330,with
Battery Octa 1080 x 2400 + 2 MP Triple Memory Card
2125 4127 samsung
Vivo V23 5G
Galaxy 24994
16499 83.0
75.0 4G, Wi-True
VoLTE, False
2.5 GHz False
128 GB Core,
44W Fast 2.4 Exynos
px, 90 Hz 1330
Rear & 50 MP exynos Octa Core
Not Supported
Android v12
A14 5G 5G, Fi Processor inbuilt GHz
Charging Display wit... + 8 MP...
VoLTE, Processor
Wi-Fi
Dual Sim, 4 GB 5000 mAh 6.47 inches, 50 MP + 8 MP
Snapdragon 680, Memory Card
Motorola Moto 3G, 4G, RAM, Battery with 1080 x 2400 + 2 MP Triple
126 128 10999 78.0 Octa Core, 2.4 Supported, Android v12
G42 VoLTE, Wi- 64 GB 20W Fast px Display Rear & 16 MP
Dual GHz Processor upto 1 TB
Fi, NFC inbuilt Charging with Punch... Front ...
Sim, Snapdragon
Motorola 3G, Sim,
Dual 8 GB 695,
5000 Octa
mAh 6.59 inches, 64 MP + 2 MP
3 5 OnePlus Nord Moto
motorola CE 14999 81.0 Snapdragon
False 695, Memory Card
3G,4G,
4G, 5G,True False Battery
RAM, Core, 2.2
with Snapdragon
1080 x 2412 695 snapdragon
+ 2 MP Triple Octa Core
127 129 2 Lite 5GG62
(8GB
5G 24999 85.0 5G, Wi- Octa Core, 2.2 GHz (Hybrid), upto Android v12
VoLTE, 256 GB 33W Fast px, 120 Hz Rear & 16 MP
RAM + 256GB) VoLTE, GHz Processor Processor 1 TB
Fi inbuilt Charging Display wi... Front ...
Wi-Fi
Dual Sim, 6 GB 5000 mAh 6.4 inches,
Exynos 1280, 48 MP Quad Memory Card
Samsung Galaxy 3G, 4G, 5G, RAM, Battery with 1080 x 2400
128 130 25999 82.0 Dual Wi- Octa Core, 2.4 Rear & 13 MP (Hybrid), upto Android v12
A33 5G VoLTE, 128 GB 25W Fast px, 90 Hz
Sim, GHz Processor Dimensity Display with... Front Camera 1 TB
Fi, NFC inbuilt Charging
Realme 3G, 1080, Octa
4 6 realme 10 Pro 24999 82.0 Dual
4G, Sim,True False 6 GB
False 4410 mAh
Core, 2.6 6.1 inches, 1080
Dimensity 12.2 MP + 12 dimensity Octa Core
Google Tensor,
Plus 3G,5G,
4G, 5G, RAM, Battery with
GHz 1080 x 2400 MP Dual Rear Memory Card
129 131 Google Pixel 7A 34990 69.0 Octa Core Android v12
VoLTE, Wi-
VoLTE, 128 GB Fast
Processor px, 90 Hz & 8 MP Front Not Supported
Processor
Fi, NFC
Wi-Fi inbuilt Charging Display with... Camera
Dual Sim, 2 GB
Dual 5 inches, 720 5 MP Rear & Memory Card
3G, 4G, Quad Core, 1.4 RAM, 2800 mAh Android v8.1
143 145 Jio Phone 3 4499 NaN Sim, Wi- Snapdragon x 1280 px 2 MP Front Supported,
VoLTE, GHz Processor 64 GB Battery (Oreo)
OnePlus 3G, 695, Octa Display Camera upto 128 GB
Fi inbuilt
1 3 oneplus Nord CE 19989 81.0 4G, True False False Core, 2.2 Snapdragon 695 snapdrago
2 Lite 5G 5G, Sim,
Dual 4 GB 6000 mAh GHz 6.52 inches, 13 MP + 2 MP
VoLTE, Snapdragon 460 Processor
3G, 4G, RAM, Battery with 720 x 1560 px + 2 MP Triple Memory Card
144 146 OnePlus Clover 14999 69.0 Wi-Fi Wi- , Octa Core, 1.8 Android v10
VoLTE, 64 GB 18W Fast Display with Rear & 8 MP Supported
GHz Processor
Fi inbuilt Charging Punch ... Front C...
Dual
Dual Sim, 4 GB 5000 mAh 6.5 inches, 13 MP + 2 MP
Sim, Helio P35, Octa Memory Card
Samsung Galaxy 3G, 4G, RAM, BatteryExynos
with 720 x 1600 px Dual Rear & 5
145 147 8499 68.0 Core, 2.3 GHz Supported, Android v12
Samsung
F04 3G, Wi-
VoLTE, 64 GB 1330,
15W Octa Display with
Fast MP Front
2 4 samsung Galaxy 16499 75.0 4G, Processor upto 1 TB
FiTrue False False Charging
inbuilt Core, 2.4 Exynos
Water D... 1330 Camera exyn
A14 5G 5G, GHz
Dual Sim,
VoLTE, 8 GB 4400 mAh
Processor 6.62 inches, 48 MP + 13
Snapdragon 870,
3G, 4G, 5G,
Wi-Fi RAM, Battery with 1080 x 2400 MP + 2 MP Memory Card
146 148 iQOO 7 24990 83.0 Octa Core, 3.2 Android v11
VoLTE, Wi- 128 GB 66W Fast px, 120 Hz Triple Rear & Not Supported
GHz Processor
Fi, NFC inbuilt Charging Display wi... 16 MP Front...
Dual
Dual Sim,
Sim, 6 GB 6000 mAh
Snapdragon 6.5 inches,
Exynos 1200, 64 MP Quad
Samsung Motorola
Galaxy 3G,3G,
4G, 5G, RAM, Battery
695,with
Octa 1080 x 2400 Memory Card
147 149 16999 84.0 Octa Core, 2.4 Rear & 32 MP Android v12
3 5 motorola M34 Moto
5G 14999 81.0 VoLTE,
4G, Wi-True False 128 GB
False 25W Fast
Core, 2.2 px, 120 Hz 695
Snapdragon snapdrago (Hybrid)
GHz Processor Front Camera
G62 5G Fi, NFC
5G, inbuilt ChargingGHz Display wit...
VoLTE, Processor
Dual Sim,
Wi-Fi 4 GB 6000 mAh 6.7 inches, 50 MP + 2 MP
Snapdragon 680, Memory Card
3G, 4G, RAM, Battery with 720 x 1600 px Dual Rear & 5
148 150 Xiaomi Redmi 10 9589 71.0 Octa Core, 2.4 Supported, Android v11
VoLTE, Wi- 64 GB 18W Fast Display with MP Front
GHz Processor upto 512 GB
Dual Fi inbuilt Charging Water D... Camera
Sim, Dimensity
Realme Dual
3G, Sim, Bionic A14, Hexa 4 GB 1080, Octa 5.4 inches, 12 MP + 12
4149 6151 Apple iPhone
realme 1012
Pro 40999
24999 82.0 3G,4G,
4G, 5G,True False RAM,
False Core, 2.6 1080
Dimensity MP Dual Reardimens
Memory Card
74.0 Core, 3.1 GHz NaN x 2340 1080 iOS v14
Mini
Plus VoLTE,
5G, Wi- 64 GB GHz & 12 MP Front Not Supported
Processor px Display
Fi, NFC
VoLTE, inbuilt Processor Camera
Wi-Fi
Dual Sim, Snapdragon 4 6 GB 5000 mAh 6.67 inches, 48 MP + 8 MP
Xiaomi Redmi Memory Card
3G, 4G, 5G, Gen 1, Octa RAM, Battery with 1080 x 2400 + 2 MP Triple
150 152 Note 12 (6GB 19988 79.0 (Hybrid), upto Android v12
VoLTE, Wi- Core, 2 GHz 128 GB 33W Fast px, 120 Hz Rear & 13 MP
RAM + 128GB) 1 TB
Fi, IR Blaster Processor inbuilt Charging Display wi... Front ...
#df1[df1['camera'] == 'Foldable Display, Dual Display']
Dual Sim, 4 GB 5000 mAh 6.6 inches, 50 MP + 8 MP
df1.loc[69,'camera'] Motorola
== '50 Moto
MP' Snapdragon 680,
3G, 4G, RAM, Battery with 2460 x 1080 + 2 MP Triple Memory Card
151 153 11999 75.0 Octa Core, 2.4 Android v12
G52 VoLTE, Wi- 64 GB 33W Fast px, 90 Hz Rear & 16 MP (Hybrid)
GHz Processor
False Fi, NFC inbuilt Charging Display with... Front ...
May 3, 2024
[ ]: import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
[ ]: pd.set_option('display.max_columns',None)
[ ]: df = pd.read_csv('/content/smartphone_cleaned_v3.csv')
[ ]: df.shape
[ ]: (980, 24)
[ ]: df.head()
1
0 256.0 6.70 120 1440 x 3216 3
1 128.0 6.59 120 1080 x 2412 3
2 64.0 6.60 90 1080 x 2408 3
3 128.0 6.55 120 1080 x 2400 3
4 128.0 6.70 120 1080 x 2412 3
extended_memory
0 0
1 1 TB
2 1 TB
3 1 TB
4 0
[ ]: df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 980 entries, 0 to 979
Data columns (total 24 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 brand_name 980 non-null object
1 model 980 non-null object
2 price 980 non-null int64
3 rating 879 non-null float64
4 has_5g 980 non-null bool
5 has_nfc 980 non-null bool
6 has_ir_blaster 980 non-null bool
7 processor_name 960 non-null object
8 processor_brand 960 non-null object
9 num_cores 974 non-null object
10 processor_speed 938 non-null float64
11 battery_capacity 969 non-null float64
12 fast_charging 980 non-null int64
13 ram_capacity 980 non-null float64
14 internal_memory 978 non-null float64
15 screen_size 980 non-null float64
16 refresh_rate 980 non-null int64
17 resolution 980 non-null object
18 num_rear_cameras 980 non-null int64
19 num_front_cameras 980 non-null object
2
20 os 980 non-null object
21 primary_camera_rear 980 non-null float64
22 primary_camera_front 976 non-null object
23 extended_memory 980 non-null object
dtypes: bool(3), float64(7), int64(4), object(10)
memory usage: 163.8+ KB
[ ]: df.isnull().sum()
[ ]: brand_name 0
model 0
price 0
rating 101
has_5g 0
has_nfc 0
has_ir_blaster 0
processor_name 20
processor_brand 20
num_cores 6
processor_speed 42
battery_capacity 11
fast_charging 0
ram_capacity 0
internal_memory 2
screen_size 0
refresh_rate 0
resolution 0
num_rear_cameras 0
num_front_cameras 0
os 0
primary_camera_rear 0
primary_camera_front 4
extended_memory 0
dtype: int64
[ ]: # brand name
df['brand_name'].value_counts()
[ ]: xiaomi 134
samsung 132
vivo 111
realme 97
oppo 88
motorola 52
apple 46
oneplus 42
poco 41
3
tecno 33
iqoo 32
infinix 29
huawei 16
google 14
nokia 13
honor 13
itel 10
sony 9
asus 7
nubia 6
nothing 5
lava 4
jio 4
gionee 3
micromax 3
oukitel 3
lg 3
redmi 3
letv 3
ikall 3
royole 2
doogee 2
zte 2
lenovo 2
lyf 2
sharp 1
tcl 1
cat 1
leitz 1
duoqin 1
leeco 1
blu 1
vertu 1
tesla 1
cola 1
blackview 1
Name: brand_name, dtype: int64
[ ]: <matplotlib.axes._subplots.AxesSubplot at 0x7f7a2e62e4c0>
4
[ ]: # price col
df['price'].describe()
[ ]: count 980.000000
mean 32520.504082
std 39531.812669
min 3499.000000
25% 12999.000000
50% 19994.500000
75% 35491.500000
max 650000.000000
Name: price, dtype: float64
[ ]: sns.histplot(df['price'],kde=True)
[ ]: <matplotlib.axes._subplots.AxesSubplot at 0x7f7a2e313ca0>
5
[ ]: df['price'].skew()
[ ]: 6.591790999665567
[ ]: sns.boxplot(df[df['price']<200000]['price'])
/usr/local/lib/python3.8/dist-packages/seaborn/_decorators.py:36: FutureWarning:
Pass the following variable as a keyword arg: x. From version 0.12, the only
valid positional argument will be `data`, and passing other arguments without an
explicit keyword will result in an error or misinterpretation.
warnings.warn(
[ ]: <matplotlib.axes._subplots.AxesSubplot at 0x7f7a2c017d90>
6
[ ]: df[df['price']>200000]
7
num_rear_cameras num_front_cameras os \
427 1 1 android
478 3 1 Hongmeng OS v3.0
887 3 1 android
951 4 2 android
[ ]: df['price'].isnull().sum()
[ ]: 0
[ ]: # rating col
[ ]: df['rating'].describe()
[ ]: count 879.000000
mean 78.258248
std 7.402854
min 60.000000
25% 74.000000
50% 80.000000
75% 84.000000
max 89.000000
Name: rating, dtype: float64
[ ]: sns.histplot(df['rating'],kde=True)
[ ]: <matplotlib.axes._subplots.AxesSubplot at 0x7f7a2bf9dbe0>
8
[ ]: df['rating'].skew()
[ ]: -0.6989993034105535
[ ]: sns.boxplot(df['rating'])
/usr/local/lib/python3.8/dist-packages/seaborn/_decorators.py:36: FutureWarning:
Pass the following variable as a keyword arg: x. From version 0.12, the only
valid positional argument will be `data`, and passing other arguments without an
explicit keyword will result in an error or misinterpretation.
warnings.warn(
[ ]: <matplotlib.axes._subplots.AxesSubplot at 0x7f7a2bf3f9d0>
9
[ ]: df['rating'].isnull().sum()
[ ]: 101
[ ]: sns.countplot(df['has_5g'])
/usr/local/lib/python3.8/dist-packages/seaborn/_decorators.py:36: FutureWarning:
Pass the following variable as a keyword arg: x. From version 0.12, the only
valid positional argument will be `data`, and passing other arguments without an
explicit keyword will result in an error or misinterpretation.
warnings.warn(
[ ]: <matplotlib.axes._subplots.AxesSubplot at 0x7f7a2a3b91c0>
10
[ ]: df['has_5g'].value_counts()
[ ]: True 549
False 431
Name: has_5g, dtype: int64
[ ]: sns.countplot(df['has_nfc'])
/usr/local/lib/python3.8/dist-packages/seaborn/_decorators.py:36: FutureWarning:
Pass the following variable as a keyword arg: x. From version 0.12, the only
valid positional argument will be `data`, and passing other arguments without an
explicit keyword will result in an error or misinterpretation.
warnings.warn(
[ ]: <matplotlib.axes._subplots.AxesSubplot at 0x7f7a2a3638b0>
11
[ ]: df['has_nfc'].value_counts()
[ ]: False 587
True 393
Name: has_nfc, dtype: int64
[ ]: sns.countplot(df['has_ir_blaster'])
/usr/local/lib/python3.8/dist-packages/seaborn/_decorators.py:36: FutureWarning:
Pass the following variable as a keyword arg: x. From version 0.12, the only
valid positional argument will be `data`, and passing other arguments without an
explicit keyword will result in an error or misinterpretation.
warnings.warn(
[ ]: <matplotlib.axes._subplots.AxesSubplot at 0x7f7a2a2c64f0>
12
[ ]: df['processor_brand'] = df['processor_brand'].str.
↪replace('sanpdragon','snapdragon')
df['processor_brand'] = df['processor_brand'].str.replace('apple','bionic')
df['processor_brand'] = df['processor_brand'].str.replace('samsung','exynos')
[ ]: df['processor_brand'].value_counts()
[ ]: snapdragon 413
helio 201
dimensity 177
exynos 50
bionic 45
unisoc 26
tiger 24
google 9
kirin 7
13
spreadtrum 4
sc9863a 2
fusion 1
mediatek 1
Name: processor_brand, dtype: int64
[ ]: df[df['processor_brand'].isnull()]
14
911 False False NaN NaN Octa Core
952 False False NaN NaN Octa Core
965 False False NaN NaN Quad Core
979 False False NaN NaN Octa Core
15
979 128.0 6.50 60 1080 x 2400
primary_camera_front extended_memory
118 32 0
143 2 128 GB
187 8 128 GB
200 8 1 TB
307 8 1 TB
313 8 32 GB
490 8 1 TB
523 16 1 TB
575 8 1 TB
733 13 128 GB
753 8 256 GB
769 5 32 GB
799 32 0
800 32 1 TB
844 5 Memory Card (Hybrid)
910 32 1 TB
911 5 1 TB
952 8 512 GB
965 13 64 GB
979 32 1 TB
[ ]: df['num_cores'].value_counts()
16
[ ]: Octa Core 899
Hexa Core 39
Quad Core 36
Name: num_cores, dtype: int64
[ ]: df['processor_speed'].describe()
[ ]: count 938.000000
mean 2.427217
std 0.464090
min 1.200000
25% 2.050000
50% 2.300000
75% 2.840000
max 3.220000
Name: processor_speed, dtype: float64
[ ]: sns.displot(kind='kde',data=df,x='processor_speed')
[ ]: <seaborn.axisgrid.FacetGrid at 0x7f7a36daef10>
17
[ ]: df['processor_speed'].skew()
[ ]: 0.18833557463624606
[ ]: sns.boxplot(df['processor_speed'])
/usr/local/lib/python3.8/dist-packages/seaborn/_decorators.py:36: FutureWarning:
Pass the following variable as a keyword arg: x. From version 0.12, the only
valid positional argument will be `data`, and passing other arguments without an
explicit keyword will result in an error or misinterpretation.
warnings.warn(
[ ]: <matplotlib.axes._subplots.AxesSubplot at 0x7f7a2a1ff700>
[ ]: df
18
.. … … … … …
975 motorola Motorola Moto Edge S30 Pro 34990 83.0 True
976 honor Honor X8 5G 14990 75.0 True
977 poco POCO X4 GT 5G (8GB RAM + 256GB) 28990 85.0 True
978 motorola Motorola Moto G91 5G 19990 80.0 True
979 samsung Samsung Galaxy M52s 5G 24990 74.0 True
19
0 3 1 android 50.0
1 3 1 android 64.0
2 3 1 android 50.0
3 3 1 android 50.0
4 3 1 android 108.0
.. … … … …
975 3 1 android 64.0
976 3 1 android 48.0
977 3 1 android 64.0
978 3 1 android 108.0
979 3 1 android 64.0
primary_camera_front extended_memory
0 16 0
1 16 1 TB
2 13 1 TB
3 16 1 TB
4 16 0
.. … …
975 16 0
976 8 1 TB
977 16 0
978 32 1 TB
979 32 1 TB
[ ]: df['battery_capacity'].describe()
[ ]: count 969.000000
mean 4817.748194
std 1009.540054
min 1821.000000
25% 4500.000000
50% 5000.000000
75% 5000.000000
max 22000.000000
Name: battery_capacity, dtype: float64
[ ]: sns.displot(kind='kde',data=df,x='battery_capacity')
[ ]: <seaborn.axisgrid.FacetGrid at 0x7f7a365fab20>
20
[ ]: df[df['battery_capacity'] > 7000]
21
966 8000.0 -1 6.0 128.0
num_front_cameras os primary_camera_rear \
391 1 android 64.0
599 1 Memory Card (Hybrid) 64.0
843 1 android 108.0
966 1 android 16.0
primary_camera_front extended_memory
391 16 Not Specified
599 Main 0
843 32 2 TB
966 8 Not Specified
[ ]: df['fast_charging'].describe()
[ ]: count 980.000000
mean 36.048980
std 35.948034
min -1.000000
25% 15.000000
50% 30.000000
75% 65.000000
max 240.000000
Name: fast_charging, dtype: float64
[ ]: def fast(row):
if row['fast_charging'] == -1:
return 0
else:
return 1
[ ]: df.columns
22
dtype='object')
[ ]: x = df.apply(fast,axis=1)
df.insert(12,'fast_charging_available',x)
[ ]: df.head()
primary_camera_front extended_memory
0 16 0
23
1 16 1 TB
2 13 1 TB
3 16 1 TB
4 16 0
[ ]: df['ram_capacity'].value_counts()
[ ]: 8.0 339
6.0 234
4.0 217
12.0 86
3.0 54
2.0 32
16.0 9
1.0 7
18.0 2
Name: ram_capacity, dtype: int64
[ ]: temp_df = df[df['internal_memory'].isnull()]
[ ]: df.loc[temp_df.index,['ram_capacity','internal_memory']] = [[4,64],[4,64]]
[ ]: df['internal_memory'].value_counts()
[ ]: 128.0 523
64.0 193
256.0 157
32.0 67
512.0 22
16.0 12
1024.0 5
8.0 1
Name: internal_memory, dtype: int64
[ ]: df['screen_size'].describe()
[ ]: count 980.000000
mean 6.536765
std 0.349162
min 3.540000
25% 6.500000
50% 6.580000
75% 6.670000
max 8.030000
Name: screen_size, dtype: float64
[ ]: sns.displot(kind='kde',data=df,x='screen_size')
24
[ ]: <seaborn.axisgrid.FacetGrid at 0x7f7a279697c0>
[ ]: df['screen_size'].skew()
[ ]: -2.11619902968816
[ ]: sns.boxplot(df['screen_size'])
/usr/local/lib/python3.8/dist-packages/seaborn/_decorators.py:36: FutureWarning:
Pass the following variable as a keyword arg: x. From version 0.12, the only
valid positional argument will be `data`, and passing other arguments without an
explicit keyword will result in an error or misinterpretation.
warnings.warn(
[ ]: <matplotlib.axes._subplots.AxesSubplot at 0x7f7a276cd160>
25
[ ]: df['extended_memory'].value_counts()
[ ]: 0 362
1 TB 262
512 GB 116
256 GB 100
Not Specified 88
Memory Card (Hybrid) 30
128 GB 9
2 TB 6
32 GB 3
64 GB 3
1000 GB 1
Name: extended_memory, dtype: int64
[ ]: def extended_extractor(row):
if row['extended_memory_available'] == 0:
return np.nan
else:
if row['extended_memory'] == '1 TB':
26
return 1024
elif row['extended_memory'] == '512 GB':
return 512
elif row['extended_memory'] == '256 GB':
return 256
elif row['extended_memory'] == 'Not Specified':
return np.nan
elif row['extended_memory'] == 'Memory Card (Hybrid)':
return np.nan
elif row['extended_memory'] == '128 GB':
return 128
elif row['extended_memory'] == '2 TB':
return 2048
elif row['extended_memory'] == '32 GB':
return 32
elif row['extended_memory'] == '64 GB':
return 64
elif row['extended_memory'] == '1000 GB':
return 1000
[ ]: def extended_extractor(row):
if row['extended_memory_available'] == 0:
return np.nan
else:
if row['extended_memory'] == 'Not Specified':
return np.nan
elif row['extended_memory'] == 'Memory Card (Hybrid)':
return np.nan
else:
return row['extended_memory']
[ ]: df['extended_memory']
[ ]: 0 0
1 1 TB
2 1 TB
3 1 TB
4 0
…
975 0
976 1 TB
977 0
978 1 TB
979 1 TB
Name: extended_memory, Length: 980, dtype: object
27
[ ]: x = df.apply(extended_extractor,axis=1).str.replace('\u2009',' ').str.split('␣
↪').str.get(0)
[ ]: df['extended_memory_available']
[ ]: 0 0
1 1
2 1
3 1
4 0
..
975 0
976 1
977 0
978 1
979 1
Name: extended_memory_available, Length: 980, dtype: int64
[ ]: df['extended_upto'] = x
[ ]: df['extended_upto'].value_counts()
[ ]: 1 262
512 116
256 100
128 9
2 6
32 3
64 3
1000 1
Name: extended_upto, dtype: int64
[ ]: def transform(text):
if text == '1':
return '1024'
elif text == '2':
return '2048'
elif text == '1000':
return '1024'
else:
return text
[ ]: df['extended_upto'] = df['extended_upto'].apply(transform)
[ ]: df['os']
28
[ ]: 0 android
1 android
2 android
3 android
4 android
…
975 android
976 android
977 android
978 android
979 android
Name: os, Length: 980, dtype: object
[ ]: def os_transform(text):
if 'Memory' in text:
return np.nan
elif 'android' in text:
return text
elif 'ios' in text:
return text
else:
return 'other'
[ ]: df['os'] = df['os'].apply(os_transform)
[ ]: df.head()
29
4 2.6 5000.0 1 67.0
extended_upto
0 NaN
1 NaN
2 NaN
3 NaN
4 NaN
[ ]: df['extended_upto'].value_counts()
[ ]: 1024 263
512 116
256 100
128 9
2048 6
32 3
64 3
Name: extended_upto, dtype: int64
[ ]: df.drop(columns=['processor_name','extended_memory'],inplace=True)
[ ]: df.isnull().sum()
[ ]: brand_name 0
model 0
30
price 0
rating 101
has_5g 0
has_nfc 0
has_ir_blaster 0
processor_name 20
processor_brand 20
num_cores 6
processor_speed 42
battery_capacity 11
fast_charging_available 0
fast_charging 211
ram_capacity 0
internal_memory 0
screen_size 0
refresh_rate 0
resolution 0
num_rear_cameras 0
num_front_cameras 0
os 14
primary_camera_rear 0
primary_camera_front 4
extended_memory 0
extended_memory_available 0
extended_upto 480
dtype: int64
[ ]: df.corr()['rating']
[ ]: price 0.283504
rating 1.000000
has_5g 0.596087
has_nfc 0.474754
has_ir_blaster 0.156421
processor_speed 0.628446
battery_capacity -0.015581
fast_charging_available 0.542814
fast_charging 0.527613
ram_capacity 0.757613
internal_memory 0.481070
screen_size 0.298272
refresh_rate 0.610795
num_rear_cameras 0.515531
primary_camera_rear 0.562046
extended_memory_available -0.415265
Name: rating, dtype: float64
31
[ ]: df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 980 entries, 0 to 979
Data columns (total 25 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 brand_name 980 non-null object
1 model 980 non-null object
2 price 980 non-null int64
3 rating 879 non-null float64
4 has_5g 980 non-null bool
5 has_nfc 980 non-null bool
6 has_ir_blaster 980 non-null bool
7 processor_brand 960 non-null object
8 num_cores 974 non-null object
9 processor_speed 938 non-null float64
10 battery_capacity 969 non-null float64
11 fast_charging_available 980 non-null int64
12 fast_charging 769 non-null float64
13 ram_capacity 980 non-null float64
14 internal_memory 980 non-null float64
15 screen_size 980 non-null float64
16 refresh_rate 980 non-null int64
17 resolution 980 non-null object
18 num_rear_cameras 980 non-null int64
19 num_front_cameras 976 non-null float64
20 os 966 non-null object
21 primary_camera_rear 980 non-null float64
22 primary_camera_front 975 non-null float64
23 extended_memory_available 980 non-null int64
24 extended_upto 500 non-null object
dtypes: bool(3), float64(10), int64(5), object(7)
memory usage: 171.4+ KB
[ ]: df['primary_camera_front'].value_counts()
[ ]: 16 307
8 178
32 155
5 119
12 50
13 41
20 37
10 24
50 12
60 10
32
44 8
40 6
2 5
7 5
24 3
25 3
10.8 3
48 2
11.1 2
0.3 1
2.1 1
Main 1
10.7 1
10.1 1
12.6 1
Name: primary_camera_front, dtype: int64
[ ]: df['num_cores'].value_counts()
[ ]: 8 899
6 39
4 36
Name: num_cores, dtype: int64
[ ]: df.to_csv('smartphone_cleaned_v4.csv',index=False)
[ ]: new_df = pd.read_csv('smartphone_cleaned_v4.csv')
[ ]: new_df[['brand_name','model','processor_brand']]
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 980 entries, 0 to 979
Data columns (total 25 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 brand_name 980 non-null object
1 model 980 non-null object
2 price 980 non-null int64
3 rating 879 non-null float64
4 has_5g 980 non-null bool
33
5 has_nfc 980 non-null bool
6 has_ir_blaster 980 non-null bool
7 processor_brand 960 non-null object
8 num_cores 974 non-null float64
9 processor_speed 938 non-null float64
10 battery_capacity 969 non-null float64
11 fast_charging_available 980 non-null int64
12 fast_charging 769 non-null float64
13 ram_capacity 980 non-null float64
14 internal_memory 980 non-null float64
15 screen_size 980 non-null float64
16 refresh_rate 980 non-null int64
17 resolution 980 non-null object
18 num_rear_cameras 980 non-null int64
19 num_front_cameras 976 non-null float64
20 os 966 non-null object
21 primary_camera_rear 980 non-null float64
22 primary_camera_front 975 non-null float64
23 extended_memory_available 980 non-null int64
24 extended_upto 500 non-null float64
dtypes: bool(3), float64(12), int64(5), object(5)
memory usage: 171.4+ KB
[ ]: new_df['num_cores'] = new_df['num_cores'].astype(float)
[ ]: new_df.isnull().sum()
[ ]: brand_name 0
model 0
price 0
rating 101
has_5g 0
has_nfc 0
has_ir_blaster 0
processor_brand 20
num_cores 6
processor_speed 42
battery_capacity 11
fast_charging_available 0
fast_charging 211
ram_capacity 0
internal_memory 0
screen_size 0
34
refresh_rate 0
resolution 0
num_rear_cameras 0
num_front_cameras 4
os 14
primary_camera_rear 0
primary_camera_front 5
extended_memory_available 0
extended_upto 480
dtype: int64
[ ]: new_df.to_csv('smartphone_cleaned_v5.csv',index=False)
[ ]: new_df.select_dtypes(include=['object'])
os
0 android
1 android
2 android
3 android
4 android
.. …
975 android
976 android
977 android
978 android
979 android
knn = KNNImputer(n_neighbors=3,weights='distance')
35
return_array = knn.fit_transform(x_df)
[ ]: x = pd.DataFrame(return_array, columns=x_df.columns).corr()['price'].
↪reset_index()
[ ]: y = new_df.corr()['price'].reset_index()
[ ]: x.merge(y,on='index')
[ ]: pd.
↪get_dummies(new_df,columns=['brand_name','processor_brand','os'],drop_first=True)
36
2 False 8.0 2.40 5000.0
3 False 8.0 2.20 5000.0
4 False 8.0 2.60 5000.0
.. … … … …
975 False 8.0 3.00 5000.0
976 False 8.0 2.20 5000.0
977 True 8.0 2.85 5080.0
978 False 8.0 2.20 5000.0
979 False 8.0 NaN 5000.0
37
979 1.0 64.0 32.0
38
.. … … … …
975 0 0 0 0
976 0 1 0 0
977 0 0 0 0
978 0 0 0 0
979 0 0 0 0
39
0 0 0 0
1 0 0 0
2 0 0 0
3 1 0 0
4 0 0 0
.. … … …
975 1 0 0
976 0 0 0
977 0 0 0
978 1 0 0
979 0 0 0
40
977 0 0 0 0
978 0 0 0 0
979 0 1 0 0
processor_brand_dimensity processor_brand_exynos \
0 0 0
1 0 0
2 0 1
3 0 0
4 1 0
.. … …
975 0 0
976 0 0
977 1 0
978 0 0
979 0 0
41
3 0 0 0
4 0 0 0
.. … … …
975 0 0 0
976 0 0 0
977 0 0 0
978 0 0 0
979 0 0 0
processor_brand_snapdragon processor_brand_spreadtrum \
0 1 0
1 1 0
2 0 0
3 1 0
4 0 0
.. … …
975 1 0
976 1 0
977 0 0
978 1 0
979 0 0
42
[980 rows x 81 columns]
[ ]: new_df['brand_name'] = new_df['brand_name'].astype('category')
[ ]: new_df.head()
extended_memory_available extended_upto
0 0 NaN
1 1 1024.0
2 1 1024.0
3 1 1024.0
43
4 0 NaN
[ ]:
44
eda-on-smartphone-data-1
May 3, 2024
[ ]: import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
[ ]: pd.set_option('display.max_columns',None)
pd.set_option('display.max_rows',None)
[ ]: df = pd.read_csv('/content/smartphone_cleaned_v5.csv')
[ ]: df.shape
[ ]: (980, 25)
[ ]: df.head()
1
4 5000.0 1 67.0 6.0
extended_memory_available extended_upto
0 0 NaN
1 1 1024.0
2 1 1024.0
3 1 1024.0
4 0 NaN
[ ]: df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 980 entries, 0 to 979
Data columns (total 25 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 brand_name 980 non-null object
1 model 980 non-null object
2 price 980 non-null int64
3 rating 879 non-null float64
4 has_5g 980 non-null bool
5 has_nfc 980 non-null bool
6 has_ir_blaster 980 non-null bool
7 processor_brand 960 non-null object
8 num_cores 974 non-null float64
9 processor_speed 938 non-null float64
10 battery_capacity 969 non-null float64
11 fast_charging_available 980 non-null int64
12 fast_charging 769 non-null float64
13 ram_capacity 980 non-null float64
14 internal_memory 980 non-null float64
15 screen_size 980 non-null float64
16 refresh_rate 980 non-null int64
2
17 resolution 980 non-null object
18 num_rear_cameras 980 non-null int64
19 num_front_cameras 976 non-null float64
20 os 966 non-null object
21 primary_camera_rear 980 non-null float64
22 primary_camera_front 975 non-null float64
23 extended_memory_available 980 non-null int64
24 extended_upto 500 non-null float64
dtypes: bool(3), float64(12), int64(5), object(5)
memory usage: 171.4+ KB
[ ]: df.isnull().sum()
[ ]: brand_name 0
model 0
price 0
rating 101
has_5g 0
has_nfc 0
has_ir_blaster 0
processor_brand 20
num_cores 6
processor_speed 42
battery_capacity 11
fast_charging_available 0
fast_charging 211
ram_capacity 0
internal_memory 0
screen_size 0
refresh_rate 0
resolution 0
num_rear_cameras 0
num_front_cameras 4
os 14
primary_camera_rear 0
primary_camera_front 5
extended_memory_available 0
extended_upto 480
dtype: int64
[ ]: df.head()
3
4 realme Realme 10 Pro Plus 24999 82.0 True False
extended_memory_available extended_upto
0 0 NaN
1 1 1024.0
2 1 1024.0
3 1 1024.0
4 0 NaN
[ ]: # brand_name
[ ]: <matplotlib.axes._subplots.AxesSubplot at 0x7fb9a0284ca0>
4
[ ]: # pie chart
df['brand_name'].value_counts().plot(kind='pie',autopct='%0.1f%%')
[ ]: <matplotlib.axes._subplots.AxesSubplot at 0x7fb9a01e57c0>
5
[ ]: df['brand_name'].isnull().sum()
[ ]: 0
[ ]: # model
df['model'].nunique()
[ ]: 980
[ ]: # price
df['price'].describe()
[ ]: count 980.000000
mean 32520.504082
std 39531.812669
min 3499.000000
25% 12999.000000
50% 19994.500000
75% 35491.500000
max 650000.000000
Name: price, dtype: float64
[ ]: sns.displot(kind='hist',data=df,x='price',kde=True)
[ ]: <seaborn.axisgrid.FacetGrid at 0x7fb9a01fbb50>
6
[ ]: df['price'].skew()
[ ]: 6.591790999665567
[ ]: sns.boxplot(df['price'])
/usr/local/lib/python3.8/dist-packages/seaborn/_decorators.py:36: FutureWarning:
Pass the following variable as a keyword arg: x. From version 0.12, the only
valid positional argument will be `data`, and passing other arguments without an
explicit keyword will result in an error or misinterpretation.
warnings.warn(
[ ]: <matplotlib.axes._subplots.AxesSubplot at 0x7fb9a0208400>
7
[ ]: df[df['price'] > 250000]
8
756 True True False bionic 6.0
789 True True False bionic 6.0
887 False True False snapdragon 8.0
905 True True False snapdragon 8.0
951 True True True kirin 8.0
954 True True True kirin 8.0
9
887 1080 x 2340 3 1.0 android
905 1812 x 2176 3 2.0 android
951 1176 x 2400 4 2.0 android
954 2200 x 2480 4 NaN NaN
extended_upto
288 NaN
319 NaN
427 NaN
458 NaN
478 256.0
704 NaN
739 NaN
756 NaN
789 NaN
887 NaN
905 NaN
951 256.0
954 NaN
[ ]: df['price'].isnull().sum()
[ ]: 0
[ ]: df['rating'].describe()
[ ]: count 879.000000
mean 78.258248
std 7.402854
min 60.000000
25% 74.000000
50% 80.000000
10
75% 84.000000
max 89.000000
Name: rating, dtype: float64
[ ]: sns.displot(kind='hist',data=df,x='rating',kde=True)
[ ]: <seaborn.axisgrid.FacetGrid at 0x7fb99fe343a0>
[ ]: df['rating'].skew()
[ ]: -0.6989993034105535
[ ]: sns.boxplot(df['rating'])
/usr/local/lib/python3.8/dist-packages/seaborn/_decorators.py:36: FutureWarning:
Pass the following variable as a keyword arg: x. From version 0.12, the only
valid positional argument will be `data`, and passing other arguments without an
explicit keyword will result in an error or misinterpretation.
11
warnings.warn(
[ ]: <matplotlib.axes._subplots.AxesSubplot at 0x7fb99b3bd9d0>
[ ]: df['rating'].isnull().sum()/980
[ ]: 0.10306122448979592
[ ]: df.head()
12
0 5000.0 1 100.0 12.0
1 5000.0 1 33.0 6.0
2 5000.0 1 15.0 4.0
3 5000.0 1 NaN 6.0
4 5000.0 1 67.0 6.0
extended_memory_available extended_upto
0 0 NaN
1 1 1024.0
2 1 1024.0
3 1 1024.0
4 0 NaN
[ ]: # has_5g
df['has_5g'].value_counts().plot(kind='pie',autopct='%0.1f%%')
[ ]: <matplotlib.axes._subplots.AxesSubplot at 0x7fb99b360220>
13
[ ]: # has_nfc
df['has_nfc'].value_counts().plot(kind='pie',autopct='%0.1f%%')
[ ]: <matplotlib.axes._subplots.AxesSubplot at 0x7fb99b27a580>
[ ]: # has_ir_blaster
df['has_ir_blaster'].value_counts().plot(kind='pie',autopct='%0.1f%%')
14
[ ]: <matplotlib.axes._subplots.AxesSubplot at 0x7fb99b231730>
[ ]: df[df['has_ir_blaster'] == True]['brand_name'].value_counts()
[ ]: xiaomi 109
poco 30
iqoo 6
huawei 6
vivo 4
redmi 2
honor 1
samsung 1
Name: brand_name, dtype: int64
[ ]: df['processor_brand'].value_counts().plot(kind='pie',autopct="%0.1f%%")
[ ]: <matplotlib.axes._subplots.AxesSubplot at 0x7fb99b209280>
15
[ ]: df['num_cores'].value_counts().plot(kind='pie',autopct="%0.1f%%")
[ ]: <matplotlib.axes._subplots.AxesSubplot at 0x7fb9996791c0>
[ ]: df['fast_charging_available'].value_counts().plot(kind='pie',autopct="%0.1f%%")
[ ]: <matplotlib.axes._subplots.AxesSubplot at 0x7fb999643d90>
16
[ ]: df['ram_capacity'].value_counts().plot(kind='pie',autopct="%0.1f%%")
[ ]: <matplotlib.axes._subplots.AxesSubplot at 0x7fb9996f5580>
[ ]: df['internal_memory'].value_counts().plot(kind='pie',autopct="%0.1f%%")
[ ]: <matplotlib.axes._subplots.AxesSubplot at 0x7fb9995f7a30>
17
[ ]: df['refresh_rate'].value_counts().plot(kind='pie',autopct="%0.1f%%")
[ ]: <matplotlib.axes._subplots.AxesSubplot at 0x7fb999557070>
[ ]: df['refresh_rate'].value_counts()
18
[ ]: 60 368
120 344
90 219
144 39
165 9
240 1
Name: refresh_rate, dtype: int64
[ ]: (df['num_rear_cameras'] + df['num_front_cameras']).value_counts().
↪plot(kind='pie',autopct="%0.1f%%")
[ ]: <matplotlib.axes._subplots.AxesSubplot at 0x7fb9994ba3d0>
[ ]: df['os'].value_counts().plot(kind='pie',autopct='%0.1f%%')
[ ]: <matplotlib.axes._subplots.AxesSubplot at 0x7fb9994ee5b0>
19
[ ]: # extended_memory_available
df['extended_memory_available'].value_counts().plot(kind='pie',autopct='%0.
↪1f%%')
[ ]: <matplotlib.axes._subplots.AxesSubplot at 0x7fb999456b50>
[ ]: df['extended_upto'].value_counts().plot(kind='pie',autopct='%0.1f%%')
20
[ ]: <matplotlib.axes._subplots.AxesSubplot at 0x7fb99941c940>
[ ]: def plot_graphs(column_name):
sns.displot(kind='hist',kde=True,data=df,x=column_name,label=column_name)
sns.catplot(kind='box',data=df,x=column_name)
[ ]: num_columns = df.select_dtypes(include=['float64','int64']).iloc[:
↪,[3,4,6,9,13,14,16]].columns
21
22
23
24
25
26
27
28
29
30
31
32
33
34
[ ]: df.head()
35
1 5000.0 1 33.0 6.0
2 5000.0 1 15.0 4.0
3 5000.0 1 NaN 6.0
4 5000.0 1 67.0 6.0
extended_memory_available extended_upto
0 0 NaN
1 1 1024.0
2 1 1024.0
3 1 1024.0
4 0 NaN
[ ]: plt.figure(figsize=(20,10))
sns.barplot(data=df,x='brand_name',y='price')
plt.xticks(rotation='vertical')
36
[ ]: x = df.groupby('brand_name').count()['model']
[ ]: plt.figure(figsize=(15,8))
sns.barplot(data=temp_df,x='brand_name',y='price')
plt.xticks(rotation='vertical')
37
[ ]: df.head()
38
2 64.0 6.60 90 1080 x 2408 3
3 128.0 6.55 120 1080 x 2400 3
4 128.0 6.70 120 1080 x 2412 3
extended_memory_available extended_upto
0 0 NaN
1 1 1024.0
2 1 1024.0
3 1 1024.0
4 0 NaN
[ ]: sns.scatterplot(data=df,x='rating',y='price')
[ ]: <matplotlib.axes._subplots.AxesSubplot at 0x7fb9992e1580>
[ ]: sns.barplot(data=temp_df,x='has_5g',y='price',estimator=np.median)
[ ]: <matplotlib.axes._subplots.AxesSubplot at 0x7fb9975fd940>
39
[ ]: sns.pointplot(data=temp_df,x='has_nfc',y='price',estimator=np.median)
[ ]: <matplotlib.axes._subplots.AxesSubplot at 0x7fb997785280>
40
[ ]: sns.barplot(data=temp_df,x='has_ir_blaster',y='price',estimator=np.median)
[ ]: <matplotlib.axes._subplots.AxesSubplot at 0x7fb9973c4940>
[ ]: sns.barplot(data=temp_df,x='processor_brand',y='price',estimator=np.median)
plt.xticks(rotation='vertical')
41
[ ]: sns.barplot(data=temp_df,x='num_cores',y='price',estimator=np.median)
plt.xticks(rotation='vertical')
42
[ ]: pd.crosstab(df['num_cores'],df['os'])
[ ]: sns.scatterplot(data=df,x='processor_speed',y='price')
[ ]: <matplotlib.axes._subplots.AxesSubplot at 0x7fb99721f550>
43
[ ]: sns.scatterplot(data=df,x='screen_size',y='price')
[ ]: <matplotlib.axes._subplots.AxesSubplot at 0x7fb9971ed0d0>
44
[ ]: df.corr()['price']
[ ]: price 1.000000
rating 0.283504
has_5g 0.305066
has_nfc 0.470951
has_ir_blaster -0.015807
num_cores -0.048561
processor_speed 0.474049
battery_capacity -0.159232
fast_charging_available 0.116739
fast_charging 0.277591
ram_capacity 0.386002
internal_memory 0.557168
screen_size 0.113253
refresh_rate 0.244115
num_rear_cameras 0.125330
num_front_cameras 0.115228
primary_camera_rear 0.092095
primary_camera_front 0.162995
extended_memory_available -0.448628
extended_upto 0.091945
Name: price, dtype: float64
[ ]: df.isnull().sum()
[ ]: brand_name 0
model 0
price 0
rating 101
has_5g 0
has_nfc 0
has_ir_blaster 0
processor_brand 20
num_cores 6
processor_speed 42
battery_capacity 11
fast_charging_available 0
fast_charging 211
ram_capacity 0
internal_memory 0
screen_size 0
refresh_rate 0
resolution 0
num_rear_cameras 0
num_front_cameras 4
45
os 14
primary_camera_rear 0
primary_camera_front 5
extended_memory_available 0
extended_upto 480
dtype: int64
[ ]: df.corr()['rating']
[ ]: price 0.283504
rating 1.000000
has_5g 0.596087
has_nfc 0.474754
has_ir_blaster 0.156421
num_cores 0.199741
processor_speed 0.628446
battery_capacity -0.015581
fast_charging_available 0.542814
fast_charging 0.527613
ram_capacity 0.757613
internal_memory 0.481070
screen_size 0.298272
refresh_rate 0.610795
num_rear_cameras 0.515531
num_front_cameras 0.131480
primary_camera_rear 0.562046
primary_camera_front 0.577861
extended_memory_available -0.415265
extended_upto 0.346761
Name: rating, dtype: float64
[ ]: # knn imputer
df.shape
[ ]: (980, 25)
[ ]: x_df = df.select_dtypes(include=['int64','float64']).drop(columns='price')
[ ]: imputer = KNNImputer(n_neighbors=5)
[ ]: x_df_values = imputer.fit_transform(x_df)
[ ]: x_df = pd.DataFrame(x_df_values,columns=x_df.columns)
[ ]: x_df['price'] = df['price']
46
[ ]: x_df.head()
extended_upto price
0 870.4 54999
1 1024.0 19989
2 1024.0 16499
3 1024.0 14999
4 1024.0 24999
[ ]: a = x_df.corr()['price'].reset_index()
[ ]: b = df.corr()['price'].reset_index()
[ ]: b.merge(a,on='index')
47
3 processor_speed 0.474049 0.488426
4 battery_capacity -0.159232 -0.166257
5 fast_charging_available 0.116739 0.116739
6 fast_charging 0.277591 0.220688
7 ram_capacity 0.386002 0.386002
8 internal_memory 0.557168 0.557168
9 screen_size 0.113253 0.113253
10 refresh_rate 0.244115 0.244115
11 num_rear_cameras 0.125330 0.125330
12 num_front_cameras 0.115228 0.115787
13 primary_camera_rear 0.092095 0.092095
14 primary_camera_front 0.162995 0.160281
15 extended_memory_available -0.448628 -0.448628
16 extended_upto 0.091945 -0.004073
[ ]: pd.
↪get_dummies(df,columns=['brand_name','processor_brand','os'],drop_first=True).
↪corr()['price']
[ ]: price 1.000000
rating 0.283504
has_5g 0.305066
has_nfc 0.470951
has_ir_blaster -0.015807
num_cores -0.048561
processor_speed 0.474049
battery_capacity -0.159232
fast_charging_available 0.116739
fast_charging 0.277591
ram_capacity 0.386002
internal_memory 0.557168
screen_size 0.113253
refresh_rate 0.244115
num_rear_cameras 0.125330
num_front_cameras 0.115228
primary_camera_rear 0.092095
primary_camera_front 0.162995
extended_memory_available -0.448628
extended_upto 0.091945
brand_name_asus 0.090566
brand_name_blackview -0.019033
brand_name_blu -0.014180
brand_name_cat -0.014173
brand_name_cola -0.014173
brand_name_doogee -0.002314
brand_name_duoqin -0.018224
brand_name_gionee -0.035937
48
brand_name_google 0.035990
brand_name_honor -0.006511
brand_name_huawei 0.155389
brand_name_ikall -0.034904
brand_name_infinix -0.078915
brand_name_iqoo -0.010316
brand_name_itel -0.068636
brand_name_jio -0.041091
brand_name_lava -0.034469
brand_name_leeco -0.017408
brand_name_leitz 0.074797
brand_name_lenovo 0.024005
brand_name_letv -0.036961
brand_name_lg 0.043208
brand_name_lyf -0.032711
brand_name_micromax -0.035890
brand_name_motorola -0.050448
brand_name_nokia -0.023152
brand_name_nothing -0.002402
brand_name_nubia 0.034380
brand_name_oneplus 0.017877
brand_name_oppo -0.022818
brand_name_oukitel -0.008734
brand_name_poco -0.074258
brand_name_realme -0.126322
brand_name_redmi -0.023638
brand_name_royole 0.111565
brand_name_samsung 0.043162
brand_name_sharp 0.022220
brand_name_sony 0.068347
brand_name_tcl -0.019033
brand_name_tecno -0.084924
brand_name_tesla 0.030316
brand_name_vertu 0.499466
brand_name_vivo -0.051903
brand_name_xiaomi -0.045925
brand_name_zte 0.009699
processor_brand_dimensity -0.075534
processor_brand_exynos 0.031226
processor_brand_fusion 0.016557
processor_brand_google 0.056005
processor_brand_helio -0.257915
processor_brand_kirin 0.135626
processor_brand_mediatek -0.022262
processor_brand_sc9863a -0.029067
processor_brand_snapdragon 0.156442
processor_brand_spreadtrum -0.043668
49
processor_brand_tiger -0.093937
processor_brand_unisoc -0.101970
os_ios 0.356357
os_other 0.089325
Name: price, dtype: float64
[ ]:
50
session-on-eda
May 3, 2024
[ ]: import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
• Dataset drive.google.com/drive/folders/1oFZxHRuAw_JI7soe46mmO61s-
WM7jtQg?usp=share_link
[ ]: df = pd.read_csv('train.csv')
[ ]: df.head()
1
• Handling missing values
• feature engineering
• detecting outliers
[ ]: # Remember it is an iterative process
The shape of a data distribution refers to its overall pattern or form as it is represented
on a graph. Some common shapes of data distributions include:
• Normal Distribution: A symmetrical and bell-shaped distribution where the mean, me-
dian, and mode are equal and the majority of the data falls in the middle of the distribution
with gradually decreasing frequencies towards the tails.
• Skewed Distribution: A distribution that is not symmetrical, with one tail being longer
than the other. It can be either positively skewed (right-skewed) or negatively skewed (left-
skewed).
• Bimodal Distribution: A distribution with two peaks or modes.
• Uniform Distribution: A distribution where all values have an equal chance of occurring.
The shape of the data distribution is important in identifying the presence of outliers, skewness,
and the type of statistical tests and models that can be used for further analysis.
2
• Interquartile range (IQR): The range between the first quartile (25th percentile) and the
third quartile (75th percentile) of the data.
Dispersion helps to describe the spread of the data, which can help to identify the presence of
outliers and skewness in the data.
0.0.5 Age
conclusions
• Age is normally(almost) distributed
• 20% of the values are missing
• There are some outliers
[ ]: df['Age'].describe()
[ ]: count 714.000000
mean 29.699118
std 14.526497
min 0.420000
25% 20.125000
50% 28.000000
75% 38.000000
max 80.000000
Name: Age, dtype: float64
[ ]: df['Age'].plot(kind='hist',bins=20)
[ ]: <matplotlib.axes._subplots.AxesSubplot at 0x7f003a0ab1f0>
3
[ ]: df['Age'].plot(kind='kde')
[ ]: <matplotlib.axes._subplots.AxesSubplot at 0x7f0039fee1f0>
4
[ ]: df['Age'].skew()
[ ]: 0.38910778230082704
[ ]: df['Age'].plot(kind='box')
[ ]: <matplotlib.axes._subplots.AxesSubplot at 0x7f0039fd0a60>
5
672 male 70.0 0 0 C.A. 24580 10.5000 NaN S
745 male 70.0 1 1 WE/P 5735 71.0000 B22 S
851 male 74.0 0 0 347060 7.7750 NaN S
[ ]: df['Age'].isnull().sum()/len(df['Age'])
[ ]: 0.19865319865319866
0.0.6 Fare
conclusions
• The data is highly(positively) skewed
• Fare col actually contains the group fare and not the individual fare(This migth be and issue)
• We need to create a new col called individual fare
[ ]: df['Fare'].describe()
[ ]: count 891.000000
mean 32.204208
std 49.693429
min 0.000000
25% 7.910400
50% 14.454200
75% 31.000000
max 512.329200
Name: Fare, dtype: float64
[ ]: df['Fare'].plot(kind='hist')
[ ]: <matplotlib.axes._subplots.AxesSubplot at 0x7f0037efa910>
6
[ ]: df['Fare'].plot(kind='kde')
[ ]: <matplotlib.axes._subplots.AxesSubplot at 0x7f0037f48be0>
7
[ ]: df['Fare'].skew()
[ ]: 4.787316519674893
[ ]: df['Fare'].plot(kind='box')
[ ]: <matplotlib.axes._subplots.AxesSubplot at 0x7f0037dd50a0>
8
341 female 24.0 3 2 19950 263.0000 C23 C25 C27 S
438 male 64.0 1 4 19950 263.0000 C23 C25 C27 S
679 male 36.0 0 1 PC 17755 512.3292 B51 B53 B55 C
737 male 35.0 0 0 PC 17755 512.3292 B101 C
742 female 21.0 2 2 PC 17608 262.3750 B57 B59 B63 B66 C
[ ]: df['Fare'].isnull().sum()
[ ]: 0
[ ]:
[ ]:
0.0.8 Survived
conclusions
• Parch and SibSp cols can be merged to form a new col call family_size
• Create a new col called is_alone
[ ]: df['Embarked'].value_counts()
[ ]: S 644
C 168
Q 77
Name: Embarked, dtype: int64
[ ]: df['Embarked'].value_counts().plot(kind='bar')
[ ]: <matplotlib.axes._subplots.AxesSubplot at 0x7f0037a68d00>
9
[ ]: df['Embarked'].value_counts().plot(kind='pie',autopct='%0.1f%%')
[ ]: <matplotlib.axes._subplots.AxesSubplot at 0x7f0037a9cb50>
[ ]: df['Sex'].isnull().sum()
10
[ ]: 0
[ ]:
[ ]:
[ ]:
[ ]:
[ ]:
11
2 Heikkinen, Miss. Laina female 26.0 0
3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1
4 Allen, Mr. William Henry male 35.0 0
.. … … … …
886 Montvila, Rev. Juozas male 27.0 0
887 Graham, Miss. Margaret Edith female 19.0 0
888 Johnston, Miss. Catherine Helen "Carrie" female NaN 1
889 Behr, Mr. Karl Howell male 26.0 0
890 Dooley, Mr. Patrick male 32.0 0
[ ]: sns.heatmap(pd.crosstab(df['Survived'],df['Pclass'],normalize='columns')*100)
[ ]: <matplotlib.axes._subplots.AxesSubplot at 0x7f0037e1adc0>
12
[ ]: pd.crosstab(df['Survived'],df['Sex'],normalize='columns')*100
[ ]: pd.crosstab(df['Survived'],df['Embarked'],normalize='columns')*100
[ ]: Embarked C Q S
Survived
0 44.642857 61.038961 66.304348
1 55.357143 38.961039 33.695652
[ ]: pd.crosstab(df['Sex'],df['Embarked'],normalize='columns')*100
[ ]: Embarked C Q S
Sex
female 43.452381 46.753247 31.521739
male 56.547619 53.246753 68.478261
[ ]: pd.crosstab(df['Pclass'],df['Embarked'],normalize='columns')*100
13
[ ]: Embarked C Q S
Pclass
1 50.595238 2.597403 19.720497
2 10.119048 3.896104 25.465839
3 39.285714 93.506494 54.813665
df[df['Survived'] == 1]['Age'].plot(kind='kde',label='Survived')
df[df['Survived'] == 0]['Age'].plot(kind='kde',label='Not Survived')
plt.legend()
plt.show()
[ ]: df[df['Pclass'] == 1]['Age'].mean()
[ ]: 38.233440860215055
[ ]: df['SibSp'].value_counts()
[ ]: 0 608
1 209
2 28
4 18
14
3 16
8 7
5 5
Name: SibSp, dtype: int64
[ ]: df[df['Name'].str.contains('Sage')]
15
[ ]: df1 = pd.read_csv('/content/test.csv')
[ ]: df = pd.concat([df,df1])
[ ]: df['individual_fare'].plot(kind='box')
[ ]: <matplotlib.axes._subplots.AxesSubplot at 0x7f00347f4c40>
16
[ ]: df[['individual_fare','Fare']].describe()
[ ]: individual_fare Fare
count 1308.000000 1308.000000
mean 20.518215 33.295479
std 35.774337 51.758668
min 0.000000 0.000000
25% 7.452767 7.895800
50% 8.512483 14.454200
75% 24.237500 31.275000
max 512.329200 512.329200
[ ]: df['Fare'].
[ ]: 0 7.2500
1 71.2833
2 7.9250
3 53.1000
4 8.0500
…
413 8.0500
414 108.9000
415 7.2500
416 8.0500
417 22.3583
Name: Fare, Length: 1309, dtype: float64
17
[ ]: df
[ ]: # family_type
# 1 -> alone
18
# 2-4 -> small
# >5 -> large
def transform_family_size(num):
if num == 1:
return 'alone'
elif num>1 and num <5:
return "small"
else:
return "large"
[ ]: df['family_type'] = df['family_size'].apply(transform_family_size)
[ ]: df
19
4 0 373450 8.0500 NaN S 8.050000
.. … … … … … …
413 0 A.5. 3236 8.0500 NaN S 8.050000
414 0 PC 17758 108.9000 C105 C 108.900000
415 0 SOTON/O.Q. 3101262 7.2500 NaN S 7.250000
416 0 359309 8.0500 NaN S 8.050000
417 1 2668 22.3583 NaN C 7.452767
family_size family_type
0 2 small
1 2 small
2 1 alone
3 2 small
4 1 alone
.. … …
413 1 alone
414 1 alone
415 1 alone
416 1 alone
417 3 small
[ ]: pd.crosstab(df['Survived'],df['family_type'],normalize='columns')*100
[ ]: df['surname'] = df['Name'].str.split(',').str.get(0)
[ ]: df
20
Name Sex Age SibSp \
0 Braund, Mr. Owen Harris male 22.0 1
1 Cumings, Mrs. John Bradley (Florence Briggs Th… female 38.0 1
2 Heikkinen, Miss. Laina female 26.0 0
3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1
4 Allen, Mr. William Henry male 35.0 0
.. … … … …
413 Spector, Mr. Woolf male NaN 0
414 Oliva y Ocana, Dona. Fermina female 39.0 0
415 Saether, Mr. Simon Sivertsen male 38.5 0
416 Ware, Mr. Frederick male NaN 0
417 Peter, Master. Michael J male NaN 1
[ ]: temp_df = df[df['title'].isin(['Mr.','Miss.','Mrs.','Master.','ootherr'])]
[ ]: pd.crosstab(temp_df['Survived'],temp_df['title'],normalize='columns')*100
21
[ ]: title Master. Miss. Mr. Mrs. ootherr
Survived
0.0 42.5 30.21978 84.332689 20.8 72.222222
1.0 57.5 69.78022 15.667311 79.2 27.777778
[ ]: df['title'] = df['title'].str.replace('Rev.','other')
df['title'] = df['title'].str.replace('Dr.','other')
df['title'] = df['title'].str.replace('Col.','other')
df['title'] = df['title'].str.replace('Major.','other')
df['title'] = df['title'].str.replace('Capt.','other')
df['title'] = df['title'].str.replace('the','other')
df['title'] = df['title'].str.replace('Jonkheer.','other')
# ,'Dr.','Col.','Major.','Don.','Capt.','the','Jonkheer.']
[ ]: df['Cabin'].isnull().sum()/len(df['Cabin'])
[ ]: 0.774637127578304
[ ]: df['Cabin'].fillna('M',inplace=True)
[ ]: df['Cabin'].value_counts()
[ ]: M 1014
C23 C25 C27 6
B57 B59 B63 B66 5
G6 5
F33 4
…
22
A14 1
E63 1
E12 1
E38 1
C105 1
Name: Cabin, Length: 187, dtype: int64
[ ]: df['deck'] = df['Cabin'].str[0]
[ ]: df['deck'].value_counts()
[ ]: M 1014
C 94
B 65
D 46
E 41
A 22
F 21
G 5
T 1
Name: deck, dtype: int64
[ ]: pd.crosstab(df['deck'],df['Pclass'])
[ ]: Pclass 1 2 3
deck
A 22 0 0
B 65 0 0
C 94 0 0
D 40 6 0
E 34 4 3
F 0 13 8
G 0 0 5
M 67 254 693
T 1 0 0
[ ]: pd.crosstab(df['deck'],df['Survived'],normalize='index').
↪plot(kind='bar',stacked=True)
[ ]: <matplotlib.axes._subplots.AxesSubplot at 0x7f00343b9af0>
23
[ ]: sns.heatmap(df.corr())
[ ]: <matplotlib.axes._subplots.AxesSubplot at 0x7f00343513d0>
24
[ ]: sns.pairplot(df1)
[ ]: <seaborn.axisgrid.PairGrid at 0x7f003427fdc0>
25
[ ]: df1
26
417 1309 3 Peter, Master. Michael J
[ ]:
27