0% found this document useful (0 votes)
16 views

Python Practice - Jupyter Notebook - 065330

Puthon data analysis practice

Uploaded by

Shaan
Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
16 views

Python Practice - Jupyter Notebook - 065330

Puthon data analysis practice

Uploaded by

Shaan
Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 126

6/10/24, 6:53 AM satan practice - Jupyter Notebook

import numpy as np

In [5]:  data=np.random.randn(2,3)
data

Out[5]: array([[-0.23472036, 0.87146424, -0.37726817],


[-0.29503245, 0.36775991, -1.32637835]])

In [6]:  data1=np.random.rand(5,4)
data1

Out[6]: array([[0.48054898, 0.96911265, 0.91927535, 0.29366841],


[0.30687972, 0.42556136, 0.71776894, 0.47845155],
[0.39952061, 0.67456693, 0.99152282, 0.67126194],
[0.03158015, 0.03472117, 0.67635815, 0.2009031 ],
[0.61736346, 0.55223703, 0.41526688, 0.2752871 ]])

In [7]:  data2=np.random.randint(20,100,(5,4))
data2

Out[7]: array([[35, 45, 99, 43],


[53, 59, 52, 98],
[84, 54, 93, 79],
[39, 22, 65, 56],
[83, 27, 50, 32]])

In [11]:  data=np.arange(10).reshape(5,2)
data

Out[11]: array([[0, 1],


[2, 3],
[4, 5],
[6, 7],
[8, 9]])

In [12]:  data*10

Out[12]: array([[ 0, 10],


[20, 30],
[40, 50],
[60, 70],
[80, 90]])

In [15]:  data-data

Out[15]: array([[0, 0],


[0, 0],
[0, 0],
[0, 0],
[0, 0]])

localhost:8888/notebooks/satan practice.ipynb# 1/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

In [16]:  data+data

Out[16]: array([[ 0, 2],


[ 4, 6],
[ 8, 10],
[12, 14],
[16, 18]])

In [17]:  data**2

Out[17]: array([[ 0, 1],


[ 4, 9],
[16, 25],
[36, 49],
[64, 81]])

In [18]:  1/data

C:\Users\learner\AppData\Local\Temp\ipykernel_4196\1424048073.py:1: Runti
meWarning: divide by zero encountered in divide
1/data

Out[18]: array([[ inf, 1. ],


[0.5 , 0.33333333],
[0.25 , 0.2 ],
[0.16666667, 0.14285714],
[0.125 , 0.11111111]])

In [19]:  np.log2(data)

C:\Users\learner\AppData\Local\Temp\ipykernel_4196\2371420318.py:1: Runti
meWarning: divide by zero encountered in log2
np.log2(data)

Out[19]: array([[ -inf, 0. ],


[1. , 1.5849625 ],
[2. , 2.32192809],
[2.5849625 , 2.80735492],
[3. , 3.169925 ]])

In [21]:  np.log(data)

C:\Users\learner\AppData\Local\Temp\ipykernel_4196\182692439.py:1: Runtim
eWarning: divide by zero encountered in log
np.log(data)

Out[21]: array([[ -inf, 0. ],


[0.69314718, 1.09861229],
[1.38629436, 1.60943791],
[1.79175947, 1.94591015],
[2.07944154, 2.19722458]])

localhost:8888/notebooks/satan practice.ipynb# 2/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

In [22]:  np.abs(data)

Out[22]: array([[0, 1],


[2, 3],
[4, 5],
[6, 7],
[8, 9]])

In [23]:  data**1/2

Out[23]: array([[0. , 0.5],


[1. , 1.5],
[2. , 2.5],
[3. , 3.5],
[4. , 4.5]])

In [24]:  data/data

C:\Users\learner\AppData\Local\Temp\ipykernel_4196\2225509407.py:1: Runti
meWarning: invalid value encountered in divide
data/data

Out[24]: array([[nan, 1.],


[ 1., 1.],
[ 1., 1.],
[ 1., 1.],
[ 1., 1.]])

In [25]:  data.shape

Out[25]: (5, 2)

In [26]:  data.dtype
Out[26]: dtype('int32')

In [27]:  arr=np.array((5,3,1,8,0))
arr

Out[27]: array([5, 3, 1, 8, 0])

In [29]:  arr2=np.array([[1,2,3,4,5],[6,7,8,9,0]])
arr2

Out[29]: array([[1, 2, 3, 4, 5],


[6, 7, 8, 9, 0]])

In [30]:  arr2.ndim

Out[30]: 2

localhost:8888/notebooks/satan practice.ipynb# 3/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

In [31]:  arr2.shape

Out[31]: (2, 5)

In [32]:  arr2.dtype

Out[32]: dtype('int32')

In [33]:  arr2.size

Out[33]: 10

In [37]:  arr2.itemsize

Out[37]: 4

In [39]:  np.zeros((2,6))
Out[39]: array([[0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0.]])

In [40]:  np.ones((5,4))

Out[40]: array([[1., 1., 1., 1.],


[1., 1., 1., 1.],
[1., 1., 1., 1.],
[1., 1., 1., 1.],
[1., 1., 1., 1.]])

In [41]:  np.empty((3,7))

Out[41]: array([[6.23042070e-307, 3.56043053e-307, 1.60219306e-306,


7.56571288e-307, 7.56587584e-307, 1.37961302e-306,
1.05699242e-307],
[8.01097889e-307, 1.78020169e-306, 7.56601165e-307,
1.02359984e-306, 1.42417221e-306, 1.60218220e-306,
1.86921279e-306],
[1.69119330e-306, 9.34611148e-307, 1.60220393e-306,
1.11260144e-306, 6.89812281e-307, 2.22522596e-306,
1.86919785e-306]])

In [42]:  np.ones_like(data2)

Out[42]: array([[1, 1, 1, 1],


[1, 1, 1, 1],
[1, 1, 1, 1],
[1, 1, 1, 1],
[1, 1, 1, 1]])

localhost:8888/notebooks/satan practice.ipynb# 4/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

In [43]:  np.zeros_like(data2)

Out[43]: array([[0, 0, 0, 0],


[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0]])

In [48]:  np.empty_like(data2)

Out[48]: array([[-1, -1, 0, 0],


[ 0, 0, 0, 0],
[ 0, 0, 0, 0],
[ 0, 0, 0, 0],
[ 0, 0, 0, 0]])

In [50]:  np.full((3,7),4)

Out[50]: array([[4, 4, 4, 4, 4, 4, 4],


[4, 4, 4, 4, 4, 4, 4],
[4, 4, 4, 4, 4, 4, 4]])

In [51]:  np.full_like(data2,1000)

Out[51]: array([[1000, 1000, 1000, 1000],


[1000, 1000, 1000, 1000],
[1000, 1000, 1000, 1000],
[1000, 1000, 1000, 1000],
[1000, 1000, 1000, 1000]])

In [54]:  np.arange(165).reshape(5,-1).shape

Out[54]: (5, 33)

In [56]:  np.eye(4,5)

Out[56]: array([[1., 0., 0., 0., 0.],


[0., 1., 0., 0., 0.],
[0., 0., 1., 0., 0.],
[0., 0., 0., 1., 0.]])

In [57]:  np.identity(9)

Out[57]: array([[1., 0., 0., 0., 0., 0., 0., 0., 0.],
[0., 1., 0., 0., 0., 0., 0., 0., 0.],
[0., 0., 1., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 1., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 1., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 1., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 1., 0., 0.],
[0., 0., 0., 0., 0., 0., 0., 1., 0.],
[0., 0., 0., 0., 0., 0., 0., 0., 1.]])

localhost:8888/notebooks/satan practice.ipynb# 5/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

In [65]:  arr=np.arange(20,dtype='float')
arr

Out[65]: array([ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.,
13., 14., 15., 16., 17., 18., 19.])

In [69]:  x=np.asarray(data2,dtype=np.float64)
x

Out[69]: array([[35., 45., 99., 43.],


[53., 59., 52., 98.],
[84., 54., 93., 79.],
[39., 22., 65., 56.],
[83., 27., 50., 32.]])

In [71]:  data2.astype('S')

Out[71]: array([[b'35', b'45', b'99', b'43'],


[b'53', b'59', b'52', b'98'],
[b'84', b'54', b'93', b'79'],
[b'39', b'22', b'65', b'56'],
[b'83', b'27', b'50', b'32']], dtype='|S11')

In [73]:  a=np.random.randint(1,100,(4,3))
b=np.random.randint(1,100,(4,3))

In [74]:  a,b

Out[74]: (array([[79, 64, 13],


[82, 85, 11],
[15, 45, 83],
[87, 91, 16]]),
array([[75, 44, 93],
[69, 81, 56],
[99, 46, 59],
[94, 50, 46]]))

In [75]:  a*b
Out[75]: array([[5925, 2816, 1209],
[5658, 6885, 616],
[1485, 2070, 4897],
[8178, 4550, 736]])

In [76]:  a-b

Out[76]: array([[ 4, 20, -80],


[ 13, 4, -45],
[-84, -1, 24],
[ -7, 41, -30]])

localhost:8888/notebooks/satan practice.ipynb# 6/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

In [77]:  b-a

Out[77]: array([[ -4, -20, 80],


[-13, -4, 45],
[ 84, 1, -24],
[ 7, -41, 30]])

In [78]:  a/b

Out[78]: array([[1.05333333, 1.45454545, 0.13978495],


[1.1884058 , 1.04938272, 0.19642857],
[0.15151515, 0.97826087, 1.40677966],
[0.92553191, 1.82 , 0.34782609]])

In [79]:  b/a

Out[79]: array([[0.94936709, 0.6875 , 7.15384615],


[0.84146341, 0.95294118, 5.09090909],
[6.6 , 1.02222222, 0.71084337],
[1.08045977, 0.54945055, 2.875 ]])

In [80]:  a**2

Out[80]: array([[6241, 4096, 169],


[6724, 7225, 121],
[ 225, 2025, 6889],
[7569, 8281, 256]])

In [83]:  a[0]

Out[83]: array([79, 64, 13])

In [84]:  a[0:2]

Out[84]: array([[79, 64, 13],


[82, 85, 11]])

In [85]:  a

Out[85]: array([[79, 64, 13],


[82, 85, 11],
[15, 45, 83],
[87, 91, 16]])

In [86]:  a[2:]=0

In [87]:  a

Out[87]: array([[79, 64, 13],


[82, 85, 11],
[ 0, 0, 0],
[ 0, 0, 0]])

localhost:8888/notebooks/satan practice.ipynb# 7/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

In [88]:  c=a[0:2]

In [90]:  c[0]=100

In [91]:  c

Out[91]: array([[100, 100, 100],


[ 82, 85, 11]])

In [92]:  a

Out[92]: array([[100, 100, 100],


[ 82, 85, 11],
[ 0, 0, 0],
[ 0, 0, 0]])

In [98]:  a

Out[98]: array([[100, 100, 100],


[ 82, 85, 11],
[ 0, 0, 0],
[ 0, 0, 0]])

In [99]:  b
Out[99]: array([[75, 44, 93],
[69, 81, 56],
[99, 46, 59],
[94, 50, 46]])

In [100]:  b[2][1]

Out[100]: 46

In [101]:  b[0,2]

Out[101]: 93

localhost:8888/notebooks/satan practice.ipynb# 8/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

In [103]:  d=np.arange(30).reshape(3,5,-1)
d

Out[103]: array([[[ 0, 1],


[ 2, 3],
[ 4, 5],
[ 6, 7],
[ 8, 9]],

[[10, 11],
[12, 13],
[14, 15],
[16, 17],
[18, 19]],

[[20, 21],
[22, 23],
[24, 25],
[26, 27],
[28, 29]]])

In [104]:  d[2]

Out[104]: array([[20, 21],


[22, 23],
[24, 25],
[26, 27],
[28, 29]])

In [106]:  x=d[0:2].copy()
x

Out[106]: array([[[ 0, 1],


[ 2, 3],
[ 4, 5],
[ 6, 7],
[ 8, 9]],

[[10, 11],
[12, 13],
[14, 15],
[16, 17],
[18, 19]]])

In [107]:  x[1]=0

localhost:8888/notebooks/satan practice.ipynb# 9/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

In [108]:  x

Out[108]: array([[[0, 1],


[2, 3],
[4, 5],
[6, 7],
[8, 9]],

[[0, 0],
[0, 0],
[0, 0],
[0, 0],
[0, 0]]])

In [109]:  d

Out[109]: array([[[ 0, 1],


[ 2, 3],
[ 4, 5],
[ 6, 7],
[ 8, 9]],

[[10, 11],
[12, 13],
[14, 15],
[16, 17],
[18, 19]],

[[20, 21],
[22, 23],
[24, 25],
[26, 27],
[28, 29]]])

In [115]:  d[0:,0:,0:]

Out[115]: array([[[ 0, 1],


[ 2, 3],
[ 4, 5],
[ 6, 7],
[ 8, 9]],

[[10, 11],
[12, 13],
[14, 15],
[16, 17],
[18, 19]],

[[20, 21],
[22, 23],
[24, 25],
[26, 27],
[28, 29]]])

localhost:8888/notebooks/satan practice.ipynb# 10/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

In [124]:  name=np.array(['gopi','suraj','gopi','sunil','praveen','suraj','harsh'])

In [125]:  name

Out[125]: array(['gopi', 'suraj', 'gopi', 'sunil', 'praveen', 'suraj', 'harsh'],


dtype='<U7')

In [119]:  name.dtype
Out[119]: dtype('<U7')

In [123]:  marks=np.random.randint(20,100,(7,10))
marks

Out[123]: array([[47, 24, 85, 27, 24, 98, 89, 26, 49, 73],
[41, 82, 91, 69, 53, 22, 23, 80, 78, 96],
[44, 85, 51, 29, 46, 99, 27, 20, 88, 31],
[94, 84, 42, 59, 23, 57, 97, 64, 76, 53],
[20, 29, 20, 75, 47, 92, 66, 82, 53, 45],
[89, 55, 68, 87, 32, 29, 36, 99, 48, 72],
[41, 71, 76, 47, 29, 50, 93, 71, 25, 21]])

In [126]:  name=='gopi'

Out[126]: array([ True, False, True, False, False, False, False])

In [128]:  marks[name=='gopi']
Out[128]: array([[47, 24, 85, 27, 24, 98, 89, 26, 49, 73],
[44, 85, 51, 29, 46, 99, 27, 20, 88, 31]])

In [130]:  marks[name=='suraj',0:2]

Out[130]: array([[41, 82],


[89, 55]])

In [136]:  x=(name!='gopi') & (name!='sunil')


x

Out[136]: array([False, True, False, False, True, True, True])

In [138]:  marks[~x]

Out[138]: array([[47, 24, 85, 27, 24, 98, 89, 26, 49, 73],
[44, 85, 51, 29, 46, 99, 27, 20, 88, 31],
[94, 84, 42, 59, 23, 57, 97, 64, 76, 53]])

localhost:8888/notebooks/satan practice.ipynb# 11/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

In [139]:  data2

Out[139]: array([[35, 45, 99, 43],


[53, 59, 52, 98],
[84, 54, 93, 79],
[39, 22, 65, 56],
[83, 27, 50, 32]])

In [140]:  data2>50

Out[140]: array([[False, False, True, False],


[ True, True, True, True],
[ True, True, True, True],
[False, False, True, True],
[ True, False, False, False]])

In [145]:  data2[data2>50]=0

In [146]:  data2

Out[146]: array([[35, 45, 0, 43],


[ 0, 0, 0, 0],
[ 0, 0, 0, 0],
[39, 22, 0, 0],
[ 0, 27, 50, 32]])

In [147]:  b

Out[147]: array([[75, 44, 93],


[69, 81, 56],
[99, 46, 59],
[94, 50, 46]])

In [148]:  b[[2,3]]

Out[148]: array([[99, 46, 59],


[94, 50, 46]])

In [149]:  b[[1,2,3],[0,1,2]]

Out[149]: array([69, 46, 46])

In [ ]:  ​

In [1]:  import pandas as pd

In [151]:  obj=pd.Series([1,2,4,5,67,9])

In [152]:  obj1=pd.Series((1,2,4,5,67,90))

localhost:8888/notebooks/satan practice.ipynb# 12/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

In [153]:  obj,obj1

Out[153]: (0 1
1 2
2 4
3 5
4 67
5 9
dtype: int64,
0 1
1 2
2 4
3 5
4 67
5 90
dtype: int64)

In [154]:  x=pd.Series({'a':20,'b':30,'c':30,'d':40})

In [155]:  x
Out[155]: a 20
b 30
c 30
d 40
dtype: int64

In [156]:  x[0:2]

Out[156]: a 20
b 30
dtype: int64

In [157]:  x['a':'c']

Out[157]: a 20
b 30
c 30
dtype: int64

In [158]:  x.index=['A','B','C','D']

In [163]:  x['A':'C']

Out[163]: A 20
B 30
C 30
dtype: int64

In [164]:  x.values

Out[164]: array([20, 30, 30, 40], dtype=int64)

localhost:8888/notebooks/satan practice.ipynb# 13/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

In [165]:  x.index

Out[165]: Index(['A', 'B', 'C', 'D'], dtype='object')

In [166]:  x[0]

Out[166]: 20

In [168]:  x[1:3]=0

In [169]:  x

Out[169]: A 20
B 0
C 0
D 40
dtype: int64

In [171]:  x[['B','C']]=[3,4]

In [172]:  x

Out[172]: A 20
B 3
C 4
D 40
dtype: int64

In [174]:  x[x>10]

Out[174]: A 20
D 40
dtype: int64

In [175]:  x*2

Out[175]: A 40
B 6
C 8
D 80
dtype: int64

In [176]:  np.exp(x)

Out[176]: A 4.851652e+08
B 2.008554e+01
C 5.459815e+01
D 2.353853e+17
dtype: float64

localhost:8888/notebooks/satan practice.ipynb# 14/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

In [179]:  3 in x.values

Out[179]: True

In [44]:  states=pd.Series({'mumbai':1000,'delhi':2000,'kolkata':500,'chennai':200,'
states

Out[44]: mumbai 1000


delhi 2000
kolkata 500
chennai 200
kanpur 100
patna 100
dtype: int64

In [184]:  states['chennai']=None

In [185]:  states

Out[185]: mumbai 1000.0


delhi 2000.0
kolkata 500.0
chennai NaN
kanpur 100.0
patna NaN
dtype: float64

In [186]:  pd.isnull(states)

Out[186]: mumbai False


delhi False
kolkata False
chennai True
kanpur False
patna True
dtype: bool

In [187]:  pd.notnull(states)

Out[187]: mumbai True


delhi True
kolkata True
chennai False
kanpur True
patna False
dtype: bool

localhost:8888/notebooks/satan practice.ipynb# 15/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

In [188]:  x,states

Out[188]: (A 20
B 3
C 4
D 40
dtype: int64,
mumbai 1000.0
delhi 2000.0
kolkata 500.0
chennai NaN
kanpur 100.0
patna NaN
dtype: float64)

In [189]:  x+states

Out[189]: A NaN
B NaN
C NaN
D NaN
chennai NaN
delhi NaN
kanpur NaN
kolkata NaN
mumbai NaN
patna NaN
dtype: float64

In [38]:  usa=pd.Series({'california':1000,'delhi':2000,'kolkata':500,'miami':200,'k
usa

Out[38]: california 1000


delhi 2000
kolkata 500
miami 200
kanpur 100
orlando 200
dtype: int64

In [191]:  usa+states

Out[191]: california NaN


chennai NaN
delhi 4000.0
kanpur 200.0
kolkata 1000.0
miami NaN
mumbai NaN
orlando NaN
patna NaN
dtype: float64

localhost:8888/notebooks/satan practice.ipynb# 16/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

In [193]:  states.name='INDIA_STATES'

In [194]:  states

Out[194]: mumbai 1000.0


delhi 2000.0
kolkata 500.0
chennai NaN
kanpur 100.0
patna NaN
Name: INDIA_STATES, dtype: float64

In [197]:  usa.name='USA_STATES'
usa

Out[197]: california 1000.0


delhi 2000.0
kolkata 500.0
miami 200.0
kanpur 100.0
orlando NaN
Name: USA_STATES, dtype: float64

In [2]:  df=pd.DataFrame({'city':['mumbai','delhi','kolkata','patna','lucknow','che

'rank':['I','II','IV','III','VI','V','VII']})

In [3]:  df

Out[3]: city code rank

0 mumbai 1100 I

1 delhi 1200 II

2 kolkata 4000 IV

3 patna 3400 III

4 lucknow 2980 VI

5 chennai 2330 V

6 assam 6660 VII

In [5]:  df.head()

Out[5]: city code rank

0 mumbai 1100 I

1 delhi 1200 II

2 kolkata 4000 IV

3 patna 3400 III

4 lucknow 2980 VI

localhost:8888/notebooks/satan practice.ipynb# 17/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

In [6]:  df.tail()

Out[6]: city code rank

2 kolkata 4000 IV

3 patna 3400 III

4 lucknow 2980 VI

5 chennai 2330 V

6 assam 6660 VII

In [7]:  df['SDP in $B']=[100,80,20,30,55,10,23]

In [8]:  df

Out[8]: city code rank SDP in $B

0 mumbai 1100 I 100

1 delhi 1200 II 80

2 kolkata 4000 IV 20

3 patna 3400 III 30

4 lucknow 2980 VI 55

5 chennai 2330 V 10

6 assam 6660 VII 23

In [9]:  df.index=['A','B','C','D','E','F','G']

In [11]:  df.name='states data'

In [14]:  df.index.name='SERIAL'

In [15]:  df.columns.name='ATTRIBUTES'

In [16]:  df

Out[16]: ATTRIBUTES city code rank SDP in $B

SERIAL

A mumbai 1100 I 100

B delhi 1200 II 80

C kolkata 4000 IV 20

D patna 3400 III 30

E lucknow 2980 VI 55

F chennai 2330 V 10

G assam 6660 VII 23

localhost:8888/notebooks/satan practice.ipynb# 18/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

In [17]:  df['ECONOMY']=df['SDP in $B']>50

In [18]:  df

Out[18]: ATTRIBUTES city code rank SDP in $B ECONOMY

SERIAL

A mumbai 1100 I 100 True

B delhi 1200 II 80 True

C kolkata 4000 IV 20 False

D patna 3400 III 30 False

E lucknow 2980 VI 55 True

F chennai 2330 V 10 False

G assam 6660 VII 23 False

In [20]:  df.replace({True:'developed',False:'developing'})
Out[20]: ATTRIBUTES city code rank SDP in $B ECONOMY

SERIAL

A mumbai 1100 I 100 developed

B delhi 1200 II 80 developed

C kolkata 4000 IV 20 developing

D patna 3400 III 30 developing

E lucknow 2980 VI 55 developed

F chennai 2330 V 10 developing

G assam 6660 VII 23 developing

In [27]:  df.T

Out[27]: SERIAL A B C D E F G

ATTRIBUTES

city mumbai delhi kolkata patna lucknow chennai assam

code 1100 1200 4000 3400 2980 2330 6660

rank I II IV III VI V VII

SDP in $B 100 80 20 30 55 10 23

ECONOMY True True False False True False False

In [28]:  df.index

Out[28]: Index(['A', 'B', 'C', 'D', 'E', 'F', 'G'], dtype='object', name='SERIAL')

localhost:8888/notebooks/satan practice.ipynb# 19/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

In [29]:  df.columns

Out[29]: Index(['city', 'code', 'rank', 'SDP in $B', 'ECONOMY'], dtype='object', n


ame='ATTRIBUTES')

In [30]:  df.values

Out[30]: array([['mumbai', 1100, 'I', 100, True],


['delhi', 1200, 'II', 80, True],
['kolkata', 4000, 'IV', 20, False],
['patna', 3400, 'III', 30, False],
['lucknow', 2980, 'VI', 55, True],
['chennai', 2330, 'V', 10, False],
['assam', 6660, 'VII', 23, False]], dtype=object)

In [31]:  'city' in df.columns

Out[31]: True

In [32]:  'D' in df.index

Out[32]: True

In [34]:  state

-------------------------------------------------------------------------
--
NameError Traceback (most recent call las
t)
Cell In[34], line 1
----> 1 state

NameError: name 'state' is not defined

In [41]:  usa

Out[41]: california 1000


delhi 2000
kolkata 500
miami 200
kanpur 100
orlando 200
dtype: int64

In [45]:  states

Out[45]: mumbai 1000


delhi 2000
kolkata 500
chennai 200
kanpur 100
patna 100
dtype: int64

localhost:8888/notebooks/satan practice.ipynb# 20/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

In [46]:  usa+states

Out[46]: california NaN


chennai NaN
delhi 4000.0
kanpur 200.0
kolkata 1000.0
miami NaN
mumbai NaN
orlando NaN
patna NaN
dtype: float64

In [51]:  pd.append(usa)

-------------------------------------------------------------------------
--
AttributeError Traceback (most recent call las
t)
Cell In[51], line 1
----> 1 pd.append(usa)

AttributeError: module 'pandas' has no attribute 'append'

In [52]:  usa

Out[52]: california 1000


delhi 2000
kolkata 500
miami 200
kanpur 100
orlando 200
dtype: int64

In [55]:  usa2=usa.reindex(['kanpur','miami','orlando','delhi'])

In [56]:  usa2

Out[56]: kanpur 100


miami 200
orlando 200
delhi 2000
dtype: int64

In [58]:  states.drop('kanpur')
Out[58]: mumbai 1000
delhi 2000
kolkata 500
chennai 200
patna 100
dtype: int64

localhost:8888/notebooks/satan practice.ipynb# 21/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

In [59]:  states

Out[59]: mumbai 1000


delhi 2000
kolkata 500
chennai 200
kanpur 100
patna 100
dtype: int64

In [60]:  usa.drop(['delhi','kolkata','kanpur'])

Out[60]: california 1000


miami 200
orlando 200
dtype: int64

In [61]:  df

Out[61]: ATTRIBUTES city code rank SDP in $B ECONOMY

SERIAL

A mumbai 1100 I 100 True

B delhi 1200 II 80 True

C kolkata 4000 IV 20 False

D patna 3400 III 30 False

E lucknow 2980 VI 55 True

F chennai 2330 V 10 False

G assam 6660 VII 23 False

In [63]:  x=df

In [78]:  usa

Out[78]: california 1000


delhi 2000
kolkata 500
miami 200
kanpur 100
orlando 200
dtype: int64

In [80]:  usa.drop(usa.index[[1,2,4]])
Out[80]: california 1000
miami 200
orlando 200
dtype: int64

localhost:8888/notebooks/satan practice.ipynb# 22/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

In [81]:  x

Out[81]: ATTRIBUTES city code rank SDP in $B ECONOMY

SERIAL

A mumbai 1100 I 100 True

B delhi 1200 II 80 True

C kolkata 4000 IV 20 False

D patna 3400 III 30 False

E lucknow 2980 VI 55 True

F chennai 2330 V 10 False

G assam 6660 VII 23 False

In [82]:  x.drop(x.index[[-1,-2]])

Out[82]: ATTRIBUTES city code rank SDP in $B ECONOMY

SERIAL

A mumbai 1100 I 100 True

B delhi 1200 II 80 True

C kolkata 4000 IV 20 False

D patna 3400 III 30 False

E lucknow 2980 VI 55 True

In [95]:  x.drop(['rank','ECONOMY'],axis=1,inplace=True)

In [96]:  x

Out[96]: ATTRIBUTES city code SDP in $B

SERIAL

A mumbai 1100 100

B delhi 1200 80

C kolkata 4000 20

D patna 3400 30

E lucknow 2980 55

F chennai 2330 10

G assam 6660 23

In [98]:  usa['delhi':'kolkata']=1000000

localhost:8888/notebooks/satan practice.ipynb# 23/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

In [99]:  usa

Out[99]: california 1000


delhi 1000000
kolkata 1000000
miami 200
kanpur 100
orlando 200
dtype: int64

In [100]:  df

Out[100]: ATTRIBUTES city code SDP in $B

SERIAL

A mumbai 1100 100

B delhi 1200 80

C kolkata 4000 20

D patna 3400 30

E lucknow 2980 55

F chennai 2330 10

G assam 6660 23

In [106]:  df['city']

Out[106]: SERIAL
A mumbai
B delhi
C kolkata
D patna
E lucknow
F chennai
G assam
Name: city, dtype: object

In [114]:  df[df.code>2000]

Out[114]: ATTRIBUTES city code SDP in $B

SERIAL

C kolkata 4000 20

D patna 3400 30

E lucknow 2980 55

F chennai 2330 10

G assam 6660 23

localhost:8888/notebooks/satan practice.ipynb# 24/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

In [115]:  df

Out[115]: ATTRIBUTES city code SDP in $B

SERIAL

A mumbai 1100 100

B delhi 1200 80

C kolkata 4000 20

D patna 3400 30

E lucknow 2980 55

F chennai 2330 10

G assam 6660 23

In [116]:  df.loc['A':'D']

Out[116]: ATTRIBUTES city code SDP in $B

SERIAL

A mumbai 1100 100

B delhi 1200 80

C kolkata 4000 20

D patna 3400 30

In [118]:  df.iloc[0:3]

Out[118]: ATTRIBUTES city code SDP in $B

SERIAL

A mumbai 1100 100

B delhi 1200 80

C kolkata 4000 20

In [119]:  df

Out[119]: ATTRIBUTES city code SDP in $B

SERIAL

A mumbai 1100 100

B delhi 1200 80

C kolkata 4000 20

D patna 3400 30

E lucknow 2980 55

F chennai 2330 10

G assam 6660 23

localhost:8888/notebooks/satan practice.ipynb# 25/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

In [120]:  df.iloc[0,'city']

localhost:8888/notebooks/satan practice.ipynb# 26/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

-------------------------------------------------------------------------
--
ValueError Traceback (most recent call las
t)
File C:\ProgramData\anaconda3\Lib\site-packages\pandas\core\indexing.py:9
04, in _LocationIndexer._validate_tuple_indexer(self, key)
903 try:
--> 904 self._validate_key(k, i)
905 except ValueError as err:

File C:\ProgramData\anaconda3\Lib\site-packages\pandas\core\indexing.py:1
518, in _iLocIndexer._validate_key(self, key, axis)
1517 else:
-> 1518 raise ValueError(f"Can only index by location with a [{self._
valid_types}]")

ValueError: Can only index by location with a [integer, integer slice (ST
ART point is INCLUDED, END point is EXCLUDED), listlike of integers, bool
ean array]

The above exception was the direct cause of the following exception:

ValueError Traceback (most recent call las


t)
Cell In[120], line 1
----> 1 df.iloc[0,'city']

File C:\ProgramData\anaconda3\Lib\site-packages\pandas\core\indexing.py:1
097, in _LocationIndexer.__getitem__(self, key)
1095 if self._is_scalar_access(key):
1096 return self.obj._get_value(*key, takeable=self._takeable)
-> 1097 return self._getitem_tuple(key)
1098 else:
1099 # we by definition only have the 0th axis
1100 axis = self.axis or 0

File C:\ProgramData\anaconda3\Lib\site-packages\pandas\core\indexing.py:1
594, in _iLocIndexer._getitem_tuple(self, tup)
1593 def _getitem_tuple(self, tup: tuple):
-> 1594 tup = self._validate_tuple_indexer(tup)
1595 with suppress(IndexingError):
1596 return self._getitem_lowerdim(tup)

File C:\ProgramData\anaconda3\Lib\site-packages\pandas\core\indexing.py:9
06, in _LocationIndexer._validate_tuple_indexer(self, key)
904 self._validate_key(k, i)
905 except ValueError as err:
--> 906 raise ValueError(
907 "Location based indexing can only have "
908 f"[{self._valid_types}] types"
909 ) from err
910 return key

ValueError: Location based indexing can only have [integer, integer slice
(START point is INCLUDED, END point is EXCLUDED), listlike of integers, b

localhost:8888/notebooks/satan practice.ipynb# 27/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook
oolean array] types

localhost:8888/notebooks/satan practice.ipynb# 28/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

In [122]:  df.iloc['A',0]

localhost:8888/notebooks/satan practice.ipynb# 29/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

-------------------------------------------------------------------------
--
ValueError Traceback (most recent call las
t)
File C:\ProgramData\anaconda3\Lib\site-packages\pandas\core\indexing.py:9
04, in _LocationIndexer._validate_tuple_indexer(self, key)
903 try:
--> 904 self._validate_key(k, i)
905 except ValueError as err:

File C:\ProgramData\anaconda3\Lib\site-packages\pandas\core\indexing.py:1
518, in _iLocIndexer._validate_key(self, key, axis)
1517 else:
-> 1518 raise ValueError(f"Can only index by location with a [{self._
valid_types}]")

ValueError: Can only index by location with a [integer, integer slice (ST
ART point is INCLUDED, END point is EXCLUDED), listlike of integers, bool
ean array]

The above exception was the direct cause of the following exception:

ValueError Traceback (most recent call las


t)
Cell In[122], line 1
----> 1 df.iloc['A',0]

File C:\ProgramData\anaconda3\Lib\site-packages\pandas\core\indexing.py:1
097, in _LocationIndexer.__getitem__(self, key)
1095 if self._is_scalar_access(key):
1096 return self.obj._get_value(*key, takeable=self._takeable)
-> 1097 return self._getitem_tuple(key)
1098 else:
1099 # we by definition only have the 0th axis
1100 axis = self.axis or 0

File C:\ProgramData\anaconda3\Lib\site-packages\pandas\core\indexing.py:1
594, in _iLocIndexer._getitem_tuple(self, tup)
1593 def _getitem_tuple(self, tup: tuple):
-> 1594 tup = self._validate_tuple_indexer(tup)
1595 with suppress(IndexingError):
1596 return self._getitem_lowerdim(tup)

File C:\ProgramData\anaconda3\Lib\site-packages\pandas\core\indexing.py:9
06, in _LocationIndexer._validate_tuple_indexer(self, key)
904 self._validate_key(k, i)
905 except ValueError as err:
--> 906 raise ValueError(
907 "Location based indexing can only have "
908 f"[{self._valid_types}] types"
909 ) from err
910 return key

ValueError: Location based indexing can only have [integer, integer slice
(START point is INCLUDED, END point is EXCLUDED), listlike of integers, b

localhost:8888/notebooks/satan practice.ipynb# 30/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook
oolean array] types

localhost:8888/notebooks/satan practice.ipynb# 31/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

In [123]:  df.loc['c',0]

localhost:8888/notebooks/satan practice.ipynb# 32/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

-------------------------------------------------------------------------
--
KeyError Traceback (most recent call las
t)
File C:\ProgramData\anaconda3\Lib\site-packages\pandas\core\indexes\base.
py:3653, in Index.get_loc(self, key)
3652 try:
-> 3653 return self._engine.get_loc(casted_key)
3654 except KeyError as err:

File C:\ProgramData\anaconda3\Lib\site-packages\pandas\_libs\index.pyx:14
7, in pandas._libs.index.IndexEngine.get_loc()

File C:\ProgramData\anaconda3\Lib\site-packages\pandas\_libs\index.pyx:17
6, in pandas._libs.index.IndexEngine.get_loc()

File pandas\_libs\hashtable_class_helper.pxi:7080, in pandas._libs.hashta


ble.PyObjectHashTable.get_item()

File pandas\_libs\hashtable_class_helper.pxi:7088, in pandas._libs.hashta


ble.PyObjectHashTable.get_item()

KeyError: 0

The above exception was the direct cause of the following exception:

KeyError Traceback (most recent call las


t)
Cell In[123], line 1
----> 1 df.loc['c',0]

File C:\ProgramData\anaconda3\Lib\site-packages\pandas\core\indexing.py:1
096, in _LocationIndexer.__getitem__(self, key)
1094 key = tuple(com.apply_if_callable(x, self.obj) for x in key)
1095 if self._is_scalar_access(key):
-> 1096 return self.obj._get_value(*key, takeable=self._takeable)
1097 return self._getitem_tuple(key)
1098 else:
1099 # we by definition only have the 0th axis

File C:\ProgramData\anaconda3\Lib\site-packages\pandas\core\frame.py:387
0, in DataFrame._get_value(self, index, col, takeable)
3867 series = self._ixs(col, axis=1)
3868 return series._values[index]
-> 3870 series = self._get_item_cache(col)
3871 engine = self.index._engine
3873 if not isinstance(self.index, MultiIndex):
3874 # CategoricalIndex: Trying to use the engine fastpath may giv
e incorrect
3875 # results if our categories are integers that dont match our
codes
3876 # IntervalIndex: IntervalTree has no get_loc

File C:\ProgramData\anaconda3\Lib\site-packages\pandas\core\frame.py:425
5, in DataFrame._get_item_cache(self, item)
4250 res = cache.get(item)
4251 if res is None:
localhost:8888/notebooks/satan practice.ipynb# 33/126
6/10/24, 6:53 AM satan practice - Jupyter Notebook
4252 # All places that call _get_item_cache have unique columns,
4253 # pending resolution of GH#33047
-> 4255 loc = self.columns.get_loc(item)
4256 res = self._ixs(loc, axis=1)
4258 cache[item] = res

File C:\ProgramData\anaconda3\Lib\site-packages\pandas\core\indexes\base.
py:3655, in Index.get_loc(self, key)
3653 return self._engine.get_loc(casted_key)
3654 except KeyError as err:
-> 3655 raise KeyError(key) from err
3656 except TypeError:
3657 # If we have a listlike key, _check_indexing_error will raise
3658 # InvalidIndexError. Otherwise we fall through and re-raise
3659 # the TypeError.
3660 self._check_indexing_error(key)

KeyError: 0

localhost:8888/notebooks/satan practice.ipynb# 34/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

In [124]:  df.loc[0,'city']

localhost:8888/notebooks/satan practice.ipynb# 35/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

-------------------------------------------------------------------------
--
KeyError Traceback (most recent call las
t)
File C:\ProgramData\anaconda3\Lib\site-packages\pandas\core\indexes\base.
py:3653, in Index.get_loc(self, key)
3652 try:
-> 3653 return self._engine.get_loc(casted_key)
3654 except KeyError as err:

File C:\ProgramData\anaconda3\Lib\site-packages\pandas\_libs\index.pyx:14
7, in pandas._libs.index.IndexEngine.get_loc()

File C:\ProgramData\anaconda3\Lib\site-packages\pandas\_libs\index.pyx:17
6, in pandas._libs.index.IndexEngine.get_loc()

File pandas\_libs\hashtable_class_helper.pxi:7080, in pandas._libs.hashta


ble.PyObjectHashTable.get_item()

File pandas\_libs\hashtable_class_helper.pxi:7088, in pandas._libs.hashta


ble.PyObjectHashTable.get_item()

KeyError: 0

The above exception was the direct cause of the following exception:

KeyError Traceback (most recent call las


t)
Cell In[124], line 1
----> 1 df.loc[0,'city']

File C:\ProgramData\anaconda3\Lib\site-packages\pandas\core\indexing.py:1
096, in _LocationIndexer.__getitem__(self, key)
1094 key = tuple(com.apply_if_callable(x, self.obj) for x in key)
1095 if self._is_scalar_access(key):
-> 1096 return self.obj._get_value(*key, takeable=self._takeable)
1097 return self._getitem_tuple(key)
1098 else:
1099 # we by definition only have the 0th axis

File C:\ProgramData\anaconda3\Lib\site-packages\pandas\core\frame.py:387
7, in DataFrame._get_value(self, index, col, takeable)
3871 engine = self.index._engine
3873 if not isinstance(self.index, MultiIndex):
3874 # CategoricalIndex: Trying to use the engine fastpath may giv
e incorrect
3875 # results if our categories are integers that dont match our
codes
3876 # IntervalIndex: IntervalTree has no get_loc
-> 3877 row = self.index.get_loc(index)
3878 return series._values[row]
3880 # For MultiIndex going through engine effectively restricts us to
3881 # same-length tuples; see test_get_set_value_no_partial_indexing

File C:\ProgramData\anaconda3\Lib\site-packages\pandas\core\indexes\base.
py:3655, in Index.get_loc(self, key)
3653 return self._engine.get_loc(casted_key)
localhost:8888/notebooks/satan practice.ipynb# 36/126
6/10/24, 6:53 AM satan practice - Jupyter Notebook
3654 except KeyError as err:
-> 3655 raise KeyError(key) from err
3656 except TypeError:
3657 # If we have a listlike key, _check_indexing_error will raise
3658 # InvalidIndexError. Otherwise we fall through and re-raise
3659 # the TypeError.
3660 self._check_indexing_error(key)

KeyError: 0

In [127]:  df.iloc[[1,2,3],0]

Out[127]: SERIAL
B delhi
C kolkata
D patna
Name: city, dtype: object

In [131]:  df.loc[['A','E'],'city']

Out[131]: SERIAL
A mumbai
E lucknow
Name: city, dtype: object

In [133]:  df,x

Out[133]: (ATTRIBUTES city code SDP in $B


SERIAL
A mumbai 1100 100
B delhi 1200 80
C kolkata 4000 20
D patna 3400 30
E lucknow 2980 55
F chennai 2330 10
G assam 6660 23,
ATTRIBUTES city code SDP in $B
SERIAL
A mumbai 1100 100
B delhi 1200 80
C kolkata 4000 20
D patna 3400 30
E lucknow 2980 55
F chennai 2330 10
G assam 6660 23)

In [348]:  y=pd.DataFrame({'city':['miami','delhi','kolkata','orlando','lucknow','che

'SPEED':['V','II','IV','III','VI','I','VII']},index=['G','

localhost:8888/notebooks/satan practice.ipynb# 37/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

In [138]:  y

Out[138]: city code SPEED

G miami 1200 V

B delhi 1220 II

C kolkata 4000 IV

H orlando 3200 III

E lucknow 2980 VI

F chennai 2330 I

I las vegas 6360 VII

In [136]:  df

Out[136]: ATTRIBUTES city code SDP in $B

SERIAL

A mumbai 1100 100

B delhi 1200 80

C kolkata 4000 20

D patna 3400 30

E lucknow 2980 55

F chennai 2330 10

G assam 6660 23

In [139]:  df+y

Out[139]: SDP in $B SPEED city code

A NaN NaN NaN NaN

B NaN NaN delhidelhi 2420.0

C NaN NaN kolkatakolkata 8000.0

D NaN NaN NaN NaN

E NaN NaN lucknowlucknow 5960.0

F NaN NaN chennaichennai 4660.0

G NaN NaN assammiami 7860.0

H NaN NaN NaN NaN

I NaN NaN NaN NaN

In [146]:  df.add?

localhost:8888/notebooks/satan practice.ipynb# 38/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

In [156]:  df.sum(numeric_only=True)

Out[156]: ATTRIBUTES
code 21670
SDP in $B 318
dtype: int64

In [162]:  df.idxmax(numeric_only=True)

Out[162]: ATTRIBUTES
code G
SDP in $B A
dtype: object

In [164]:  df.cumsum()

Out[164]: ATTRIBUTES city code SDP in $B

SERIAL

A mumbai 1100 100

B mumbaidelhi 2300 180

C mumbaidelhikolkata 6300 200

D mumbaidelhikolkatapatna 9700 230

E mumbaidelhikolkatapatnalucknow 12680 285

F mumbaidelhikolkatapatnalucknowchennai 15010 295

G mumbaidelhikolkatapatnalucknowchennaiassam 21670 318

In [166]:  import numpy as np


import pandas as pd

In [167]:  px=pd.DataFrame(np.arange(25).reshape(5,5))

In [170]:  py=pd.DataFrame(np.arange(50,75).reshape(5,5))

In [171]:  px
Out[171]: 0 1 2 3 4

0 0 1 2 3 4

1 5 6 7 8 9

2 10 11 12 13 14

3 15 16 17 18 19

4 20 21 22 23 24

localhost:8888/notebooks/satan practice.ipynb# 39/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

In [172]:  py

Out[172]: 0 1 2 3 4

0 50 51 52 53 54

1 55 56 57 58 59

2 60 61 62 63 64

3 65 66 67 68 69

4 70 71 72 73 74

In [173]:  px+py

Out[173]: 0 1 2 3 4

0 50 52 54 56 58

1 60 62 64 66 68

2 70 72 74 76 78

3 80 82 84 86 88

4 90 92 94 96 98

In [174]:  px.index=['a','b','c','d','e']
py.index=['c','d','e','f','g']

In [175]:  px

Out[175]: 0 1 2 3 4

a 0 1 2 3 4

b 5 6 7 8 9

c 10 11 12 13 14

d 15 16 17 18 19

e 20 21 22 23 24

In [176]:  py

Out[176]: 0 1 2 3 4

c 50 51 52 53 54

d 55 56 57 58 59

e 60 61 62 63 64

f 65 66 67 68 69

g 70 71 72 73 74

localhost:8888/notebooks/satan practice.ipynb# 40/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

In [177]:  px+py

Out[177]: 0 1 2 3 4

a NaN NaN NaN NaN NaN

b NaN NaN NaN NaN NaN

c 60.0 62.0 64.0 66.0 68.0

d 70.0 72.0 74.0 76.0 78.0

e 80.0 82.0 84.0 86.0 88.0

f NaN NaN NaN NaN NaN

g NaN NaN NaN NaN NaN

In [178]:  px.add(py,fill_value=0)

Out[178]: 0 1 2 3 4

a 0.0 1.0 2.0 3.0 4.0

b 5.0 6.0 7.0 8.0 9.0

c 60.0 62.0 64.0 66.0 68.0

d 70.0 72.0 74.0 76.0 78.0

e 80.0 82.0 84.0 86.0 88.0

f 65.0 66.0 67.0 68.0 69.0

g 70.0 71.0 72.0 73.0 74.0

In [179]:  df

Out[179]: ATTRIBUTES city code SDP in $B

SERIAL

A mumbai 1100 100

B delhi 1200 80

C kolkata 4000 20

D patna 3400 30

E lucknow 2980 55

F chennai 2330 10

G assam 6660 23

localhost:8888/notebooks/satan practice.ipynb# 41/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

In [180]:  df.describe()

Out[180]: ATTRIBUTES code SDP in $B

count 7.000000 7.000000

mean 3095.714286 45.428571

std 1905.726958 33.930601

min 1100.000000 10.000000

25% 1765.000000 21.500000

50% 2980.000000 30.000000

75% 3700.000000 67.500000

max 6660.000000 100.000000

In [182]:  df.corr(numeric_only=True)

Out[182]: ATTRIBUTES code SDP in $B

ATTRIBUTES

code 1.000000 -0.659984

SDP in $B -0.659984 1.000000

In [183]:  df.cov(numeric_only=True)

Out[183]: ATTRIBUTES code SDP in $B

ATTRIBUTES

code 3.631795e+06 -42676.190476

SDP in $B -4.267619e+04 1151.285714

In [184]:  py.corr()

Out[184]: 0 1 2 3 4

0 1.0 1.0 1.0 1.0 1.0

1 1.0 1.0 1.0 1.0 1.0

2 1.0 1.0 1.0 1.0 1.0

3 1.0 1.0 1.0 1.0 1.0

4 1.0 1.0 1.0 1.0 1.0

In [ ]:  py.cov()

localhost:8888/notebooks/satan practice.ipynb# 42/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

In [186]:  px.corrwith(py)

Out[186]: 0 1.0
1 1.0
2 1.0
3 1.0
4 1.0
dtype: float64

In [187]:  usa

Out[187]: california 1000


delhi 1000000
kolkata 1000000
miami 200
kanpur 100
orlando 200
dtype: int64

In [201]:  x=np.random.randint(1,101,100)

In [202]:  x

Out[202]: array([18, 91, 65, 56, 99, 28, 31, 36, 31, 99, 22, 92, 39, 82, 27, 67, 1
3,
97, 41, 85, 55, 67, 14, 30, 25, 12, 24, 78, 37, 19, 2, 6, 93, 5
3,
16, 93, 12, 30, 21, 88, 9, 96, 15, 71, 61, 82, 1, 35, 94, 97, 1
3,
48, 32, 78, 13, 77, 10, 5, 28, 77, 29, 33, 26, 6, 26, 58, 18, 7
5,
36, 43, 67, 67, 53, 67, 62, 44, 93, 70, 43, 40, 42, 17, 54, 22, 8
8,
50, 72, 78, 95, 29, 94, 19, 71, 3, 38, 54, 70, 58, 93, 51])

In [203]:  y=pd.Series(x)

In [208]:  y.value_counts()

Out[208]: 67 5
93 4
78 3
13 3
18 2
..
61 1
1 1
35 1
92 1
51 1
Name: count, Length: 66, dtype: int64

localhost:8888/notebooks/satan practice.ipynb# 43/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

In [ ]:  ​

In [211]:  y.isin([1,2,18])

Out[211]: 0 True
1 False
2 False
3 False
4 False
...
95 False
96 False
97 False
98 False
99 False
Length: 100, dtype: bool

In [212]:  usa

Out[212]: california 1000


delhi 1000000
kolkata 1000000
miami 200
kanpur 100
orlando 200
dtype: int64

In [213]:  states

Out[213]: mumbai 1000


delhi 2000
kolkata 500
chennai 200
kanpur 100
patna 100
dtype: int64

In [193]:  px.values

Out[193]: array([[ 0, 1, 2, 3, 4],


[ 5, 6, 7, 8, 9],
[10, 11, 12, 13, 14],
[15, 16, 17, 18, 19],
[20, 21, 22, 23, 24]])

localhost:8888/notebooks/satan practice.ipynb# 44/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

In [214]:  x,y

Out[214]: (array([18, 91, 65, 56, 99, 28, 31, 36, 31, 99, 22, 92, 39, 82, 27, 67, 1
3,
97, 41, 85, 55, 67, 14, 30, 25, 12, 24, 78, 37, 19, 2, 6, 93, 5
3,
16, 93, 12, 30, 21, 88, 9, 96, 15, 71, 61, 82, 1, 35, 94, 97, 1
3,
48, 32, 78, 13, 77, 10, 5, 28, 77, 29, 33, 26, 6, 26, 58, 18, 7
5,
36, 43, 67, 67, 53, 67, 62, 44, 93, 70, 43, 40, 42, 17, 54, 22, 8
8,
50, 72, 78, 95, 29, 94, 19, 71, 3, 38, 54, 70, 58, 93, 51]),
0 18
1 91
2 65
3 56
4 99
..
95 54
96 70
97 58
98 93
99 51
Length: 100, dtype: int32)

In [215]:  df

Out[215]: ATTRIBUTES city code SDP in $B

SERIAL

A mumbai 1100 100

B delhi 1200 80

C kolkata 4000 20

D patna 3400 30

E lucknow 2980 55

F chennai 2330 10

G assam 6660 23

In [244]:  data=pd.read_csv('mac.txt')

localhost:8888/notebooks/satan practice.ipynb# 45/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

In [224]:  data

Out[224]: Name Age City

0 John 30 New York

1 Alice 25 Los Angeles

2 Bob 35 Chicago

3 Emily 28 San Francisco

4 Michael 40 Houston

5 Samantha 33 Miami

6 David 27 Seattle

7 Emma 31 Boston

8 Daniel 29 Denver

9 Olivia 36 Atlanta

In [241]:  sale=pd.read_table('sale.txt',sep='|')

In [242]:  sale

Out[242]: Product ID Product Name Price Quantity

0 101 Laptop 1200 10

1 102 Smartphone 800 20

2 103 Tablet 500 15

3 104 Headphones 100 50

4 105 Smartwatch 300 30

5 106 Camera 700 12

6 107 Printer 250 25

7 108 External HDD 150 40

8 109 Wireless Mouse 30 100

9 110 Keyboard 50 80

In [256]:  data2=pd.read_csv('mac.txt',header=None,names=['A','B','C'],index_col='C')

localhost:8888/notebooks/satan practice.ipynb# 46/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

In [257]:  data2

Out[257]: A B

City Name Age

New York John 30

Los Angeles Alice 25

Chicago Bob 35

San Francisco Emily 28

Houston Michael 40

Miami Samantha 33

Seattle David 27

Boston Emma 31

Denver Daniel 29

Atlanta Olivia 36

In [258]:  data3=pd.read_csv('mac.txt',header=None,names=['A','B','C'],index_col=['C'

In [259]:  data3

Out[259]: A

C B

City Age Name

New York 30 John

Los Angeles 25 Alice

Chicago 35 Bob

San Francisco 28 Emily

Houston 40 Michael

Miami 33 Samantha

Seattle 27 David

Boston 31 Emma

Denver 29 Daniel

Atlanta 36 Olivia

In [263]:  dell=pd.read_table('hell.txt',sep='\s+')

localhost:8888/notebooks/satan practice.ipynb# 47/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

In [264]:  dell

Out[264]: Product ID Product.1 Name Price Quantity

0 101 Laptop 1200 10 NaN NaN

1 102 Smartphone 800 20 NaN NaN

2 103 Tablet 500 15 NaN NaN

3 104 Headphones 100 50 NaN NaN

4 105 Smartwatch 300 30 NaN NaN

5 106 Camera 700 12 NaN NaN

6 107 Printer 250 25 NaN NaN

7 108 External HDD 150 40.0 NaN

8 109 Wireless Mouse 30 100.0 NaN

9 110 Keyboard 50 80 NaN NaN

In [265]:  df

Out[265]: ATTRIBUTES city code SDP in $B

SERIAL

A mumbai 1100 100

B delhi 1200 80

C kolkata 4000 20

D patna 3400 30

E lucknow 2980 55

F chennai 2330 10

G assam 6660 23

In [267]:  df.to_csv('indian_states.csv',sep='|')

In [268]:  import seaborn as sns

localhost:8888/notebooks/satan practice.ipynb# 48/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

In [270]:  sns.get_dataset_names()

localhost:8888/notebooks/satan practice.ipynb# 49/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

Out[270]: ['anagrams',
'anscombe',
'attention',
'brain_networks',
'car_crashes',
'diamonds',
'dots',
'dowjones',
'exercise',
'flights',
'fmri',
'geyser',
'glue',
'healthexp',
'iris',
'mpg',
'penguins',
'planets',
'seaice',
'taxis',
'tips',
'titanic',
'anagrams',
'anagrams',
'anscombe',
'anscombe',
'attention',
'attention',
'brain_networks',
'brain_networks',
'car_crashes',
'car_crashes',
'diamonds',
'diamonds',
'dots',
'dots',
'dowjones',
'dowjones',
'exercise',
'exercise',
'flights',
'flights',
'fmri',
'fmri',
'geyser',
'geyser',
'glue',
'glue',
'healthexp',
'healthexp',
'iris',
'iris',
'mpg',
'mpg',
'penguins',
'penguins',
'planets',
localhost:8888/notebooks/satan practice.ipynb# 50/126
6/10/24, 6:53 AM satan practice - Jupyter Notebook
'planets',
'seaice',
'seaice',
'taxis',
'taxis',
'tips',
'tips',
'titanic',
'titanic',
'anagrams',
'anscombe',
'attention',
'brain_networks',
'car_crashes',
'diamonds',
'dots',
'dowjones',
'exercise',
'flights',
'fmri',
'geyser',
'glue',
'healthexp',
'iris',
'mpg',
'penguins',
'planets',
'seaice',
'taxis',
'tips',
'titanic']

In [281]:  fog=sns.load_dataset('dots')
fog

Out[281]: align choice time coherence firing_rate

0 dots T1 -80 0.0 33.189967

1 dots T1 -80 3.2 31.691726

2 dots T1 -80 6.4 34.279840

3 dots T1 -80 12.8 32.631874

4 dots T1 -80 25.6 35.060487

... ... ... ... ... ...

843 sacc T2 300 3.2 33.281734

844 sacc T2 300 6.4 27.583979

845 sacc T2 300 12.8 28.511530

846 sacc T2 300 25.6 27.009804

847 sacc T2 300 51.2 30.959302

848 rows × 5 columns

localhost:8888/notebooks/satan practice.ipynb# 51/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

In [274]:  fog.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 90 entries, 0 to 89
Data columns (total 6 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Unnamed: 0 90 non-null int64
1 id 90 non-null int64
2 diet 90 non-null category
3 pulse 90 non-null int64
4 time 90 non-null category
5 kind 90 non-null category
dtypes: category(3), int64(3)
memory usage: 2.9 KB

localhost:8888/notebooks/satan practice.ipynb# 52/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

In [284]:  cv=pd.DataFrame(mx)

-------------------------------------------------------------------------
--
ValueError Traceback (most recent call las
t)
Cell In[284], line 1
----> 1 cv=pd.DataFrame(mx)

File C:\ProgramData\anaconda3\Lib\site-packages\pandas\core\frame.py:709,
in DataFrame.__init__(self, data, index, columns, dtype, copy)
703 mgr = self._init_mgr(
704 data, axes={"index": index, "columns": columns}, dtype=dt
ype, copy=copy
705 )
707 elif isinstance(data, dict):
708 # GH#38939 de facto copy defaults to False only in non-dict c
ases
--> 709 mgr = dict_to_mgr(data, index, columns, dtype=dtype, copy=cop
y, typ=manager)
710 elif isinstance(data, ma.MaskedArray):
711 from numpy.ma import mrecords

File C:\ProgramData\anaconda3\Lib\site-packages\pandas\core\internals\con
struction.py:481, in dict_to_mgr(data, index, columns, dtype, typ, copy)
477 else:
478 # dtype check to exclude e.g. range objects, scalars
479 arrays = [x.copy() if hasattr(x, "dtype") else x for x in
arrays]
--> 481 return arrays_to_mgr(arrays, columns, index, dtype=dtype, typ=ty
p, consolidate=copy)

File C:\ProgramData\anaconda3\Lib\site-packages\pandas\core\internals\con
struction.py:115, in arrays_to_mgr(arrays, columns, index, dtype, verify_
integrity, typ, consolidate)
112 if verify_integrity:
113 # figure out the index, if necessary
114 if index is None:
--> 115 index = _extract_index(arrays)
116 else:
117 index = ensure_index(index)

File C:\ProgramData\anaconda3\Lib\site-packages\pandas\core\internals\con
struction.py:655, in _extract_index(data)
653 lengths = list(set(raw_lengths))
654 if len(lengths) > 1:
--> 655 raise ValueError("All arrays must be of the same length")
657 if have_dicts:
658 raise ValueError(
659 "Mixing dicts with non-Series may lead to ambiguous order
ing."
660 )

ValueError: All arrays must be of the same length

localhost:8888/notebooks/satan practice.ipynb# 53/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

In [308]:  import random



# Define the attributes and their corresponding lists
color_list = ["red", "blue", "green", "yellow", None]
size_list = ["small", "medium", "large", None, None]
shape_list = ["circle", "square", "triangle", None, None]
material_list = ["plastic", "wood", "metal", None, None]
weight_list = [100, 200, 300, None, None]
price_list = [10.5, 20.3, 15.0, None, None]
quantity_list = [1, 2, 3, None, None]
availability_list = ["in stock", "out of stock", None, None, None]
brand_list = ["Brand1", "Brand2", None, None, None]
category_list = ["electronics", "clothing", "home", None, None]

# Create the dictionary
cv = {
"color": color_list,
"size": size_list,
"shape": shape_list,
"material": material_list,
"weight": weight_list,
"price": price_list,
"quantity": quantity_list,
"availability": availability_list,
"brand": brand_list,
"category": category_list
}


In [309]:  mx=pd.DataFrame(cv)

In [288]:  mx

Out[288]: color size shape material weight price quantity availability brand category

0 red small circle plastic 100.0 10.5 1.0 in stock Brand1 electronics

1 blue medium square wood 200.0 20.3 2.0 out of stock Brand2 clothing

2 green large triangle metal 300.0 15.0 3.0 None None home

3 yellow None None None NaN NaN NaN None None None

4 None None None None NaN NaN NaN None None None

localhost:8888/notebooks/satan practice.ipynb# 54/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

In [289]:  mx.dropna(how='all')

Out[289]: color size shape material weight price quantity availability brand category

0 red small circle plastic 100.0 10.5 1.0 in stock Brand1 electronics

1 blue medium square wood 200.0 20.3 2.0 out of stock Brand2 clothing

2 green large triangle metal 300.0 15.0 3.0 None None home

3 yellow None None None NaN NaN NaN None None None

In [291]:  mx.replace([None, np.nan])

Out[291]: color size shape material weight price quantity availability brand category

0 red small circle plastic 100.0 10.5 1.0 in stock Brand1 electronics

1 blue medium square wood 200.0 20.3 2.0 out of stock Brand2 clothing

2 green large triangle metal 300.0 15.0 3.0 out of stock Brand2 home

3 yellow large triangle metal 300.0 15.0 3.0 out of stock Brand2 home

4 yellow large triangle metal 300.0 15.0 3.0 out of stock Brand2 home

In [295]:  mx.replace({'None':np.nan})

Out[295]: color size shape material weight price quantity availability brand category

0 red small circle plastic 100.0 10.5 1.0 in stock Brand1 electronics

1 blue medium square wood 200.0 20.3 2.0 out of stock Brand2 clothing

2 green large triangle metal 300.0 15.0 3.0 None None home

3 yellow None None None NaN NaN NaN None None None

4 None None None None NaN NaN NaN None None None

In [297]:  mx.dropna()

Out[297]: color size shape material weight price quantity availability brand category

0 red small circle plastic 100.0 10.5 1.0 in stock Brand1 electronics

1 blue medium square wood 200.0 20.3 2.0 out of stock Brand2 clothing

localhost:8888/notebooks/satan practice.ipynb# 55/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

In [303]:  mx.dropna(thresh=1)

Out[303]: color size shape material weight price quantity availability brand category

0 red small circle plastic 100.0 10.5 1.0 in stock Brand1 electronics

1 blue medium square wood 200.0 20.3 2.0 out of stock Brand2 clothing

2 green large triangle metal 300.0 15.0 3.0 None None home

3 yellow None None None NaN NaN NaN None None None

In [302]:  fog.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 848 entries, 0 to 847
Data columns (total 5 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 align 848 non-null object
1 choice 848 non-null object
2 time 848 non-null int64
3 coherence 848 non-null float64
4 firing_rate 848 non-null float64
dtypes: float64(2), int64(1), object(2)
memory usage: 33.3+ KB

In [ ]:  mx.dropna()

In [306]:  mx.fillna(method='bfill')

Out[306]: color size shape material weight price quantity availability brand category

0 red small circle plastic 100.0 10.5 1.0 in stock Brand1 electronics

1 blue medium square wood 200.0 20.3 2.0 out of stock Brand2 clothing

2 green large triangle metal 300.0 15.0 3.0 None None home

3 yellow None None None NaN NaN NaN None None None

4 None None None None NaN NaN NaN None None None

In [312]:  mx.loc[0,'brand']=np.nan

localhost:8888/notebooks/satan practice.ipynb# 56/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

In [313]:  mx

Out[313]: color size shape material weight price quantity availability brand category

0 red small circle plastic 100.0 10.5 1.0 in stock NaN electronics

1 blue medium square wood 200.0 20.3 2.0 out of stock Brand2 clothing

2 green large triangle metal 300.0 15.0 3.0 None None home

3 yellow None None None NaN NaN NaN None None None

4 None None None None NaN NaN NaN None None None

In [318]:  mx.fillna(method='backfill')

Out[318]: color size shape material weight price quantity availability brand category

0 red small circle plastic 100.0 10.5 1.0 in stock Brand2 electronics

1 blue medium square wood 200.0 20.3 2.0 out of stock Brand2 clothing

2 green large triangle metal 300.0 15.0 3.0 None None home

3 yellow None None None NaN NaN NaN None None None

4 None None None None NaN NaN NaN None None None

In [322]:  mx.dropna(axis=1,thresh=3)

Out[322]: color size shape material weight price quantity category

0 red small circle plastic 100.0 10.5 1.0 electronics

1 blue medium square wood 200.0 20.3 2.0 clothing

2 green large triangle metal 300.0 15.0 3.0 home

3 yellow None None None NaN NaN NaN None

4 None None None None NaN NaN NaN None

In [323]:  mx.fillna({'price':0,'category':'local'})

Out[323]: color size shape material weight price quantity availability brand category

0 red small circle plastic 100.0 10.5 1.0 in stock NaN electronics

1 blue medium square wood 200.0 20.3 2.0 out of stock Brand2 clothing

2 green large triangle metal 300.0 15.0 3.0 None None home

3 yellow None None None NaN 0.0 NaN None None local

4 None None None None NaN 0.0 NaN None None local

localhost:8888/notebooks/satan practice.ipynb# 57/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

In [326]:  mx

Out[326]: COLOR SIZE SHAPE MATERIAL WEIGHT PRICE QUANTITY AVAILABILITY BRAN

0 red small circle plastic 100.0 10.5 1.0 in stock Na

1 blue medium square wood 200.0 20.3 2.0 out of stock Brand

2 green large triangle metal 300.0 15.0 3.0 None Non

3 yellow None None None NaN NaN NaN None Non

4 None None None None NaN NaN NaN None Non

In [333]:  mx.replace([None],100)

Out[333]: COLOR SIZE SHAPE MATERIAL WEIGHT PRICE QUANTITY AVAILABILITY BRAN

0 red small circle plastic 100.0 10.5 1.0 in stock 10

1 blue medium square wood 200.0 20.3 2.0 out of stock Brand

2 green large triangle metal 300.0 15.0 3.0 100 10

3 yellow 100 100 100 NaN NaN NaN 100 10

4 100 100 100 100 NaN NaN NaN 100 10

In [ ]:  ​

In [331]:  mx.replace(0,'None')

Out[331]: COLOR SIZE SHAPE MATERIAL WEIGHT PRICE QUANTITY AVAILABILITY BRAN

0 red small circle plastic 100.0 10.5 1.0 in stock Na

1 blue medium square wood 200.0 20.3 2.0 out of stock Brand

2 green large triangle metal 300.0 15.0 3.0 None Non

3 yellow None None None NaN NaN NaN None Non

4 None None None None NaN NaN NaN None Non

localhost:8888/notebooks/satan practice.ipynb# 58/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

In [334]:  df

Out[334]: ATTRIBUTES city code SDP in $B

SERIAL

A mumbai 1100 100

B delhi 1200 80

C kolkata 4000 20

D patna 3400 30

E lucknow 2980 55

F chennai 2330 10

G assam 6660 23

In [336]:  df.rename(index=str.lower,columns=str.upper)

Out[336]: ATTRIBUTES CITY CODE SDP IN $B

SERIAL

a mumbai 1100 100

b delhi 1200 80

c kolkata 4000 20

d patna 3400 30

e lucknow 2980 55

f chennai 2330 10

g assam 6660 23

In [343]:  transform=[lambda x : x+'i' for i in [1,2,3,4,5,6,7]]

In [342]:  df.index=df.index.(transform)

-------------------------------------------------------------------------
--
AttributeError Traceback (most recent call las
t)
Cell In[342], line 1
----> 1 df.index=df.index.apply(transform)

AttributeError: 'Index' object has no attribute 'apply'

localhost:8888/notebooks/satan practice.ipynb# 59/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

In [341]:  df

Out[341]: ATTRIBUTES city code SDP in $B

SERIAL

A1 mumbai 1100 100

B1 delhi 1200 80

C1 kolkata 4000 20

D1 patna 3400 30

E1 lucknow 2980 55

F1 chennai 2330 10

G1 assam 6660 23

In [347]:  df['code'].apply(np.sum)

Out[347]: SERIAL
A1 1100
B1 1200
C1 4000
D1 3400
E1 2980
F1 2330
G1 6660
Name: code, dtype: int32

localhost:8888/notebooks/satan practice.ipynb# 60/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

In [349]:  import random



# Define the attributes and their corresponding lists
color_list = ["red", "blue", "green", "yellow", None]
size_list = ["small", "medium", "large", None, None]
shape_list = ["circle", "square", "triangle", None, None]
material_list = ["plastic", "wood", "metal", None, None]
weight_list = [100, 200, 300, None, None]
price_list = [10.5, 20.3, 15.0, None, None]
quantity_list = [1, 2, 3, None, None]
availability_list = ["in stock", "out of stock", None, None, None]
brand_list = ["Brand1", "Brand2", None, None, None]
category_list = ["electronics", "clothing", "home", None, None]

# Create the dictionary
attributes_dict = {
"color": color_list,
"size": size_list,
"shape": shape_list,
"material": material_list,
"weight": weight_list,
"price": price_list,
"quantity": quantity_list,
"availability": availability_list,
"brand": brand_list,
"category": category_list
}

# Print the dictionary
print(attributes_dict)

Out[349]: city code SPEED

G miami 1200 V

B delhi 1220 II

C kolkata 4000 IV

H orlando 3200 III

E lucknow 2980 VI

F chennai 2330 I

I las vegas 6360 VII

In [357]:  y=pd.DataFrame({'city':['miami','delhi','las_vegas','kolkata','ohio','orla
,2330,1100,2340,1221,1111,6360
'SPEED':['V','II','IV','VIII','IX','X','XI','III','VI','I'

localhost:8888/notebooks/satan practice.ipynb# 61/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

In [358]:  y

Out[358]: city code SPEED

a miami 1200 V
A
a delhi 1220 II

B b las_vegas 4000 IV

c kolkata 3200 VIII


C
c ohio 2980 IX

d orlando 2330 X
D
d tokyo 1100 XI

E e lucknow 2340 III

F f shanghai 1221 VI

g chennai 1111 I
G
g las vegas 6360 VII

In [359]:  df

Out[359]: ATTRIBUTES city code SDP in $B

SERIAL

A1 mumbai 1100 100

B1 delhi 1200 80

C1 kolkata 4000 20

D1 patna 3400 30

E1 lucknow 2980 55

F1 chennai 2330 10

G1 assam 6660 23

localhost:8888/notebooks/satan practice.ipynb# 62/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

In [360]:  import pandas as pd



# Sample DataFrame
data = {
'color': ['red', 'blue', 'green', 'red', 'blue'],
'size': ['small', 'medium', 'large', 'small', 'medium'],
'quantity': [1, 2, 3, 1, 2],
'price': [10.5, 20.3, 15.0, 10.5, 20.3]
}

df = pd.DataFrame(data)

# Set 'color' and 'size' columns as multi-index
df.set_index(['color', 'size'], inplace=True)

# Print DataFrame with multi-index
print(df)

quantity price
color size
red small 1 10.5
blue medium 2 20.3
green large 3 15.0
red small 1 10.5
blue medium 2 20.3

In [361]:  y

Out[361]: city code SPEED

a miami 1200 V
A
a delhi 1220 II

B b las_vegas 4000 IV

c kolkata 3200 VIII


C
c ohio 2980 IX

d orlando 2330 X
D
d tokyo 1100 XI

E e lucknow 2340 III

F f shanghai 1221 VI

g chennai 1111 I
G
g las vegas 6360 VII

localhost:8888/notebooks/satan practice.ipynb# 63/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

In [363]:  y.index

Out[363]: MultiIndex([('A', 'a'),


('A', 'a'),
('B', 'b'),
('C', 'c'),
('C', 'c'),
('D', 'd'),
('D', 'd'),
('E', 'e'),
('F', 'f'),
('G', 'g'),
('G', 'g')],
)

localhost:8888/notebooks/satan practice.ipynb# 64/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

In [364]:  y.unstack()

localhost:8888/notebooks/satan practice.ipynb# 65/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

-------------------------------------------------------------------------
--
ValueError Traceback (most recent call las
t)
Cell In[364], line 1
----> 1 y.unstack()

File C:\ProgramData\anaconda3\Lib\site-packages\pandas\core\frame.py:896
1, in DataFrame.unstack(self, level, fill_value)
8899 """
8900 Pivot a level of the (necessarily hierarchical) index labels.
8901
(...)
8957 dtype: float64
8958 """
8959 from pandas.core.reshape.reshape import unstack
-> 8961 result = unstack(self, level, fill_value)
8963 return result.__finalize__(self, method="unstack")

File C:\ProgramData\anaconda3\Lib\site-packages\pandas\core\reshape\resha
pe.py:475, in unstack(obj, level, fill_value)
473 if isinstance(obj, DataFrame):
474 if isinstance(obj.index, MultiIndex):
--> 475 return _unstack_frame(obj, level, fill_value=fill_value)
476 else:
477 return obj.T.stack(dropna=False)

File C:\ProgramData\anaconda3\Lib\site-packages\pandas\core\reshape\resha
pe.py:498, in _unstack_frame(obj, level, fill_value)
496 def _unstack_frame(obj: DataFrame, level, fill_value=None):
497 assert isinstance(obj.index, MultiIndex) # checked by caller
--> 498 unstacker = _Unstacker(obj.index, level=level, constructor=ob
j._constructor)
500 if not obj._can_fast_transpose:
501 mgr = obj._mgr.unstack(unstacker, fill_value=fill_value)

File C:\ProgramData\anaconda3\Lib\site-packages\pandas\core\reshape\resha
pe.py:136, in _Unstacker.__init__(self, index, level, constructor)
128 if num_cells > np.iinfo(np.int32).max:
129 warnings.warn(
130 f"The following operation may generate {num_cells} cells
"
131 f"in the resulting pandas object.",
132 PerformanceWarning,
133 stacklevel=find_stack_level(),
134 )
--> 136 self._make_selectors()

File C:\ProgramData\anaconda3\Lib\site-packages\pandas\core\reshape\resha
pe.py:188, in _Unstacker._make_selectors(self)
185 mask.put(selector, True)
187 if mask.sum() < len(self.index):
--> 188 raise ValueError("Index contains duplicate entries, cannot re
shape")
190 self.group_index = comp_index
191 self.mask = mask

localhost:8888/notebooks/satan practice.ipynb# 66/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook
ValueError: Index contains duplicate entries, cannot reshape

In [ ]:  ​

In [370]:  df.set_index(['color','price'])

-------------------------------------------------------------------------
--
KeyError Traceback (most recent call las
t)
~\AppData\Local\Temp\ipykernel_3180\3070971697.py in ?()
----> 1 df.set_index(['color','price'])

C:\ProgramData\anaconda3\Lib\site-packages\pandas\core\frame.py in ?(sel
f, keys, drop, append, inplace, verify_integrity)
5855 if not found:
5856 missing.append(col)
5857
5858 if missing:
-> 5859 raise KeyError(f"None of {missing} are in the column
s")
5860
5861 if inplace:
5862 frame = self

KeyError: "None of ['color'] are in the columns"

In [368]:  x

Out[368]: array([18, 91, 65, 56, 99, 28, 31, 36, 31, 99, 22, 92, 39, 82, 27, 67, 1
3,
97, 41, 85, 55, 67, 14, 30, 25, 12, 24, 78, 37, 19, 2, 6, 93, 5
3,
16, 93, 12, 30, 21, 88, 9, 96, 15, 71, 61, 82, 1, 35, 94, 97, 1
3,
48, 32, 78, 13, 77, 10, 5, 28, 77, 29, 33, 26, 6, 26, 58, 18, 7
5,
36, 43, 67, 67, 53, 67, 62, 44, 93, 70, 43, 40, 42, 17, 54, 22, 8
8,
50, 72, 78, 95, 29, 94, 19, 71, 3, 38, 54, 70, 58, 93, 51])

In [369]:  mx

Out[369]: COLOR SIZE SHAPE MATERIAL WEIGHT PRICE QUANTITY AVAILABILITY BRAN

0 red small circle plastic 100.0 10.5 1.0 in stock Na

1 blue medium square wood 200.0 20.3 2.0 out of stock Brand

2 green large triangle metal 300.0 15.0 3.0 None Non

3 yellow None None None NaN NaN NaN None Non

4 None None None None NaN NaN NaN None Non

localhost:8888/notebooks/satan practice.ipynb# 67/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

In [372]:  pd.DataFrame(data)

Out[372]: color size quantity price

0 red small 1 10.5

1 blue medium 2 20.3

2 green large 3 15.0

3 red small 1 10.5

4 blue medium 2 20.3

In [373]:  # Sample data


colors = ["red", "blue", "green", "yellow"]
sizes = ["small", "medium", "large", "extra-large"]
quantities = [10, 20, 30, 40]

# Check if all lists have the same length
assert len(set(map(len, [colors, sizes, quantities]))) == 1, "Lists must h

# Check if all elements in each list are distinct
assert all(len(set(lst)) == len(lst) for lst in [colors, sizes, quantities

# Create dictionary
data_dict = {
"colors": colors,
"sizes": sizes,
"quantities": quantities
}

# Print the dictionary
print(data_dict)

{'colors': ['red', 'blue', 'green', 'yellow'], 'sizes': ['small', 'mediu


m', 'large', 'extra-large'], 'quantities': [10, 20, 30, 40]}

In [374]:  x=pd.DataFrame(data_dict)

In [377]:  x=x.set_index(['colors','sizes'])

localhost:8888/notebooks/satan practice.ipynb# 68/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

In [384]:  x.unstack().unstack().unstack

Out[384]: <bound method Series.unstack of sizes colors


quantities extra-large blue NaN
green NaN
red NaN
yellow 40.0
large blue NaN
green 30.0
red NaN
yellow NaN
medium blue 20.0
green NaN
red NaN
yellow NaN
small blue NaN
green NaN
red 10.0
yellow NaN
dtype: float64>

In [388]:  import pandas as pd



employees_data = {
'EmployeeID': [1, 2, 3, 4],
'Name': ['John', 'Alice', 'Bob', 'Mary'],
'DepartmentID': [101, 102, 101, 103]
}
emp1 = pd.DataFrame(employees_data)
print("Employees DataFrame:")
print(emp1)

Employees DataFrame:
EmployeeID Name DepartmentID
0 1 John 101
1 2 Alice 102
2 3 Bob 101
3 4 Mary 103

localhost:8888/notebooks/satan practice.ipynb# 69/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

In [390]:  departments_data = {
'DepartmentID': [101, 102, 103],
'DepartmentName': ['HR', 'Finance', 'IT']
}

emp2 = pd.DataFrame(departments_data)
print("\nDepartments DataFrame:")
print(emp2)

Departments DataFrame:
DepartmentID DepartmentName
0 101 HR
1 102 Finance
2 103 IT

In [391]:  emp1

Out[391]: EmployeeID Name DepartmentID

0 1 John 101

1 2 Alice 102

2 3 Bob 101

3 4 Mary 103

In [392]:  emp2

Out[392]: DepartmentID DepartmentName

0 101 HR

1 102 Finance

2 103 IT

In [393]:  pd.merge(emp1,emp2)

Out[393]: EmployeeID Name DepartmentID DepartmentName

0 1 John 101 HR

1 3 Bob 101 HR

2 2 Alice 102 Finance

3 4 Mary 103 IT

localhost:8888/notebooks/satan practice.ipynb# 70/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

In [394]:  pd.merge(emp1,emp2,on='DepartmentID')

Out[394]: EmployeeID Name DepartmentID DepartmentName

0 1 John 101 HR

1 3 Bob 101 HR

2 2 Alice 102 Finance

3 4 Mary 103 IT

In [396]:  pd.merge(emp1,emp2,on='DepartmentID',how='inner')

Out[396]: EmployeeID Name DepartmentID DepartmentName

0 1 John 101 HR

1 3 Bob 101 HR

2 2 Alice 102 Finance

3 4 Mary 103 IT

In [397]:  pd.merge(emp1,emp2,on='DepartmentID',how='outer')

Out[397]: EmployeeID Name DepartmentID DepartmentName

0 1 John 101 HR

1 3 Bob 101 HR

2 2 Alice 102 Finance

3 4 Mary 103 IT

In [398]:  pd.merge(emp1,emp2,on='DepartmentID',how='right')

Out[398]: EmployeeID Name DepartmentID DepartmentName

0 1 John 101 HR

1 3 Bob 101 HR

2 2 Alice 102 Finance

3 4 Mary 103 IT

In [399]:  pd.merge(emp1,emp2,on='DepartmentID',how='left')
Out[399]: EmployeeID Name DepartmentID DepartmentName

0 1 John 101 HR

1 2 Alice 102 Finance

2 3 Bob 101 HR

3 4 Mary 103 IT

localhost:8888/notebooks/satan practice.ipynb# 71/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

In [400]:  df

Out[400]: quantity price

color size

red small 1 10.5

blue medium 2 20.3

green large 3 15.0

red small 1 10.5

blue medium 2 20.3

In [421]:  x=df['price'].groupby('size')

In [416]:  xdf.groupby('color')['quantity'].size()

Out[416]: color
blue 2
green 1
red 2
Name: quantity, dtype: int64

In [418]:  df.groupby(['color','size']).count()

Out[418]: quantity price

color size

blue medium 2 2

green large 1 1

red small 2 2

In [ ]:  ​

localhost:8888/notebooks/satan practice.ipynb# 72/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

In [395]:  pd.merge(emp1,emp2,on='EmployeeID')

localhost:8888/notebooks/satan practice.ipynb# 73/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

-------------------------------------------------------------------------
--
KeyError Traceback (most recent call las
t)
~\AppData\Local\Temp\ipykernel_3180\224658189.py in ?()
----> 1 pd.merge(emp1,emp2,on='EmployeeID')

C:\ProgramData\anaconda3\Lib\site-packages\pandas\core\reshape\merge.py i
n ?(left, right, how, on, left_on, right_on, left_index, right_index, sor
t, suffixes, copy, indicator, validate)
144 copy: bool | None = None,
145 indicator: str | bool = False,
146 validate: str | None = None,
147 ) -> DataFrame:
--> 148 op = _MergeOperation(
149 left,
150 right,
151 how=how,

C:\ProgramData\anaconda3\Lib\site-packages\pandas\core\reshape\merge.py i
n ?(self, left, right, how, on, left_on, right_on, axis, left_index, righ
t_index, sort, suffixes, indicator, validate)
733 (
734 self.left_join_keys,
735 self.right_join_keys,
736 self.join_names,
--> 737 ) = self._get_merge_keys()
738
739 # validate the merge keys dtypes. We may need to coerce
740 # to avoid incompatible dtypes

C:\ProgramData\anaconda3\Lib\site-packages\pandas\core\reshape\merge.py i
n ?(self)
1199 # Then we're either Hashable or a wrong-l
ength arraylike,
1200 # the latter of which will raise
1201 rk = cast(Hashable, rk)
1202 if rk is not None:
-> 1203 right_keys.append(right._get_label_or
_level_values(rk))
1204 else:
1205 # work-around for merge_asof(right_in
dex=True)
1206 right_keys.append(right.index)

C:\ProgramData\anaconda3\Lib\site-packages\pandas\core\generic.py in ?(se
lf, key, axis)
1774 values = self.xs(key, axis=other_axes[0])._values
1775 elif self._is_level_reference(key, axis=axis):
1776 values = self.axes[axis].get_level_values(key)._value
s
1777 else:
-> 1778 raise KeyError(key)
1779
1780 # Check for duplicates
1781 if values.ndim > 1:

localhost:8888/notebooks/satan practice.ipynb# 74/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook
KeyError: 'EmployeeID'

In [422]:  for i , j in x:
print(i)
print(j)

large
color size
green large 15.0
Name: price, dtype: float64
medium
color size
blue medium 20.3
medium 20.3
Name: price, dtype: float64
small
color size
red small 10.5
small 10.5
Name: price, dtype: float64

In [423]:  for i , j in x:
print(i,j)

large color size


green large 15.0
Name: price, dtype: float64
medium color size
blue medium 20.3
medium 20.3
Name: price, dtype: float64
small color size
red small 10.5
small 10.5
Name: price, dtype: float64

In [424]:  x.describe()

Out[424]: count mean std min 25% 50% 75% max

size

large 1.0 15.0 NaN 15.0 15.0 15.0 15.0 15.0

medium 2.0 20.3 0.0 20.3 20.3 20.3 20.3 20.3

small 2.0 10.5 0.0 10.5 10.5 10.5 10.5 10.5

localhost:8888/notebooks/satan practice.ipynb# 75/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

In [434]:  x.agg([('quantity','count'),('price','sum')])

Out[434]: quantity price

size

large 1 15.0

medium 2 40.6

small 2 21.0

In [435]:  df

Out[435]: quantity price

color size

red small 1 10.5

blue medium 2 20.3

green large 3 15.0

red small 1 10.5

blue medium 2 20.3

In [439]:  df.pivot_table(index=['color','size'],margins=True)

Out[439]: price quantity

color size

blue medium 20.30 2.0

green large 15.00 3.0

red small 10.50 1.0

All 15.32 1.8

In [445]:  pd.crosstab(df.quantity,df.price)

Out[445]: price 10.5 15.0 20.3

quantity

1 2 0 0

2 0 0 2

3 0 1 0

In [446]:  import matplotlib.pyplot as plt

In [447]:  data=np.arange(10)

localhost:8888/notebooks/satan practice.ipynb# 76/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

In [522]:  plt.plot(data)

Out[522]: [<matplotlib.lines.Line2D at 0x27001298d90>]

In [543]:  fig=plt.figure(figsize=(10,10))
plt.suptitle('SURVEYS')

Out[543]: Text(0.5, 0.98, 'SURVEYS')

<Figure size 1000x1000 with 0 Axes>

In [544]:  ax1=fig.add_subplot(2,2,1)

In [545]:  ax1.plot(np.arange(10),np.arange(10,101,10),'--g')
ax1.set_xlabel('HEIGHT(in feet)')
ax1.set_ylabel('WEIGHT(in kg)')
ax1.set_xticks([3,5,7,9,11])
ax1.set_yticks([40,55,65,78,90])

Out[545]: [<matplotlib.axis.YTick at 0x270034b4f90>,


<matplotlib.axis.YTick at 0x270034e6e50>,
<matplotlib.axis.YTick at 0x270034777d0>,
<matplotlib.axis.YTick at 0x27003529fd0>,
<matplotlib.axis.YTick at 0x270035342d0>]

In [546]:  ax2=fig.add_subplot(2,2,2)
ax2.scatter(np.random.randint(10,100,50),np.random.randint(10,100,50),colo

Out[546]: <matplotlib.collections.PathCollection at 0x270034ccad0>

localhost:8888/notebooks/satan practice.ipynb# 77/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

In [475]:  plt.hist?

In [547]:  ax3=fig.add_subplot(2,2,3)
ax3.hist([10,20,30,60,80,100],bins=5,color='green',alpha=0.2)

Out[547]: (array([2., 1., 1., 1., 1.]),


array([ 10., 28., 46., 64., 82., 100.]),
<BarContainer object of 5 artists>)

In [570]:  ax4=fig.add_subplot(2,2,4)
props={'title':'RESULTS','xlabel':'NAMES','ylabel':'MARKS'}
ax4.set(**props)
ax4.bar(['gopi','suraj','raj','rahul'],[10,20,40,100],color=['red','green'

Out[570]: <BarContainer object of 4 artists>

In [571]:  fig

Out[571]:

localhost:8888/notebooks/satan practice.ipynb# 78/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

In [562]:  figx,axes=plt.subplots(3,3,sharex=True,sharey=True,figsize=(10,10))

In [563]:  axes[0,0].plot(np.random.randint(10,100,30).cumsum(),'--g')

Out[563]: [<matplotlib.lines.Line2D at 0x27005d30910>]

localhost:8888/notebooks/satan practice.ipynb# 79/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

In [564]:  figx

Out[564]:

In [568]:  axes[0,1].scatte(np.random.randint(1,1000,100),np.random.randint(1,1000,10

Out[568]: <matplotlib.collections.PathCollection at 0x27005b38c10>

localhost:8888/notebooks/satan practice.ipynb# 80/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

In [569]:  figx

Out[569]:

In [573]:  p=sns.load_dataset('planets')

localhost:8888/notebooks/satan practice.ipynb# 81/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

In [574]:  p

Out[574]: method number orbital_period mass distance year

0 Radial Velocity 1 269.300000 7.10 77.40 2006

1 Radial Velocity 1 874.774000 2.21 56.95 2008

2 Radial Velocity 1 763.000000 2.60 19.84 2011

3 Radial Velocity 1 326.030000 19.40 110.62 2007

4 Radial Velocity 1 516.220000 10.50 119.47 2009

... ... ... ... ... ... ...

1030 Transit 1 3.941507 NaN 172.00 2006

1031 Transit 1 2.615864 NaN 148.00 2007

1032 Transit 1 3.191524 NaN 174.00 2007

1033 Transit 1 4.125083 NaN 293.00 2008

1034 Transit 1 4.187757 NaN 260.00 2008

1035 rows × 6 columns

localhost:8888/notebooks/satan practice.ipynb# 82/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

In [830]:  figv=plt.figure()
ax1=figv.add_subplot(1,1,1)
ax1.plot(p['orbital_period'].cumsum(),'g',label='one')
ax1.plot(p['distance'].cumsum(),'r--',label='two')
ax1.plot(p['year'].cumsum(),'b--',label='three')
ax1.legend(loc='best')
att={'title':'ANALYSIS','xlabel':'range','ylabel':'domain','xticklabels':[
ax1.set(**att)
plt.savefig('hello.svg')
plt.annotate('satan',xy=(2,3),xytext=(0,1),arrowprops=dict(arrowstyle='->'
plt.savefig('figpath.png',dpi=500,bbox_inches='tight')

C:\Users\learner\AppData\Local\Temp\ipykernel_3180\3879900435.py:8: UserW
arning:

FixedFormatter should only be used together with FixedLocator

In [589]:  plt.savefig('hell.svg')

<Figure size 640x480 with 0 Axes>

In [593]:  import pandas as pd


import seaborn as sns

localhost:8888/notebooks/satan practice.ipynb# 83/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

In [594]:  p

Out[594]: method number orbital_period mass distance year

0 Radial Velocity 1 269.300000 7.10 77.40 2006

1 Radial Velocity 1 874.774000 2.21 56.95 2008

2 Radial Velocity 1 763.000000 2.60 19.84 2011

3 Radial Velocity 1 326.030000 19.40 110.62 2007

4 Radial Velocity 1 516.220000 10.50 119.47 2009

... ... ... ... ... ... ...

1030 Transit 1 3.941507 NaN 172.00 2006

1031 Transit 1 2.615864 NaN 148.00 2007

1032 Transit 1 3.191524 NaN 174.00 2007

1033 Transit 1 4.125083 NaN 293.00 2008

1034 Transit 1 4.187757 NaN 260.00 2008

1035 rows × 6 columns

In [605]:  p.year.plot(color='green',rot='vertical')

Out[605]: <Axes: >

localhost:8888/notebooks/satan practice.ipynb# 84/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

In [599]:  df.plot()

Out[599]: <Axes: xlabel='color,size'>

localhost:8888/notebooks/satan practice.ipynb# 85/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

In [601]:  p.plot(figsize=(10,10))

Out[601]: <Axes: >

In [602]:  t=sns.load_dataset('titanic')

localhost:8888/notebooks/satan practice.ipynb# 86/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

In [603]:  t.plot()

Out[603]: <Axes: >

localhost:8888/notebooks/satan practice.ipynb# 87/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

In [616]:  t.plot(subplots=True)

Out[616]: array([<Axes: >, <Axes: >, <Axes: >, <Axes: >, <Axes: >, <Axes: >],
dtype=object)

In [615]:  ​

-------------------------------------------------------------------------
--
AttributeError Traceback (most recent call las
t)
Cell In[615], line 1
----> 1 a[0].show()

AttributeError: 'Axes' object has no attribute 'show'

localhost:8888/notebooks/satan practice.ipynb# 88/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

In [617]:  p

Out[617]: method number orbital_period mass distance year

0 Radial Velocity 1 269.300000 7.10 77.40 2006

1 Radial Velocity 1 874.774000 2.21 56.95 2008

2 Radial Velocity 1 763.000000 2.60 19.84 2011

3 Radial Velocity 1 326.030000 19.40 110.62 2007

4 Radial Velocity 1 516.220000 10.50 119.47 2009

... ... ... ... ... ... ...

1030 Transit 1 3.941507 NaN 172.00 2006

1031 Transit 1 2.615864 NaN 148.00 2007

1032 Transit 1 3.191524 NaN 174.00 2007

1033 Transit 1 4.125083 NaN 293.00 2008

1034 Transit 1 4.187757 NaN 260.00 2008

1035 rows × 6 columns

localhost:8888/notebooks/satan practice.ipynb# 89/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

In [625]:  fig

Out[625]:

localhost:8888/notebooks/satan practice.ipynb# 90/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

In [632]:  figz,axes=plt.subplots(2,1)
pd.Series(np.random.randint(1,10,10),index=list('abcdefghij')).plot.bar(ax
pd.Series(np.random.randint(1,10,10),index=list('abcdefghij')).plot.barh(a

Out[632]: <Axes: >

localhost:8888/notebooks/satan practice.ipynb# 91/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

In [633]:  df.plot.bar()

Out[633]: <Axes: xlabel='color,size'>

localhost:8888/notebooks/satan practice.ipynb# 92/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

In [636]:  df.plot.barh()

Out[636]: <Axes: ylabel='color,size'>

In [634]:  mx

Out[634]: COLOR SIZE SHAPE MATERIAL WEIGHT PRICE QUANTITY AVAILABILITY BRAN

0 red small circle plastic 100.0 10.5 1.0 in stock Na

1 blue medium square wood 200.0 20.3 2.0 out of stock Brand

2 green large triangle metal 300.0 15.0 3.0 None Non

3 yellow None None None NaN NaN NaN None Non

4 None None None None NaN NaN NaN None Non

localhost:8888/notebooks/satan practice.ipynb# 93/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

In [635]:  mx.plot.bar()

Out[635]: <Axes: >

localhost:8888/notebooks/satan practice.ipynb# 94/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

In [637]:  df.plot.bar(stacked=True)

Out[637]: <Axes: xlabel='color,size'>

localhost:8888/notebooks/satan practice.ipynb# 95/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

In [639]:  df.plot.barh(stacked=True,alpha=0.4)

Out[639]: <Axes: ylabel='color,size'>

In [650]:  ​
p

Out[650]: method number orbital_period mass distance year

0 Radial Velocity 1 269.300 7.10 77.40 2006

1 Radial Velocity 1 874.774 2.21 56.95 2008

2 Radial Velocity 1 763.000 2.60 19.84 2011

3 Radial Velocity 1 326.030 19.40 110.62 2007

4 Radial Velocity 1 516.220 10.50 119.47 2009

In [648]:  x=pd.crosstab(p['method'],p['distance'])
x

Out[648]: distance 19.84 56.95 77.40 110.62 119.47

method

Radial Velocity 1 1 1 1 1

localhost:8888/notebooks/satan practice.ipynb# 96/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

In [649]:  p.plot.bar()

Out[649]: <Axes: >

In [654]:  t
c=t.head()
c

Out[654]: survived pclass sex age sibsp parch fare embarked class who adult_ma

0 0 3 male 22.0 1 0 7.2500 S Third man Tr

1 1 1 female 38.0 1 0 71.2833 C First woman Fa

2 1 3 female 26.0 0 0 7.9250 S Third woman Fa

3 1 1 female 35.0 1 0 53.1000 S First woman Fa

4 0 3 male 35.0 0 0 8.0500 S Third man Tr

localhost:8888/notebooks/satan practice.ipynb# 97/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

In [656]:  sns.barplot(x=c['fare'],y=c['sex'],hue=c.age)

Out[656]: <Axes: xlabel='fare', ylabel='sex'>

In [657]:  t['age'].plot.density()

Out[657]: <Axes: ylabel='Density'>

localhost:8888/notebooks/satan practice.ipynb# 98/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

In [658]:  sns.distplot(t['age'])

C:\Users\learner\AppData\Local\Temp\ipykernel_3180\128245390.py:1: UserWa
rning:

`distplot` is a deprecated function and will be removed in seaborn v0.14.


0.

Please adapt your code to use either `displot` (a figure-level function w


ith
similar flexibility) or `histplot` (an axes-level function for histogram
s).

For a guide to updating your code to use the new functions, please see
https://gist.github.com/mwaskom/de44147ed2974457ad6372750bbe5751 (http
s://gist.github.com/mwaskom/de44147ed2974457ad6372750bbe5751)

sns.distplot(t['age'])

Out[658]: <Axes: xlabel='age', ylabel='Density'>

localhost:8888/notebooks/satan practice.ipynb# 99/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

In [660]:  sns.regplot('age','fare',data=t)

-------------------------------------------------------------------------
--
TypeError Traceback (most recent call las
t)
Cell In[660], line 1
----> 1 sns.regplot('age','fare',data=t)

TypeError: regplot() got multiple values for argument 'data'

localhost:8888/notebooks/satan practice.ipynb# 100/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

In [661]:  sns.pairplot(t)

<__array_function__ internals>:200: RuntimeWarning: Converting input from


bool to <class 'numpy.uint8'> for compatibility.
<__array_function__ internals>:200: RuntimeWarning: Converting input from
bool to <class 'numpy.uint8'> for compatibility.
C:\ProgramData\anaconda3\Lib\site-packages\seaborn\axisgrid.py:118: UserW
arning: The figure layout has changed to tight
self._figure.tight_layout(*args, **kwargs)

Out[661]: <seaborn.axisgrid.PairGrid at 0x27028d45850>

localhost:8888/notebooks/satan practice.ipynb# 101/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

In [667]:  sns.barplot(x=c['age'],y=c['fare'])

Out[667]: <Axes: xlabel='age', ylabel='fare'>

In [662]:  c
Out[662]: survived pclass sex age sibsp parch fare embarked class who adult_ma

0 0 3 male 22.0 1 0 7.2500 S Third man Tr

1 1 1 female 38.0 1 0 71.2833 C First woman Fa

2 1 3 female 26.0 0 0 7.9250 S Third woman Fa

3 1 1 female 35.0 1 0 53.1000 S First woman Fa

4 0 3 male 35.0 0 0 8.0500 S Third man Tr

In [668]:  import plotly.express as px


import numpy as np

In [679]:  x=np.arange(20)

In [676]:  y=np.arange(20,0,-1)

localhost:8888/notebooks/satan practice.ipynb# 102/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

In [677]:  z=np.random.randint(1,10,20)

In [680]:  plot=px.scatter(x=x,y=y,size=z,hover_data=[x,y])

In [681]:  plot.show()

In [683]:  c
Out[683]: survived pclass sex age sibsp parch fare embarked class who adult_ma

0 0 3 male 22.0 1 0 7.2500 S Third man Tr

1 1 1 female 38.0 1 0 71.2833 C First woman Fa

2 1 3 female 26.0 0 0 7.9250 S Third woman Fa

3 1 1 female 35.0 1 0 53.1000 S First woman Fa

4 0 3 male 35.0 0 0 8.0500 S Third man Tr

localhost:8888/notebooks/satan practice.ipynb# 103/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

In [687]:  slot=px.scatter(x=c['age'],y=c['fare'])
slot.show()

In [700]:  import plotly.graph_objects as go


x=np.arange(10,101,10)
y=np.arange(1,11)

localhost:8888/notebooks/satan practice.ipynb# 104/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

In [705]:  fig=go.Figure(data = [go.Scatter(x=np.arange(10,101,10),y=np.arange(1,11))

fig.show()

In [707]:  c

Out[707]: survived pclass sex age sibsp parch fare embarked class who adult_ma

0 0 3 male 22.0 1 0 7.2500 S Third man Tr

1 1 1 female 38.0 1 0 71.2833 C First woman Fa

2 1 3 female 26.0 0 0 7.9250 S Third woman Fa

3 1 1 female 35.0 1 0 53.1000 S First woman Fa

4 0 3 male 35.0 0 0 8.0500 S Third man Tr

In [708]:  x=px.pie(c,values='fare',names='class')

localhost:8888/notebooks/satan practice.ipynb# 105/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

In [709]:  x.show()

In [710]:  figx=go.Figure(data=[go.Pie(labels=c.age,values=c.fare)])
figx.show()

localhost:8888/notebooks/satan practice.ipynb# 106/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

In [714]:  figv=go.Figure(data=[go.Bar(y=c.fare,x=c.age)])
figv.show()

In [715]:  from sklearn import datasets


In [716]:  iris=datasets.load_iris()

localhost:8888/notebooks/satan practice.ipynb# 107/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

In [717]:  iris

Out[717]: {'data': array([[5.1, 3.5, 1.4, 0.2],


[4.9, 3. , 1.4, 0.2],
[4.7, 3.2, 1.3, 0.2],
[4.6, 3.1, 1.5, 0.2],
[5. , 3.6, 1.4, 0.2],
[5.4, 3.9, 1.7, 0.4],
[4.6, 3.4, 1.4, 0.3],
[5. , 3.4, 1.5, 0.2],
[4.4, 2.9, 1.4, 0.2],
[4.9, 3.1, 1.5, 0.1],
[5.4, 3.7, 1.5, 0.2],
[4.8, 3.4, 1.6, 0.2],
[4.8, 3. , 1.4, 0.1],
[4.3, 3. , 1.1, 0.1],
[5.8, 4. , 1.2, 0.2],
[5.7, 4.4, 1.5, 0.4],
[5.4, 3.9, 1.3, 0.4],
[5.1, 3.5, 1.4, 0.3],
[5.7, 3.8, 1.7, 0.3],
[5 1 3 8 1 5 0 3]
In [718]:  df=px.data.gapminder()

In [719]:  df

Out[719]: country continent year lifeExp pop gdpPercap iso_alpha iso_num

0 Afghanistan Asia 1952 28.801 8425333 779.445314 AFG 4

1 Afghanistan Asia 1957 30.332 9240934 820.853030 AFG 4

2 Afghanistan Asia 1962 31.997 10267083 853.100710 AFG 4

3 Afghanistan Asia 1967 34.020 11537966 836.197138 AFG 4

4 Afghanistan Asia 1972 36.088 13079460 739.981106 AFG 4

... ... ... ... ... ... ... ... ...

1699 Zimbabwe Africa 1987 62.351 9216418 706.157306 ZWE 716

1700 Zimbabwe Africa 1992 60.377 10704340 693.420786 ZWE 716

1701 Zimbabwe Africa 1997 46.809 11404948 792.449960 ZWE 716

1702 Zimbabwe Africa 2002 39.989 11926563 672.038623 ZWE 716

1703 Zimbabwe Africa 2007 43.487 12311143 469.709298 ZWE 716

1704 rows × 8 columns

In [720]:  x=px.data

In [725]:  x

Out[725]: <module 'plotly.express.data' from 'C:\\ProgramData\\anaconda3\\Lib\\site


-packages\\plotly\\express\\data\\__init__.py'>

localhost:8888/notebooks/satan practice.ipynb# 108/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

In [733]:  g=df.head()

In [734]:  l=px.line(g,g['lifeExp'],g['year'])

In [735]:  l.show()

In [737]:  fig=go.Figure(data=[go.Pie(labels=df.continent,values=df.lifeExp)])

localhost:8888/notebooks/satan practice.ipynb# 109/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

In [738]:  fig.show()

localhost:8888/notebooks/satan practice.ipynb# 110/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

In [739]:  fig=go.Figure(data=[go.Pie(labels=df.continent,values=df.lifeExp,hole=0.5)
fig.show()

localhost:8888/notebooks/satan practice.ipynb# 111/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

In [745]:  fig=go.Figure(data=[go.Pie(labels=df.continent,values=df.lifeExp,pull=[0,0
fig.show()

In [746]:  ​
df.head()

Out[746]: country continent year lifeExp pop gdpPercap iso_alpha iso_num

0 Afghanistan Asia 1952 28.801 8425333 779.445314 AFG 4

1 Afghanistan Asia 1957 30.332 9240934 820.853030 AFG 4

2 Afghanistan Asia 1962 31.997 10267083 853.100710 AFG 4

3 Afghanistan Asia 1967 34.020 11537966 836.197138 AFG 4

4 Afghanistan Asia 1972 36.088 13079460 739.981106 AFG 4

localhost:8888/notebooks/satan practice.ipynb# 112/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

In [761]:  #country=['red','green','blue','black','yellow']
fig=px.bar(x=df.continent,y=df.year,barmode='group')
fig.show()

localhost:8888/notebooks/satan practice.ipynb# 113/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

-------------------------------------------------------------------------
--
ValueError Traceback (most recent call las
t)
Cell In[761], line 2
1 country=['red','green','blue','black','yellow']
----> 2 fig=px.bar(x=df.continent,y=df.year,barmode='group',color='countr
y')
3 fig.show()

File C:\ProgramData\anaconda3\Lib\site-packages\plotly\express\_chart_typ
es.py:373, in bar(data_frame, x, y, color, pattern_shape, facet_row, face
t_col, facet_col_wrap, facet_row_spacing, facet_col_spacing, hover_name,
hover_data, custom_data, text, base, error_x, error_x_minus, error_y, err
or_y_minus, animation_frame, animation_group, category_orders, labels, co
lor_discrete_sequence, color_discrete_map, color_continuous_scale, patter
n_shape_sequence, pattern_shape_map, range_color, color_continuous_midpoi
nt, opacity, orientation, barmode, log_x, log_y, range_x, range_y, text_a
uto, title, template, width, height)
325 def bar(
326 data_frame=None,
327 x=None,
(...)
367 height=None,
368 ) -> go.Figure:
369 """
370 In a bar plot, each row of `data_frame` is represented as a r
ectangular
371 mark.
372 """
--> 373 return make_figure(
374 args=locals(),
375 constructor=go.Bar,
376 trace_patch=dict(textposition="auto"),
377 layout_patch=dict(barmode=barmode),
378 )

File C:\ProgramData\anaconda3\Lib\site-packages\plotly\express\_core.py:1
990, in make_figure(args, constructor, trace_patch, layout_patch)
1987 layout_patch = layout_patch or {}
1988 apply_default_cascade(args)
-> 1990 args = build_dataframe(args, constructor)
1991 if constructor in [go.Treemap, go.Sunburst, go.Icicle] and args
["path"] is not None:
1992 args = process_dataframe_hierarchy(args)

File C:\ProgramData\anaconda3\Lib\site-packages\plotly\express\_core.py:1
405, in build_dataframe(args, constructor)
1402 args["color"] = None
1403 # now that things have been prepped, we do the systematic rewriti
ng of `args`
-> 1405 df_output, wide_id_vars = process_args_into_dataframe(
1406 args, wide_mode, var_name, value_name
1407 )
1409 # now that `df_output` exists and `args` contains only reference
s, we complete
1410 # the special-case and wide-mode handling by further rewriting ar
localhost:8888/notebooks/satan practice.ipynb# 114/126
6/10/24, 6:53 AM satan practice - Jupyter Notebook
gs and/or mutating
1411 # df_output
1413 count_name = _escape_col_name(df_output, "count", [var_name, valu
e_name])

File C:\ProgramData\anaconda3\Lib\site-packages\plotly\express\_core.py:1
189, in process_args_into_dataframe(args, wide_mode, var_name, value_nam
e)
1187 df_output[col_name] = to_unindexed_series(real_argument)
1188 elif not df_provided:
-> 1189 raise ValueError(
1190 "String or int arguments are only possible when a "
1191 "DataFrame or an array is provided in the `data_frame` "
1192 "argument. No DataFrame was provided, but argument "
1193 "'%s' is of type str or int." % field
1194 )
1195 # Check validity of column name
1196 elif argument not in df_input.columns:

ValueError: String or int arguments are only possible when a DataFrame or


an array is provided in the `data_frame` argument. No DataFrame was provi
ded, but argument 'color' is of type str or int.

In [751]:  go.Bar?

In [762]:  import numpy as np


import pandas as pd
import matplotlib.pyplot as plt

localhost:8888/notebooks/satan practice.ipynb# 115/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

In [772]:  plt.plot(np.arange(1,51,10),np.arange(10,160,30))
plt.title('DRAW A LINE')
plt.xlabel('x-axis')
plt.ylabel('y-axis')
plt.xlim(0,50)
plt.ylim(0,160)

Out[772]: (0.0, 160.0)

localhost:8888/notebooks/satan practice.ipynb# 116/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

In [818]:  plt.plot([1,2,3],[2,4,1])
plt.title('DRAW A LINE')
plt.xlabel('x-axis')
plt.ylabel('y-axis')
plt.xlim(1.0,3.0)
plt.ylim(1.0,4.0)
plt.annotate('satan landed',color='red',xy=(2,2),xytext=(1,2),weight='bold
plt.annotate('satan flew away',xy=(2.5,2.5),xytext=(1,3),arrowprops=dict(a
plt.grid()

In [781]:  df=pd.read_csv('bus.txt')

In [785]:  import seaborn as sns


import plotly.express as px
import plotly.graph_objects as go

localhost:8888/notebooks/satan practice.ipynb# 117/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

In [794]:  df.plot()

Out[794]: <Axes: >

localhost:8888/notebooks/satan practice.ipynb# 118/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

In [784]:  sns.lineplot(df)

Out[784]: <Axes: >

localhost:8888/notebooks/satan practice.ipynb# 119/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

In [807]:  plt.plot(df.iloc[:,1],label='open')
plt.plot(df.iloc[:,2],label='high')
plt.plot(df.iloc[:,3],label='low')
plt.plot(df.iloc[:,4],label='close')
plt.legend(loc='best')
plt.grid()

In [798]:  df

Out[798]: Date Open High Low Close

0 10-03-16 774.250000 776.065002 769.500000 772.559998

1 10-04-16 776.030029 778.710022 772.890015 776.429993

2 10-05-16 779.309998 782.070007 775.650024 776.469971

3 10-06-16 779.000000 780.479980 775.539978 776.859985

4 10-07-16 779.659973 779.659973 770.750000 775.080017

localhost:8888/notebooks/satan practice.ipynb# 120/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

In [808]:  import matplotlib.pyplot as plt



# Generate some data
x = [1, 2, 3, 4, 5]
y = [2, 4, 6, 8, 10]

# Plot with customized aesthetics and annotations
plt.plot(x, y, color='blue', linestyle='-', marker='o')
plt.xlabel('X-axis Label', fontsize=12)
plt.ylabel('Y-axis Label', fontsize=12)
plt.title('Customized Plot with Annotations', fontsize=14)
plt.text(3, 6, 'Example Text Annotation', fontsize=10)
plt.annotate('Example Arrow Annotation', xy=(3, 6), xytext=(2, 8),
arrowprops=dict(facecolor='black', arrowstyle='->'))
plt.grid(True)
plt.show()

localhost:8888/notebooks/satan practice.ipynb# 121/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

In [822]:  import pandas as pd


import numpy as np

# Create sample sales data
np.random.seed(0)
sales_data = {
'Salesperson': np.random.choice(['Alice', 'Bob', 'Charlie'], size=100)
'Region': np.random.choice(['North', 'South', 'East', 'West'], size=10
'Category': np.random.choice(['Electronics', 'Clothing', 'Books'], siz
'Amount': np.random.randint(100, 1000, size=100)
}
sales_df = pd.DataFrame(sales_data)

# Create a pivot table
pivot_table = pd.pivot_table(sales_df, index='Salesperson', columns='Categ
values='Amount', aggfunc='sum', fill_value=0,

print("Pivot Table:")
pivot_table

Pivot Table:

Out[822]: Category Books Clothing Electronics All

Salesperson

Alice 9639 8136 4164 21939

Bob 3224 7998 7818 19040

Charlie 2984 5816 5446 14246

All 15847 21950 17428 55225

In [824]:  # Create a cross-tabulation


cross_tab = pd.crosstab(sales_df['Region'], sales_df['Category'],margins=T

print("\nCross-Tabulation:")
cross_tab

Cross-Tabulation:

Out[824]: Category Books Clothing Electronics All

Region

East 7 15 8 30

North 11 10 6 27

South 5 5 9 19

West 4 11 9 24

All 27 41 32 100

localhost:8888/notebooks/satan practice.ipynb# 122/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

In [828]:  import plotly.express as px


import pandas as pd

# Sample data
np.random.seed(0)
df = pd.DataFrame({
'X': np.random.randn(100),
'Y': np.random.randn(100),
'Category': np.random.choice(['A', 'B', 'C'], 100)
})

# Create a scatter plot with hover effects
fig = px.scatter(df, x='X', y='Y', color='Category',
title='Interactive Scatter Plot with Hover Effects',
hover_data={'X': True, 'Y': True, 'Category': True})

# Enable zooming and panning
fig.update_layout(
xaxis=dict(
autorange=True,
showgrid=True,
zeroline=True,
showline=True,
ticks='',
showticklabels=True
),
yaxis=dict(
autorange=True,
showgrid=True,
zeroline=True,
showline=True,
ticks='',
showticklabels=True
),
hovermode='closest',
margin=dict(l=0, r=0, t=30, b=30)
)

fig.show()

localhost:8888/notebooks/satan practice.ipynb# 123/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

localhost:8888/notebooks/satan practice.ipynb# 124/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

In [829]:  import plotly.graph_objs as go


import numpy as np

# Sample data
np.random.seed(0)
x = np.random.randn(100)
y = np.random.randn(100)

# Create a scatter plot with hover effects
fig = go.Figure(data=go.Scatter(x=x, y=y, mode='markers', marker=dict(colo
hoverinfo='text', text=['Point {}'.format(

# Update layout to enable zooming and panning
fig.update_layout(
title='Interactive Scatter Plot',
xaxis=dict(title='X-axis'),
yaxis=dict(title='Y-axis'),
hovermode='closest',
dragmode='zoom', # Enable zooming and panning
)

fig.show()

localhost:8888/notebooks/satan practice.ipynb# 125/126


6/10/24, 6:53 AM satan practice - Jupyter Notebook

localhost:8888/notebooks/satan practice.ipynb# 126/126

You might also like