Python Practice - Jupyter Notebook - 065330
Python Practice - Jupyter Notebook - 065330
import numpy as np
In [5]: data=np.random.randn(2,3)
data
In [6]: data1=np.random.rand(5,4)
data1
In [7]: data2=np.random.randint(20,100,(5,4))
data2
In [11]: data=np.arange(10).reshape(5,2)
data
In [12]: data*10
In [15]: data-data
In [16]: data+data
In [17]: data**2
In [18]: 1/data
C:\Users\learner\AppData\Local\Temp\ipykernel_4196\1424048073.py:1: Runti
meWarning: divide by zero encountered in divide
1/data
In [19]: np.log2(data)
C:\Users\learner\AppData\Local\Temp\ipykernel_4196\2371420318.py:1: Runti
meWarning: divide by zero encountered in log2
np.log2(data)
In [21]: np.log(data)
C:\Users\learner\AppData\Local\Temp\ipykernel_4196\182692439.py:1: Runtim
eWarning: divide by zero encountered in log
np.log(data)
In [22]: np.abs(data)
In [23]: data**1/2
In [24]: data/data
C:\Users\learner\AppData\Local\Temp\ipykernel_4196\2225509407.py:1: Runti
meWarning: invalid value encountered in divide
data/data
In [25]: data.shape
Out[25]: (5, 2)
In [26]: data.dtype
Out[26]: dtype('int32')
In [27]: arr=np.array((5,3,1,8,0))
arr
In [29]: arr2=np.array([[1,2,3,4,5],[6,7,8,9,0]])
arr2
In [30]: arr2.ndim
Out[30]: 2
In [31]: arr2.shape
Out[31]: (2, 5)
In [32]: arr2.dtype
Out[32]: dtype('int32')
In [33]: arr2.size
Out[33]: 10
In [37]: arr2.itemsize
Out[37]: 4
In [39]: np.zeros((2,6))
Out[39]: array([[0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0.]])
In [40]: np.ones((5,4))
In [41]: np.empty((3,7))
In [42]: np.ones_like(data2)
In [43]: np.zeros_like(data2)
In [48]: np.empty_like(data2)
In [50]: np.full((3,7),4)
In [51]: np.full_like(data2,1000)
In [54]: np.arange(165).reshape(5,-1).shape
In [56]: np.eye(4,5)
In [57]: np.identity(9)
Out[57]: array([[1., 0., 0., 0., 0., 0., 0., 0., 0.],
[0., 1., 0., 0., 0., 0., 0., 0., 0.],
[0., 0., 1., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 1., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 1., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 1., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 1., 0., 0.],
[0., 0., 0., 0., 0., 0., 0., 1., 0.],
[0., 0., 0., 0., 0., 0., 0., 0., 1.]])
In [65]: arr=np.arange(20,dtype='float')
arr
Out[65]: array([ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.,
13., 14., 15., 16., 17., 18., 19.])
In [69]: x=np.asarray(data2,dtype=np.float64)
x
In [71]: data2.astype('S')
In [73]: a=np.random.randint(1,100,(4,3))
b=np.random.randint(1,100,(4,3))
In [74]: a,b
In [75]: a*b
Out[75]: array([[5925, 2816, 1209],
[5658, 6885, 616],
[1485, 2070, 4897],
[8178, 4550, 736]])
In [76]: a-b
In [77]: b-a
In [78]: a/b
In [79]: b/a
In [80]: a**2
In [83]: a[0]
In [84]: a[0:2]
In [85]: a
In [86]: a[2:]=0
In [87]: a
In [88]: c=a[0:2]
In [90]: c[0]=100
In [91]: c
In [92]: a
In [98]: a
In [99]: b
Out[99]: array([[75, 44, 93],
[69, 81, 56],
[99, 46, 59],
[94, 50, 46]])
In [100]: b[2][1]
Out[100]: 46
In [101]: b[0,2]
Out[101]: 93
In [103]: d=np.arange(30).reshape(3,5,-1)
d
[[10, 11],
[12, 13],
[14, 15],
[16, 17],
[18, 19]],
[[20, 21],
[22, 23],
[24, 25],
[26, 27],
[28, 29]]])
In [104]: d[2]
In [106]: x=d[0:2].copy()
x
[[10, 11],
[12, 13],
[14, 15],
[16, 17],
[18, 19]]])
In [107]: x[1]=0
In [108]: x
[[0, 0],
[0, 0],
[0, 0],
[0, 0],
[0, 0]]])
In [109]: d
[[10, 11],
[12, 13],
[14, 15],
[16, 17],
[18, 19]],
[[20, 21],
[22, 23],
[24, 25],
[26, 27],
[28, 29]]])
In [115]: d[0:,0:,0:]
[[10, 11],
[12, 13],
[14, 15],
[16, 17],
[18, 19]],
[[20, 21],
[22, 23],
[24, 25],
[26, 27],
[28, 29]]])
In [124]: name=np.array(['gopi','suraj','gopi','sunil','praveen','suraj','harsh'])
In [125]: name
In [119]: name.dtype
Out[119]: dtype('<U7')
In [123]: marks=np.random.randint(20,100,(7,10))
marks
Out[123]: array([[47, 24, 85, 27, 24, 98, 89, 26, 49, 73],
[41, 82, 91, 69, 53, 22, 23, 80, 78, 96],
[44, 85, 51, 29, 46, 99, 27, 20, 88, 31],
[94, 84, 42, 59, 23, 57, 97, 64, 76, 53],
[20, 29, 20, 75, 47, 92, 66, 82, 53, 45],
[89, 55, 68, 87, 32, 29, 36, 99, 48, 72],
[41, 71, 76, 47, 29, 50, 93, 71, 25, 21]])
In [126]: name=='gopi'
In [128]: marks[name=='gopi']
Out[128]: array([[47, 24, 85, 27, 24, 98, 89, 26, 49, 73],
[44, 85, 51, 29, 46, 99, 27, 20, 88, 31]])
In [130]: marks[name=='suraj',0:2]
In [138]: marks[~x]
Out[138]: array([[47, 24, 85, 27, 24, 98, 89, 26, 49, 73],
[44, 85, 51, 29, 46, 99, 27, 20, 88, 31],
[94, 84, 42, 59, 23, 57, 97, 64, 76, 53]])
In [139]: data2
In [140]: data2>50
In [145]: data2[data2>50]=0
In [146]: data2
In [147]: b
In [148]: b[[2,3]]
In [149]: b[[1,2,3],[0,1,2]]
In [ ]:
In [151]: obj=pd.Series([1,2,4,5,67,9])
In [152]: obj1=pd.Series((1,2,4,5,67,90))
In [153]: obj,obj1
Out[153]: (0 1
1 2
2 4
3 5
4 67
5 9
dtype: int64,
0 1
1 2
2 4
3 5
4 67
5 90
dtype: int64)
In [154]: x=pd.Series({'a':20,'b':30,'c':30,'d':40})
In [155]: x
Out[155]: a 20
b 30
c 30
d 40
dtype: int64
In [156]: x[0:2]
Out[156]: a 20
b 30
dtype: int64
In [157]: x['a':'c']
Out[157]: a 20
b 30
c 30
dtype: int64
In [158]: x.index=['A','B','C','D']
In [163]: x['A':'C']
Out[163]: A 20
B 30
C 30
dtype: int64
In [164]: x.values
In [165]: x.index
In [166]: x[0]
Out[166]: 20
In [168]: x[1:3]=0
In [169]: x
Out[169]: A 20
B 0
C 0
D 40
dtype: int64
In [171]: x[['B','C']]=[3,4]
In [172]: x
Out[172]: A 20
B 3
C 4
D 40
dtype: int64
In [174]: x[x>10]
Out[174]: A 20
D 40
dtype: int64
In [175]: x*2
Out[175]: A 40
B 6
C 8
D 80
dtype: int64
In [176]: np.exp(x)
Out[176]: A 4.851652e+08
B 2.008554e+01
C 5.459815e+01
D 2.353853e+17
dtype: float64
In [179]: 3 in x.values
Out[179]: True
In [44]: states=pd.Series({'mumbai':1000,'delhi':2000,'kolkata':500,'chennai':200,'
states
In [184]: states['chennai']=None
In [185]: states
In [186]: pd.isnull(states)
In [187]: pd.notnull(states)
In [188]: x,states
Out[188]: (A 20
B 3
C 4
D 40
dtype: int64,
mumbai 1000.0
delhi 2000.0
kolkata 500.0
chennai NaN
kanpur 100.0
patna NaN
dtype: float64)
In [189]: x+states
Out[189]: A NaN
B NaN
C NaN
D NaN
chennai NaN
delhi NaN
kanpur NaN
kolkata NaN
mumbai NaN
patna NaN
dtype: float64
In [38]: usa=pd.Series({'california':1000,'delhi':2000,'kolkata':500,'miami':200,'k
usa
In [191]: usa+states
In [193]: states.name='INDIA_STATES'
In [194]: states
In [197]: usa.name='USA_STATES'
usa
In [2]: df=pd.DataFrame({'city':['mumbai','delhi','kolkata','patna','lucknow','che
'rank':['I','II','IV','III','VI','V','VII']})
In [3]: df
0 mumbai 1100 I
1 delhi 1200 II
2 kolkata 4000 IV
4 lucknow 2980 VI
5 chennai 2330 V
In [5]: df.head()
0 mumbai 1100 I
1 delhi 1200 II
2 kolkata 4000 IV
4 lucknow 2980 VI
In [6]: df.tail()
2 kolkata 4000 IV
4 lucknow 2980 VI
5 chennai 2330 V
In [8]: df
1 delhi 1200 II 80
2 kolkata 4000 IV 20
4 lucknow 2980 VI 55
5 chennai 2330 V 10
In [9]: df.index=['A','B','C','D','E','F','G']
In [14]: df.index.name='SERIAL'
In [15]: df.columns.name='ATTRIBUTES'
In [16]: df
SERIAL
B delhi 1200 II 80
C kolkata 4000 IV 20
E lucknow 2980 VI 55
F chennai 2330 V 10
In [18]: df
SERIAL
In [20]: df.replace({True:'developed',False:'developing'})
Out[20]: ATTRIBUTES city code rank SDP in $B ECONOMY
SERIAL
In [27]: df.T
Out[27]: SERIAL A B C D E F G
ATTRIBUTES
SDP in $B 100 80 20 30 55 10 23
In [28]: df.index
Out[28]: Index(['A', 'B', 'C', 'D', 'E', 'F', 'G'], dtype='object', name='SERIAL')
In [29]: df.columns
In [30]: df.values
Out[31]: True
Out[32]: True
In [34]: state
-------------------------------------------------------------------------
--
NameError Traceback (most recent call las
t)
Cell In[34], line 1
----> 1 state
In [41]: usa
In [45]: states
In [46]: usa+states
In [51]: pd.append(usa)
-------------------------------------------------------------------------
--
AttributeError Traceback (most recent call las
t)
Cell In[51], line 1
----> 1 pd.append(usa)
In [52]: usa
In [55]: usa2=usa.reindex(['kanpur','miami','orlando','delhi'])
In [56]: usa2
In [58]: states.drop('kanpur')
Out[58]: mumbai 1000
delhi 2000
kolkata 500
chennai 200
patna 100
dtype: int64
In [59]: states
In [60]: usa.drop(['delhi','kolkata','kanpur'])
In [61]: df
SERIAL
In [63]: x=df
In [78]: usa
In [80]: usa.drop(usa.index[[1,2,4]])
Out[80]: california 1000
miami 200
orlando 200
dtype: int64
In [81]: x
SERIAL
In [82]: x.drop(x.index[[-1,-2]])
SERIAL
In [95]: x.drop(['rank','ECONOMY'],axis=1,inplace=True)
In [96]: x
SERIAL
B delhi 1200 80
C kolkata 4000 20
D patna 3400 30
E lucknow 2980 55
F chennai 2330 10
G assam 6660 23
In [98]: usa['delhi':'kolkata']=1000000
In [99]: usa
In [100]: df
SERIAL
B delhi 1200 80
C kolkata 4000 20
D patna 3400 30
E lucknow 2980 55
F chennai 2330 10
G assam 6660 23
In [106]: df['city']
Out[106]: SERIAL
A mumbai
B delhi
C kolkata
D patna
E lucknow
F chennai
G assam
Name: city, dtype: object
In [114]: df[df.code>2000]
SERIAL
C kolkata 4000 20
D patna 3400 30
E lucknow 2980 55
F chennai 2330 10
G assam 6660 23
In [115]: df
SERIAL
B delhi 1200 80
C kolkata 4000 20
D patna 3400 30
E lucknow 2980 55
F chennai 2330 10
G assam 6660 23
In [116]: df.loc['A':'D']
SERIAL
B delhi 1200 80
C kolkata 4000 20
D patna 3400 30
In [118]: df.iloc[0:3]
SERIAL
B delhi 1200 80
C kolkata 4000 20
In [119]: df
SERIAL
B delhi 1200 80
C kolkata 4000 20
D patna 3400 30
E lucknow 2980 55
F chennai 2330 10
G assam 6660 23
In [120]: df.iloc[0,'city']
-------------------------------------------------------------------------
--
ValueError Traceback (most recent call las
t)
File C:\ProgramData\anaconda3\Lib\site-packages\pandas\core\indexing.py:9
04, in _LocationIndexer._validate_tuple_indexer(self, key)
903 try:
--> 904 self._validate_key(k, i)
905 except ValueError as err:
File C:\ProgramData\anaconda3\Lib\site-packages\pandas\core\indexing.py:1
518, in _iLocIndexer._validate_key(self, key, axis)
1517 else:
-> 1518 raise ValueError(f"Can only index by location with a [{self._
valid_types}]")
ValueError: Can only index by location with a [integer, integer slice (ST
ART point is INCLUDED, END point is EXCLUDED), listlike of integers, bool
ean array]
The above exception was the direct cause of the following exception:
File C:\ProgramData\anaconda3\Lib\site-packages\pandas\core\indexing.py:1
097, in _LocationIndexer.__getitem__(self, key)
1095 if self._is_scalar_access(key):
1096 return self.obj._get_value(*key, takeable=self._takeable)
-> 1097 return self._getitem_tuple(key)
1098 else:
1099 # we by definition only have the 0th axis
1100 axis = self.axis or 0
File C:\ProgramData\anaconda3\Lib\site-packages\pandas\core\indexing.py:1
594, in _iLocIndexer._getitem_tuple(self, tup)
1593 def _getitem_tuple(self, tup: tuple):
-> 1594 tup = self._validate_tuple_indexer(tup)
1595 with suppress(IndexingError):
1596 return self._getitem_lowerdim(tup)
File C:\ProgramData\anaconda3\Lib\site-packages\pandas\core\indexing.py:9
06, in _LocationIndexer._validate_tuple_indexer(self, key)
904 self._validate_key(k, i)
905 except ValueError as err:
--> 906 raise ValueError(
907 "Location based indexing can only have "
908 f"[{self._valid_types}] types"
909 ) from err
910 return key
ValueError: Location based indexing can only have [integer, integer slice
(START point is INCLUDED, END point is EXCLUDED), listlike of integers, b
In [122]: df.iloc['A',0]
-------------------------------------------------------------------------
--
ValueError Traceback (most recent call las
t)
File C:\ProgramData\anaconda3\Lib\site-packages\pandas\core\indexing.py:9
04, in _LocationIndexer._validate_tuple_indexer(self, key)
903 try:
--> 904 self._validate_key(k, i)
905 except ValueError as err:
File C:\ProgramData\anaconda3\Lib\site-packages\pandas\core\indexing.py:1
518, in _iLocIndexer._validate_key(self, key, axis)
1517 else:
-> 1518 raise ValueError(f"Can only index by location with a [{self._
valid_types}]")
ValueError: Can only index by location with a [integer, integer slice (ST
ART point is INCLUDED, END point is EXCLUDED), listlike of integers, bool
ean array]
The above exception was the direct cause of the following exception:
File C:\ProgramData\anaconda3\Lib\site-packages\pandas\core\indexing.py:1
097, in _LocationIndexer.__getitem__(self, key)
1095 if self._is_scalar_access(key):
1096 return self.obj._get_value(*key, takeable=self._takeable)
-> 1097 return self._getitem_tuple(key)
1098 else:
1099 # we by definition only have the 0th axis
1100 axis = self.axis or 0
File C:\ProgramData\anaconda3\Lib\site-packages\pandas\core\indexing.py:1
594, in _iLocIndexer._getitem_tuple(self, tup)
1593 def _getitem_tuple(self, tup: tuple):
-> 1594 tup = self._validate_tuple_indexer(tup)
1595 with suppress(IndexingError):
1596 return self._getitem_lowerdim(tup)
File C:\ProgramData\anaconda3\Lib\site-packages\pandas\core\indexing.py:9
06, in _LocationIndexer._validate_tuple_indexer(self, key)
904 self._validate_key(k, i)
905 except ValueError as err:
--> 906 raise ValueError(
907 "Location based indexing can only have "
908 f"[{self._valid_types}] types"
909 ) from err
910 return key
ValueError: Location based indexing can only have [integer, integer slice
(START point is INCLUDED, END point is EXCLUDED), listlike of integers, b
In [123]: df.loc['c',0]
-------------------------------------------------------------------------
--
KeyError Traceback (most recent call las
t)
File C:\ProgramData\anaconda3\Lib\site-packages\pandas\core\indexes\base.
py:3653, in Index.get_loc(self, key)
3652 try:
-> 3653 return self._engine.get_loc(casted_key)
3654 except KeyError as err:
File C:\ProgramData\anaconda3\Lib\site-packages\pandas\_libs\index.pyx:14
7, in pandas._libs.index.IndexEngine.get_loc()
File C:\ProgramData\anaconda3\Lib\site-packages\pandas\_libs\index.pyx:17
6, in pandas._libs.index.IndexEngine.get_loc()
KeyError: 0
The above exception was the direct cause of the following exception:
File C:\ProgramData\anaconda3\Lib\site-packages\pandas\core\indexing.py:1
096, in _LocationIndexer.__getitem__(self, key)
1094 key = tuple(com.apply_if_callable(x, self.obj) for x in key)
1095 if self._is_scalar_access(key):
-> 1096 return self.obj._get_value(*key, takeable=self._takeable)
1097 return self._getitem_tuple(key)
1098 else:
1099 # we by definition only have the 0th axis
File C:\ProgramData\anaconda3\Lib\site-packages\pandas\core\frame.py:387
0, in DataFrame._get_value(self, index, col, takeable)
3867 series = self._ixs(col, axis=1)
3868 return series._values[index]
-> 3870 series = self._get_item_cache(col)
3871 engine = self.index._engine
3873 if not isinstance(self.index, MultiIndex):
3874 # CategoricalIndex: Trying to use the engine fastpath may giv
e incorrect
3875 # results if our categories are integers that dont match our
codes
3876 # IntervalIndex: IntervalTree has no get_loc
File C:\ProgramData\anaconda3\Lib\site-packages\pandas\core\frame.py:425
5, in DataFrame._get_item_cache(self, item)
4250 res = cache.get(item)
4251 if res is None:
localhost:8888/notebooks/satan practice.ipynb# 33/126
6/10/24, 6:53 AM satan practice - Jupyter Notebook
4252 # All places that call _get_item_cache have unique columns,
4253 # pending resolution of GH#33047
-> 4255 loc = self.columns.get_loc(item)
4256 res = self._ixs(loc, axis=1)
4258 cache[item] = res
File C:\ProgramData\anaconda3\Lib\site-packages\pandas\core\indexes\base.
py:3655, in Index.get_loc(self, key)
3653 return self._engine.get_loc(casted_key)
3654 except KeyError as err:
-> 3655 raise KeyError(key) from err
3656 except TypeError:
3657 # If we have a listlike key, _check_indexing_error will raise
3658 # InvalidIndexError. Otherwise we fall through and re-raise
3659 # the TypeError.
3660 self._check_indexing_error(key)
KeyError: 0
In [124]: df.loc[0,'city']
-------------------------------------------------------------------------
--
KeyError Traceback (most recent call las
t)
File C:\ProgramData\anaconda3\Lib\site-packages\pandas\core\indexes\base.
py:3653, in Index.get_loc(self, key)
3652 try:
-> 3653 return self._engine.get_loc(casted_key)
3654 except KeyError as err:
File C:\ProgramData\anaconda3\Lib\site-packages\pandas\_libs\index.pyx:14
7, in pandas._libs.index.IndexEngine.get_loc()
File C:\ProgramData\anaconda3\Lib\site-packages\pandas\_libs\index.pyx:17
6, in pandas._libs.index.IndexEngine.get_loc()
KeyError: 0
The above exception was the direct cause of the following exception:
File C:\ProgramData\anaconda3\Lib\site-packages\pandas\core\indexing.py:1
096, in _LocationIndexer.__getitem__(self, key)
1094 key = tuple(com.apply_if_callable(x, self.obj) for x in key)
1095 if self._is_scalar_access(key):
-> 1096 return self.obj._get_value(*key, takeable=self._takeable)
1097 return self._getitem_tuple(key)
1098 else:
1099 # we by definition only have the 0th axis
File C:\ProgramData\anaconda3\Lib\site-packages\pandas\core\frame.py:387
7, in DataFrame._get_value(self, index, col, takeable)
3871 engine = self.index._engine
3873 if not isinstance(self.index, MultiIndex):
3874 # CategoricalIndex: Trying to use the engine fastpath may giv
e incorrect
3875 # results if our categories are integers that dont match our
codes
3876 # IntervalIndex: IntervalTree has no get_loc
-> 3877 row = self.index.get_loc(index)
3878 return series._values[row]
3880 # For MultiIndex going through engine effectively restricts us to
3881 # same-length tuples; see test_get_set_value_no_partial_indexing
File C:\ProgramData\anaconda3\Lib\site-packages\pandas\core\indexes\base.
py:3655, in Index.get_loc(self, key)
3653 return self._engine.get_loc(casted_key)
localhost:8888/notebooks/satan practice.ipynb# 36/126
6/10/24, 6:53 AM satan practice - Jupyter Notebook
3654 except KeyError as err:
-> 3655 raise KeyError(key) from err
3656 except TypeError:
3657 # If we have a listlike key, _check_indexing_error will raise
3658 # InvalidIndexError. Otherwise we fall through and re-raise
3659 # the TypeError.
3660 self._check_indexing_error(key)
KeyError: 0
In [127]: df.iloc[[1,2,3],0]
Out[127]: SERIAL
B delhi
C kolkata
D patna
Name: city, dtype: object
In [131]: df.loc[['A','E'],'city']
Out[131]: SERIAL
A mumbai
E lucknow
Name: city, dtype: object
In [133]: df,x
In [348]: y=pd.DataFrame({'city':['miami','delhi','kolkata','orlando','lucknow','che
'SPEED':['V','II','IV','III','VI','I','VII']},index=['G','
In [138]: y
G miami 1200 V
B delhi 1220 II
C kolkata 4000 IV
E lucknow 2980 VI
F chennai 2330 I
In [136]: df
SERIAL
B delhi 1200 80
C kolkata 4000 20
D patna 3400 30
E lucknow 2980 55
F chennai 2330 10
G assam 6660 23
In [139]: df+y
In [146]: df.add?
In [156]: df.sum(numeric_only=True)
Out[156]: ATTRIBUTES
code 21670
SDP in $B 318
dtype: int64
In [162]: df.idxmax(numeric_only=True)
Out[162]: ATTRIBUTES
code G
SDP in $B A
dtype: object
In [164]: df.cumsum()
SERIAL
In [167]: px=pd.DataFrame(np.arange(25).reshape(5,5))
In [170]: py=pd.DataFrame(np.arange(50,75).reshape(5,5))
In [171]: px
Out[171]: 0 1 2 3 4
0 0 1 2 3 4
1 5 6 7 8 9
2 10 11 12 13 14
3 15 16 17 18 19
4 20 21 22 23 24
In [172]: py
Out[172]: 0 1 2 3 4
0 50 51 52 53 54
1 55 56 57 58 59
2 60 61 62 63 64
3 65 66 67 68 69
4 70 71 72 73 74
In [173]: px+py
Out[173]: 0 1 2 3 4
0 50 52 54 56 58
1 60 62 64 66 68
2 70 72 74 76 78
3 80 82 84 86 88
4 90 92 94 96 98
In [174]: px.index=['a','b','c','d','e']
py.index=['c','d','e','f','g']
In [175]: px
Out[175]: 0 1 2 3 4
a 0 1 2 3 4
b 5 6 7 8 9
c 10 11 12 13 14
d 15 16 17 18 19
e 20 21 22 23 24
In [176]: py
Out[176]: 0 1 2 3 4
c 50 51 52 53 54
d 55 56 57 58 59
e 60 61 62 63 64
f 65 66 67 68 69
g 70 71 72 73 74
In [177]: px+py
Out[177]: 0 1 2 3 4
In [178]: px.add(py,fill_value=0)
Out[178]: 0 1 2 3 4
In [179]: df
SERIAL
B delhi 1200 80
C kolkata 4000 20
D patna 3400 30
E lucknow 2980 55
F chennai 2330 10
G assam 6660 23
In [180]: df.describe()
In [182]: df.corr(numeric_only=True)
ATTRIBUTES
In [183]: df.cov(numeric_only=True)
ATTRIBUTES
In [184]: py.corr()
Out[184]: 0 1 2 3 4
In [ ]: py.cov()
In [186]: px.corrwith(py)
Out[186]: 0 1.0
1 1.0
2 1.0
3 1.0
4 1.0
dtype: float64
In [187]: usa
In [201]: x=np.random.randint(1,101,100)
In [202]: x
Out[202]: array([18, 91, 65, 56, 99, 28, 31, 36, 31, 99, 22, 92, 39, 82, 27, 67, 1
3,
97, 41, 85, 55, 67, 14, 30, 25, 12, 24, 78, 37, 19, 2, 6, 93, 5
3,
16, 93, 12, 30, 21, 88, 9, 96, 15, 71, 61, 82, 1, 35, 94, 97, 1
3,
48, 32, 78, 13, 77, 10, 5, 28, 77, 29, 33, 26, 6, 26, 58, 18, 7
5,
36, 43, 67, 67, 53, 67, 62, 44, 93, 70, 43, 40, 42, 17, 54, 22, 8
8,
50, 72, 78, 95, 29, 94, 19, 71, 3, 38, 54, 70, 58, 93, 51])
In [203]: y=pd.Series(x)
In [208]: y.value_counts()
Out[208]: 67 5
93 4
78 3
13 3
18 2
..
61 1
1 1
35 1
92 1
51 1
Name: count, Length: 66, dtype: int64
In [ ]:
In [211]: y.isin([1,2,18])
Out[211]: 0 True
1 False
2 False
3 False
4 False
...
95 False
96 False
97 False
98 False
99 False
Length: 100, dtype: bool
In [212]: usa
In [213]: states
In [193]: px.values
In [214]: x,y
Out[214]: (array([18, 91, 65, 56, 99, 28, 31, 36, 31, 99, 22, 92, 39, 82, 27, 67, 1
3,
97, 41, 85, 55, 67, 14, 30, 25, 12, 24, 78, 37, 19, 2, 6, 93, 5
3,
16, 93, 12, 30, 21, 88, 9, 96, 15, 71, 61, 82, 1, 35, 94, 97, 1
3,
48, 32, 78, 13, 77, 10, 5, 28, 77, 29, 33, 26, 6, 26, 58, 18, 7
5,
36, 43, 67, 67, 53, 67, 62, 44, 93, 70, 43, 40, 42, 17, 54, 22, 8
8,
50, 72, 78, 95, 29, 94, 19, 71, 3, 38, 54, 70, 58, 93, 51]),
0 18
1 91
2 65
3 56
4 99
..
95 54
96 70
97 58
98 93
99 51
Length: 100, dtype: int32)
In [215]: df
SERIAL
B delhi 1200 80
C kolkata 4000 20
D patna 3400 30
E lucknow 2980 55
F chennai 2330 10
G assam 6660 23
In [244]: data=pd.read_csv('mac.txt')
In [224]: data
2 Bob 35 Chicago
4 Michael 40 Houston
5 Samantha 33 Miami
6 David 27 Seattle
7 Emma 31 Boston
8 Daniel 29 Denver
9 Olivia 36 Atlanta
In [241]: sale=pd.read_table('sale.txt',sep='|')
In [242]: sale
9 110 Keyboard 50 80
In [256]: data2=pd.read_csv('mac.txt',header=None,names=['A','B','C'],index_col='C')
In [257]: data2
Out[257]: A B
Chicago Bob 35
Houston Michael 40
Miami Samantha 33
Seattle David 27
Boston Emma 31
Denver Daniel 29
Atlanta Olivia 36
In [258]: data3=pd.read_csv('mac.txt',header=None,names=['A','B','C'],index_col=['C'
In [259]: data3
Out[259]: A
C B
Chicago 35 Bob
Houston 40 Michael
Miami 33 Samantha
Seattle 27 David
Boston 31 Emma
Denver 29 Daniel
Atlanta 36 Olivia
In [263]: dell=pd.read_table('hell.txt',sep='\s+')
In [264]: dell
In [265]: df
SERIAL
B delhi 1200 80
C kolkata 4000 20
D patna 3400 30
E lucknow 2980 55
F chennai 2330 10
G assam 6660 23
In [267]: df.to_csv('indian_states.csv',sep='|')
In [270]: sns.get_dataset_names()
Out[270]: ['anagrams',
'anscombe',
'attention',
'brain_networks',
'car_crashes',
'diamonds',
'dots',
'dowjones',
'exercise',
'flights',
'fmri',
'geyser',
'glue',
'healthexp',
'iris',
'mpg',
'penguins',
'planets',
'seaice',
'taxis',
'tips',
'titanic',
'anagrams',
'anagrams',
'anscombe',
'anscombe',
'attention',
'attention',
'brain_networks',
'brain_networks',
'car_crashes',
'car_crashes',
'diamonds',
'diamonds',
'dots',
'dots',
'dowjones',
'dowjones',
'exercise',
'exercise',
'flights',
'flights',
'fmri',
'fmri',
'geyser',
'geyser',
'glue',
'glue',
'healthexp',
'healthexp',
'iris',
'iris',
'mpg',
'mpg',
'penguins',
'penguins',
'planets',
localhost:8888/notebooks/satan practice.ipynb# 50/126
6/10/24, 6:53 AM satan practice - Jupyter Notebook
'planets',
'seaice',
'seaice',
'taxis',
'taxis',
'tips',
'tips',
'titanic',
'titanic',
'anagrams',
'anscombe',
'attention',
'brain_networks',
'car_crashes',
'diamonds',
'dots',
'dowjones',
'exercise',
'flights',
'fmri',
'geyser',
'glue',
'healthexp',
'iris',
'mpg',
'penguins',
'planets',
'seaice',
'taxis',
'tips',
'titanic']
In [281]: fog=sns.load_dataset('dots')
fog
In [274]: fog.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 90 entries, 0 to 89
Data columns (total 6 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Unnamed: 0 90 non-null int64
1 id 90 non-null int64
2 diet 90 non-null category
3 pulse 90 non-null int64
4 time 90 non-null category
5 kind 90 non-null category
dtypes: category(3), int64(3)
memory usage: 2.9 KB
In [284]: cv=pd.DataFrame(mx)
-------------------------------------------------------------------------
--
ValueError Traceback (most recent call las
t)
Cell In[284], line 1
----> 1 cv=pd.DataFrame(mx)
File C:\ProgramData\anaconda3\Lib\site-packages\pandas\core\frame.py:709,
in DataFrame.__init__(self, data, index, columns, dtype, copy)
703 mgr = self._init_mgr(
704 data, axes={"index": index, "columns": columns}, dtype=dt
ype, copy=copy
705 )
707 elif isinstance(data, dict):
708 # GH#38939 de facto copy defaults to False only in non-dict c
ases
--> 709 mgr = dict_to_mgr(data, index, columns, dtype=dtype, copy=cop
y, typ=manager)
710 elif isinstance(data, ma.MaskedArray):
711 from numpy.ma import mrecords
File C:\ProgramData\anaconda3\Lib\site-packages\pandas\core\internals\con
struction.py:481, in dict_to_mgr(data, index, columns, dtype, typ, copy)
477 else:
478 # dtype check to exclude e.g. range objects, scalars
479 arrays = [x.copy() if hasattr(x, "dtype") else x for x in
arrays]
--> 481 return arrays_to_mgr(arrays, columns, index, dtype=dtype, typ=ty
p, consolidate=copy)
File C:\ProgramData\anaconda3\Lib\site-packages\pandas\core\internals\con
struction.py:115, in arrays_to_mgr(arrays, columns, index, dtype, verify_
integrity, typ, consolidate)
112 if verify_integrity:
113 # figure out the index, if necessary
114 if index is None:
--> 115 index = _extract_index(arrays)
116 else:
117 index = ensure_index(index)
File C:\ProgramData\anaconda3\Lib\site-packages\pandas\core\internals\con
struction.py:655, in _extract_index(data)
653 lengths = list(set(raw_lengths))
654 if len(lengths) > 1:
--> 655 raise ValueError("All arrays must be of the same length")
657 if have_dicts:
658 raise ValueError(
659 "Mixing dicts with non-Series may lead to ambiguous order
ing."
660 )
In [309]: mx=pd.DataFrame(cv)
In [288]: mx
Out[288]: color size shape material weight price quantity availability brand category
0 red small circle plastic 100.0 10.5 1.0 in stock Brand1 electronics
1 blue medium square wood 200.0 20.3 2.0 out of stock Brand2 clothing
2 green large triangle metal 300.0 15.0 3.0 None None home
3 yellow None None None NaN NaN NaN None None None
4 None None None None NaN NaN NaN None None None
In [289]: mx.dropna(how='all')
Out[289]: color size shape material weight price quantity availability brand category
0 red small circle plastic 100.0 10.5 1.0 in stock Brand1 electronics
1 blue medium square wood 200.0 20.3 2.0 out of stock Brand2 clothing
2 green large triangle metal 300.0 15.0 3.0 None None home
3 yellow None None None NaN NaN NaN None None None
Out[291]: color size shape material weight price quantity availability brand category
0 red small circle plastic 100.0 10.5 1.0 in stock Brand1 electronics
1 blue medium square wood 200.0 20.3 2.0 out of stock Brand2 clothing
2 green large triangle metal 300.0 15.0 3.0 out of stock Brand2 home
3 yellow large triangle metal 300.0 15.0 3.0 out of stock Brand2 home
4 yellow large triangle metal 300.0 15.0 3.0 out of stock Brand2 home
In [295]: mx.replace({'None':np.nan})
Out[295]: color size shape material weight price quantity availability brand category
0 red small circle plastic 100.0 10.5 1.0 in stock Brand1 electronics
1 blue medium square wood 200.0 20.3 2.0 out of stock Brand2 clothing
2 green large triangle metal 300.0 15.0 3.0 None None home
3 yellow None None None NaN NaN NaN None None None
4 None None None None NaN NaN NaN None None None
In [297]: mx.dropna()
Out[297]: color size shape material weight price quantity availability brand category
0 red small circle plastic 100.0 10.5 1.0 in stock Brand1 electronics
1 blue medium square wood 200.0 20.3 2.0 out of stock Brand2 clothing
In [303]: mx.dropna(thresh=1)
Out[303]: color size shape material weight price quantity availability brand category
0 red small circle plastic 100.0 10.5 1.0 in stock Brand1 electronics
1 blue medium square wood 200.0 20.3 2.0 out of stock Brand2 clothing
2 green large triangle metal 300.0 15.0 3.0 None None home
3 yellow None None None NaN NaN NaN None None None
In [302]: fog.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 848 entries, 0 to 847
Data columns (total 5 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 align 848 non-null object
1 choice 848 non-null object
2 time 848 non-null int64
3 coherence 848 non-null float64
4 firing_rate 848 non-null float64
dtypes: float64(2), int64(1), object(2)
memory usage: 33.3+ KB
In [ ]: mx.dropna()
In [306]: mx.fillna(method='bfill')
Out[306]: color size shape material weight price quantity availability brand category
0 red small circle plastic 100.0 10.5 1.0 in stock Brand1 electronics
1 blue medium square wood 200.0 20.3 2.0 out of stock Brand2 clothing
2 green large triangle metal 300.0 15.0 3.0 None None home
3 yellow None None None NaN NaN NaN None None None
4 None None None None NaN NaN NaN None None None
In [312]: mx.loc[0,'brand']=np.nan
In [313]: mx
Out[313]: color size shape material weight price quantity availability brand category
0 red small circle plastic 100.0 10.5 1.0 in stock NaN electronics
1 blue medium square wood 200.0 20.3 2.0 out of stock Brand2 clothing
2 green large triangle metal 300.0 15.0 3.0 None None home
3 yellow None None None NaN NaN NaN None None None
4 None None None None NaN NaN NaN None None None
In [318]: mx.fillna(method='backfill')
Out[318]: color size shape material weight price quantity availability brand category
0 red small circle plastic 100.0 10.5 1.0 in stock Brand2 electronics
1 blue medium square wood 200.0 20.3 2.0 out of stock Brand2 clothing
2 green large triangle metal 300.0 15.0 3.0 None None home
3 yellow None None None NaN NaN NaN None None None
4 None None None None NaN NaN NaN None None None
In [322]: mx.dropna(axis=1,thresh=3)
In [323]: mx.fillna({'price':0,'category':'local'})
Out[323]: color size shape material weight price quantity availability brand category
0 red small circle plastic 100.0 10.5 1.0 in stock NaN electronics
1 blue medium square wood 200.0 20.3 2.0 out of stock Brand2 clothing
2 green large triangle metal 300.0 15.0 3.0 None None home
3 yellow None None None NaN 0.0 NaN None None local
4 None None None None NaN 0.0 NaN None None local
In [326]: mx
Out[326]: COLOR SIZE SHAPE MATERIAL WEIGHT PRICE QUANTITY AVAILABILITY BRAN
1 blue medium square wood 200.0 20.3 2.0 out of stock Brand
In [333]: mx.replace([None],100)
Out[333]: COLOR SIZE SHAPE MATERIAL WEIGHT PRICE QUANTITY AVAILABILITY BRAN
1 blue medium square wood 200.0 20.3 2.0 out of stock Brand
In [ ]:
In [331]: mx.replace(0,'None')
Out[331]: COLOR SIZE SHAPE MATERIAL WEIGHT PRICE QUANTITY AVAILABILITY BRAN
1 blue medium square wood 200.0 20.3 2.0 out of stock Brand
In [334]: df
SERIAL
B delhi 1200 80
C kolkata 4000 20
D patna 3400 30
E lucknow 2980 55
F chennai 2330 10
G assam 6660 23
In [336]: df.rename(index=str.lower,columns=str.upper)
SERIAL
b delhi 1200 80
c kolkata 4000 20
d patna 3400 30
e lucknow 2980 55
f chennai 2330 10
g assam 6660 23
In [342]: df.index=df.index.(transform)
-------------------------------------------------------------------------
--
AttributeError Traceback (most recent call las
t)
Cell In[342], line 1
----> 1 df.index=df.index.apply(transform)
In [341]: df
SERIAL
B1 delhi 1200 80
C1 kolkata 4000 20
D1 patna 3400 30
E1 lucknow 2980 55
F1 chennai 2330 10
G1 assam 6660 23
In [347]: df['code'].apply(np.sum)
Out[347]: SERIAL
A1 1100
B1 1200
C1 4000
D1 3400
E1 2980
F1 2330
G1 6660
Name: code, dtype: int32
G miami 1200 V
B delhi 1220 II
C kolkata 4000 IV
E lucknow 2980 VI
F chennai 2330 I
In [357]: y=pd.DataFrame({'city':['miami','delhi','las_vegas','kolkata','ohio','orla
,2330,1100,2340,1221,1111,6360
'SPEED':['V','II','IV','VIII','IX','X','XI','III','VI','I'
In [358]: y
a miami 1200 V
A
a delhi 1220 II
B b las_vegas 4000 IV
d orlando 2330 X
D
d tokyo 1100 XI
F f shanghai 1221 VI
g chennai 1111 I
G
g las vegas 6360 VII
In [359]: df
SERIAL
B1 delhi 1200 80
C1 kolkata 4000 20
D1 patna 3400 30
E1 lucknow 2980 55
F1 chennai 2330 10
G1 assam 6660 23
quantity price
color size
red small 1 10.5
blue medium 2 20.3
green large 3 15.0
red small 1 10.5
blue medium 2 20.3
In [361]: y
a miami 1200 V
A
a delhi 1220 II
B b las_vegas 4000 IV
d orlando 2330 X
D
d tokyo 1100 XI
F f shanghai 1221 VI
g chennai 1111 I
G
g las vegas 6360 VII
In [363]: y.index
In [364]: y.unstack()
-------------------------------------------------------------------------
--
ValueError Traceback (most recent call las
t)
Cell In[364], line 1
----> 1 y.unstack()
File C:\ProgramData\anaconda3\Lib\site-packages\pandas\core\frame.py:896
1, in DataFrame.unstack(self, level, fill_value)
8899 """
8900 Pivot a level of the (necessarily hierarchical) index labels.
8901
(...)
8957 dtype: float64
8958 """
8959 from pandas.core.reshape.reshape import unstack
-> 8961 result = unstack(self, level, fill_value)
8963 return result.__finalize__(self, method="unstack")
File C:\ProgramData\anaconda3\Lib\site-packages\pandas\core\reshape\resha
pe.py:475, in unstack(obj, level, fill_value)
473 if isinstance(obj, DataFrame):
474 if isinstance(obj.index, MultiIndex):
--> 475 return _unstack_frame(obj, level, fill_value=fill_value)
476 else:
477 return obj.T.stack(dropna=False)
File C:\ProgramData\anaconda3\Lib\site-packages\pandas\core\reshape\resha
pe.py:498, in _unstack_frame(obj, level, fill_value)
496 def _unstack_frame(obj: DataFrame, level, fill_value=None):
497 assert isinstance(obj.index, MultiIndex) # checked by caller
--> 498 unstacker = _Unstacker(obj.index, level=level, constructor=ob
j._constructor)
500 if not obj._can_fast_transpose:
501 mgr = obj._mgr.unstack(unstacker, fill_value=fill_value)
File C:\ProgramData\anaconda3\Lib\site-packages\pandas\core\reshape\resha
pe.py:136, in _Unstacker.__init__(self, index, level, constructor)
128 if num_cells > np.iinfo(np.int32).max:
129 warnings.warn(
130 f"The following operation may generate {num_cells} cells
"
131 f"in the resulting pandas object.",
132 PerformanceWarning,
133 stacklevel=find_stack_level(),
134 )
--> 136 self._make_selectors()
File C:\ProgramData\anaconda3\Lib\site-packages\pandas\core\reshape\resha
pe.py:188, in _Unstacker._make_selectors(self)
185 mask.put(selector, True)
187 if mask.sum() < len(self.index):
--> 188 raise ValueError("Index contains duplicate entries, cannot re
shape")
190 self.group_index = comp_index
191 self.mask = mask
In [ ]:
In [370]: df.set_index(['color','price'])
-------------------------------------------------------------------------
--
KeyError Traceback (most recent call las
t)
~\AppData\Local\Temp\ipykernel_3180\3070971697.py in ?()
----> 1 df.set_index(['color','price'])
C:\ProgramData\anaconda3\Lib\site-packages\pandas\core\frame.py in ?(sel
f, keys, drop, append, inplace, verify_integrity)
5855 if not found:
5856 missing.append(col)
5857
5858 if missing:
-> 5859 raise KeyError(f"None of {missing} are in the column
s")
5860
5861 if inplace:
5862 frame = self
In [368]: x
Out[368]: array([18, 91, 65, 56, 99, 28, 31, 36, 31, 99, 22, 92, 39, 82, 27, 67, 1
3,
97, 41, 85, 55, 67, 14, 30, 25, 12, 24, 78, 37, 19, 2, 6, 93, 5
3,
16, 93, 12, 30, 21, 88, 9, 96, 15, 71, 61, 82, 1, 35, 94, 97, 1
3,
48, 32, 78, 13, 77, 10, 5, 28, 77, 29, 33, 26, 6, 26, 58, 18, 7
5,
36, 43, 67, 67, 53, 67, 62, 44, 93, 70, 43, 40, 42, 17, 54, 22, 8
8,
50, 72, 78, 95, 29, 94, 19, 71, 3, 38, 54, 70, 58, 93, 51])
In [369]: mx
Out[369]: COLOR SIZE SHAPE MATERIAL WEIGHT PRICE QUANTITY AVAILABILITY BRAN
1 blue medium square wood 200.0 20.3 2.0 out of stock Brand
In [372]: pd.DataFrame(data)
In [374]: x=pd.DataFrame(data_dict)
In [377]: x=x.set_index(['colors','sizes'])
In [384]: x.unstack().unstack().unstack
Employees DataFrame:
EmployeeID Name DepartmentID
0 1 John 101
1 2 Alice 102
2 3 Bob 101
3 4 Mary 103
In [390]: departments_data = {
'DepartmentID': [101, 102, 103],
'DepartmentName': ['HR', 'Finance', 'IT']
}
emp2 = pd.DataFrame(departments_data)
print("\nDepartments DataFrame:")
print(emp2)
Departments DataFrame:
DepartmentID DepartmentName
0 101 HR
1 102 Finance
2 103 IT
In [391]: emp1
0 1 John 101
1 2 Alice 102
2 3 Bob 101
3 4 Mary 103
In [392]: emp2
0 101 HR
1 102 Finance
2 103 IT
In [393]: pd.merge(emp1,emp2)
0 1 John 101 HR
1 3 Bob 101 HR
3 4 Mary 103 IT
In [394]: pd.merge(emp1,emp2,on='DepartmentID')
0 1 John 101 HR
1 3 Bob 101 HR
3 4 Mary 103 IT
In [396]: pd.merge(emp1,emp2,on='DepartmentID',how='inner')
0 1 John 101 HR
1 3 Bob 101 HR
3 4 Mary 103 IT
In [397]: pd.merge(emp1,emp2,on='DepartmentID',how='outer')
0 1 John 101 HR
1 3 Bob 101 HR
3 4 Mary 103 IT
In [398]: pd.merge(emp1,emp2,on='DepartmentID',how='right')
0 1 John 101 HR
1 3 Bob 101 HR
3 4 Mary 103 IT
In [399]: pd.merge(emp1,emp2,on='DepartmentID',how='left')
Out[399]: EmployeeID Name DepartmentID DepartmentName
0 1 John 101 HR
2 3 Bob 101 HR
3 4 Mary 103 IT
In [400]: df
color size
In [421]: x=df['price'].groupby('size')
In [416]: xdf.groupby('color')['quantity'].size()
Out[416]: color
blue 2
green 1
red 2
Name: quantity, dtype: int64
In [418]: df.groupby(['color','size']).count()
color size
blue medium 2 2
green large 1 1
red small 2 2
In [ ]:
In [395]: pd.merge(emp1,emp2,on='EmployeeID')
-------------------------------------------------------------------------
--
KeyError Traceback (most recent call las
t)
~\AppData\Local\Temp\ipykernel_3180\224658189.py in ?()
----> 1 pd.merge(emp1,emp2,on='EmployeeID')
C:\ProgramData\anaconda3\Lib\site-packages\pandas\core\reshape\merge.py i
n ?(left, right, how, on, left_on, right_on, left_index, right_index, sor
t, suffixes, copy, indicator, validate)
144 copy: bool | None = None,
145 indicator: str | bool = False,
146 validate: str | None = None,
147 ) -> DataFrame:
--> 148 op = _MergeOperation(
149 left,
150 right,
151 how=how,
C:\ProgramData\anaconda3\Lib\site-packages\pandas\core\reshape\merge.py i
n ?(self, left, right, how, on, left_on, right_on, axis, left_index, righ
t_index, sort, suffixes, indicator, validate)
733 (
734 self.left_join_keys,
735 self.right_join_keys,
736 self.join_names,
--> 737 ) = self._get_merge_keys()
738
739 # validate the merge keys dtypes. We may need to coerce
740 # to avoid incompatible dtypes
C:\ProgramData\anaconda3\Lib\site-packages\pandas\core\reshape\merge.py i
n ?(self)
1199 # Then we're either Hashable or a wrong-l
ength arraylike,
1200 # the latter of which will raise
1201 rk = cast(Hashable, rk)
1202 if rk is not None:
-> 1203 right_keys.append(right._get_label_or
_level_values(rk))
1204 else:
1205 # work-around for merge_asof(right_in
dex=True)
1206 right_keys.append(right.index)
C:\ProgramData\anaconda3\Lib\site-packages\pandas\core\generic.py in ?(se
lf, key, axis)
1774 values = self.xs(key, axis=other_axes[0])._values
1775 elif self._is_level_reference(key, axis=axis):
1776 values = self.axes[axis].get_level_values(key)._value
s
1777 else:
-> 1778 raise KeyError(key)
1779
1780 # Check for duplicates
1781 if values.ndim > 1:
In [422]: for i , j in x:
print(i)
print(j)
large
color size
green large 15.0
Name: price, dtype: float64
medium
color size
blue medium 20.3
medium 20.3
Name: price, dtype: float64
small
color size
red small 10.5
small 10.5
Name: price, dtype: float64
In [423]: for i , j in x:
print(i,j)
In [424]: x.describe()
size
In [434]: x.agg([('quantity','count'),('price','sum')])
size
large 1 15.0
medium 2 40.6
small 2 21.0
In [435]: df
color size
In [439]: df.pivot_table(index=['color','size'],margins=True)
color size
In [445]: pd.crosstab(df.quantity,df.price)
quantity
1 2 0 0
2 0 0 2
3 0 1 0
In [447]: data=np.arange(10)
In [522]: plt.plot(data)
In [543]: fig=plt.figure(figsize=(10,10))
plt.suptitle('SURVEYS')
In [544]: ax1=fig.add_subplot(2,2,1)
In [545]: ax1.plot(np.arange(10),np.arange(10,101,10),'--g')
ax1.set_xlabel('HEIGHT(in feet)')
ax1.set_ylabel('WEIGHT(in kg)')
ax1.set_xticks([3,5,7,9,11])
ax1.set_yticks([40,55,65,78,90])
In [546]: ax2=fig.add_subplot(2,2,2)
ax2.scatter(np.random.randint(10,100,50),np.random.randint(10,100,50),colo
In [475]: plt.hist?
In [547]: ax3=fig.add_subplot(2,2,3)
ax3.hist([10,20,30,60,80,100],bins=5,color='green',alpha=0.2)
In [570]: ax4=fig.add_subplot(2,2,4)
props={'title':'RESULTS','xlabel':'NAMES','ylabel':'MARKS'}
ax4.set(**props)
ax4.bar(['gopi','suraj','raj','rahul'],[10,20,40,100],color=['red','green'
In [571]: fig
Out[571]:
In [562]: figx,axes=plt.subplots(3,3,sharex=True,sharey=True,figsize=(10,10))
In [563]: axes[0,0].plot(np.random.randint(10,100,30).cumsum(),'--g')
In [564]: figx
Out[564]:
In [568]: axes[0,1].scatte(np.random.randint(1,1000,100),np.random.randint(1,1000,10
In [569]: figx
Out[569]:
In [573]: p=sns.load_dataset('planets')
In [574]: p
In [830]: figv=plt.figure()
ax1=figv.add_subplot(1,1,1)
ax1.plot(p['orbital_period'].cumsum(),'g',label='one')
ax1.plot(p['distance'].cumsum(),'r--',label='two')
ax1.plot(p['year'].cumsum(),'b--',label='three')
ax1.legend(loc='best')
att={'title':'ANALYSIS','xlabel':'range','ylabel':'domain','xticklabels':[
ax1.set(**att)
plt.savefig('hello.svg')
plt.annotate('satan',xy=(2,3),xytext=(0,1),arrowprops=dict(arrowstyle='->'
plt.savefig('figpath.png',dpi=500,bbox_inches='tight')
C:\Users\learner\AppData\Local\Temp\ipykernel_3180\3879900435.py:8: UserW
arning:
In [589]: plt.savefig('hell.svg')
In [594]: p
In [605]: p.year.plot(color='green',rot='vertical')
In [599]: df.plot()
In [601]: p.plot(figsize=(10,10))
In [602]: t=sns.load_dataset('titanic')
In [603]: t.plot()
In [616]: t.plot(subplots=True)
Out[616]: array([<Axes: >, <Axes: >, <Axes: >, <Axes: >, <Axes: >, <Axes: >],
dtype=object)
In [615]:
-------------------------------------------------------------------------
--
AttributeError Traceback (most recent call las
t)
Cell In[615], line 1
----> 1 a[0].show()
In [617]: p
In [625]: fig
Out[625]:
In [632]: figz,axes=plt.subplots(2,1)
pd.Series(np.random.randint(1,10,10),index=list('abcdefghij')).plot.bar(ax
pd.Series(np.random.randint(1,10,10),index=list('abcdefghij')).plot.barh(a
In [633]: df.plot.bar()
In [636]: df.plot.barh()
In [634]: mx
Out[634]: COLOR SIZE SHAPE MATERIAL WEIGHT PRICE QUANTITY AVAILABILITY BRAN
1 blue medium square wood 200.0 20.3 2.0 out of stock Brand
In [635]: mx.plot.bar()
In [637]: df.plot.bar(stacked=True)
In [639]: df.plot.barh(stacked=True,alpha=0.4)
In [650]:
p
In [648]: x=pd.crosstab(p['method'],p['distance'])
x
method
Radial Velocity 1 1 1 1 1
In [649]: p.plot.bar()
In [654]: t
c=t.head()
c
Out[654]: survived pclass sex age sibsp parch fare embarked class who adult_ma
In [656]: sns.barplot(x=c['fare'],y=c['sex'],hue=c.age)
In [657]: t['age'].plot.density()
In [658]: sns.distplot(t['age'])
C:\Users\learner\AppData\Local\Temp\ipykernel_3180\128245390.py:1: UserWa
rning:
For a guide to updating your code to use the new functions, please see
https://gist.github.com/mwaskom/de44147ed2974457ad6372750bbe5751 (http
s://gist.github.com/mwaskom/de44147ed2974457ad6372750bbe5751)
sns.distplot(t['age'])
In [660]: sns.regplot('age','fare',data=t)
-------------------------------------------------------------------------
--
TypeError Traceback (most recent call las
t)
Cell In[660], line 1
----> 1 sns.regplot('age','fare',data=t)
In [661]: sns.pairplot(t)
In [667]: sns.barplot(x=c['age'],y=c['fare'])
In [662]: c
Out[662]: survived pclass sex age sibsp parch fare embarked class who adult_ma
In [679]: x=np.arange(20)
In [676]: y=np.arange(20,0,-1)
In [677]: z=np.random.randint(1,10,20)
In [680]: plot=px.scatter(x=x,y=y,size=z,hover_data=[x,y])
In [681]: plot.show()
In [683]: c
Out[683]: survived pclass sex age sibsp parch fare embarked class who adult_ma
In [687]: slot=px.scatter(x=c['age'],y=c['fare'])
slot.show()
fig.show()
In [707]: c
Out[707]: survived pclass sex age sibsp parch fare embarked class who adult_ma
In [708]: x=px.pie(c,values='fare',names='class')
In [709]: x.show()
In [710]: figx=go.Figure(data=[go.Pie(labels=c.age,values=c.fare)])
figx.show()
In [714]: figv=go.Figure(data=[go.Bar(y=c.fare,x=c.age)])
figv.show()
In [716]: iris=datasets.load_iris()
In [717]: iris
In [719]: df
In [720]: x=px.data
In [725]: x
In [733]: g=df.head()
In [734]: l=px.line(g,g['lifeExp'],g['year'])
In [735]: l.show()
In [737]: fig=go.Figure(data=[go.Pie(labels=df.continent,values=df.lifeExp)])
In [738]: fig.show()
In [739]: fig=go.Figure(data=[go.Pie(labels=df.continent,values=df.lifeExp,hole=0.5)
fig.show()
In [745]: fig=go.Figure(data=[go.Pie(labels=df.continent,values=df.lifeExp,pull=[0,0
fig.show()
In [746]:
df.head()
In [761]: #country=['red','green','blue','black','yellow']
fig=px.bar(x=df.continent,y=df.year,barmode='group')
fig.show()
-------------------------------------------------------------------------
--
ValueError Traceback (most recent call las
t)
Cell In[761], line 2
1 country=['red','green','blue','black','yellow']
----> 2 fig=px.bar(x=df.continent,y=df.year,barmode='group',color='countr
y')
3 fig.show()
File C:\ProgramData\anaconda3\Lib\site-packages\plotly\express\_chart_typ
es.py:373, in bar(data_frame, x, y, color, pattern_shape, facet_row, face
t_col, facet_col_wrap, facet_row_spacing, facet_col_spacing, hover_name,
hover_data, custom_data, text, base, error_x, error_x_minus, error_y, err
or_y_minus, animation_frame, animation_group, category_orders, labels, co
lor_discrete_sequence, color_discrete_map, color_continuous_scale, patter
n_shape_sequence, pattern_shape_map, range_color, color_continuous_midpoi
nt, opacity, orientation, barmode, log_x, log_y, range_x, range_y, text_a
uto, title, template, width, height)
325 def bar(
326 data_frame=None,
327 x=None,
(...)
367 height=None,
368 ) -> go.Figure:
369 """
370 In a bar plot, each row of `data_frame` is represented as a r
ectangular
371 mark.
372 """
--> 373 return make_figure(
374 args=locals(),
375 constructor=go.Bar,
376 trace_patch=dict(textposition="auto"),
377 layout_patch=dict(barmode=barmode),
378 )
File C:\ProgramData\anaconda3\Lib\site-packages\plotly\express\_core.py:1
990, in make_figure(args, constructor, trace_patch, layout_patch)
1987 layout_patch = layout_patch or {}
1988 apply_default_cascade(args)
-> 1990 args = build_dataframe(args, constructor)
1991 if constructor in [go.Treemap, go.Sunburst, go.Icicle] and args
["path"] is not None:
1992 args = process_dataframe_hierarchy(args)
File C:\ProgramData\anaconda3\Lib\site-packages\plotly\express\_core.py:1
405, in build_dataframe(args, constructor)
1402 args["color"] = None
1403 # now that things have been prepped, we do the systematic rewriti
ng of `args`
-> 1405 df_output, wide_id_vars = process_args_into_dataframe(
1406 args, wide_mode, var_name, value_name
1407 )
1409 # now that `df_output` exists and `args` contains only reference
s, we complete
1410 # the special-case and wide-mode handling by further rewriting ar
localhost:8888/notebooks/satan practice.ipynb# 114/126
6/10/24, 6:53 AM satan practice - Jupyter Notebook
gs and/or mutating
1411 # df_output
1413 count_name = _escape_col_name(df_output, "count", [var_name, valu
e_name])
File C:\ProgramData\anaconda3\Lib\site-packages\plotly\express\_core.py:1
189, in process_args_into_dataframe(args, wide_mode, var_name, value_nam
e)
1187 df_output[col_name] = to_unindexed_series(real_argument)
1188 elif not df_provided:
-> 1189 raise ValueError(
1190 "String or int arguments are only possible when a "
1191 "DataFrame or an array is provided in the `data_frame` "
1192 "argument. No DataFrame was provided, but argument "
1193 "'%s' is of type str or int." % field
1194 )
1195 # Check validity of column name
1196 elif argument not in df_input.columns:
In [751]: go.Bar?
In [772]: plt.plot(np.arange(1,51,10),np.arange(10,160,30))
plt.title('DRAW A LINE')
plt.xlabel('x-axis')
plt.ylabel('y-axis')
plt.xlim(0,50)
plt.ylim(0,160)
In [818]: plt.plot([1,2,3],[2,4,1])
plt.title('DRAW A LINE')
plt.xlabel('x-axis')
plt.ylabel('y-axis')
plt.xlim(1.0,3.0)
plt.ylim(1.0,4.0)
plt.annotate('satan landed',color='red',xy=(2,2),xytext=(1,2),weight='bold
plt.annotate('satan flew away',xy=(2.5,2.5),xytext=(1,3),arrowprops=dict(a
plt.grid()
In [781]: df=pd.read_csv('bus.txt')
In [794]: df.plot()
In [784]: sns.lineplot(df)
In [807]: plt.plot(df.iloc[:,1],label='open')
plt.plot(df.iloc[:,2],label='high')
plt.plot(df.iloc[:,3],label='low')
plt.plot(df.iloc[:,4],label='close')
plt.legend(loc='best')
plt.grid()
In [798]: df
Pivot Table:
Salesperson
Cross-Tabulation:
Region
East 7 15 8 30
North 11 10 6 27
South 5 5 9 19
West 4 11 9 24
All 27 41 32 100