pandas.py
pandas.py
import pandas as pd
In [2]: dict1 = {
"name":['harry', 'rohan','skillf','shubh'],
"marks":[92,34,24,17],
"city":['rampur','kolkata','barelly','antarctica']
}
In [3]: df = pd.DataFrame(dict1)
In [4]: df
0 harry 92 rampur
1 rohan 34 kolkata
2 skillf 24 barelly
3 shubh 17 antarctica
In [5]: df.to_csv('friends.csv')
In [8]: df.head(2)
0 harry 92 rampur
1 rohan 34 kolkata
In [9]: df.tail(2)
2 skillf 24 barelly
3 shubh 17 antarctica
In [10]: df.describe()
Loading [MathJax]/extensions/Safe.js
Out[10]: marks
count 4.00000
mean 41.75000
std 34.21866
min 17.00000
25% 22.25000
50% 29.00000
75% 48.50000
max 92.00000
In [12]: vinay
0 0 0 1521644 50 rampur
1 1 1 24165 34 kolkata
2 2 2 54876 24 barelly
3 3 3 5157 17 antarctica
In [13]: vinay['speed'][0] = 50
C:\Users\vinay\AppData\Local\Temp\ipykernel_12824\473427975.py:1: SettingWithCopyWarnin
g:
A value is trying to be set on a copy of a slice from a DataFrame
In [14]: vinay
0 0 0 1521644 50 rampur
1 1 1 24165 34 kolkata
2 2 2 54876 24 barelly
3 3 3 5157 17 antarctica
In [15]: vinay.to_csv('vinay.csv')
In [17]: vinay
Loading [MathJax]/extensions/Safe.js
Out[17]: Unnamed: 0.1 Unnamed: 0 train no. speed city
In [19]: type(ser)
pandas.core.series.Series
Out[19]:
In [21]: newdf.head()
Out[21]: 0 1 2 3 4
In [22]: type(newdf)
pandas.core.frame.DataFrame
Out[22]:
In [23]: newdf.describe()
Out[23]: 0 1 2 3 4
In [24]: newdf.dtypes
0 float64
Out[24]:
1 float64
2 float64
3 float64
4 float64
dtype: object
In [26]: newdf.head()
Out[26]: 0 1 2 3 4
In [27]: newdf.index
Index([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
Out[27]:
...
324, 325, 326, 327, 328, 329, 330, 331, 332, 333],
dtype='int32', length=334)
In [28]: newdf.columns
In [29]: newdf.to_numpy()
In [31]: newdf.head()
Out[31]: 0 1 2 3 4
In [32]: newdf.T
Loading [MathJax]/extensions/Safe.js
Out[32]: 0 1 2 3 4 5 6 7 8 9 ...
0 0.3 vinay 0.351126 0.808912 0.121119 0.541671 0.810778 0.013301 0.970215 0.834933 ... 0.443
1 0.483302 0.358511 0.453518 0.194086 0.840377 0.332581 0.49378 0.546343 0.357016 0.844727 ... 0.215
2 0.182232 0.836136 0.532963 0.2441 0.933503 0.743576 0.173255 0.78586 0.456049 0.842426 ... 0.821
3 0.109495 0.389201 0.806051 0.224745 0.33241 0.498823 0.027296 0.580119 0.22295 0.937127 ... 0.761
4 0.346556 0.662256 0.880142 0.603455 0.57951 0.498658 0.963489 0.033478 0.524955 0.784691 ... 0.611
In [33]: newdf.head()
Out[33]: 0 1 2 3 4
Out[34]: 0 1 2 3 4
In [35]: newdf.head()
Out[35]: 0 1 2 3 4
Loading [MathJax]/extensions/Safe.js
In [36]: type(newdf[0])
pandas.core.series.Series
Out[36]:
In [39]: newdf
Out[39]: 0 1 2 3 4
In [40]: # to copy
In [42]: newdf2[0][0] = 2
C:\Users\vinay\AppData\Local\Temp\ipykernel_12824\2252306501.py:1: SettingWithCopyWarnin
g:
A value is trying to be set on a copy of a slice from a DataFrame
In [43]: newdf
Loading [MathJax]/extensions/Safe.js
Out[43]: 0 1 2 3 4
In [45]: newdf.head(3)
Out[45]: 0 1 2 3 4
In [47]: newdf.head()
Out[47]: A B C D E
Loading [MathJax]/extensions/Safe.js
Out[48]: A B C D E 0
Out[49]: A B C D E 0
Loading [MathJax]/extensions/Safe.js
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
Cell In[50], line 1
----> 1 newdf = newdf.drop(1, axis=1)
In [51]: newdf.head()
Loading [MathJax]/extensions/Safe.js
Out[51]: A B C D E 0
In [52]: newdf.loc[[1,2],['C','D']]
Out[52]: C D
1 0.836136 0.389201
2 0.532963 0.806051
In [53]: newdf.head()
Out[53]: A B C D E 0
In [54]: newdf.loc[[1,2],:]
Out[54]: A B C D E 0
In [55]: newdf.loc[:,['C','D']]
Loading [MathJax]/extensions/Safe.js
Out[55]: C D
0 654.000000 0.109495
1 0.836136 0.389201
2 0.532963 0.806051
3 0.244100 0.224745
4 0.933503 0.332410
In [56]: newdf.loc[(newdf['A']<0.3)]
Out[56]: A B C D E 0
92 rows × 6 columns
Loading [MathJax]/extensions/Safe.js
Out[57]: A B C D E 0
92 rows × 6 columns
In [58]: newdf.head(2)
Out[58]: A B C D E 0
In [59]: newdf.iloc[0,4]
0.346555717762674
Out[59]:
In [60]: newdf.iloc[[0,5],[1,2]]
Out[60]: B C
0 0.483302 654.000000
5 0.332581 0.743576
In [61]: newdf.head(3)
Out[61]: A B C D E 0
In [62]: newdf.drop([0])
Loading [MathJax]/extensions/Safe.js
Out[62]: A B C D E 0
In [63]: newdf.head(2)
Out[63]: A B C D E 0
In [64]: newdf.iloc[0,4]
0.346555717762674
Out[64]:
In [65]: newdf.iloc[[0,1],[1,2]]
Out[65]: B C
0 0.483302 654.000000
1 0.358511 0.836136
In [66]: newdf.head(3)
Out[66]: A B C D E 0
In [67]: newdf.drop([0])
Loading [MathJax]/extensions/Safe.js
Out[67]: A B C D E 0
Out[69]: B D E 0
In [75]: newdf.head(3)
Out[75]: A B C D E 0
Loading [MathJax]/extensions/Safe.js
In [77]: newdf.head(3)
Out[77]: A B C D E 0
In [80]: newdf.head()
Out[80]: A B C D E 0
In [ ]:
In [ ]:
In [ ]:
NUMPY
In [81]: import numpy as np
# By np.int_size we define the or set the limit how much we want the size it may be 8,32
In [92]: myarr.shape
(1, 4)
Out[92]:
In [93]: myarr.dtype
dtype('int8')
Out[93]:
In [94]: myarr[0,1]
6
Out[94]:
Loading [MathJax]/extensions/Safe.js
array([[14, 45, 32, 7]], dtype=int8)
Out[95]:
In [97]: listarry
array([[1, 2, 3],
Out[97]:
[8, 6, 4],
[2, 6, 7]])
In [99]: listarry.shape
(3, 3)
Out[99]:
In [100… listarry.size
9
Out[100]:
In [103… zeros
Loading [MathJax]/extensions/Safe.js
array([[1., 0., 0., ..., 0., 0., 0.],
Out[116]:
[0., 1., 0., ..., 0., 0., 0.],
[0., 0., 1., ..., 0., 0., 0.],
...,
[0., 0., 0., ..., 1., 0., 0.],
[0., 0., 0., ..., 0., 1., 0.],
[0., 0., 0., ..., 0., 0., 1.]])
In [117… ide.shape
(45, 45)
Out[117]:
In [120… arr.reshape(3,33)
In [121… arr.reshape(3,31)
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Cell In[121], line 1
----> 1 arr.reshape(3,31)
In [122… arr.ravel()
In [123… x = [[1,2,3],[4,5,6],[7,1,0]]
In [126… ar = np.array(x)
ar
array([[1, 2, 3],
Out[126]:
[4, 5, 6],
[7, 1, 0]])
In [127… ar.sum(axis=0)
array([12, 8, 9])
Out[127]:
Loading [MathJax]/extensions/Safe.js
In [128… ar.sum(axis=1)
In [130… ar.T
array([[1, 4, 7],
Out[130]:
[2, 5, 1],
[3, 6, 0]])
In [131… ar.flat
<numpy.flatiter at 0x21230427be0>
Out[131]:
1
2
3
4
5
6
7
1
0
2
Out[134]:
In [135… ar.size
9
Out[135]:
In [136… ar.nbytes
36
Out[136]:
3
Out[140]:
In [142… one.argmin()
0
Out[142]:
In [143… one.argsort()
In [144… ar
array([[1, 2, 3],
Out[144]:
[4, 5, 6],
[7, 1, 0]])
In [146… ar.argmin()
Loading [MathJax]/extensions/Safe.js
8
Out[146]:
In [147… ar.argmax(axis=0)
In [148… ar.argmax(axis=1)
In [149… ar.argsort(axis=0)
array([[0, 2, 2],
Out[149]:
[1, 0, 0],
[2, 1, 1]], dtype=int64)
In [150… ar.ravel()
array([1, 2, 3, 4, 5, 6, 7, 1, 0])
Out[150]:
In [151… ar.reshape((9,1))
array([[1],
Out[151]:
[2],
[3],
[4],
[5],
[6],
[7],
[1],
[0]])
In [152… ar
array([[1, 2, 3],
Out[152]:
[4, 5, 6],
[7, 1, 0]])
array([[ 1, 2, 1],
Out[157]:
[ 8, 5, 12],
[ 4, 0, 6]])
In [156… ar + ar2
array([[ 2, 4, 4],
Out[156]:
[12, 10, 18],
[11, 1, 6]])
In [158… ar * ar2
array([[ 1, 4, 3],
Out[158]:
[32, 25, 72],
[28, 0, 0]])
In [159… np.sqrt(ar)
In [160… ar.sum()
Loading [MathJax]/extensions/Safe.js
29
Out[160]:
In [161… ar.max()
7
Out[161]:
In [162… ar.min()
0
Out[162]:
In [163… ar
array([[1, 2, 3],
Out[163]:
[4, 5, 6],
[7, 1, 0]])
In [164… np.where(ar>5)
In [165… np.count_nonzero(ar)
8
Out[165]:
In [166… np.nonzero(ar)
In [167… ar[1,2] = 0
In [168… np.nonzero(ar)
In [172… sys.getsizeof(1)*len(py_ar)
112
Out[172]:
16
Out[174]:
The above two are showing that numpy saves the space
In [ ]:
Loading [MathJax]/extensions/Safe.js