UQ21CA632B Unit2 Class12&13 Pandas Basics
UQ21CA632B Unit2 Class12&13 Pandas Basics
In [2]:
data=pd.read_csv("Salaries.csv")
Initial Inspection
In [3]:
data.head()
In [4]:
data.head(10)
In [5]:
data.tail(10)
In [6]:
data.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 397 entries, 0 to 396
Data columns (total 7 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Unnamed: 0 397 non-null int64
1 rank 397 non-null object
2 discipline 397 non-null object
3 yrs.since.phd 397 non-null int64
4 yrs.service 397 non-null int64
5 sex 397 non-null object
6 salary 397 non-null int64
dtypes: int64(4), object(3)
memory usage: 21.8+ KB
In [7]:
data.columns
In [8]:
data.index
In [9]:
data.shape
Out[9]: (397, 7)
In [10]:
data.ndim
Out[10]: 2
In [11]:
data.describe() # will work for numerical data
Out[12]: salary
0 139750
1 173200
2 79750
3 115000
4 141500
... ...
392 103106
393 150564
394 101738
395 95329
396 81035
In [13]:
data.iloc[:,6]
Out[13]: 0 139750
1 173200
2 79750
3 115000
4 141500
...
392 103106
393 150564
394 101738
395 95329
396 81035
Name: salary, Length: 397, dtype: int64
In [14]:
df=data.iloc[10:16,2:4]
In [15]:
df
10 B 12
11 B 7
12 B 1
13 B 2
14 B 20
15 B 12
In [28]:
d1=data[['discipline','rank']]
d1
0 B Prof
1 B Prof
2 B AsstProf
3 B Prof
4 B Prof
392 A Prof
393 A Prof
394 A Prof
395 A Prof
396 A AsstProf
In [30]:
data.iloc[:,2]
Out[30]: 0 B
1 B
2 B
3 B
4 B
..
392 A
393 A
394 A
395 A
396 A
Name: discipline, Length: 397, dtype: object
Grouping of Data
In [33]:
k=data.groupby('discipline')
In [34]: k.groups.keys()
In [35]:
d2=k.get_group('A')
d2.head()
In [36]:
d2.count()
In [37]:
len(d2)
Out[37]: 181
In [39]:
d2.shape
Out[39]: (181, 7)
In [44]:
len(d3)
Out[44]: 181
In [45]:
data[data['yrs.service']>30]
70 rows × 7 columns
In [48]:
f=(data['discipline']=='A') & (data['yrs.service']>30)
Out[48]: 0 False
1 False
2 False
3 False
4 False
...
392 False
393 False
394 False
395 False
396 False
Length: 397, dtype: bool
In [49]:
data[f]
0 18 Prof
1 16 Prof
yrs.service rank
2 3 AsstProf
3 39 Prof
4 41 Prof
In [52]:
d4n=d4[d4['rank']=='Prof']
In [53]:
d4n.count()
In [54]:
d4n['yrs.service'].mean()
Out[54]: 22.81578947368421
In [55]:
d4n
0 18 Prof
1 16 Prof
3 39 Prof
4 41 Prof
6 23 Prof
391 19 Prof
392 30 Prof
393 19 Prof
394 25 Prof
395 15 Prof
In [56]:
d4n.index
In [57]:
d4n.set_index('rank')
Out[57]: yrs.service
rank yrs.service
rank
Prof 18
Prof 16
Prof 39
Prof 41
Prof 23
... ...
Prof 19
Prof 30
Prof 19
Prof 25
Prof 15
In [59]:
data
In [64]:
data.drop('rank',axis=1)
0 B 19 18 Male 139750
1 B 20 16 Male 173200
2 B 4 3 Male 79750
3 B 45 39 Male 115000
4 B 40 41 Male 141500
In [65]:
data['New rank']=d4['rank']
In [66]:
data
In [69]:
data['A']=range(0,397,1)
In [70]:
data
In [ ]: