Python pandas library
Python pandas library
import numpy as np
import pandas as pd
1. Read csv
In [2]: # Reading data from a CSV file
In [ ]:
Out[3]: PassengerId Survived Pclass Sex Age SibSp Parch Ticket Fare Cabin Embarked
A/5
0 1 0 3 male 22.0 1 0 7.2500 NaN S
21171
STON/O2.
2 3 1 3 female 26.0 0 0 7.9250 NaN S
3101282
A/5
0 1 0 3 male 22.0 1 0 7.2500 NaN S
21171
STON/O2.
2 3 1 3 female 26.0 0 0 7.9250 NaN S
3101282
Out[5]: PassengerId Survived Pclass Sex Age SibSp Parch Ticket Fare Cabin Embarked
W./C.
888 889 0 3 female NaN 1 2 23.45 NaN S
6607
C.A./SOTON
883 884 0 2 male 28.0 0 0 10.5000 NaN
34068
SOTON/OQ
884 885 0 3 male 25.0 0 0 7.0500 NaN
392076
In [7]: data
Out[7]: PassengerId Survived Pclass Sex Age SibSp Parch Ticket Fare Cabin Embark
A/5
0 1 0 3 male 22.0 1 0 7.2500 NaN
21171
STON/O2.
2 3 1 3 female 26.0 0 0 7.9250 NaN
3101282
... ... ... ... ... ... ... ... ... ... ...
W./C.
888 889 0 3 female NaN 1 2 23.4500 NaN
6607
In [ ]:
In [9]: data.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 11 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 PassengerId 891 non-null int64
1 Survived 891 non-null int64
2 Pclass 891 non-null int64
3 Sex 891 non-null object
4 Age 714 non-null float64
5 SibSp 891 non-null int64
6 Parch 891 non-null int64
7 Ticket 891 non-null object
8 Fare 891 non-null float64
9 Cabin 204 non-null object
10 Embarked 889 non-null object
dtypes: float64(2), int64(5), object(4)
memory usage: 76.7+ KB
In [ ]:
data.head()
Out[10]: PassengerId Survived Pclass Sex Age SibSp Parch Ticket Fare Cabin Embarked
A/5
0 1 0 3 male 22.0 1 0 7.2500 NaN S
21171
STON/O2.
2 3 1 3 female 26.0 0 0 7.9250 NaN S
3101282
Out[11]: PassengerId Survived Pclass Sex Age SibSp Parch Ticket Fare Cabin Embarked
A/5
0 1 0 3 male 22.0 1 0 7.2500 NaN S
21171
STON/O2.
2 3 1 3 female 26.0 0 0 7.9250 NaN S
3101282
In [12]: data
Out[12]: PassengerId Survived Pclass Sex Age SibSp Parch Ticket Fare Cabin Embark
A/5
0 1 0 3 male 22.0 1 0 7.2500 NaN
21171
STON/O2.
2 3 1 3 female 26.0 0 0 7.9250 NaN
3101282
... ... ... ... ... ... ... ... ... ... ...
W./C.
888 889 0 3 female NaN 1 2 23.4500 NaN
6607
In [14]: data.head()
Out[14]: PassengerId Survived Pclass Sex Age SibSp Parch Ticket Fare Cabin Embarked
A/5
0 1 0 3 male 22.0 1 0 7.2500 NaN S
21171
STON/O2.
2 3 1 3 female 26.0 0 0 7.9250 NaN S
3101282
In [ ]:
data.set_index('PassengerId')
Out[15]: Survived Pclass Sex Age SibSp Parch Ticket Fare Cabin Embarked
PassengerId
A/5
1 0 3 male 22.0 1 0 7.2500 NaN S
21171
STON/O2.
3 1 3 female 26.0 0 0 7.9250 NaN S
3101282
... ... ... ... ... ... ... ... ... ... ...
W./C.
889 0 3 female NaN 1 2 23.4500 NaN S
6607
In [16]: data.head()
Out[16]: PassengerId Survived Pclass Sex Age SibSp Parch Ticket Fare Cabin Embarked
A/5
0 1 0 3 male 22.0 1 0 7.2500 NaN S
21171
STON/O2.
2 3 1 3 female 26.0 0 0 7.9250 NaN S
3101282
In [ ]:
Out[17]: PassengerId Survived Pclass Sex Age SibSp Parch Ticket Fare Cabin Embark
PC
309 310 1 1 female 30.0 0 0 56.9292 E36
17485
PC
307 308 1 1 female 17.0 1 0 108.9000 C65
17758
... ... ... ... ... ... ... ... ... ... ...
STON/O
382 383 0 3 male 32.0 0 0 2. 7.9250 NaN
3101293
SOTON/OQ
511 512 0 3 male NaN 0 0 8.0500 NaN
3101316
... ... ... ... ... ... ... ... ... ... ...
In [19]: data.head()
Out[19]: PassengerId Survived Pclass Sex Age SibSp Parch Ticket Fare Cabin Embarked
A/5
0 1 0 3 male 22.0 1 0 7.2500 NaN S
21171
STON/O2.
2 3 1 3 female 26.0 0 0 7.9250 NaN S
3101282
In [ ]:
data.shape
(891, 11)
Out[20]:
data.columns
data.index
In [ ]:
data.count()
PassengerId 891
Out[23]:
Survived 891
Pclass 891
Sex 891
Age 714
SibSp 891
Parch 891
Ticket 891
Fare 891
Cabin 204
Embarked 889
dtype: int64
data.isna()
Out[24]: PassengerId Survived Pclass Sex Age SibSp Parch Ticket Fare Cabin Embarked
0 False False False False False False False False False True False
1 False False False False False False False False False False False
2 False False False False False False False False False True False
3 False False False False False False False False False False False
4 False False False False False False False False False True False
... ... ... ... ... ... ... ... ... ... ... ...
886 False False False False False False False False False True False
887 False False False False False False False False False False False
888 False False False False True False False False False True False
889 False False False False False False False False False False False
890 False False False False False False False False False True False
PassengerId 0
Out[25]:
Survived 0
Pclass 0
Sex 0
Age 177
SibSp 0
Parch 0
Ticket 0
Fare 0
Cabin 687
Embarked 2
dtype: int64
In [ ]:
PassengerId int64
Out[26]:
Survived int64
Pclass int64
Sex object
Age float64
SibSp int64
Parch int64
Ticket object
Fare float64
Cabin object
Embarked object
dtype: object
In [ ]: