matplotlib
matplotlib
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
In [3]: plt.plot(list1)
In [5]: cars.head()
Out[5]: Model Mileage Cylinders Displacement Horsepower Weight Acceleration Year Origin
In [6]: cars['Mileage'].plot.hist()
#highest num of cars are with mileage 12-15
#highest num of cars are with mileage 38-43
In [7]: cars['Mileage'].plot.hist(bins=5)
In [9]: cars['Horsepower'].plot.box()
#Q2 or median is around 90 - 50% of the data is less tham 90
#Q1 is around 75 - 25% of the data is less than 75
# there are outliers after 200
In [10]: cars['Mileage'].plot.box()
In [12]: cars['Year'].value_counts().plot.bar()
In [13]: cars.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 394 entries, 0 to 393
Data columns (total 9 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Model 394 non-null object
1 Mileage 394 non-null float64
2 Cylinders 394 non-null int64
3 Displacement 394 non-null float64
4 Horsepower 392 non-null float64
5 Weight 394 non-null int64
6 Acceleration 394 non-null float64
7 Year 394 non-null int64
8 Origin 394 non-null object
dtypes: float64(4), int64(3), object(2)
memory usage: 27.8+ KB
In [23]: sns.countplot(y='Origin',data=cars)
In [55]: sns.boxplot(y='Mileage',data=cars)
In [25]: #bvivariate
# cat num
# Mileage of each country
sns.boxplot(x='Origin',y='Mileage',data=cars)
#median mileage of Japan > Europe > US
In [29]: plt.scatter(cars['Mileage'],cars['Horsepower'])
In [31]: sns.scatterplot(x='Weight',y='Horsepower',data=cars)
# as one variable increase other variable decrease due to negative correlation
In [32]: titanic=pd.read_excel("titanic.xlsx")
In [33]: titanic.head()
Out[33]: PassengerId Gender Gender_Category Age Fare Class Embarked_Town Accompany_Status Status
In [36]: titanic['Status'].value_counts(normalize=True).plot.bar()
#60% died
In [37]: titanic['Age'].plot.hist()
In [45]: cars.head()
Out[45]: Model Mileage Cylinders Displacement Horsepower Weight Acceleration Year Origin
In [47]: cars_num=cars[['Mileage','Displacement','Horsepower','Weight','Acceleration']]
In [48]: cars_num.corr()
In [52]: sns.heatmap(cars_num.corr(),annot=True)
In [ ]: