Code - Cap 3
Code - Cap 3
import calendar
import numpy as np
import networkx as nx
import pandas as pd
from pandas.plotting import scatter_matrix, parallel_coordinates
import seaborn as sns
from sklearn import preprocessing
import matplotlib.pylab as plt
####### 3.3 Basic Charts: Bar Charts, Line Graphs, and Scatter Plots ########
#############################################################################
## Load the Amtrak data and convert them to be suitable for time series analysis
Amtrak_df = pd.read_csv("D:\__UNI_2022_HP\_CC 442_Mineria de Datos\_Semana 1_2_\
Amtrak.csv", squeeze=True)
Amtrak_df["Date"] = pd.to_datetime(Amtrak_df.Month, format="%d/%m/%Y")
ridership_ts = pd.Series(Amtrak_df.Ridership.values,index=Amtrak_df.Date)
## Boston housing data
housing_df = pd.read_csv("C:\Python\BostonHousing.csv")
housing_df = housing_df.rename(columns={"CAT. MEDV":"CAT_MEDV"})
################# Rescaling
# Avoid the use of scientific notation for the log axis
plt.rcParams["axes.formatter.min_exponent"] = 4
## scatter plot: regular and log scale
fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(7, 4))
# regular scale
housing_df.plot.scatter(x="CRIM", y="MEDV", ax=axes[0])
# log scale
ax = housing_df.plot.scatter(x="CRIM", y="MEDV", logx=True,logy=True, ax=axes[1])
ax.set_yticks([5, 10, 20, 50])
ax.set_yticklabels([5, 10, 20, 50])
plt.tight_layout(); plt.show()
## boxplot: regular and log scale
fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(7, 3))
# regular scale
ax = housing_df.boxplot(column="CRIM", by="CAT_MEDV",ax=axes[0])
ax.set_xlabel("CAT.MEDV"); ax.set_ylabel("CRIM")
# log scale
ax = housing_df.boxplot(column="CRIM", by="CAT_MEDV",ax=axes[1])
ax.set_xlabel("CAT.MEDV"); ax.set_ylabel("CRIM");
ax.set_yscale("log")
# suppress the title
axes[0].get_figure().suptitle(""); plt.tight_layout();
plt.show()