Exercises Part2
Exercises Part2
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.tsa.seasonal import seasonal_decompose, STL
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
url =
"https://raw.githubusercontent.com/jbrownlee/Datasets/master/airline-passengers.csv"
df = pd.read_csv(url, parse_dates=['Month'], index_col='Month')
print(df.head())
plt.figure(figsize=(12, 6))
plt.plot(df.index, df['Passengers'], label="Passenger Count")
plt.title('Airline Passengers Over Time')
plt.xlabel('Year')
plt.ylabel('Number of Passengers')
plt.legend()
plt.show()
rolling_mean = df['Passengers'].rolling(window=12).mean()
rolling_std = df['Passengers'].rolling(window=12).std()
plt.figure(figsize=(12, 6))
plt.plot(df['Passengers'], label='Original')
plt.plot(rolling_mean, label='Rolling Mean (12 months)')
plt.plot(rolling_std, label='Rolling Std (12 months)')
plt.title('Rolling Mean & Standard Deviation')
plt.legend()
plt.show()
plt.figure(figsize=(12, 6))
plot_acf(df['Passengers'], lags=40, alpha=0.05)
plt.title('Autocorrelation Function (ACF)')
plt.show()
plt.figure(figsize=(12, 6))
plot_pacf(df['Passengers'], lags=40, alpha=0.05)
plt.title('Partial Autocorrelation Function (PACF)')
plt.show()
EXE 6
import folium
import pandas as pd
data = {
'City': ['New York', 'San Francisco', 'Los Angeles'],
'Population': [8175133, 884363, 3906772],
'Latitude': [40.7128, 37.7749, 34.0522],
'Longitude': [-74.0060, -122.4194, -118.2437]
}
df = pd.DataFrame(data)
map_center = [37.7749, -122.4194]
map_obj = folium.Map(location=map_center, zoom_start=5)
map_obj.save('interactive_map.html')
EXE 7
import geopandas as gpd
from shapely.geometry import Polygon
import matplotlib.pyplot as plt
import pandas as pd
world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))
india_geometry = gpd.GeoSeries([Polygon([(75, 20), (80, 20), (80, 25), (75, 25)])],
crs="EPSG:4326")
india = gpd.GeoDataFrame(geometry=india_geometry)
world_data = {
'Country': ['USA', 'China', 'India'],
'Population': [331002651, 1444216107, 1380004385]
}
world_df = pd.DataFrame(world_data)
india_data = {
'State': ['Maharashtra', 'Uttar Pradesh', 'Tamil Nadu'],
'Population': [123144223, 223897418, 77841267]
}
india_df = pd.DataFrame(india_data)
data = pd.read_csv("winequalityN.csv")
print("First few rows of the dataset:")
print(data.head())
correlation_matrix = data.corr()
plt.figure(figsize=(10, 8))
sns.heatmap(correlation_matrix, annot=True, cmap="coolwarm", linewidths=0.5)
plt.title("Correlation Heatmap")
plt.show()
plt.figure(figsize=(12, 8))
sns.boxplot(data=data, width=0.5)
plt.xticks(rotation=45)
plt.title("Box Plots of Features")
plt.show()
plt.figure(figsize=(12, 6))
sns.histplot(data["alcohol"], kde=True)
plt.title("Alcohol Content Distribution")
plt.show()
EXE 9
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import load_iris
iris = load_iris()
data = pd.DataFrame(data=np.c_[iris['data'], iris['target']],
columns=iris['feature_names'] + ['target'])
print("\nData Information:")
print(data.info())
print("\nSummary Statistics:")
print(data.describe())
sns.set(style="whitegrid")
plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
sns.histplot(data['sepal length (cm)'], kde=True)
plt.title("Distribution of Sepal Length")
plt.subplot(1, 2, 2)
sns.histplot(data['sepal width (cm)'], kde=True)
plt.title("Distribution of Sepal Width")
plt.show()
plt.figure(figsize=(10, 6))
sns.boxplot(x='target', y='petal length (cm)', data=data)
plt.title("Petal Length Boxplot by Species")
plt.show()
correlation_matrix = data.corr()
plt.figure(figsize=(8, 6))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm')
plt.title("Correlation Heatmap")
plt.show()
print("\nAnalysis Report:")
print("- The dataset contains three species of iris flowers: setosa, versicolor, and
virginica.")
print("- The features vary in their distributions, with sepal length and sepal width
showing different patterns.")
print("- The pairplot shows how the features are correlated and how they can be used to
distinguish between species.")
print("- The petal length is a strong predictor for species differentiation, with setosa
having the shortest petals and virginica the longest.")
print("- The correlation heatmap confirms that petal length is highly correlated with
the target variable, making it an important feature for classification.")