0% found this document useful (0 votes)
5 views7 pages

Exercises Part2

The document contains multiple Python scripts that perform data analysis and visualization using libraries such as pandas, matplotlib, seaborn, and statsmodels. Key analyses include time series decomposition of airline passenger data, interactive mapping of city populations, geographical plotting of world and Indian populations, and exploratory data analysis on wine quality and iris datasets. Each script demonstrates different techniques for data visualization, statistical analysis, and feature correlation.

Uploaded by

cse stud
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
5 views7 pages

Exercises Part2

The document contains multiple Python scripts that perform data analysis and visualization using libraries such as pandas, matplotlib, seaborn, and statsmodels. Key analyses include time series decomposition of airline passenger data, interactive mapping of city populations, geographical plotting of world and Indian populations, and exploratory data analysis on wine quality and iris datasets. Each script demonstrates different techniques for data visualization, statistical analysis, and feature correlation.

Uploaded by

cse stud
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 7

EXE 5

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.tsa.seasonal import seasonal_decompose, STL
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf

url =
"https://raw.githubusercontent.com/jbrownlee/Datasets/master/airline-passengers.csv"
df = pd.read_csv(url, parse_dates=['Month'], index_col='Month')
print(df.head())

plt.figure(figsize=(12, 6))
plt.plot(df.index, df['Passengers'], label="Passenger Count")
plt.title('Airline Passengers Over Time')
plt.xlabel('Year')
plt.ylabel('Number of Passengers')
plt.legend()
plt.show()

result = seasonal_decompose(df['Passengers'], model='multiplicative', period=12)


fig, (ax1, ax2, ax3, ax4) = plt.subplots(4, 1, figsize=(12, 8), sharex=True)
ax1.plot(result.trend); ax1.set_title("Trend Component")
ax2.plot(result.seasonal); ax2.set_title("Seasonal Component")
ax3.plot(result.resid); ax3.set_title("Residual Component")
ax4.plot(result.observed); ax4.set_title("Observed")
plt.tight_layout()
plt.show()

rolling_mean = df['Passengers'].rolling(window=12).mean()
rolling_std = df['Passengers'].rolling(window=12).std()
plt.figure(figsize=(12, 6))
plt.plot(df['Passengers'], label='Original')
plt.plot(rolling_mean, label='Rolling Mean (12 months)')
plt.plot(rolling_std, label='Rolling Std (12 months)')
plt.title('Rolling Mean & Standard Deviation')
plt.legend()
plt.show()

stl = STL(df['Passengers'], seasonal=13)


result_stl = stl.fit()
plt.figure(figsize=(12, 6))
plt.plot(result_stl.trend, label="Trend")
plt.plot(result_stl.seasonal, label='Seasonal')
plt.plot(result_stl.resid, label='Residual')
plt.title('Seasonal-Trend decomposition using LOESS (STL)')
plt.legend()
plt.show()

plt.figure(figsize=(12, 6))
plot_acf(df['Passengers'], lags=40, alpha=0.05)
plt.title('Autocorrelation Function (ACF)')
plt.show()

plt.figure(figsize=(12, 6))
plot_pacf(df['Passengers'], lags=40, alpha=0.05)
plt.title('Partial Autocorrelation Function (PACF)')
plt.show()
EXE 6
import folium
import pandas as pd

data = {
'City': ['New York', 'San Francisco', 'Los Angeles'],
'Population': [8175133, 884363, 3906772],
'Latitude': [40.7128, 37.7749, 34.0522],
'Longitude': [-74.0060, -122.4194, -118.2437]
}

df = pd.DataFrame(data)
map_center = [37.7749, -122.4194]
map_obj = folium.Map(location=map_center, zoom_start=5)

for index, row in df.iterrows():


folium.Marker(
location=[row["Latitude"], row["Longitude"]],
popup=f"City: {row['City']}\nPopulation: {row['Population']}",
tooltip=row["City"]
).add_to(map_obj)

map_obj.save('interactive_map.html')
EXE 7
import geopandas as gpd
from shapely.geometry import Polygon
import matplotlib.pyplot as plt
import pandas as pd

world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))
india_geometry = gpd.GeoSeries([Polygon([(75, 20), (80, 20), (80, 25), (75, 25)])],
crs="EPSG:4326")
india = gpd.GeoDataFrame(geometry=india_geometry)

world_data = {
'Country': ['USA', 'China', 'India'],
'Population': [331002651, 1444216107, 1380004385]
}
world_df = pd.DataFrame(world_data)

india_data = {
'State': ['Maharashtra', 'Uttar Pradesh', 'Tamil Nadu'],
'Population': [123144223, 223897418, 77841267]
}
india_df = pd.DataFrame(india_data)

world = world.merge(world_df, left_on='name', right_on='Country', how='left')


india['Population'] = india_df['Population']

fig, ax = plt.subplots(1, 2, figsize=(15, 7))


world.plot(column='Population', cmap='OrRd', ax=ax[0], legend=True,
legend_kwds={"label": "Population by Country"})
ax[0].set_title('World Population')

india.plot(column='Population', cmap='OrRd', ax=ax[1], legend=True,


legend_kwds={"label": "Population by State"})
ax[1].set_title('India Population')
plt.show()
EXE 8
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

data = pd.read_csv("winequalityN.csv")
print("First few rows of the dataset:")
print(data.head())

print("Summary statistics of the dataset:")


print(data.describe())

data.hist(bins=30, figsize=(12, 8))


plt.suptitle("Histograms of Features", y=1.02)
plt.show()

correlation_matrix = data.corr()
plt.figure(figsize=(10, 8))
sns.heatmap(correlation_matrix, annot=True, cmap="coolwarm", linewidths=0.5)
plt.title("Correlation Heatmap")
plt.show()

plt.figure(figsize=(12, 8))
sns.boxplot(data=data, width=0.5)
plt.xticks(rotation=45)
plt.title("Box Plots of Features")
plt.show()

plt.figure(figsize=(12, 6))
sns.histplot(data["alcohol"], kde=True)
plt.title("Alcohol Content Distribution")
plt.show()
EXE 9
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import load_iris

iris = load_iris()
data = pd.DataFrame(data=np.c_[iris['data'], iris['target']],
columns=iris['feature_names'] + ['target'])

print("First few rows of the dataset:")


print(data.head())

print("\nData Information:")
print(data.info())

print("\nSummary Statistics:")
print(data.describe())

sns.set(style="whitegrid")
plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
sns.histplot(data['sepal length (cm)'], kde=True)
plt.title("Distribution of Sepal Length")

plt.subplot(1, 2, 2)
sns.histplot(data['sepal width (cm)'], kde=True)
plt.title("Distribution of Sepal Width")
plt.show()

sns.pairplot(data, hue='target', markers=["o", "s", "D"])


plt.show()

plt.figure(figsize=(10, 6))
sns.boxplot(x='target', y='petal length (cm)', data=data)
plt.title("Petal Length Boxplot by Species")
plt.show()

correlation_matrix = data.corr()
plt.figure(figsize=(8, 6))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm')
plt.title("Correlation Heatmap")
plt.show()

print("\nAnalysis Report:")
print("- The dataset contains three species of iris flowers: setosa, versicolor, and
virginica.")
print("- The features vary in their distributions, with sepal length and sepal width
showing different patterns.")
print("- The pairplot shows how the features are correlated and how they can be used to
distinguish between species.")
print("- The petal length is a strong predictor for species differentiation, with setosa
having the shortest petals and virginica the longest.")
print("- The correlation heatmap confirms that petal length is highly correlated with
the target variable, making it an important feature for classification.")

You might also like