0% found this document useful (0 votes)
14 views16 pages

Daa External

The document outlines various data analysis and visualization techniques using Python's Pandas and Matplotlib libraries, focusing on earthquake data and stock prices. It includes calculations of earthquake magnitudes, percentages of tsunamis, and visualizations such as box plots, histograms, and line plots for stock price analysis. Additionally, it demonstrates how to create heat maps and support/resistance levels for stock prices, emphasizing the impact of after-hours trading.

Uploaded by

mukeshbalaga2003
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
14 views16 pages

Daa External

The document outlines various data analysis and visualization techniques using Python's Pandas and Matplotlib libraries, focusing on earthquake data and stock prices. It includes calculations of earthquake magnitudes, percentages of tsunamis, and visualizations such as box plots, histograms, and line plots for stock price analysis. Additionally, it demonstrates how to create heat maps and support/resistance levels for stock prices, emphasizing the impact of after-hours trading.

Uploaded by

mukeshbalaga2003
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
You are on page 1/ 16

Earthquake Data Analysis using Pandas

import pandas as pd
df = pd.read_csv(r"DATA ANALYTICS\\exp1.csv")
japan_mb_earthquakes = df[(df["parsed_place"].str.contains("Japan",
na=False)) & (df["magType"]== "mb")]
percentile_95 = japan_mb_earthquakes["mag"].quantile(0.95)
print(f"95th percentile of earthquake magnitude in Japan (mb type):
{percentile_95}")
indonesia_earthquakes = df[df["parsed_place"].str.contains("Indonesia",
na=False)]
total_indonesia = len(indonesia_earthquakes)
tsunami_indonesia =
len(indonesia_earthquakes[indonesia_earthquakes["tsunami"] == 1])
tsunami_percentage = (tsunami_indonesia / total_indonesia) * 100 if
total_indonesia > 0 else 0
print(f"Percentage of earthquakes in Indonesia that caused tsunamis:
{tsunami_percentage:.2f}%")
nevada_earthquakes = df[df["parsed_place"].str.contains("Nevada",
na=False)]
print("Summary statistics for earthquakes in Nevada:")
print(nevada_earthquakes.describe())
ring_of_fire_locations = [
"Alaska", "Antarctic", "Bolivia", "California", "Canada", "Chile", "Costa Rica",
"Ecuador", "Fiji", "Guatemala", "Indonesia", "Japan", "Kermadec Islands",
"Mexico",
"New Zealand", "Peru", "Philippines", "Russia", "Taiwan", "Tonga",
"Washington"
]
df["parsed_place"] = df["parsed_place"].fillna("") # Handle NaN values
df["ring_of_fire"] = df["parsed_place"].apply(lambda x: "Yes" if any(loc in x for
loc in ring_of_fire_locations) else "No")
ring_of_fire_count = df["ring_of_fire"].value_counts()
print(f"Number of earthquakes in Ring of Fire locations:
{ring_of_fire_count.get('Yes', 0)}")
print(f"Number of earthquakes outside Ring of Fire locations:
{ring_of_fire_count.get('No', 0)}")
ring_of_fire_count = df["ring_of_fire"].value_counts()
print(f"Number of earthquakes in Ring of Fire locations:
{ring_of_fire_count.get('Yes', 0)}")
print(f"Number of earthquakes outside Ring of Fire locations:
{ring_of_fire_count.get('No', 0)}")
EXPERIMENT 2: DATA FRAMES

import pandas as pd
aapl = pd.read_csv(r"DATA ANALYTICS\\AAPL(Exp2).csv")
amzn = pd.read_csv(r"DATA ANALYTICS\\amzn(Exp2).csv")
fb = pd.read_csv(r"DATA ANALYTICS\\fb1(Exp2).csv")
goog = pd.read_csv(r"DATA ANALYTICS\\goog(Exp2).csv")
nflx = pd.read_csv(r"DATA ANALYTICS\\nflx(Exp2).csv")
aapl['ticker'] = 'AAPL'
amzn['ticker'] = 'AMZN'
fb['ticker'] = 'FB'
goog['ticker'] = 'GOOG'
nflx['ticker'] = 'NFLX'
fang = pd.concat([aapl, amzn, fb, goog, nflx], ignore_index=True)
fang.to_csv("faang.csv", index=False)
print(fang.head())
Create the following visualizations using what you have learned up to this
point in this book.
Plot the rolling 20-day minimum of the Facebook closing price using
Pandas

import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
import random
np.random.seed(42)
dates = [datetime(2023, 1, 1) + timedelta(days=i) for i in range(50)]
opens = np.random.uniform(150, 300, 50)
closes = opens + np.random.uniform(-10, 10, 50)
highs = closes + np.random.uniform(0, 15, 50)
lows = closes - np.random.uniform(0, 10, 50)
facebook_data = pd.DataFrame({
'Date': dates,
'Open': opens,
'Close': closes,
'High': highs,
'Low': lows
})
facebook_data['Rolling_20_day_min'] =
facebook_data['Close'].rolling(window=20).min()
plt.figure(figsize=(10, 6))
plt.plot(facebook_data['Rolling_20_day_min'],
label='20-Day Rolling Minimum', color='red')
plt.title('Rolling 20-Day Minimum of Facebook Closing Price')
plt.xlabel('Date')
plt.ylabel('Price')
plt.legend()
plt.grid(True)
plt.show()
Create a histogram and KDE of the change from open to close in the price
of Facebook stock.

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
np.random.seed(42)
dates = pd.date_range(start='2023-01-01', periods=50, freq='B')
open_prices = np.random.uniform(150, 400, size=50)
close_prices = open_prices + np.random.uniform(-10, 10, size=50)
facebook_data = pd.DataFrame({
'Date': dates,
'Open': open_prices,
'Close': close_prices
})
facebook_data['Change'] = facebook_data['Close'] -
facebook_data['Open']
plt.figure(figsize=(10, 6))
sns.histplot(facebook_data['Change'],
kde=True, bins=15, color='blue', stat='density')
plt.title('Histogram and KDE of Change from Open to Close in
Facebook Stock Price')
plt.xlabel('Change in Price (Close - Open)')
plt.ylabel('Density')
plt.show()
Using the earthquake data, create box plots for the magnitudes of
each magType used in Indonesia.

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
np.random.seed(42)
mag_types = ['ML', 'MS', 'Mw', 'mb', 'mwc']
data = {
'magType': np.random.choice(mag_types, size=200),
'magnitude': np.concatenate([
np.random.normal(loc=5.0, scale=0.5, size=50),
np.random.normal(loc=5.5, scale=0.7, size=50),
np.random.normal(loc=6.0, scale=0.6, size=50),
np.random.normal(loc=4.8, scale=0.4, size=25),
np.random.normal(loc=5.2, scale=0.6, size=25)]
)}
earthquake_data = pd.DataFrame(data)
plt.figure(figsize=(10, 6))
sns.boxplot(x='magType', y='magnitude',
data=earthquake_data, palette='Set2')
plt.title('Box Plot of Earthquake Magnitudes by magType in
Indonesia')
plt.xlabel('magType')
plt.ylabel('Magnitude')
plt.show()
Make a line plot of the difference between the weekly
maximum high price and the weekly minimum low price for
Facebook. This should be a single line

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
np.random.seed(42)
dates = pd.date_range(start="2023-01-01", end="2023-06-30",
freq="B")
high_prices = np.random.uniform(150, 300, size=len(dates))
low_prices = high_prices - np.random.uniform(5, 20,
size=len(dates))
facebook_data = pd.DataFrame({
'Date': dates,
'High': high_prices,
'Low': low_prices
})
facebook_data['Date'] =
pd.to_datetime(facebook_data['Date'])
facebook_data.set_index('Date', inplace=True)
weekly_data = facebook_data.resample('W').agg({'High': 'max',
'Low': 'min'})
weekly_data['Price_Difference'] = weekly_data['High'] -
weekly_data['Low']
plt.figure(figsize=(10, 6))
plt.plot(weekly_data.index, weekly_data['Price_Difference'],
marker='o',
linestyle='-', color='b')
plt.title('Weekly Difference Between Max High and Min Low
Prices for Facebook')
plt.xlabel('Date')
plt.ylabel('Price Difference (Max High - Min Low)')
plt.grid(True)
plt.show()

EXPERIMENT 4: MATPLOTLIB:

Using matplotlib and pandas, create two subplots side-by-side


showing the effect that after- hours trading has had on
Facebook's stock prices:
1. The first subplot will contain a line plot of the daily difference
between that day's opening price and the prior day's
closing price (be sure to review the Working with
time series data section
2. The second subplot will be a bar plot showing the net effect
this had monthly, using resample
3. Bonus #1: Color the bars according to whether there are
gains in the stock price (green) or drops in the stock price (red).
4. Bonus #2: Modify the x-axis of the bar plot to show the
three-letter abbreviation for the month.

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
date_range = pd.date_range(start='2024-01-01', end='2024-12-
31', freq='B')
np.random.seed(42)
open_prices = 250 +
np.cumsum(np.random.randn(len(date_range)) * 2)
close_prices = open_prices +
np.random.randn(len(date_range)) * 1.5
df = pd.DataFrame({
'Date': date_range,
'Open': open_prices,
'Close': close_prices
})
df.set_index('Date', inplace=True)
df['Prev_Close'] = df['Close'].shift(1)
df['Daily_Diff'] = df['Open'] - df['Prev_Close']
fig, ax = plt.subplots(1, 2, figsize=(14, 6))
ax[0].plot(df.index, df['Daily_Diff'], label='Daily Diff',
color='blue')
ax[0].set_title("Daily Difference between Opening and Previous
Closing Prices")
ax[0].set_xlabel('Date')
ax[0].set_ylabel('Price Difference')
ax[0].grid(True)
monthly_net_effect = df['Daily_Diff'].resample('M').sum()
colors = ['green' if x > 0 else 'red' for x in monthly_net_effect]
monthly_net_effect.plot(kind='bar', ax=ax[1], color=colors)
ax[1].set_title("Net Effect of After-Hours Trading (Monthly)")
ax[1].set_xlabel('Month')
ax[1].set_ylabel('Net Price Change')
ax[1].grid(True)
ax[1].xaxis.set_major_formatter(mdates.DateFormatter('%b'))
ax[1].tick_params(axis='x', rotation=45)
plt.tight_layout()
plt.show()
Using sea born, create a heat map to visualize the correlation
coefficients between earthquake magnitude and whether there
was a tsunami for earthquakes measured with the mb
magnitudetype

import seaborn as sns


import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
magnitude = np.random.uniform(5.0, 9.0, 100)
tsunami = np.random.choice([0, 1], size=100, p=[0.7, 0.3])
data = pd.DataFrame({
'magnitude': magnitude,
'tsunami': tsunami
})
corr_matrix = data.corr()
plt.figure(figsize=(8, 6))
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm',
fmt='.2f', vmin=-1, vmax=1)
plt.title("Correlation between Earthquake Magnitude and
Tsunami")
plt.show()
Box Plot for Facebook Volume Traded and Closing Prices

import seaborn as sns


import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
np.random.seed(42)
volume = np.random.randint(1000, 5000, size=100)
closing_price = np.random.randint(250, 350, size=100)
data = pd.DataFrame({
'Volume': volume,
'Closing Price': closing_price
})
Q1_volume = data['Volume'].quantile(0.25)
Q3_volume = data['Volume'].quantile(0.75)
IQR_volume = Q3_volume - Q1_volume
lower_bound_volume = Q1_volume - 1.5 * IQR_volume
upper_bound_volume = Q3_volume + 1.5 * IQR_volume
Q1_price = data['Closing Price'].quantile(0.25)
Q3_price = data['Closing Price'].quantile(0.75)
IQR_price = Q3_price - Q1_price
lower_bound_price = Q1_price - 1.5 * IQR_price
upper_bound_price = Q3_price + 1.5 * IQR_price
fig, axs = plt.subplots(1, 2, figsize=(14, 6))
sns.boxplot(x=data['Volume'], ax=axs[0])
axs[0].axvline(lower_bound_volume, color='r', linestyle='--',
label='Lower Tukey Fence')
axs[0].axvline(upper_bound_volume, color='g', linestyle='--',
label='Upper Tukey Fence')
axs[0].set_title("Facebook Trading Volume with Tukey Fences")
axs[0].legend()
sns.boxplot(x=data['Closing Price'], ax=axs[1])
axs[1].axvline(lower_bound_price, color='r', linestyle='--', label='Lower
Tukey Fence')
axs[1].axvline(upper_bound_price, color='g', linestyle='--', label='Upper
Tukey Fence')
axs[1].set_title("Facebook Closing Price with Tukey Fences")
axs[1].legend()
plt.tight_layout()
plt.show()
Evolution of Cumulative COVID-19 Cases Worldwide

import pandas as pd
import matplotlib.pyplot as plt
import random
import numpy as np
np.random.seed(42)
dates = pd.date_range(start="2020-01-01", periods=200,
freq='D')
cumulative_cases = np.cumsum(np.random.randint(10000,
50000, size=200))
data = pd.DataFrame({
'date': dates,
'cumulative_cases': cumulative_cases
})
surpass_date = data[data['cumulative_cases'] >=
1000000].iloc[0]['date']
plt.figure(figsize=(10, 6))
plt.plot(data['date'], data['cumulative_cases'],
label='Cumulative COVID-19 Cases', color='b')
plt.axvline(surpass_date, color='r', linestyle='--',
label='Surpassed 1 million cases')
plt.title("Cumulative COVID-19 Cases Worldwide")
plt.xlabel("Date")
plt.ylabel("Cumulative Cases")
plt.legend()
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()
Calculate and Plot Three Levels of Support and Resistance for
Netflix’s Closing Price

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
date_range = pd.date_range(start = '2019-01-01', end = '2020-
12-31', freq='B')
np.random.seed(42)
open_prices = np.random.uniform(250, 400,
size=len(date_range))
high_prices = open_prices + np.random.uniform(1, 10,
size=len(date_range))
low_prices = open_prices - np.random.uniform(1, 10,
size=len(date_range))
close_prices = np.random.uniform(low_prices, high_prices)
volume = np.random.randint(500000, 5000000,
size=len(date_range))
data = pd.DataFrame({
'Date': date_range,
'Open': open_prices,
'High': high_prices,
'Low': low_prices,
'Close': close_prices,
'Volume': volume
})
low = data['Low'].min()
high = data['High'].max()
support = [low, low + 0.33 * (high - low), low + 0.66 * (high -
low)]
resistance = [high, high - 0.33 * (high - low), high - 0.66 * (high -
low)]
plt.figure(figsize=(10,6))
plt.plot(data['Date'], data['Close'], label='Netflix Closing Price',
color='blue')
plt.axhline(support[0], color='green', linestyle='--',
label='Support Level 1')
plt.axhline(support[1], color='yellow', linestyle='--',
label='Support Level 2')
plt.axhline(support[2], color='red', linestyle='--', label='Support
Level 3')
plt.axhline(resistance[0], color='red', linestyle='--',
label='Resistance Level 1')
plt.axhline(resistance[1], color='yellow', linestyle='--',
label='Resistance Level 2')
plt.axhline(resistance[2], color='green', linestyle='--',
label='Resistance Level 3')
plt.title('Netflix Stock: Support and Resistance Levels')
plt.xlabel('Date')
plt.ylabel('Price (USD)')
plt.legend()
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()
With the Stock Visualizer class, look at the effect of after-hours
trading on the FAANG stocks:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
np.random.seed(42)
date_range = pd.date_range(start="2019-01-01", end="2020-12-
31", freq='B')
stocks = ['FB', 'AAPL', 'AMZN', 'NFLX', 'GOOG']
faang_data = {}
for stock in stocks:
open_prices = np.random.uniform(250, 400,
size=len(date_range))
high_prices = open_prices + np.random.uniform(1, 10,
size=len(date_range))
low_prices = open_prices - np.random.uniform(1, 10,
size=len(date_range))
close_prices = np.random.uniform(low_prices, high_prices)
volume = np.random.randint(500000, 5000000,
size=len(date_range))
faang_data[stock] = pd.DataFrame({'Date': date_range,'Open':
open_prices,'High': high_prices,'Low': low_prices,'Close':
close_prices,'Volume': volume})
faang_data[stock].set_index('Date', inplace=True)
plt.figure(figsize=(15, 10))
for i, stock in enumerate(stocks):
data = faang_data[stock]
data['After Hours Change'] = data['Open'] - data['Close'].shift(1)
plt.subplot(3, 2, i+1)
plt.plot(data.index, data['After Hours Change'], label=f'After
Hours Change - {stock}')
plt.title(f'After Hours Trading Effect - {stock}')
plt.xlabel('Date')
plt.ylabel('Price Change (USD)')
plt.xticks(rotation=45)
plt.legend()
plt.tight_layout()
plt.show()
As a portfolio using the make portfolio () function from the
stockanalysis.utils module

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
np.random.seed(42)
date_range = pd.date_range(start="2019-01-01", end="2020-12-
31", freq='B')
stocks = ['FB', 'AAPL', 'AMZN', 'NFLX', 'GOOG']
faang_data = {}
for stock in stocks:
open_prices = np.random.uniform(250, 400,
size=len(date_range))
high_prices = open_prices + np.random.uniform(1, 10,
size=len(date_range))
low_prices = open_prices - np.random.uniform(1, 10,
size=len(date_range))
close_prices = np.random.uniform(low_prices, high_prices)
volume = np.random.randint(500000, 5000000,
size=len(date_range))
faang_data[stock] = pd.DataFrame({
'Date': date_range,
'Open': open_prices,
'High': high_prices,
'Low': low_prices,
'Close': close_prices,
'Volume': volume
})
faang_data[stock].set_index('Date', inplace=True)
portfolio_data = pd.DataFrame({stock: faang_data[stock]['Close']
for stock in stocks})
portfolio_returns = portfolio_data.pct_change()
weights = np.ones(len(stocks)) / len(stocks)
portfolio_returns['Portfolio'] = portfolio_returns.dot(weights)
portfolio_returns['Portfolio Cumulative'] = (1 +
portfolio_returns['Portfolio']).cumprod()
plt.figure(figsize=(14, 8))
for stock in stocks:
portfolio_returns[stock] = (1 +
portfolio_returns[stock]).cumprod()
plt.plot(portfolio_returns.index, portfolio_returns[stock],
label=f'{stock} Cumulative Return')
plt.plot(portfolio_returns.index, portfolio_returns['Portfolio
Cumulative'], label='Portfolio Cumulative Return', color='black',
linewidth=3)
plt.title('Cumulative Returns: FAANG Portfolio vs Individual
Stocks')
plt.xlabel('Date')
plt.ylabel('Cumulative Return')
plt.legend()
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

You might also like