Daa External
Daa External
import pandas as pd
df = pd.read_csv(r"DATA ANALYTICS\\exp1.csv")
japan_mb_earthquakes = df[(df["parsed_place"].str.contains("Japan",
na=False)) & (df["magType"]== "mb")]
percentile_95 = japan_mb_earthquakes["mag"].quantile(0.95)
print(f"95th percentile of earthquake magnitude in Japan (mb type):
{percentile_95}")
indonesia_earthquakes = df[df["parsed_place"].str.contains("Indonesia",
na=False)]
total_indonesia = len(indonesia_earthquakes)
tsunami_indonesia =
len(indonesia_earthquakes[indonesia_earthquakes["tsunami"] == 1])
tsunami_percentage = (tsunami_indonesia / total_indonesia) * 100 if
total_indonesia > 0 else 0
print(f"Percentage of earthquakes in Indonesia that caused tsunamis:
{tsunami_percentage:.2f}%")
nevada_earthquakes = df[df["parsed_place"].str.contains("Nevada",
na=False)]
print("Summary statistics for earthquakes in Nevada:")
print(nevada_earthquakes.describe())
ring_of_fire_locations = [
"Alaska", "Antarctic", "Bolivia", "California", "Canada", "Chile", "Costa Rica",
"Ecuador", "Fiji", "Guatemala", "Indonesia", "Japan", "Kermadec Islands",
"Mexico",
"New Zealand", "Peru", "Philippines", "Russia", "Taiwan", "Tonga",
"Washington"
]
df["parsed_place"] = df["parsed_place"].fillna("") # Handle NaN values
df["ring_of_fire"] = df["parsed_place"].apply(lambda x: "Yes" if any(loc in x for
loc in ring_of_fire_locations) else "No")
ring_of_fire_count = df["ring_of_fire"].value_counts()
print(f"Number of earthquakes in Ring of Fire locations:
{ring_of_fire_count.get('Yes', 0)}")
print(f"Number of earthquakes outside Ring of Fire locations:
{ring_of_fire_count.get('No', 0)}")
ring_of_fire_count = df["ring_of_fire"].value_counts()
print(f"Number of earthquakes in Ring of Fire locations:
{ring_of_fire_count.get('Yes', 0)}")
print(f"Number of earthquakes outside Ring of Fire locations:
{ring_of_fire_count.get('No', 0)}")
EXPERIMENT 2: DATA FRAMES
import pandas as pd
aapl = pd.read_csv(r"DATA ANALYTICS\\AAPL(Exp2).csv")
amzn = pd.read_csv(r"DATA ANALYTICS\\amzn(Exp2).csv")
fb = pd.read_csv(r"DATA ANALYTICS\\fb1(Exp2).csv")
goog = pd.read_csv(r"DATA ANALYTICS\\goog(Exp2).csv")
nflx = pd.read_csv(r"DATA ANALYTICS\\nflx(Exp2).csv")
aapl['ticker'] = 'AAPL'
amzn['ticker'] = 'AMZN'
fb['ticker'] = 'FB'
goog['ticker'] = 'GOOG'
nflx['ticker'] = 'NFLX'
fang = pd.concat([aapl, amzn, fb, goog, nflx], ignore_index=True)
fang.to_csv("faang.csv", index=False)
print(fang.head())
Create the following visualizations using what you have learned up to this
point in this book.
Plot the rolling 20-day minimum of the Facebook closing price using
Pandas
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
import random
np.random.seed(42)
dates = [datetime(2023, 1, 1) + timedelta(days=i) for i in range(50)]
opens = np.random.uniform(150, 300, 50)
closes = opens + np.random.uniform(-10, 10, 50)
highs = closes + np.random.uniform(0, 15, 50)
lows = closes - np.random.uniform(0, 10, 50)
facebook_data = pd.DataFrame({
'Date': dates,
'Open': opens,
'Close': closes,
'High': highs,
'Low': lows
})
facebook_data['Rolling_20_day_min'] =
facebook_data['Close'].rolling(window=20).min()
plt.figure(figsize=(10, 6))
plt.plot(facebook_data['Rolling_20_day_min'],
label='20-Day Rolling Minimum', color='red')
plt.title('Rolling 20-Day Minimum of Facebook Closing Price')
plt.xlabel('Date')
plt.ylabel('Price')
plt.legend()
plt.grid(True)
plt.show()
Create a histogram and KDE of the change from open to close in the price
of Facebook stock.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
np.random.seed(42)
dates = pd.date_range(start='2023-01-01', periods=50, freq='B')
open_prices = np.random.uniform(150, 400, size=50)
close_prices = open_prices + np.random.uniform(-10, 10, size=50)
facebook_data = pd.DataFrame({
'Date': dates,
'Open': open_prices,
'Close': close_prices
})
facebook_data['Change'] = facebook_data['Close'] -
facebook_data['Open']
plt.figure(figsize=(10, 6))
sns.histplot(facebook_data['Change'],
kde=True, bins=15, color='blue', stat='density')
plt.title('Histogram and KDE of Change from Open to Close in
Facebook Stock Price')
plt.xlabel('Change in Price (Close - Open)')
plt.ylabel('Density')
plt.show()
Using the earthquake data, create box plots for the magnitudes of
each magType used in Indonesia.
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
np.random.seed(42)
mag_types = ['ML', 'MS', 'Mw', 'mb', 'mwc']
data = {
'magType': np.random.choice(mag_types, size=200),
'magnitude': np.concatenate([
np.random.normal(loc=5.0, scale=0.5, size=50),
np.random.normal(loc=5.5, scale=0.7, size=50),
np.random.normal(loc=6.0, scale=0.6, size=50),
np.random.normal(loc=4.8, scale=0.4, size=25),
np.random.normal(loc=5.2, scale=0.6, size=25)]
)}
earthquake_data = pd.DataFrame(data)
plt.figure(figsize=(10, 6))
sns.boxplot(x='magType', y='magnitude',
data=earthquake_data, palette='Set2')
plt.title('Box Plot of Earthquake Magnitudes by magType in
Indonesia')
plt.xlabel('magType')
plt.ylabel('Magnitude')
plt.show()
Make a line plot of the difference between the weekly
maximum high price and the weekly minimum low price for
Facebook. This should be a single line
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
np.random.seed(42)
dates = pd.date_range(start="2023-01-01", end="2023-06-30",
freq="B")
high_prices = np.random.uniform(150, 300, size=len(dates))
low_prices = high_prices - np.random.uniform(5, 20,
size=len(dates))
facebook_data = pd.DataFrame({
'Date': dates,
'High': high_prices,
'Low': low_prices
})
facebook_data['Date'] =
pd.to_datetime(facebook_data['Date'])
facebook_data.set_index('Date', inplace=True)
weekly_data = facebook_data.resample('W').agg({'High': 'max',
'Low': 'min'})
weekly_data['Price_Difference'] = weekly_data['High'] -
weekly_data['Low']
plt.figure(figsize=(10, 6))
plt.plot(weekly_data.index, weekly_data['Price_Difference'],
marker='o',
linestyle='-', color='b')
plt.title('Weekly Difference Between Max High and Min Low
Prices for Facebook')
plt.xlabel('Date')
plt.ylabel('Price Difference (Max High - Min Low)')
plt.grid(True)
plt.show()
EXPERIMENT 4: MATPLOTLIB:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
date_range = pd.date_range(start='2024-01-01', end='2024-12-
31', freq='B')
np.random.seed(42)
open_prices = 250 +
np.cumsum(np.random.randn(len(date_range)) * 2)
close_prices = open_prices +
np.random.randn(len(date_range)) * 1.5
df = pd.DataFrame({
'Date': date_range,
'Open': open_prices,
'Close': close_prices
})
df.set_index('Date', inplace=True)
df['Prev_Close'] = df['Close'].shift(1)
df['Daily_Diff'] = df['Open'] - df['Prev_Close']
fig, ax = plt.subplots(1, 2, figsize=(14, 6))
ax[0].plot(df.index, df['Daily_Diff'], label='Daily Diff',
color='blue')
ax[0].set_title("Daily Difference between Opening and Previous
Closing Prices")
ax[0].set_xlabel('Date')
ax[0].set_ylabel('Price Difference')
ax[0].grid(True)
monthly_net_effect = df['Daily_Diff'].resample('M').sum()
colors = ['green' if x > 0 else 'red' for x in monthly_net_effect]
monthly_net_effect.plot(kind='bar', ax=ax[1], color=colors)
ax[1].set_title("Net Effect of After-Hours Trading (Monthly)")
ax[1].set_xlabel('Month')
ax[1].set_ylabel('Net Price Change')
ax[1].grid(True)
ax[1].xaxis.set_major_formatter(mdates.DateFormatter('%b'))
ax[1].tick_params(axis='x', rotation=45)
plt.tight_layout()
plt.show()
Using sea born, create a heat map to visualize the correlation
coefficients between earthquake magnitude and whether there
was a tsunami for earthquakes measured with the mb
magnitudetype
import pandas as pd
import matplotlib.pyplot as plt
import random
import numpy as np
np.random.seed(42)
dates = pd.date_range(start="2020-01-01", periods=200,
freq='D')
cumulative_cases = np.cumsum(np.random.randint(10000,
50000, size=200))
data = pd.DataFrame({
'date': dates,
'cumulative_cases': cumulative_cases
})
surpass_date = data[data['cumulative_cases'] >=
1000000].iloc[0]['date']
plt.figure(figsize=(10, 6))
plt.plot(data['date'], data['cumulative_cases'],
label='Cumulative COVID-19 Cases', color='b')
plt.axvline(surpass_date, color='r', linestyle='--',
label='Surpassed 1 million cases')
plt.title("Cumulative COVID-19 Cases Worldwide")
plt.xlabel("Date")
plt.ylabel("Cumulative Cases")
plt.legend()
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()
Calculate and Plot Three Levels of Support and Resistance for
Netflix’s Closing Price
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
date_range = pd.date_range(start = '2019-01-01', end = '2020-
12-31', freq='B')
np.random.seed(42)
open_prices = np.random.uniform(250, 400,
size=len(date_range))
high_prices = open_prices + np.random.uniform(1, 10,
size=len(date_range))
low_prices = open_prices - np.random.uniform(1, 10,
size=len(date_range))
close_prices = np.random.uniform(low_prices, high_prices)
volume = np.random.randint(500000, 5000000,
size=len(date_range))
data = pd.DataFrame({
'Date': date_range,
'Open': open_prices,
'High': high_prices,
'Low': low_prices,
'Close': close_prices,
'Volume': volume
})
low = data['Low'].min()
high = data['High'].max()
support = [low, low + 0.33 * (high - low), low + 0.66 * (high -
low)]
resistance = [high, high - 0.33 * (high - low), high - 0.66 * (high -
low)]
plt.figure(figsize=(10,6))
plt.plot(data['Date'], data['Close'], label='Netflix Closing Price',
color='blue')
plt.axhline(support[0], color='green', linestyle='--',
label='Support Level 1')
plt.axhline(support[1], color='yellow', linestyle='--',
label='Support Level 2')
plt.axhline(support[2], color='red', linestyle='--', label='Support
Level 3')
plt.axhline(resistance[0], color='red', linestyle='--',
label='Resistance Level 1')
plt.axhline(resistance[1], color='yellow', linestyle='--',
label='Resistance Level 2')
plt.axhline(resistance[2], color='green', linestyle='--',
label='Resistance Level 3')
plt.title('Netflix Stock: Support and Resistance Levels')
plt.xlabel('Date')
plt.ylabel('Price (USD)')
plt.legend()
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()
With the Stock Visualizer class, look at the effect of after-hours
trading on the FAANG stocks:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
np.random.seed(42)
date_range = pd.date_range(start="2019-01-01", end="2020-12-
31", freq='B')
stocks = ['FB', 'AAPL', 'AMZN', 'NFLX', 'GOOG']
faang_data = {}
for stock in stocks:
open_prices = np.random.uniform(250, 400,
size=len(date_range))
high_prices = open_prices + np.random.uniform(1, 10,
size=len(date_range))
low_prices = open_prices - np.random.uniform(1, 10,
size=len(date_range))
close_prices = np.random.uniform(low_prices, high_prices)
volume = np.random.randint(500000, 5000000,
size=len(date_range))
faang_data[stock] = pd.DataFrame({'Date': date_range,'Open':
open_prices,'High': high_prices,'Low': low_prices,'Close':
close_prices,'Volume': volume})
faang_data[stock].set_index('Date', inplace=True)
plt.figure(figsize=(15, 10))
for i, stock in enumerate(stocks):
data = faang_data[stock]
data['After Hours Change'] = data['Open'] - data['Close'].shift(1)
plt.subplot(3, 2, i+1)
plt.plot(data.index, data['After Hours Change'], label=f'After
Hours Change - {stock}')
plt.title(f'After Hours Trading Effect - {stock}')
plt.xlabel('Date')
plt.ylabel('Price Change (USD)')
plt.xticks(rotation=45)
plt.legend()
plt.tight_layout()
plt.show()
As a portfolio using the make portfolio () function from the
stockanalysis.utils module
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
np.random.seed(42)
date_range = pd.date_range(start="2019-01-01", end="2020-12-
31", freq='B')
stocks = ['FB', 'AAPL', 'AMZN', 'NFLX', 'GOOG']
faang_data = {}
for stock in stocks:
open_prices = np.random.uniform(250, 400,
size=len(date_range))
high_prices = open_prices + np.random.uniform(1, 10,
size=len(date_range))
low_prices = open_prices - np.random.uniform(1, 10,
size=len(date_range))
close_prices = np.random.uniform(low_prices, high_prices)
volume = np.random.randint(500000, 5000000,
size=len(date_range))
faang_data[stock] = pd.DataFrame({
'Date': date_range,
'Open': open_prices,
'High': high_prices,
'Low': low_prices,
'Close': close_prices,
'Volume': volume
})
faang_data[stock].set_index('Date', inplace=True)
portfolio_data = pd.DataFrame({stock: faang_data[stock]['Close']
for stock in stocks})
portfolio_returns = portfolio_data.pct_change()
weights = np.ones(len(stocks)) / len(stocks)
portfolio_returns['Portfolio'] = portfolio_returns.dot(weights)
portfolio_returns['Portfolio Cumulative'] = (1 +
portfolio_returns['Portfolio']).cumprod()
plt.figure(figsize=(14, 8))
for stock in stocks:
portfolio_returns[stock] = (1 +
portfolio_returns[stock]).cumprod()
plt.plot(portfolio_returns.index, portfolio_returns[stock],
label=f'{stock} Cumulative Return')
plt.plot(portfolio_returns.index, portfolio_returns['Portfolio
Cumulative'], label='Portfolio Cumulative Return', color='black',
linewidth=3)
plt.title('Cumulative Returns: FAANG Portfolio vs Individual
Stocks')
plt.xlabel('Date')
plt.ylabel('Cumulative Return')
plt.legend()
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()