DAEXTERNAL
DAEXTERNAL
PROGRAM :
df = pd.read_csv(r"C:\Users\this pc\Desktop\Earthquakes.csv")
japan_mb_earthquakes = df[(df["parsed_place"].str.contains("Japan", na=False)) & (df["magType"]=="mb")]
percentile_95 = japan_mb_earthquakes["mag"].quantile(0.95)
print(f"95th percentile of earthquake magnitude in Japan (mb type): {percentile_95}")
indonesia_earthquakes = df[df["parsed_place"].str.contains("Indonesia", na=False)]
total_indonesia = len(indonesia_earthquakes)
tsunami_indonesia = len(indonesia_earthquakes[indonesia_earthquakes["tsunami"] == 1])
tsunami_percentage = (tsunami_indonesia / total_indonesia) * 100 if total_indonesia > 0 else 0
print(f"Percentage of earthquakes in Indonesia that caused tsunamis: {tsunami_percentage:.2f}%")
nevada_earthquakes = df[df["parsed_place"].str.contains("Nevada", na=False)]
print("Summary statistics for earthquakes in Nevada:")
print(nevada_earthquakes.describe())
ring_of_fire_locations = [
"Alaska", "Antarctic", "Bolivia", "California", "Canada", "Chile", "Costa Rica",
"Ecuador", "Fiji", "Guatemala", "Indonesia", "Japan", "Kermadec Islands", "Mexico",
"New Zealand", "Peru", "Philippines", "Russia", "Taiwan", "Tonga", "Washington"
]
df["parsed_place"] = df["parsed_place"].fillna("") # Handle NaN values
df["ring_of_fire"] = df["parsed_place"].apply(lambda x: "Yes" if any(loc in x for loc in ring_of_fire_locations) else
"No")
ring_of_fire_count = df["ring_of_fire"].value_counts()
print(f"Number of earthquakes in Ring of Fire locations: {ring_of_fire_count.get('Yes', 0)}")
print(f"Number of earthquakes outside Ring of Fire locations: {ring_of_fire_count.get('No', 0)}")
tsunami_ring_of_fire = df[(df["ring_of_fire"] == "Yes") & (df["tsunami"] == 1)]
tsunami_count = len(tsunami_ring_of_fire)
print(f"Number of tsunamis along the Ring of Fire: {tsunami_count}")
EXPERIMENT 2: DATA FRAMES
PROGRAM:
import pandas as pd
aapl = pd.read_csv("exercises/aapl.csv")
amzn = pd.read_csv("exercises/amzn.csv")
fb = pd.read_csv("exercises/fb.csv")
goog = pd.read_csv("exercises/goog.csv")
nflx = pd.read_csv("exercises/nflx.csv")
# Step 2: Add the ticker column to each DataFrame
aapl['ticker'] = 'AAPL'
amzn['ticker'] = 'AMZN'
fb['ticker'] = 'FB'
goog['ticker'] = 'GOOG'
nflx['ticker'] = 'NFLX'
fang.to_csv("faang.csv", index=False)
# Optional: Display the first few rows of the combined DataFrame
print(fang.head())
EXPERIMENT 3 : HISTOGRAM AND LINE PLOT
3(a) Plot the rolling 20-day minimum of the Facebook closing price using
pandas.
PROGRAM :
import pandas as pd
import numpy as np
import random
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
# Generate a random sample of Facebook stock data
for 50 rows
np.random.seed(42)
dates = [datetime(2023, 1, 1) + timedelta(days=i) for i
in range(50)]
opens = np.random.uniform(150, 300, 50)
closes = opens + np.random.uniform(-10, 10, 50)
highs = closes + np.random.uniform(0, 15, 50)
lows = closes - np.random.uniform(0, 10, 50)
facebook_data = pd.DataFrame({
'Date': dates,
'Open': opens,
'Close': closes,
'High': highs,
'Low': lows
})
facebook_data['Rolling_20_day_min'] =
facebook_data['Close'].rolling(window=20).min() plt.xlabel('Date')
# Plot the rolling 20-day minimum plt.ylabel('Price')
plt.figure(figsize=(10, 6)) plt.legend()
plt.plot(facebook_data['Rolling_20_day_min'], plt.grid(True)
label='20-Day Rolling Minimum', color='red') plt.show()
plt.title('Rolling 20-Day Minimum of Facebook Closing Price’)
3(b)
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
np.random.seed(42) # For reproducibility
dates = pd.date_range(start='2023-01-01',periods=50, freq='B')
open_prices = np.random.uniform(150, 400,size=50)
close_prices = open_prices +
np.random.uniform(-10, 10, size=50)
facebook_data = pd.DataFrame({
'Date': dates,
'Open': open_prices,
'Close': close_prices
})
facebook_data['Change'] =facebook_data['Close'] - facebook_data['Open']
plt.figure(figsize=(10, 6))
sns.histplot(facebook_data['Change'],kde=True, bins=15, color='blue', stat='density')
plt.title('Histogram and KDE of Change from Open to Close in Facebook Stock Price')
plt.xlabel('Change in Price (Close - Open)')
plt.ylabel('Density')
3(C)
PROGRAM
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
np.random.seed(42)
mag_types = ['ML', 'MS', 'Mw', 'mb', 'mwc']
data = {
'magType': np.random.choice(mag_types, size=200),
'magnitude': np.concatenate([np.random.normal(loc=5.0, scale=0.5, size=50),
np.random.normal(loc=5.5, scale=0.7, size=50),
np.random.normal(loc=6.0, scale=0.6, size=50),
np.random.normal(loc=4.8, scale=0.4, size=25),
np.random.normal(loc=5.2, scale=0.6, size=25
])}
earthquake_data = pd.DataFrame(data)
plt.figure(figsize=(10, 6))
sns.boxplot(x='magType', y='magnitude', data=earthquake_data, palette='Set2')
plt.title('Box Plot of Earthquake Magnitudes by magType in Indonesia')
plt.xlabel('magType')
plt.ylabel('Magnitude')
plt.show()
3(d)
PROGRAM:
import pandas as pd
import matplotlib.pyplot as plt
facebook_data = pd.read_csv('facebook_stock_data.csv')
facebook_data['Date'] = pd.to_datetime(facebook_data['Date'])
facebook_data.set_index('Date', inplace=True)
weekly_data = facebook_data.resample('W').agg({'High': 'max', 'Low': 'min'})
weekly_data['Price_Difference'] = weekly_data['High'] - weekly_data['Low']
plt.figure(figsize=(10, 6))
plt.plot(weekly_data.index, weekly_data['Price_Difference'], marker='o', linestyle='-',color='b')
plt.title('Difference Between Weekly Max High and Min Low Prices for Facebook')
plt.xlabel('Date')
plt.ylabel('Price Difference (Max High - Min Low)')
plt.grid(True)
plt.show()
EXPERIMENT :4
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
date_range = pd.date_range(start='2024-01-01', end='2024-12-31', freq='B')
np.random.seed(42)
open_prices = 250 + np.cumsum(np.random.randn(len(date_range)) * 2)
close_prices = open_prices + np.random.randn(len(date_range)) * 1.5
df = pd.DataFrame({
'Date': date_range,
'Open': open_prices,
'Close': close_prices
})
df.set_index('Date', inplace=True)
df['Prev_Close'] = df['Close'].shift(1)
df['Daily_Diff'] = df['Open'] - df['Prev_Close']
fig, ax = plt.subplots(1, 2, figsize=(14, 6))
ax[0].plot(df.index, df['Daily_Diff'], label='Daily Diff', color='blue')
ax[0].set_title("Daily Difference between Opening and Previous Closing Prices")
ax[0].set_xlabel('Date')
ax[0].set_ylabel('Price Difference')
ax[0].grid(True)
monthly_net_effect = df['Daily_Diff'].resample('M').sum()
colors = ['green' if x > 0 else 'red' for x in monthly_net_effect]
monthly_net_effect.plot(kind='bar', ax=ax[1], color=colors)
ax[1].set_title("Net Effect of After-Hours Trading (Monthly)")
ax[1].set_xlabel('Month')
ax[1].set_ylabel('Net Price Change')
ax[1].grid(True)
ax[1].xaxis.set_major_formatter(mdates.DateFormatter('%b'))
ax[1].tick_params(axis='x', rotation=45)
EXPERIMENT 5 SEABORN AND BOXPLOTS
5(A)
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
np.random.seed(42)
magnitude = np.random.uniform(5.0, 9.0, 100)
tsunami = np.random.choice([0, 1], size=100, p=[0.7, 0.3])
data = pd.DataFrame({
'magnitude': magnitude,
'tsunami': tsunami
})
corr_matrix = data.corr()
plt.figure(figsize=(8, 6))
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', fmt='.2f', vmin=-1, vmax=1)
plt.title("Correlation between Earthquake Magnitude and Tsunami")
plt.show()
5(B)
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
np.random.seed(42) # For reproducibility
volume = np.random.randint(1000, 5000, size=100)
closing_price = np.random.randint(250, 350, size=100)
data = pd.DataFrame({
'Volume': volume,
'Closing Price': closing_price
})
Q1_volume = data['Volume'].quantile(0.25)
Q3_volume = data['Volume'].quantile(0.75)
IQR_volume = Q3_volume - Q1_volume
lower_bound_volume = Q1_volume - 1.5 * IQR_volume
upper_bound_volume = Q3_volume + 1.5 * IQR_volume
Q1_price = data['Closing Price'].quantile(0.25)
Q3_price = data['Closing Price'].quantile(0.75)
IQR_price = Q3_price - Q1_price
lower_bound_price = Q1_price - 1.5 * IQR_price
upper_bound_price = Q3_price + 1.5 * IQR_price
fig, axs = plt.subplots(1, 2, figsize=(14, 6))
sns.boxplot(x=data['Volume'], ax=axs[0])
axs[0].axvline(lower_bound_volume, color='r', linestyle='--', label='Lower Tukey Fence')
axs[0].axvline(upper_bound_volume, color='g', linestyle='--', label='Upper Tukey Fence')
axs[0].set_title("Facebook Trading Volume with Tukey Fences")
axs[0].legend()
sns.boxplot(x=data['Closing Price'], ax=axs[1])
axs[1].axvline(lower_bound_price, color='r', linestyle='--', label='Lower Tukey Fence')
axs[1].axvline(upper_bound_price, color='g', linestyle='--', label='Upper Tukey Fence')
axs[1].set_title("Facebook Closing Price with Tukey Fences")
axs[1].legend()
plt.tight_layout()
plt.show()
5(C)
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
np.random.seed(42)
dates = pd.date_range(start="2020-01-01", periods=200, freq='D')
cumulative_cases = np.cumsum(np.random.randint(10000, 50000, size=200))
data = pd.DataFrame({
'date': dates,
'cumulative_cases': cumulative_cases
})
surpass_date = data[data['cumulative_cases'] >= 1000000].iloc[0]['date']
plt.figure(figsize=(10, 6))
plt.plot(data['date'], data['cumulative_cases'], label='Cumulative COVID-19 Cases', color='b')
plt.axvline(surpass_date, color='r', linestyle='--', label='Surpassed 1 million cases')
plt.title("Cumulative COVID-19 Cases Worldwide")
plt.xlabel("Date")
plt.ylabel("Cumulative Cases")
plt.legend()
plt.xticks(rotation=45)
plt.tight_layout()
EXPERIMENT 6 (A) STOCK ANALYSIS
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
date_range = pd.date_range(start="2019-01-01", end="2020-12-31", freq='B')
np.random.seed(42)
open_prices = np.random.uniform(250, 400, size=len(date_range))
high_prices = open_prices + np.random.uniform(1, 10, size=len(date_range))
low_prices = open_prices - np.random.uniform(1, 10, size=len(date_range))
close_prices = np.random.uniform(low_prices, high_prices) # Close between Low and High
volume = np.random.randint(500000, 5000000, size=len(date_range)) # Random volume
data = pd.DataFrame({
'Date': date_range,
'Open': open_prices,
'High': high_prices,
'Low': low_prices,
'Close': close_prices,
'Volume': volume
})
low = data['Low'].min() # Lowest price (support)
high = data['High'].max() # Highest price (resistance)
support = [low, low + 0.33 * (high - low), low + 0.66 * (high - low)]
resistance = [high, high - 0.33 * (high - low), high - 0.66 * (high - low)]
plt.figure(figsize=(10,6))
plt.plot(data['Date'], data['Close'], label='Netflix Closing Price', color='blue')
plt.axhline(support[0], color='green', linestyle='--', label='Support Level 1')
plt.axhline(support[1], color='yellow', linestyle='--', label='Support Level 2')
plt.axhline(support[2], color='red', linestyle='--', label='Support Level 3')
plt.axhline(resistance[0], color='red', linestyle='--', label='Resistance Level 1')
plt.axhline(resistance[1], color='yellow', linestyle='--', label='Resistance Level 2')
plt.axhline(resistance[2], color='green', linestyle='--', label='Resistance Level 3')
plt.title('Netflix Stock: Support and Resistance Levels')
plt.xlabel('Date') plt.xticks(rotation=45)
plt.ylabel('Price (USD)') plt.tight_layout()
plt.legend() plt.show()
6(B)
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
np.random.seed(42)
date_range = pd.date_range(start="2019-01-01", end="2020-12-31", freq='B')
for stock in stocks:
open_prices = np.random.uniform(250, 400, size=len(date_range))
high_prices = open_prices + np.random.uniform(1, 10, size=len(date_range))
low_prices = open_prices - np.random.uniform(1, 10, size=len(date_range))
close_prices = np.random.uniform(low_prices, high_prices) # Close between Low and High
volume = np.random.randint(500000, 5000000, size=len(date_range))
faang_data[stock] = pd.DataFrame({
'Date': date_range,
'Open': open_prices,
'High': high_prices,
'Low': low_prices,
'Close': close_prices,
'Volume': volume
})
faang_data[stock].set_index('Date', inplace=True)
plt.figure(figsize=(15, 10))
for i, stock in enumerate(stocks):
data = faang_data[stock]
data['After Hours Change'] = data['Open'] - data['Close'].shift(1)
plt.subplot(3, 2, i+1)
plt.plot(data.index, data['After Hours Change'], label=f'After Hours Change - {stock}')
plt.title(f'After Hours Trading Effect - {stock}')
plt.xlabel('Date')
plt.ylabel('Price Change (USD)')
plt.xticks(rotation=45)
plt.legend()
plt.tight_layout()
plt.show()
6(C)
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
np.random.seed(42)
date_range = pd.date_range(start="2019-01-01", end="2020-12-31", freq='B')
stocks = ['FB', 'AAPL', 'AMZN', 'NFLX', 'GOOG']
faang_data = {}
for stock in stocks:
open_prices = np.random.uniform(250, 400, size=len(date_range))
high_prices = open_prices + np.random.uniform(1, 10, size=len(date_range))
low_prices = open_prices - np.random.uniform(1, 10, size=len(date_range))
close_prices = np.random.uniform(low_prices, high_prices) # Close between Low and High
volume = np.random.randint(500000, 5000000, size=len(date_range))
faang_data[stock] = pd.DataFrame({
'Date': date_range,
'Open': open_prices,
'High': high_prices,
'Low': low_prices,
'Close': close_prices,
'Volume': volume
})
faang_data[stock].set_index('Date', inplace=True)
portfolio_data = pd.DataFrame({stock: faang_data[stock]['Close'] for stock in stocks})
portfolio_returns = portfolio_data.pct_change()
weights = np.ones(len(stocks)) / len(stocks)
portfolio_returns['Portfolio'] = portfolio_returns.dot(weights)
portfolio_returns['Portfolio Cumulative'] = (1 + portfolio_returns['Portfolio']).cumprod()
plt.figure(figsize=(14, 8))
for stock in stocks:
portfolio_returns[stock] = (1 + portfolio_returns[stock]).cumprod()
plt.plot(portfolio_returns.index, portfolio_returns[stock], label=f'{stock} Cumulative Return')
plt.plot(portfolio_returns.index, portfolio_returns['Portfolio Cumulative'], label='Portfolio Cumulative Return',
color='black', linewidth=3)
plt.title('Cumulative Returns: FAANG Portfolio vs Individual Stocks')
plt.xlabel('Date')
plt.ylabel('Cumulative Return')
plt.legend()
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()