0% found this document useful (0 votes)
6 views14 pages

DAEXTERNAL

The document outlines multiple experiments involving data analysis using Python and pandas, focusing on earthquake data, stock prices, and visualizations. Key analyses include calculating earthquake magnitudes, tsunami percentages, and generating stock price statistics and visualizations. The document also demonstrates the use of Seaborn for correlation heatmaps and boxplots, as well as plotting techniques for financial data.

Uploaded by

mukeshbalaga2003
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
6 views14 pages

DAEXTERNAL

The document outlines multiple experiments involving data analysis using Python and pandas, focusing on earthquake data, stock prices, and visualizations. Key analyses include calculating earthquake magnitudes, tsunami percentages, and generating stock price statistics and visualizations. The document also demonstrates the use of Seaborn for correlation heatmaps and boxplots, as well as plotting techniques for financial data.

Uploaded by

mukeshbalaga2003
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 14

EXPERIMENT 1 :ANALYSIS FUNDAMENTALS

PROGRAM :
df = pd.read_csv(r"C:\Users\this pc\Desktop\Earthquakes.csv")
japan_mb_earthquakes = df[(df["parsed_place"].str.contains("Japan", na=False)) & (df["magType"]=="mb")]
percentile_95 = japan_mb_earthquakes["mag"].quantile(0.95)
print(f"95th percentile of earthquake magnitude in Japan (mb type): {percentile_95}")
indonesia_earthquakes = df[df["parsed_place"].str.contains("Indonesia", na=False)]
total_indonesia = len(indonesia_earthquakes)
tsunami_indonesia = len(indonesia_earthquakes[indonesia_earthquakes["tsunami"] == 1])
tsunami_percentage = (tsunami_indonesia / total_indonesia) * 100 if total_indonesia > 0 else 0
print(f"Percentage of earthquakes in Indonesia that caused tsunamis: {tsunami_percentage:.2f}%")
nevada_earthquakes = df[df["parsed_place"].str.contains("Nevada", na=False)]
print("Summary statistics for earthquakes in Nevada:")
print(nevada_earthquakes.describe())
ring_of_fire_locations = [
"Alaska", "Antarctic", "Bolivia", "California", "Canada", "Chile", "Costa Rica",
"Ecuador", "Fiji", "Guatemala", "Indonesia", "Japan", "Kermadec Islands", "Mexico",
"New Zealand", "Peru", "Philippines", "Russia", "Taiwan", "Tonga", "Washington"
]
df["parsed_place"] = df["parsed_place"].fillna("") # Handle NaN values
df["ring_of_fire"] = df["parsed_place"].apply(lambda x: "Yes" if any(loc in x for loc in ring_of_fire_locations) else
"No")
ring_of_fire_count = df["ring_of_fire"].value_counts()
print(f"Number of earthquakes in Ring of Fire locations: {ring_of_fire_count.get('Yes', 0)}")
print(f"Number of earthquakes outside Ring of Fire locations: {ring_of_fire_count.get('No', 0)}")
tsunami_ring_of_fire = df[(df["ring_of_fire"] == "Yes") & (df["tsunami"] == 1)]
tsunami_count = len(tsunami_ring_of_fire)
print(f"Number of tsunamis along the Ring of Fire: {tsunami_count}")
EXPERIMENT 2: DATA FRAMES
PROGRAM:

import pandas as pd

# Step 1: Read the stock data from CSV files

aapl = pd.read_csv("exercises/aapl.csv")

amzn = pd.read_csv("exercises/amzn.csv")

fb = pd.read_csv("exercises/fb.csv")

goog = pd.read_csv("exercises/goog.csv")

nflx = pd.read_csv("exercises/nflx.csv")
# Step 2: Add the ticker column to each DataFrame

aapl['ticker'] = 'AAPL'

amzn['ticker'] = 'AMZN'

fb['ticker'] = 'FB'

goog['ticker'] = 'GOOG'

nflx['ticker'] = 'NFLX'

# Step 3: Combine the DataFrames

fang = pd.concat([aapl, amzn, fb, goog, nflx], ignore_index=True)


# Step 4: Save the combined data to a CSV file

fang.to_csv("faang.csv", index=False)
# Optional: Display the first few rows of the combined DataFrame

print(fang.head())
EXPERIMENT 3 : HISTOGRAM AND LINE PLOT

3(a) Plot the rolling 20-day minimum of the Facebook closing price using
pandas.
PROGRAM :
import pandas as pd
import numpy as np
import random
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
# Generate a random sample of Facebook stock data
for 50 rows
np.random.seed(42)
dates = [datetime(2023, 1, 1) + timedelta(days=i) for i
in range(50)]
opens = np.random.uniform(150, 300, 50)
closes = opens + np.random.uniform(-10, 10, 50)
highs = closes + np.random.uniform(0, 15, 50)
lows = closes - np.random.uniform(0, 10, 50)
facebook_data = pd.DataFrame({
'Date': dates,
'Open': opens,
'Close': closes,
'High': highs,
'Low': lows
})
facebook_data['Rolling_20_day_min'] =
facebook_data['Close'].rolling(window=20).min() plt.xlabel('Date')
# Plot the rolling 20-day minimum plt.ylabel('Price')
plt.figure(figsize=(10, 6)) plt.legend()
plt.plot(facebook_data['Rolling_20_day_min'], plt.grid(True)
label='20-Day Rolling Minimum', color='red') plt.show()
plt.title('Rolling 20-Day Minimum of Facebook Closing Price’)
3(b)
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
np.random.seed(42) # For reproducibility
dates = pd.date_range(start='2023-01-01',periods=50, freq='B')
open_prices = np.random.uniform(150, 400,size=50)
close_prices = open_prices +
np.random.uniform(-10, 10, size=50)
facebook_data = pd.DataFrame({
'Date': dates,
'Open': open_prices,
'Close': close_prices
})
facebook_data['Change'] =facebook_data['Close'] - facebook_data['Open']
plt.figure(figsize=(10, 6))
sns.histplot(facebook_data['Change'],kde=True, bins=15, color='blue', stat='density')
plt.title('Histogram and KDE of Change from Open to Close in Facebook Stock Price')
plt.xlabel('Change in Price (Close - Open)')
plt.ylabel('Density')
3(C)
PROGRAM
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
np.random.seed(42)
mag_types = ['ML', 'MS', 'Mw', 'mb', 'mwc']
data = {
'magType': np.random.choice(mag_types, size=200),
'magnitude': np.concatenate([np.random.normal(loc=5.0, scale=0.5, size=50),
np.random.normal(loc=5.5, scale=0.7, size=50),
np.random.normal(loc=6.0, scale=0.6, size=50),
np.random.normal(loc=4.8, scale=0.4, size=25),
np.random.normal(loc=5.2, scale=0.6, size=25
])}
earthquake_data = pd.DataFrame(data)
plt.figure(figsize=(10, 6))
sns.boxplot(x='magType', y='magnitude', data=earthquake_data, palette='Set2')
plt.title('Box Plot of Earthquake Magnitudes by magType in Indonesia')
plt.xlabel('magType')
plt.ylabel('Magnitude')
plt.show()
3(d)
PROGRAM:
import pandas as pd
import matplotlib.pyplot as plt
facebook_data = pd.read_csv('facebook_stock_data.csv')
facebook_data['Date'] = pd.to_datetime(facebook_data['Date'])
facebook_data.set_index('Date', inplace=True)
weekly_data = facebook_data.resample('W').agg({'High': 'max', 'Low': 'min'})
weekly_data['Price_Difference'] = weekly_data['High'] - weekly_data['Low']
plt.figure(figsize=(10, 6))
plt.plot(weekly_data.index, weekly_data['Price_Difference'], marker='o', linestyle='-',color='b')
plt.title('Difference Between Weekly Max High and Min Low Prices for Facebook')
plt.xlabel('Date')
plt.ylabel('Price Difference (Max High - Min Low)')
plt.grid(True)
plt.show()
EXPERIMENT :4
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
date_range = pd.date_range(start='2024-01-01', end='2024-12-31', freq='B')
np.random.seed(42)
open_prices = 250 + np.cumsum(np.random.randn(len(date_range)) * 2)
close_prices = open_prices + np.random.randn(len(date_range)) * 1.5
df = pd.DataFrame({
'Date': date_range,
'Open': open_prices,
'Close': close_prices
})
df.set_index('Date', inplace=True)
df['Prev_Close'] = df['Close'].shift(1)
df['Daily_Diff'] = df['Open'] - df['Prev_Close']
fig, ax = plt.subplots(1, 2, figsize=(14, 6))
ax[0].plot(df.index, df['Daily_Diff'], label='Daily Diff', color='blue')
ax[0].set_title("Daily Difference between Opening and Previous Closing Prices")
ax[0].set_xlabel('Date')
ax[0].set_ylabel('Price Difference')
ax[0].grid(True)
monthly_net_effect = df['Daily_Diff'].resample('M').sum()
colors = ['green' if x > 0 else 'red' for x in monthly_net_effect]
monthly_net_effect.plot(kind='bar', ax=ax[1], color=colors)
ax[1].set_title("Net Effect of After-Hours Trading (Monthly)")
ax[1].set_xlabel('Month')
ax[1].set_ylabel('Net Price Change')
ax[1].grid(True)
ax[1].xaxis.set_major_formatter(mdates.DateFormatter('%b'))
ax[1].tick_params(axis='x', rotation=45)
EXPERIMENT 5 SEABORN AND BOXPLOTS
5(A)
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
np.random.seed(42)
magnitude = np.random.uniform(5.0, 9.0, 100)
tsunami = np.random.choice([0, 1], size=100, p=[0.7, 0.3])
data = pd.DataFrame({
'magnitude': magnitude,
'tsunami': tsunami
})
corr_matrix = data.corr()
plt.figure(figsize=(8, 6))
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', fmt='.2f', vmin=-1, vmax=1)
plt.title("Correlation between Earthquake Magnitude and Tsunami")
plt.show()
5(B)
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
np.random.seed(42) # For reproducibility
volume = np.random.randint(1000, 5000, size=100)
closing_price = np.random.randint(250, 350, size=100)
data = pd.DataFrame({
'Volume': volume,
'Closing Price': closing_price
})
Q1_volume = data['Volume'].quantile(0.25)
Q3_volume = data['Volume'].quantile(0.75)
IQR_volume = Q3_volume - Q1_volume
lower_bound_volume = Q1_volume - 1.5 * IQR_volume
upper_bound_volume = Q3_volume + 1.5 * IQR_volume
Q1_price = data['Closing Price'].quantile(0.25)
Q3_price = data['Closing Price'].quantile(0.75)
IQR_price = Q3_price - Q1_price
lower_bound_price = Q1_price - 1.5 * IQR_price
upper_bound_price = Q3_price + 1.5 * IQR_price
fig, axs = plt.subplots(1, 2, figsize=(14, 6))
sns.boxplot(x=data['Volume'], ax=axs[0])
axs[0].axvline(lower_bound_volume, color='r', linestyle='--', label='Lower Tukey Fence')
axs[0].axvline(upper_bound_volume, color='g', linestyle='--', label='Upper Tukey Fence')
axs[0].set_title("Facebook Trading Volume with Tukey Fences")
axs[0].legend()
sns.boxplot(x=data['Closing Price'], ax=axs[1])
axs[1].axvline(lower_bound_price, color='r', linestyle='--', label='Lower Tukey Fence')
axs[1].axvline(upper_bound_price, color='g', linestyle='--', label='Upper Tukey Fence')
axs[1].set_title("Facebook Closing Price with Tukey Fences")
axs[1].legend()
plt.tight_layout()
plt.show()
5(C)
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
np.random.seed(42)
dates = pd.date_range(start="2020-01-01", periods=200, freq='D')
cumulative_cases = np.cumsum(np.random.randint(10000, 50000, size=200))
data = pd.DataFrame({
'date': dates,
'cumulative_cases': cumulative_cases
})
surpass_date = data[data['cumulative_cases'] >= 1000000].iloc[0]['date']
plt.figure(figsize=(10, 6))
plt.plot(data['date'], data['cumulative_cases'], label='Cumulative COVID-19 Cases', color='b')
plt.axvline(surpass_date, color='r', linestyle='--', label='Surpassed 1 million cases')
plt.title("Cumulative COVID-19 Cases Worldwide")
plt.xlabel("Date")
plt.ylabel("Cumulative Cases")
plt.legend()
plt.xticks(rotation=45)
plt.tight_layout()
EXPERIMENT 6 (A) STOCK ANALYSIS
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
date_range = pd.date_range(start="2019-01-01", end="2020-12-31", freq='B')
np.random.seed(42)
open_prices = np.random.uniform(250, 400, size=len(date_range))
high_prices = open_prices + np.random.uniform(1, 10, size=len(date_range))
low_prices = open_prices - np.random.uniform(1, 10, size=len(date_range))
close_prices = np.random.uniform(low_prices, high_prices) # Close between Low and High
volume = np.random.randint(500000, 5000000, size=len(date_range)) # Random volume
data = pd.DataFrame({
'Date': date_range,
'Open': open_prices,
'High': high_prices,
'Low': low_prices,
'Close': close_prices,
'Volume': volume
})
low = data['Low'].min() # Lowest price (support)
high = data['High'].max() # Highest price (resistance)
support = [low, low + 0.33 * (high - low), low + 0.66 * (high - low)]
resistance = [high, high - 0.33 * (high - low), high - 0.66 * (high - low)]
plt.figure(figsize=(10,6))
plt.plot(data['Date'], data['Close'], label='Netflix Closing Price', color='blue')
plt.axhline(support[0], color='green', linestyle='--', label='Support Level 1')
plt.axhline(support[1], color='yellow', linestyle='--', label='Support Level 2')
plt.axhline(support[2], color='red', linestyle='--', label='Support Level 3')
plt.axhline(resistance[0], color='red', linestyle='--', label='Resistance Level 1')
plt.axhline(resistance[1], color='yellow', linestyle='--', label='Resistance Level 2')
plt.axhline(resistance[2], color='green', linestyle='--', label='Resistance Level 3')
plt.title('Netflix Stock: Support and Resistance Levels')
plt.xlabel('Date') plt.xticks(rotation=45)
plt.ylabel('Price (USD)') plt.tight_layout()
plt.legend() plt.show()
6(B)
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
np.random.seed(42)
date_range = pd.date_range(start="2019-01-01", end="2020-12-31", freq='B')
for stock in stocks:
open_prices = np.random.uniform(250, 400, size=len(date_range))
high_prices = open_prices + np.random.uniform(1, 10, size=len(date_range))
low_prices = open_prices - np.random.uniform(1, 10, size=len(date_range))
close_prices = np.random.uniform(low_prices, high_prices) # Close between Low and High
volume = np.random.randint(500000, 5000000, size=len(date_range))
faang_data[stock] = pd.DataFrame({
'Date': date_range,
'Open': open_prices,
'High': high_prices,
'Low': low_prices,
'Close': close_prices,
'Volume': volume
})
faang_data[stock].set_index('Date', inplace=True)
plt.figure(figsize=(15, 10))
for i, stock in enumerate(stocks):
data = faang_data[stock]
data['After Hours Change'] = data['Open'] - data['Close'].shift(1)
plt.subplot(3, 2, i+1)
plt.plot(data.index, data['After Hours Change'], label=f'After Hours Change - {stock}')
plt.title(f'After Hours Trading Effect - {stock}')
plt.xlabel('Date')
plt.ylabel('Price Change (USD)')
plt.xticks(rotation=45)
plt.legend()
plt.tight_layout()
plt.show()
6(C)
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
np.random.seed(42)
date_range = pd.date_range(start="2019-01-01", end="2020-12-31", freq='B')
stocks = ['FB', 'AAPL', 'AMZN', 'NFLX', 'GOOG']
faang_data = {}
for stock in stocks:
open_prices = np.random.uniform(250, 400, size=len(date_range))
high_prices = open_prices + np.random.uniform(1, 10, size=len(date_range))
low_prices = open_prices - np.random.uniform(1, 10, size=len(date_range))
close_prices = np.random.uniform(low_prices, high_prices) # Close between Low and High
volume = np.random.randint(500000, 5000000, size=len(date_range))
faang_data[stock] = pd.DataFrame({
'Date': date_range,
'Open': open_prices,
'High': high_prices,
'Low': low_prices,
'Close': close_prices,
'Volume': volume
})
faang_data[stock].set_index('Date', inplace=True)
portfolio_data = pd.DataFrame({stock: faang_data[stock]['Close'] for stock in stocks})
portfolio_returns = portfolio_data.pct_change()
weights = np.ones(len(stocks)) / len(stocks)
portfolio_returns['Portfolio'] = portfolio_returns.dot(weights)
portfolio_returns['Portfolio Cumulative'] = (1 + portfolio_returns['Portfolio']).cumprod()
plt.figure(figsize=(14, 8))
for stock in stocks:
portfolio_returns[stock] = (1 + portfolio_returns[stock]).cumprod()
plt.plot(portfolio_returns.index, portfolio_returns[stock], label=f'{stock} Cumulative Return')
plt.plot(portfolio_returns.index, portfolio_returns['Portfolio Cumulative'], label='Portfolio Cumulative Return',
color='black', linewidth=3)
plt.title('Cumulative Returns: FAANG Portfolio vs Individual Stocks')
plt.xlabel('Date')
plt.ylabel('Cumulative Return')
plt.legend()
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

You might also like