08/06/2025, 23:39 unit 5 (python) - Colab
import matplotlib.pyplot as plt
import pandas as pd
months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
sales = [120, 135, 150, 145, 160, 175, 180, 170, 165, 180, 195, 210]
plt.figure(figsize=(10, 6))
plt.plot(months, sales, marker='o', linestyle='-')
plt.xlabel('Month')
plt.ylabel('Sales Amount')
plt.title('Product Sales Trend Over the Last 12 Months')
plt.grid(True)
plt.show()
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.cluster import KMeans
import numpy as np
np.random.seed(42)
data = {
'Age': np.random.randint(18, 65, 100),
'Income': np.random.randint(30000, 120000, 100),
'CustomerID': range(1, 101)
}
df = pd.DataFrame(data)
df.loc[df['Age'] < 30, 'Income'] = df.loc[df['Age'] < 30, 'Income'] * 0.8
df.loc[df['Age'] > 50, 'Income'] = df.loc[df['Age'] > 50, 'Income'] * 1.2
df.loc[(df['Age'] >= 30) & (df['Age'] <= 50) & (df['Income'] > 80000), 'Income'] = df.loc[(df['Age'] >= 30) & (df['Age'] <= 50) & (df['I
print("Sample Data:")
print(df.head())
plt.figure(figsize=(8, 6))
sns.histplot(df['Age'], bins=10, kde=True)
plt.title('Distribution of Customer Age')
plt.xlabel('Age')
plt.ylabel('Frequency')
plt.show()
plt.figure(figsize=(8, 6))
https://colab.research.google.com/drive/1T6zPX0dM3l7uZw5HfQdL4f06_DzkQwrX#scrollTo=y-K1EGx5o4fT 1/11
08/06/2025, 23:39 unit 5 (python) - Colab
sns.histplot(df['Income'], bins=10, kde=True)
plt.title('Distribution of Customer Income')
plt.xlabel('Income')
plt.ylabel('Frequency')
plt.show()
plt.figure(figsize=(10, 8))
sns.scatterplot(x='Age', y='Income', data=df)
plt.title('Customer Age vs. Income')
plt.xlabel('Age')
plt.ylabel('Income')
plt.show()
X = df[['Age', 'Income']]
kmeans = KMeans(n_clusters=3, random_state=42, n_init=10)
kmeans.fit(X)
df['Cluster'] = kmeans.labels_
plt.figure(figsize=(10, 8))
sns.scatterplot(x='Age', y='Income', hue='Cluster', data=df, palette='viridis', s=50)
plt.title('Customer Segmentation: Age vs. Income with Clusters')
plt.xlabel('Age')
plt.ylabel('Income')
plt.legend(title='Cluster')
plt.show()
https://colab.research.google.com/drive/1T6zPX0dM3l7uZw5HfQdL4f06_DzkQwrX#scrollTo=y-K1EGx5o4fT 2/11
08/06/2025, 23:39 unit 5 (python) - Colab
<ipython-input-2-c3ba236435c0>:17: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in
67414.4 88284.8 77771.2 68656. 41839.2 75235.2 58352.8 41467.2 38437.6
84612.8 77473.6 70442.4 76254.4 42931.2 77737.6 45388.8]' has dtype incompatible with int64, please explicitly cast to a compati
df.loc[df['Age'] < 30, 'Income'] = df.loc[df['Age'] < 30, 'Income'] * 0.8
Sample Data:
Age Income CustomerID
0 56 39234.0 1
1 46 78190.0 2
2 32 35258.0 3
3 60 141045.6 4
4 25 55603.2 5
https://colab.research.google.com/drive/1T6zPX0dM3l7uZw5HfQdL4f06_DzkQwrX#scrollTo=y-K1EGx5o4fT 3/11
08/06/2025, 23:39 unit 5 (python) - Colab
https://colab.research.google.com/drive/1T6zPX0dM3l7uZw5HfQdL4f06_DzkQwrX#scrollTo=y-K1EGx5o4fT 4/11
08/06/2025, 23:39 unit 5 (python) - Colab
!pip install mplfinance==0.12.10b0
import matplotlib.pyplot as plt
import pandas as pd
import mplfinance as mpf
import numpy as np
dates = pd.date_range('2023-01-01', periods=50, freq='D')
np.random.seed(42)
open_prices = np.random.uniform(150, 160, 50)
high_prices = open_prices + np.random.uniform(0, 5, 50)
low_prices = open_prices - np.random.uniform(0, 5, 50)
close_prices = np.random.uniform(low_prices, high_prices, 50)
volume = np.random.randint(10000, 50000, 50)
stock_data = pd.DataFrame({
'Open': open_prices,
'High': high_prices,
'Low': low_prices,
'Close': close_prices,
'Volume': volume
}, index=dates)
plt.figure(figsize=(12, 6))
plt.plot(stock_data.index, stock_data['Close'])
plt.title('Stock Closing Price Over Time (Line Chart)')
plt.xlabel('Date')
plt.ylabel('Closing Price')
plt.grid(True)
plt.show()
mpf.plot(stock_data, type='candle', style='yahoo',
title='Stock Price Movements (Candlestick Chart)',
ylabel='Price',
volume=True,
figscale=1.5)
https://colab.research.google.com/drive/1T6zPX0dM3l7uZw5HfQdL4f06_DzkQwrX#scrollTo=y-K1EGx5o4fT 5/11
08/06/2025, 23:39 unit 5 (python) - Colab
Requirement already satisfied: mplfinance==0.12.10b0 in /usr/local/lib/python3.11/dist-packages (0.12.10b0)
Requirement already satisfied: matplotlib in /usr/local/lib/python3.11/dist-packages (from mplfinance==0.12.10b0) (3.10.0)
Requirement already satisfied: pandas in /usr/local/lib/python3.11/dist-packages (from mplfinance==0.12.10b0) (2.2.2)
Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.11/dist-packages (from matplotlib->mplfinance==0.12.10b
Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.11/dist-packages (from matplotlib->mplfinance==0.12.10b0) (
Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.11/dist-packages (from matplotlib->mplfinance==0.12.10
Requirement already satisfied: kiwisolver>=1.3.1 in /usr/local/lib/python3.11/dist-packages (from matplotlib->mplfinance==0.12.10
Requirement already satisfied: numpy>=1.23 in /usr/local/lib/python3.11/dist-packages (from matplotlib->mplfinance==0.12.10b0) (2
Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.11/dist-packages (from matplotlib->mplfinance==0.12.10b0
Requirement already satisfied: pillow>=8 in /usr/local/lib/python3.11/dist-packages (from matplotlib->mplfinance==0.12.10b0) (11.
Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.11/dist-packages (from matplotlib->mplfinance==0.12.10b
Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.11/dist-packages (from matplotlib->mplfinance==0.12
Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas->mplfinance==0.12.10b0) (2025
Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas->mplfinance==0.12.10b0) (20
Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/dist-packages (from python-dateutil>=2.7->matplotlib->mplfin
https://colab.research.google.com/drive/1T6zPX0dM3l7uZw5HfQdL4f06_DzkQwrX#scrollTo=y-K1EGx5o4fT 6/11
08/06/2025, 23:39 unit 5 (python) - Colab
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
data = {'Region': ['North', 'South', 'East', 'West', 'Central'],
'Sales': [45000, 60000, 52000, 75000, 48000]}
sales_df = pd.DataFrame(data)
highest_sales_region = sales_df.loc[sales_df['Sales'].idxmax()]
lowest_sales_region = sales_df.loc[sales_df['Sales'].idxmin()]
plt.figure(figsize=(10, 6))
colors = ['skyblue'] * len(sales_df)
highest_index = sales_df['Sales'].idxmax()
lowest_index = sales_df['Sales'].idxmin()
colors[highest_index] = 'lightgreen'
colors[lowest_index] = 'salmon'
plt.bar(sales_df['Region'], sales_df['Sales'], color=colors)
plt.xlabel('Region')
plt.ylabel('Total Sales')
plt.title('Sales Distribution by Region')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()
plt.figure(figsize=(8, 8))
plt.pie(sales_df['Sales'], labels=sales_df['Region'], autopct='%1.1f%%', startangle=90, colors=colors)
plt.title('Sales Proportion by Region')
plt.axis('equal')
plt.show()
print(f"Region with Highest Sales: {highest_sales_region['Region']} ({highest_sales_region['Sales']})")
print(f"Region with Lowest Sales: {lowest_sales_region['Region']} ({lowest_sales_region['Sales']})")
https://colab.research.google.com/drive/1T6zPX0dM3l7uZw5HfQdL4f06_DzkQwrX#scrollTo=y-K1EGx5o4fT 7/11
08/06/2025, 23:39 unit 5 (python) - Colab
Region with Highest Sales: West (75000)
Region with Lowest Sales: North (45000)
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
metrics = ['Productivity', 'Collaboration', 'Leadership', 'Problem-Solving', 'Communication']
employee_scores = [4, 3, 5, 4, 5]
angles = [n / float(len(metrics)) * 2 * np.pi for n in range(len(metrics))]
https://colab.research.google.com/drive/1T6zPX0dM3l7uZw5HfQdL4f06_DzkQwrX#scrollTo=y-K1EGx5o4fT 8/11
08/06/2025, 23:39 unit 5 (python) - Colab
metrics = metrics + [metrics[0]]
employee_scores = employee_scores + [employee_scores[0]]
angles = angles + [angles[0]]
plt.figure(figsize=(8, 8))
ax = plt.subplot(111, polar=True)
ax.plot(angles, employee_scores, 'o-', linewidth=2)
ax.fill(angles, employee_scores, alpha=0.25)
ax.set_thetagrids(angles * 180/np.pi, metrics)
ax.set_ylim(0, 5)
plt.title('Employee Performance Radar Chart', va='bottom')
plt.show()
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-8-96f9bd57a8bf> in <cell line: 0>()
22
23
---> 24 ax.set_thetagrids(angles * 180/np.pi, metrics)
25
26
TypeError: unsupported operand type(s) for /: 'list' and 'float'
Next steps: Explain error
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import numpy as np
np.random.seed(42)
data = {
'Revenue': np.random.randint(100000, 500000, 50),
'Expenses': np.random.randint(50000, 300000, 50),
'Profit': np.random.randint(20000, 250000, 50),
https://colab.research.google.com/drive/1T6zPX0dM3l7uZw5HfQdL4f06_DzkQwrX#scrollTo=y-K1EGx5o4fT 9/11
08/06/2025, 23:39 unit 5 (python) - Colab
'Marketing Spend': np.random.randint(5000, 50000, 50),
'Employee Count': np.random.randint(10, 100, 50)
}
financial_df = pd.DataFrame(data)
correlation_matrix = financial_df.corr()
plt.figure(figsize=(10, 8))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', fmt=".2f", linewidths=.5)
plt.title('Correlation Heatmap of Financial Variables')
plt.show()
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
dates_before = pd.date_range('2023-01-01', periods=30, freq='D')
sales_before = np.random.randint(100, 200, 30)
dates_after = pd.date_range('2023-02-01', periods=30, freq='D')
sales_after = np.random.randint(150, 250, 30)
df_before = pd.DataFrame({'Date': dates_before, 'Sales': sales_before})
df_after = pd.DataFrame({'Date': dates_after, 'Sales': sales_after})
df_before['Period'] = 'Before Campaign'
df_after['Period'] = 'After Campaign'
combined_df = pd.concat([df_before, df_after])
average_sales = combined_df.groupby('Period')['Sales'].mean().reset_index()
plt.figure(figsize=(8, 6))
plt.bar(average_sales['Period'], average_sales['Sales'], color=['skyblue', 'lightgreen'])
plt.ylabel('Average Daily Sales')
plt.title('Average Sales Performance Before and After Marketing Campaign')
plt.ylim(0, max(average_sales['Sales']) * 1.2)
plt.show()
https://colab.research.google.com/drive/1T6zPX0dM3l7uZw5HfQdL4f06_DzkQwrX#scrollTo=y-K1EGx5o4fT 10/11
08/06/2025, 23:39 unit 5 (python) - Colab
plt.figure(figsize=(12, 6))
plt.plot(df_before['Date'], df_before['Sales'], label='Before Campaign', marker='o')
plt.plot(df_after['Date'], df_after['Sales'], label='After Campaign', marker='o')
plt.xlabel('Date')
plt.ylabel('Daily Sales')
plt.title('Daily Sales Trend Before and After Marketing Campaign')
plt.legend()
plt.grid(True)
plt.show()
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
dates = pd.date_range('2023-10-26', periods=72, freq='H')
traffic = np.random.randint(50, 200, 72)
traffic[10:15] = traffic[10:15] + 50
traffic[34:39] = traffic[34:39] + 70
traffic[58:63] = traffic[58:63] + 60
https://colab.research.google.com/drive/1T6zPX0dM3l7uZw5HfQdL4f06_DzkQwrX#scrollTo=y-K1EGx5o4fT 11/11