Superstores Dataset Documentation
Superstores Dataset Documentation
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
df.describe()
df.describe(include='O')
df.columns
df.info()
df.isnull().sum()
df.duplicated()
df.drop_duplicates()
df.describe()
plt.figure(figsize=(8, 6))
sns.boxplot(df['Sales'])
plt.title('Box Plot of Sales')
plt.show()
df
plt.title('Sales by Category')
plt.xticks(rotation=45)
plt.show()
products=df['Product Name'].nunique()
products
store_sales = df.groupby('Region')['Sales'].sum().reset_index()
print(store_sales)
plt.pie(store_sales['Sales'], labels=store_sales['Region'],autopct='%1.1f%%',
startangle=160,colors=['#008a72','#dc6a5d','#9474b4','#006cb2','#4eace7'])
my_circle=plt.Circle( (0,0), 0.7, color='white')
p=plt.gcf()
p.gca().add_artist(my_circle)
plt.title('Sales Distribution by Store Location ')
plt.axis('equal')
plt.gca().set_xticks([])
plt.gca().set_yticks([])
plt.show()
plt.figure(figsize=(10, 6))
sns.heatmap(pivot_df, annot=True, fmt=".2f", cmap="YlGnBu")
plt.title('Relationship between Customer Segment, Product Category, and Sales')
plt.xlabel('Product Category')
plt.ylabel('Customer Segment')
plt.show()
df['Delivery Days'].describe()
df['Delivery Days'].describe()
plt.figure(figsize=(10, 6))
sns.kdeplot(df['Delivery Days'], shade=True)
plt.title('Distribution of Delivery days')
plt.xlabel('delivery days')
plt.ylabel('Density')
plt.show()
scatter_fig.show()
df.sample(5)
plt.figure(figsize=(14, 6))
frequents.plot(kind='bar')
plt.title('Number of Frequent Customers with More Than 10 Orders')
plt.xlabel('Customer Name')
plt.ylabel('Number of Orders')
plt.xticks(rotation=90)
plt.show()
plt.figure(figsize=(6, 6))
plt.pie([num_frequents, num_nonfrequents], labels=['Frequent', 'Not Frequent'],
autopct='%1.1f%%', colors=['#8cb5db', '#8c8c8c'])
plt.title('Relationship Between Frequent and Not Frequent Clients')
plt.show()