Task 1
Task 1
import numpy as np
import seaborn as sns
transaction_data= pd.read_excel('/content/QVI_transaction_data.xlsx')
transaction_data.head()
{"type":"dataframe","variable_name":"transaction_data"}
customer_data= pd.read_csv('/content/QVI_purchase_behaviour.csv')
customer_data.head()
transaction_data.describe()
transaction_data.isnull().sum()
DATE 0
STORE_NBR 0
LYLTY_CARD_NBR 0
TXN_ID 0
PROD_NBR 0
PROD_NAME 0
PROD_QTY 0
TOT_SALES 0
dtype: int64
data_type= transaction_data.dtypes
print(data_type)
DATE int64
STORE_NBR int64
LYLTY_CARD_NBR int64
TXN_ID int64
PROD_NBR int64
PROD_NAME object
PROD_QTY int64
TOT_SALES float64
dtype: object
sns.displot(transaction_data.TOT_SALES, kde=True)
<seaborn.axisgrid.FacetGrid at 0x7cbba5d46440>
numericdata= transaction_data.select_dtypes(['float','int'])
numericdata.head()
{"type":"dataframe","variable_name":"numericdata"}
x=numericdata[numericdata['TOT_SALES']<8.000]
sns.displot(x.TOT_SALES, kde=True)
<seaborn.axisgrid.FacetGrid at 0x7cbbe4e08730>
sns.boxplot(x.TOT_SALES)
<Axes: ylabel='TOT_SALES'>