0% found this document useful (0 votes)
13 views5 pages

Task 1

Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
13 views5 pages

Task 1

Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 5

import pandas as pd

import numpy as np
import seaborn as sns

transaction_data= pd.read_excel('/content/QVI_transaction_data.xlsx')

transaction_data.head()

{"type":"dataframe","variable_name":"transaction_data"}

customer_data= pd.read_csv('/content/QVI_purchase_behaviour.csv')

customer_data.head()

{"summary":"{\n \"name\": \"customer_data\",\n \"rows\": 72637,\n


\"fields\": [\n {\n \"column\": \"LYLTY_CARD_NBR\",\n
\"properties\": {\n \"dtype\": \"number\",\n \"std\":
89892,\n \"min\": 1000,\n \"max\": 2373711,\n
\"num_unique_values\": 72637,\n \"samples\": [\n
34250,\n 224159,\n 107092\n ],\n
\"semantic_type\": \"\",\n \"description\": \"\"\n }\
n },\n {\n \"column\": \"LIFESTAGE\",\n
\"properties\": {\n \"dtype\": \"category\",\n
\"num_unique_values\": 7,\n \"samples\": [\n \"YOUNG
SINGLES/COUPLES\",\n \"YOUNG FAMILIES\",\n \"OLDER
FAMILIES\"\n ],\n \"semantic_type\": \"\",\n
\"description\": \"\"\n }\n },\n {\n \"column\":
\"PREMIUM_CUSTOMER\",\n \"properties\": {\n \"dtype\":
\"category\",\n \"num_unique_values\": 3,\n \"samples\":
[\n \"Premium\",\n \"Mainstream\",\n
\"Budget\"\n ],\n \"semantic_type\": \"\",\n
\"description\": \"\"\n }\n }\n ]\
n}","type":"dataframe","variable_name":"customer_data"}

transaction_data.describe()

{"summary":"{\n \"name\": \"transaction_data\",\n \"rows\": 8,\n


\"fields\": [\n {\n \"column\": \"DATE\",\n
\"properties\": {\n \"dtype\": \"number\",\n \"std\":
81874.91928740985,\n \"min\": 105.38928199808275,\n
\"max\": 264836.0,\n \"num_unique_values\": 8,\n
\"samples\": [\n 43464.03626017611,\n 43464.0,\n
264836.0\n ],\n \"semantic_type\": \"\",\n
\"description\": \"\"\n }\n },\n {\n \"column\":
\"STORE_NBR\",\n \"properties\": {\n \"dtype\":
\"number\",\n \"std\": 93588.85886517387,\n \"min\":
1.0,\n \"max\": 264836.0,\n \"num_unique_values\": 8,\n
\"samples\": [\n 135.08010995483997,\n 130.0,\n
264836.0\n ],\n \"semantic_type\": \"\",\n
\"description\": \"\"\n }\n },\n {\n \"column\":
\"LYLTY_CARD_NBR\",\n \"properties\": {\n \"dtype\":
\"number\",\n \"std\": 798650.3268791955,\n \"min\":
1000.0,\n \"max\": 2373711.0,\n \"num_unique_values\":
8,\n \"samples\": [\n 135549.47640426527,\n
130357.5,\n 264836.0\n ],\n \"semantic_type\":
\"\",\n \"description\": \"\"\n }\n },\n {\n
\"column\": \"TXN_ID\",\n \"properties\": {\n \"dtype\":
\"number\",\n \"std\": 813629.9030235903,\n \"min\":
1.0,\n \"max\": 2415841.0,\n \"num_unique_values\": 8,\n
\"samples\": [\n 135158.31081499494,\n 135137.5,\n
264836.0\n ],\n \"semantic_type\": \"\",\n
\"description\": \"\"\n }\n },\n {\n \"column\":
\"PROD_NBR\",\n \"properties\": {\n \"dtype\":
\"number\",\n \"std\": 93614.81219548377,\n \"min\":
1.0,\n \"max\": 264836.0,\n \"num_unique_values\": 8,\n
\"samples\": [\n 56.58315712365388,\n 56.0,\n
264836.0\n ],\n \"semantic_type\": \"\",\n
\"description\": \"\"\n }\n },\n {\n \"column\":
\"PROD_QTY\",\n \"properties\": {\n \"dtype\":
\"number\",\n \"std\": 93623.10757379017,\n \"min\":
0.6436539890116252,\n \"max\": 264836.0,\n
\"num_unique_values\": 6,\n \"samples\": [\n
264836.0,\n 1.907308674047335,\n 200.0\n ],\n
\"semantic_type\": \"\",\n \"description\": \"\"\n }\
n },\n {\n \"column\": \"TOT_SALES\",\n
\"properties\": {\n \"dtype\": \"number\",\n \"std\":
93599.39583319426,\n \"min\": 1.5,\n \"max\": 264836.0,\
n \"num_unique_values\": 8,\n \"samples\": [\n
7.3041995801175075,\n 7.4,\n 264836.0\n ],\n
\"semantic_type\": \"\",\n \"description\": \"\"\n }\
n }\n ]\n}","type":"dataframe"}

transaction_data.isnull().sum()

DATE 0
STORE_NBR 0
LYLTY_CARD_NBR 0
TXN_ID 0
PROD_NBR 0
PROD_NAME 0
PROD_QTY 0
TOT_SALES 0
dtype: int64

data_type= transaction_data.dtypes
print(data_type)

DATE int64
STORE_NBR int64
LYLTY_CARD_NBR int64
TXN_ID int64
PROD_NBR int64
PROD_NAME object
PROD_QTY int64
TOT_SALES float64
dtype: object

import matplotlib.pyplot as plt


import seaborn as sns

sns.displot(transaction_data.TOT_SALES, kde=True)

<seaborn.axisgrid.FacetGrid at 0x7cbba5d46440>

numericdata= transaction_data.select_dtypes(['float','int'])
numericdata.head()

{"type":"dataframe","variable_name":"numericdata"}

x=numericdata[numericdata['TOT_SALES']<8.000]

sns.displot(x.TOT_SALES, kde=True)
<seaborn.axisgrid.FacetGrid at 0x7cbbe4e08730>

sns.boxplot(x.TOT_SALES)

<Axes: ylabel='TOT_SALES'>

You might also like