0% found this document useful (0 votes)
3 views

utils.py

This module provides helper functions for exploratory data analysis (EDA) and modeling exercises, including functions to read and clean demand and promotion data. It also includes functionality to merge demand with promotions and extend promotions over multiple days. Additionally, there is a function to aggregate data to a weekly format.

Uploaded by

salimshaik045
Copyright
© © All Rights Reserved
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
3 views

utils.py

This module provides helper functions for exploratory data analysis (EDA) and modeling exercises, including functions to read and clean demand and promotion data. It also includes functionality to merge demand with promotions and extend promotions over multiple days. Additionally, there is a function to aggregate data to a weekly format.

Uploaded by

salimshaik045
Copyright
© © All Rights Reserved
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
You are on page 1/ 2

"""

This module contains helper functions for the EDA and modelling
exercises. Feel free to use them to get you started more quickly.

"""
import pandas as pd
import numpy as np
from datetime import datetime

def parse_time(s):
return datetime.strptime(s, "%Y-%m-%d").date()

def read_demand(path):
df = pd.read_csv(path)
df = df.assign(date=lambda df: df.date.apply(parse_time))
df = df.set_index("date")
df.index = pd.DatetimeIndex(df.index)
return df

def read_promotions(path):
df = pd.read_csv(path, index_col=0)
df = df.assign(promotion_date=lambda df:
df.promotion_date.apply(parse_time))
df = df.set_index("promotion_date")
df.index = pd.DatetimeIndex(df.index)
return df

def clean(ts: pd.Series) -> pd.Series:


# Replaces missing values
return ts.bfill().fillna(ts.mean())

def clean_demand_per_group(demand: pd.DataFrame) -> pd.DataFrame:


"""TODO add docstring"""
sus = demand.supermarket.unique()
skus = demand.sku.unique()
for su in sus:
for sku in skus:
demand.loc[(demand.sku == sku) & (demand.supermarket == su),
"demand"] = clean(demand.loc[(demand.sku == sku) & (demand.supermarket ==
su), "demand"])
return demand

def merge(demand: pd.DataFrame, promotions: pd.DataFrame) ->


pd.DataFrame:
promotions = promotions.rename_axis("date").assign(promotion=True)
demand = demand.merge(
promotions,
on=["supermarket", "sku", "date"],
how="outer",
)
demand = demand.assign(promotion=lambda df:
df.promotion.fillna(False))
return demand

def extend_promotions_days(promotions, n_days):


""" Extends the promotions to have multiple rows for a specific
number of days.
The input promotions is assumed be specified with a single row with a
starting date.
The output extends the input promotions with multiple days, one row
for each day of the promotion.
"""
n_promotions = len(promotions)
initial_promotions = promotions.copy()
promotion_id = np.arange(n_promotions)
extended_promotions =
promotions.copy().assign(promotion_id=promotion_id)
for days_to_add in range(1, n_days):
additional_promotion_days =
initial_promotions.copy().assign(promotion_id=promotion_id)
additional_promotion_days.index += pd.Timedelta(days_to_add, "d")
extended_promotions =
extended_promotions.append(additional_promotion_days)
return extended_promotions

def aggregate_to_weekly(df):
grouped = df.groupby(["sku", "supermarket"])
# Performs a simplistic aggregation of promotion. If a promotion
occured during the week this variable will be true.
weekly = grouped.apply(lambda df: df.resample("W").agg({"demand":
"sum", "promotion": "max"}))
weekly = weekly.reset_index().set_index("date")
return weekly

You might also like