0% found this document useful (0 votes)
16 views

Code

This Python code aggregates household electricity consumption data from multiple files into 15-minute intervals and averages the values within each interval. It loops through files for each household, extracts timestamp and power values from each row, calculates the average when data falls within the same 15-minute period, and writes the aggregated values to a new output file. Key variables tracked include the number of files processed, total rows read, and new rows written to the output file.

Uploaded by

itsack
Copyright
© © All Rights Reserved
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
16 views

Code

This Python code aggregates household electricity consumption data from multiple files into 15-minute intervals and averages the values within each interval. It loops through files for each household, extracts timestamp and power values from each row, calculates the average when data falls within the same 15-minute period, and writes the aggregated values to a new output file. Key variables tracked include the number of files processed, total rows read, and new rows written to the output file.

Uploaded by

itsack
Copyright
© © All Rights Reserved
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
You are on page 1/ 2

from datetime import datetime

from decimal import *


import glob
import os
import pandas as pd #pip install this first before executing

#4-significant decimal digits precision and rounding


getcontext().rounding = ROUND_HALF_EVEN
DECIMAL_DIGITS = "1.0000"

MAX_FILES = 205 #1611 original for 22 houses


H_BASE_DIR = "D:/DATASETS/Disaggregation/BY HOUSEHOLD/"
TC_BASE_DIR = "D:/DATASETS/Disaggregation/TOTAL CONSUMPTIONS/"
NEW_FILE = "D:/DATASETS/Disaggregation/TOTAL CONSUMPTIONS/tc_temp_dataset.csv"

def listdirs(rootdir):
for file in os.listdir(rootdir):
d = os.path.join(rootdir, file)
if os.path.isdir(d):
listdirs(d)

#MAIN
#vars
totalFiles = 0
totalRows = 0
newRows = 0
dt_min = -1
dt_hr = -1
dt_day = ""
old_dt = datetime.now()
ap_sum = 0.0
rp_sum = 0.0
min_ctr = 0

#prepare new dataset file


ds = open(NEW_FILE, "a")

#loop by house
for h in os.scandir(TC_BASE_DIR):
if h.is_dir():
HDir = h.path + "/"
HNo = h.path[-2:]

#loop by dates
for d in os.scandir(HDir):
if d.is_dir():
DateDir = d.path
path = DateDir + "/"
dt_min = -1
dt_hr = -1
dt_day = ""

#get csv file


print(path)
csv_files = glob.glob(path + "*.csv")
for file in csv_files:
#read content into data frame

df = pd.read_csv(file)

#iterate every row of csv file, skip first row (header row)
for i in range(0, len(df)):
ap = df.iloc[i, 0] #Active Power
rp = df.iloc[i, 1] #Reactive Power
tse = str(df.iloc[i, 2]) #timestamp in scientific notation
#change big E to small e
tse = tse.replace("E", "e")
#convert tse
sts = str(int(float(tse)))
ts10 = sts[0:10]
ts = int(ts10)
dt = datetime.fromtimestamp(ts)
#print(dt, "\nMIN: ", dt.minute, " | HR: ", dt.hour, " |
DATE: ", dt.day)
totalRows += 1
if dt_min == dt.minute and dt_hr == dt.hour and dt_day ==
dt.day:
#add for average
ap_sum += float(ap)
rp_sum = float(rp)
min_ctr += 1
else:
if ap_sum != 0.0 and rp_sum != 0.0:
ap_avg = ap_sum/min_ctr
rp_avg = rp_sum/min_ctr
#sdate = str(dt.year) + "-" +
str(dt.month).zfill(2) + "-" + str(dt.day).zfill(2) + " " + str(dt.hour).zfill(2) +
":" + str(dt.minute).zfill(2) + ":00"
sdate = old_dt.strftime("%Y-%m-%d %H:%M:00")
#append to new file
ds.writelines(sdate + ",H" + HNo + "," +
str(Decimal(ap_avg).quantize(Decimal(DECIMAL_DIGITS))) + "," +
str(Decimal(rp_avg).quantize(Decimal(DECIMAL_DIGITS))) + "," + old_dt.strftime("%w,
%H") + "\n") #DATETIME, HOUSE_ID, AVG_ACTIVE_POWER, AVG_REACTIVE_POWER, WEEKDAY,
HOUR
newRows += 1

# set as new vars


dt_min = dt.minute
dt_hr = dt.hour
dt_day = dt.day
min_ctr = 1
ap_sum = float(ap)
rp_sum = float(rp)
old_dt = dt

totalFiles += 1
print("Total Files Processed = ", totalFiles, "/", MAX_FILES, "
: {:.2f}".format((totalFiles/MAX_FILES)*100), "%")

ds.close()
print("Total Rows Processed = {:,}".format(totalRows))
print("Total New Rows Created = {:,}".format(newRows))

You might also like