Code
Code
def listdirs(rootdir):
for file in os.listdir(rootdir):
d = os.path.join(rootdir, file)
if os.path.isdir(d):
listdirs(d)
#MAIN
#vars
totalFiles = 0
totalRows = 0
newRows = 0
dt_min = -1
dt_hr = -1
dt_day = ""
old_dt = datetime.now()
ap_sum = 0.0
rp_sum = 0.0
min_ctr = 0
#loop by house
for h in os.scandir(TC_BASE_DIR):
if h.is_dir():
HDir = h.path + "/"
HNo = h.path[-2:]
#loop by dates
for d in os.scandir(HDir):
if d.is_dir():
DateDir = d.path
path = DateDir + "/"
dt_min = -1
dt_hr = -1
dt_day = ""
df = pd.read_csv(file)
#iterate every row of csv file, skip first row (header row)
for i in range(0, len(df)):
ap = df.iloc[i, 0] #Active Power
rp = df.iloc[i, 1] #Reactive Power
tse = str(df.iloc[i, 2]) #timestamp in scientific notation
#change big E to small e
tse = tse.replace("E", "e")
#convert tse
sts = str(int(float(tse)))
ts10 = sts[0:10]
ts = int(ts10)
dt = datetime.fromtimestamp(ts)
#print(dt, "\nMIN: ", dt.minute, " | HR: ", dt.hour, " |
DATE: ", dt.day)
totalRows += 1
if dt_min == dt.minute and dt_hr == dt.hour and dt_day ==
dt.day:
#add for average
ap_sum += float(ap)
rp_sum = float(rp)
min_ctr += 1
else:
if ap_sum != 0.0 and rp_sum != 0.0:
ap_avg = ap_sum/min_ctr
rp_avg = rp_sum/min_ctr
#sdate = str(dt.year) + "-" +
str(dt.month).zfill(2) + "-" + str(dt.day).zfill(2) + " " + str(dt.hour).zfill(2) +
":" + str(dt.minute).zfill(2) + ":00"
sdate = old_dt.strftime("%Y-%m-%d %H:%M:00")
#append to new file
ds.writelines(sdate + ",H" + HNo + "," +
str(Decimal(ap_avg).quantize(Decimal(DECIMAL_DIGITS))) + "," +
str(Decimal(rp_avg).quantize(Decimal(DECIMAL_DIGITS))) + "," + old_dt.strftime("%w,
%H") + "\n") #DATETIME, HOUSE_ID, AVG_ACTIVE_POWER, AVG_REACTIVE_POWER, WEEKDAY,
HOUR
newRows += 1
totalFiles += 1
print("Total Files Processed = ", totalFiles, "/", MAX_FILES, "
: {:.2f}".format((totalFiles/MAX_FILES)*100), "%")
ds.close()
print("Total Rows Processed = {:,}".format(totalRows))
print("Total New Rows Created = {:,}".format(newRows))