- A diurnal plot, which shows the date and time each email was sent (or received), with years running along the x axis and times of day on the y axis.
- And a daily distribution histogram, which represents the distribution of emails sent by time of day.
from imaplib import IMAP4_SSL from datetime import date,timedelta,datetime from time import mktime from email.utils import parsedate from pylab import plot_date,show,xticks,date2num from pylab import figure,hist,num2date from matplotlib.dates import DateFormatter def getHeaders(address,password,folder,d): """ retrieve the headers of the emails from d days ago until now """ # imap connection mail = IMAP4_SSL('imap.gmail.com') mail.login(address,password) mail.select(folder) # retrieving the uids interval = (date.today() - timedelta(d)).strftime("%d-%b-%Y") result, data = mail.uid('search', None, '(SENTSINCE {date})'.format(date=interval)) # retrieving the headers result, data = mail.uid('fetch', data[0].replace(' ',','), '(BODY[HEADER.FIELDS (DATE)])') mail.close() mail.logout() return dataThe second one, make us able to make the diurnal plot:
def diurnalPlot(headers): """ diurnal plot of the emails, with years running along the x axis and times of day on the y axis. """ xday = [] ytime = [] for h in headers: if len(h) > 1: timestamp = mktime(parsedate(h[1][5:].replace('.',':'))) mailstamp = datetime.fromtimestamp(timestamp) xday.append(mailstamp) # Time the email is arrived # Note that years, month and day are not important here. y = datetime(2010,10,14, mailstamp.hour, mailstamp.minute, mailstamp.second) ytime.append(y) plot_date(xday,ytime,'.',alpha=.7) xticks(rotation=30) return xday,ytimeAnd this is the function for the daily distribution histogram:
def dailyDistributioPlot(ytime): """ draw the histogram of the daily distribution """ # converting dates to numbers numtime = [date2num(t) for t in ytime] # plotting the histogram ax = figure().gca() _, _, patches = hist(numtime, bins=24,alpha=.5) # adding the labels for the x axis tks = [num2date(p.get_x()) for p in patches] xticks(tks,rotation=75) # formatting the dates on the x axis ax.xaxis.set_major_formatter(DateFormatter('%H:%M'))Now we got everything we need to make the graphs. Let's try to analyze the outgoing mails of last 5 years:
print 'Fetching emails...' headers = getHeaders('[email protected]', 'ofcourseiamsupersexy','inbox',365*5) print 'Plotting some statistics...' xday,ytime = diurnalPlot(headers) dailyDistributioPlot(ytime) print len(xday),'Emails analysed.' show()The result would appear as follows
We can analyze the outgoing mails just using selecting the folder '[Gmail]/Sent Mail':
print 'Fetching emails...' headers = getHeaders('[email protected]', 'ofcourseiamsupersexy','[Gmail]/Sent Mail',365*5) print 'Plotting some statistics...' xday,ytime = diurnalPlot(headers) dailyDistributioPlot(ytime) print len(xday),'Emails analysed.' show()And this is the result: