"""Record of bike trips for data analysis.
Demonstration app for dataplot.
"""
import datetime,pdb,os
#from django.db import models
from dataplot import plotmodels as models
from django.db import connection
from django.db.models import Q
from dataplot.bike.R import ThereAndBackPlot,lattice_bikeplot
from dataplot import R
from dataplot import matplotlib
def tup_to_minutes(h,m,s):
return h*60 + m + float(s)/60
PAIRED_RIDES_SQL="""
select x.date,x.hours,x.minutes,x.seconds,y.hours,y.minutes,y.seconds
from bike_ride as x join bike_ride as y on x.date=y.date where
x.origin_id=%s and x.destination_id=%s and
y.origin_id=%s and y.destination_id=%s order by x.date;
"""
class Location(models.Model):
"""Origin or destination of a bike ride.
"""
short_name=models.CharField(
max_length=100,
blank=True,
null=False,
default='',
)
address=models.TextField(blank=True,null=False,default='')
class Admin:
pass
def __str__(self):
return self.short_name.capitalize()
def CorrScatter_args(self):
cursor = connection.cursor()
cursor.execute(PAIRED_RIDES_SQL,[self.id,self.o.id,self.o.id,self.id])
rows = cursor.fetchall()
return {
'x':[tup_to_minutes(*d[1:4]) for d in rows],
'xlab':'%s->%s'%(self.short_name,self.o.short_name),
'y':[tup_to_minutes(*d[4:7]) for d in rows],
'ylab':'%s->%s'%(self.o.short_name,self.short_name),
'main':"Correlation in ride times for rides on same day?",
}
def related_locations(self):
return Location.objects.filter(
Q(ride_origins__destination__id__exact=self.id)|
Q(ride_destinations__origin__id__exact=self.id)).exclude(
id=self.id).distinct()
def related_locations_dict(self):
#pdb.set_trace()
li=[]
for loc in self.related_locations():
loc.compare(self)
li.append({
'from_here_to_there':Ride.objects.filter(
origin__id__exact=self.id,
destination__id__exact=loc.id,
).count(),
'from_there_to_here':Ride.objects.filter(
origin__id__exact=loc.id,
destination__id__exact=self.id,
).count(),
'l':loc,
})
return li
h=300
w=400
COMPARE_PLOTS=(
(R.Histogram,{'h':h,'w':w}),
(R.NormalQQPlot,{'h':h,'w':w}),
(ThereAndBackPlot,{}),
(lattice_bikeplot,{}),
(R.CorrScatter,{}),
)
def compare(self,o):
"""Initialize plots to summarize rides to another location.
o: other location
"""
self.o=o
self.to_o=self.ride_origins.filter(destination__id__exact=o.id)
self.from_o=self.ride_destinations.filter(origin__id__exact=o.id)
pre=self.get_compare_description()
## here we can't use the special DATAPLOTS simple declaration
## because the file name is dependent on self and self.o and
## self.o is set after instantiation thus we must manually set
## up dataplots
## IDEA: modify DATAPLOTS parser to encompass this case?
## or at least export a function we can use to bind plots, i.e.
## self.DATAPLOTS.add(R.Histogram,init_args={'w':self.w,'h':self.h})
## self.DATAPLOTS.add(p,init_args=kwargs,description=pre)
## DATAPLOTS=[
## (R.Histogram,{
## 'init_args':{'h':h,'w':w},
## 'description':'get_compare_description',
## })]
# IDEA: we take the description arg and use the result of the
# method call if the method exists, otherwise we just use the
# string. However, it won't work in this case because as the
# API currently stands we need to declare the base filename
# upon plot instatiation, and we instantiate the DATAPLOTS
# when the Model is instantiated. However, we could change the
# API so that basename is an optional arg, and we look for the
# basename only when we need to (that is, when we make a plot)
for p,kwargs in self.COMPARE_PLOTS:
N=p.__name__
setattr(self,N,p(pre+N,getattr(self,N+'_args'),**kwargs))
def get_compare_description(self):
return '%s-to-%s-'%(self.short_name,self.o.short_name)
def ThereAndBackPlot_args(self):
"""kwargs to pass to the R function for plotting.
"""
rides=list(self.to_o)+list(self.from_o)
values=[ride.there_and_back_dict() for ride in rides]
# shortcut for making a dataframe for R
df=R.values_to_df(values)
return {'d':df}
lattice_bikeplot_args=ThereAndBackPlot_args
def Histogram_args(self):
xlim=self.get_hist_xlim()
breaks=range(xlim[0],xlim[1],3)
minutes=[ride.get_minutes() for ride in self.to_o]
return {
'x':minutes,
'main':'Rides from %s to %s'%(self,self.o),
'xlab':'Ride length (minutes)',
'xlim':xlim,
'breaks':breaks,
'freq':False,
'ylim':[0,0.2],
}
def NormalQQPlot_args(self):
return {
'y':[ride.get_minutes() for ride in self.to_o],
'ylim':self.get_hist_xlim(),
}
def get_hist_xlim(self):
minutes=[
ride.get_minutes() for ride in self.to_o
]+[
ride.get_minutes() for ride in self.from_o
]
return [int(min(minutes)-1),int(max(minutes)+3)]
class Bike(models.Model):
"""Parameters of bike -- different bikes may be different speeds.
"""
short_name=models.CharField(
max_length=100,
blank=True,
null=False,
default='',
)
description=models.TextField(blank=True,null=False,default='')
class Admin:
pass
def __str__(self):
return self.short_name.capitalize()
class RideManager(models.Manager):
"""Used for making global ride plots and importing data.
"""
def create_from_file_default(self):
"""Create new bike rides based on default dataset.
"""
self.create_from_file(os.path.join(
os.path.dirname(__file__),'bikelog.txt'))
def create_from_file(self,filename):
"""Create new bike rides based on records in a text file.
"""
print "Loading data from %s"%filename
lines=open(filename).readlines()
for line in lines[1:]: # assume header line
print line
ride=Ride()
ride.get_data_from_line(line)
ride.save()
def __init__(self):
super(RideManager,self).__init__()
self.odoplot=R.TimeSeries(
'allrides',
self.get_odoplot_args,
)
if matplotlib:
self.corr=matplotlib.Scatter(
'samedaycorr-matplotlib',
self.get_corr_args,
)
self.corrr=R.SquareScatter(
'samedaycorr-R',
self.get_corr_args,
default_kwargs={'fit_lty':1,'lty.x.y':2,'one.to.one':True}
)
#self.corrr.SAVE_DATA_ON_MAKEFILE=True
def get_odoplot_args(self):
"""Get arguments for the time series plot.
If no data present, create some from the default dataset.
"""
qs=self.all()
if not qs:
self.create_from_file_default()
qs=self.all()
return {
#'d':R.dtdict(qs),
'd':[ride.date.strftime("%Y-%m-%d") for ride in qs],
'y':[ride.distance for ride in qs],
'transform':'cumulative',
'label.interval':'day',
'main':'Cumulative distance rode by Toby Dylan Hocking over time',
'ylab':'Distance (miles)',
'xlab':'Day of ride',
}
def get_date_dict(self):
"""Sort rides based on date in a dictionary.
"""
qs=self.all()
di={}
for ride in qs:
k=ride.date
if k not in di:
di[k]=[]
di[k].append(ride.average_speed)
return di
def get_corr_args(self):
"""Arguments for making the daily correlation scatterplot.
"""
tups=[li for li in self.get_date_dict().values() if len(li)==2]
return {
'x':[t[0] for t in tups],
'y':[t[1] for t in tups],
'main':"2 speeds (miles/hour) for rides on the same day",
'xlab':'',
'ylab':'',
}
def from_to(self,org,dest):
"""Shortcut for searching from and to by short_name.
"""
return self.filter(
origin__short_name__exact=org,
destination__short_name__exact=dest,
)
class Ride(models.Model):
"""Ride and associated statistics.
I can type these into Django's admin interface easily (data from
my bike's navigational computer).
"""
date=models.DateField(blank=False,null=False)
bike=models.ForeignKey(Bike,blank=False,null=False)
origin=models.ForeignKey(
Location,blank=False,null=False,related_name='ride_origins')
destination=models.ForeignKey(
Location,blank=False,null=False,related_name='ride_destinations')
distance=models.FloatField(blank=False,null=False)
fastest_speed=models.FloatField(blank=False,null=False)
average_speed=models.FloatField(blank=False,null=False)
hours=models.IntegerField(blank=False,null=False)
minutes=models.IntegerField(blank=False,null=False)
seconds=models.IntegerField(blank=False,null=False)
back_flats=models.IntegerField(blank=False,null=False)
front_flats=models.IntegerField(blank=False,null=False)
objects=RideManager()
DO_NOT_CACHE_MANAGER_PLOTS=False
MANAGER_DATAPLOTS=(
(R.CorrScatter,{'get_plot_args':{'x':'fastest_speed','y':'average_speed'}}),
)
class Admin:
list_display=[
'date',
'bike',
'origin',
'destination',
'distance',
'average_speed',
'fastest_speed',
]
def __str__(self):
return "%s(%s->%s)=%s"%(
self.date,
self.origin,
self.destination,
self.distance,
)
def get_minutes(self):
"""Calculate ride time in minutes.
This considers hours and seconds columns too.
"""
return tup_to_minutes(self.hours,self.minutes,self.seconds)
def there_and_back_dict(self):
"""Return a dict for input to there and back Rplot.
"""
return {
'date':self.date.strftime("%Y-%m-%d"),
'from':self.origin.short_name,
'to':self.destination.short_name,
'm':self.get_minutes(),
}
def get_data_from_line(self,line):
"""Import 1 row of data from a line in a text file.
The line looks something like:
2007-06-06 road sangamo home 9.783 30 15.1 0 39 7
"""
# ordering of fields matches ordering of cols in text file
for f, i in zip(self._meta.fields[1:],line.split()):
if f.rel:
# lookup of bikes and locations is by short_name
obj, created = f.rel.to.objects.get_or_create(short_name=i)
val=obj.id
else:
val=i
setattr(self,f.attname,val)
# Set date object specially
self.date=datetime.date(*[int(i) for i in self.date.split("-")])