"""Plots using R base graphics code and the rpy backend.
"""
import os,pdb,dataplot
try:
from rpy import r,RException
ERROR_MESSAGE=''
except ImportError:
ERROR_MESSAGE='rpy module unavailable. To view this plot, install R and Rpy'
class RException(Exception): pass
class RFunctionDoesNotExist(RException): pass
def values_to_df(values):
"""Django values list of dicts -> R data.frame
This makes writing Django and R code easy, since querysets and
data.frames are the native ways of describing data tables in the
respective programming paradigms.
"""
dfkwargs={}
for k in values[0].keys():
dfkwargs[k]=[]
for ride in values:
val=ride[k]
dfkwargs[k].append(val)
df=r.data_frame(**dfkwargs)
return df
def convert_unicode(s):
return s.encode("UTF-8")
def encode_utf8(D):
"""Encode all unicode strs in a dict to normal strs.
The idea is that you are going to call an R function with RPy, and
you have a dict D that will be the kwargs you use in that function
call. You pass D to this function and it will return you an
identical dict but with all the unicode strings changed to normal
python strings, encoded so that they work in R and represent the
correct character when you use print() or plot(). Finally call
your R function using something like
from rpy import r
r.your_fun(**encode_utf8(kwargs))
Only works when values of D are among a few special data types:
- unicode string
- list of unicode strings
- list of dicts
RPy should do this for us, but it doesn't so I wrote this hack.
Works with:
rpy r736
R version 2.9.0 (2009-04-17)
Python 2.5.2 (r252:60911, Jul 31 2008, 17:28:52)
"""
for k,v in D.iteritems():
if type(v)==unicode: # string scalar -> character scalar
D[k]=convert_unicode(v)
if type(v)==list:
if type(v[0])==dict: # list of dicts -> call recursively
D[k]=[encode_utf8(x) for x in v]
if type(v[0])==unicode: # string lists -> character vectors
D[k]=[convert_unicode(x) for x in v]
return D
class Plot(dataplot.GenericPlot):
"""R plot for the web.
Uses the rpy package and base R graphics. The idea is separation
of R and python code, so each subclass needs to map to an R
function (r_fun_name) in a .R code file (r_code_filename) that can
be sourced and used for plotting by rpy.
"""
convert_to={
'png':{'suffix':'.png'},
'thumb':{'suffix':'-thumb.png','convert_args':'-resize 65x90'},
'pdf':{'suffix':'.pdf'},
}
convert_from='pdf'
w=9
h=6.5
view_program='xpdf'
ERROR_MESSAGE=ERROR_MESSAGE
def get_data_file(self):
return self.get_full_base()+'.Rdata'
def get_test_file(self):
return self.get_full_base()+'.test.R'
def get_r_fun(self,e=None):
"""Try to get the R function from the r environment.
Returns true if it worked.
"""
try:
self.plot_fun=getattr(r,self.r_fun_name)
return True
except RException:
if e:
raise e
def __init__(self,*args,**kwargs):
"""Infer default values at init.
"""
# infer default r_fun_name from class name
if 'r_fun_name' not in dir(self):
self.r_fun_name=self.__class__.__name__.lower().replace("_",".")
#pdb.set_trace()
super(Plot,self).__init__(*args,**kwargs)
def check_files_for_function(self):
"""Go through files looking for the plot function.
"""
# infer default r_code_filename from r_fun_name
filename=getattr(self,'r_code_filename',self.r_fun_name+".R")
# lookup .R file in corresponding app/R/ directory
mn=self.__module__
mod=__import__(mn,[],[],'.'.split(mn)[-1])
files=[os.path.dirname(mod.__file__)]
self.r_fullpaths=[os.path.join(d,filename) for d in files]
actual_files=[f for f in self.r_fullpaths if os.path.exists(f)]
# test each one by sourcing it and checking if fun exists after
for r_code_fullpath in actual_files:
try:
r.source(r_code_fullpath)
except RException: # file does not exist or syntax error
pass
# if it worked, return now
if self.get_r_fun():
self.r_code_filename_fullpath=r_code_fullpath
return True
def source_for_function(self):
"""Source R code files looking for fun_name.
Raise error if fun_name is never found.
"""
# if it already exists, return now
if self.get_r_fun():
return
# if we can find it in a file, return now
if self.check_files_for_function():
return
# if it didn't work by now, raise error
e="Could not find R fun %s in %s"%(self.r_fun_name,self.r_fullpaths)
self.get_r_fun(RFunctionDoesNotExist(e))
def get_kwargs(self):
self.source_for_function()
D=r.formals(self.plot_fun)
try:
D.pop('...') # this won't actually work as a kwarg if supplied
except KeyError:
pass
return D
def save_data(self):
"""Save result of call to get_plot_args in Rdata.
"""
data_file=self.get_data_file()
test_file=self.get_test_file()
kwargs=self.set_r_args()
if os.path.exists(test_file) and getattr(self,'save_nice',None):
print "Warning: %s already exists, not saving test file"%test_file
else:
self.check_files_for_function()
Rcode='load("%s")\nsource("%s")\n%s(%s)\n'%(
data_file,
self.r_code_filename_fullpath,
self.r_fun_name,
',\n'.join(['%s=%s'%(k,k) for k in kwargs]),
)
f=open(test_file,'w')
f.write(Rcode)
f.close()
if os.path.exists(data_file) and getattr(self,'save_nice',None):
print "Warning: %s already exists, not saving data file"%data_file
else:
for k in kwargs:
r.assign(k,kwargs[k])
r.save(list=kwargs.keys(),file=data_file)
def set_r_args(self):
r_args=self.get_plot_args()
## rpy doesn't work with unicode accents hack
r_args=encode_utf8(r_args)
self.r_args=r_args
return r_args
def makefile(self):
"""Start a PDF device and execute R plotting code.
"""
try:
filename=self.get_filenames()['pdf']
# Can't pass unicode strings here
r.pdf(filename,h=self.h,w=self.w)
except RException, e:
raise dataplot.PlotError('\n'.join([
"Error in starting the R PDF graphics device.",
"Does the webserver have permission to write %s?"%filename]))
# Get r_args from defaults and database
r_args=self.set_r_args()
print r_args
##pdb.set_trace()
# Look for function first -- get from r code if specified
self.source_for_function()
# Then actually draw the figure -- may fail if bad data
try:
oldwarn=r.getOption('warn')
r.options(warn=-1)
# for characters with accent marks
r.Sys_setlocale(loc='en_US.UTF-8')
# weird bug for french , decimal separator
r.Sys_setlocale("LC_NUMERIC","C")
r.options(warn=oldwarn)
self.plot_fun_return_val=rval=self.plot_fun(**self.r_args)
r.dev_off()
except RException, e:
try:
self.save_data()
except:
pass
raise dataplot.PlotError('\n'.join([
'Error in generating the plots.',
'Is all the required data present?\nR said: %s'%e]))
r.warnings()
# Also save the data if explicitly requested
if getattr(self,'SAVE_DATA_ON_MAKEFILE',None):
self.save_data()
class Scatter(Plot):
"""Simple x-y scatterplot.
Required:
x: list of ints or floats: horizontal values.
y: list of ints or floats: vertical values.
Optional:
ann: list of strings: labels for each data point.
pch: plotting symbol to use; see R>example(points).
fit.lty: lty of least squares fit line, default: 0 => no line.
axis.round: decimal points for rounding axis labels.
lty.x.y: lty of line at x=y, default: 0 => no line.
one.to.one: Force axes to be same?
"""
r_fun_name='generic.scatter.plot'
default_args_map={
'xlab':'x',
'ylab':'y',
}
class SquareScatter(Scatter):
w=h=6.5 # arbitrary but good for us paper
class CorrScatter(SquareScatter):
"""Scatterplot used to judge correlation between 2 variables.
Same as Scatter but with one.to.one=T and lty.x.y=2.
"""
default_kwargs={
'one.to.one':True,
'lty.x.y':2,
}
class TimeSeries(Plot):
"""Simple cumulative time series.
Required:
d: list of time data producted with strftime('%s')
Optional:
y: values at time points. Will assume 1 for each as default.
transform: how to transform the data before plotting, one of:
'cumulative', 'monthly', 'daily'
"""
r_fun_name='generic.time.series'
default_args_map={'xlab':'d','ylab':'y'}
class Histogram(Plot):
"""generic histogram for showing a univariate distribution.
Arguments passed verbatim to R base function hist.
"""
r_fun_name='hist'
class NormalQQPlot(Plot):
"""Use to see if univariate data are approximately normal.
All arguments are passed verbatim to R base function qqnorm.
"""
r_fun_name='generic.qqnorm'
class barplot(Plot):
"""Standard boring barplot.
"""
class multi_time_series(Plot):
"""Multiple time series superimposed for comparison.
"""