Menu

[r174]: / trunk / R / __init__.py  Maximize  Restore  History

Download this file

324 lines (263 with data), 10.1 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
"""Plots using R base graphics code and the rpy backend.
"""
import os,pdb,dataplot
try:
from rpy import r,RException
ERROR_MESSAGE=''
except ImportError:
ERROR_MESSAGE='rpy module unavailable. To view this plot, install R and Rpy'
class RException(Exception): pass
class RFunctionDoesNotExist(RException): pass
def values_to_df(values):
"""Django values list of dicts -> R data.frame
This makes writing Django and R code easy, since querysets and
data.frames are the native ways of describing data tables in the
respective programming paradigms.
"""
dfkwargs={}
for k in values[0].keys():
dfkwargs[k]=[]
for ride in values:
val=ride[k]
dfkwargs[k].append(val)
df=r.data_frame(**dfkwargs)
return df
def convert_unicode(s):
return s.encode("UTF-8")
def encode_utf8(D):
"""Encode all unicode strs in a dict to normal strs.
The idea is that you are going to call an R function with RPy, and
you have a dict D that will be the kwargs you use in that function
call. You pass D to this function and it will return you an
identical dict but with all the unicode strings changed to normal
python strings, encoded so that they work in R and represent the
correct character when you use print() or plot(). Finally call
your R function using something like
from rpy import r
r.your_fun(**encode_utf8(kwargs))
Only works when values of D are among a few special data types:
- unicode string
- list of unicode strings
- list of dicts
RPy should do this for us, but it doesn't so I wrote this hack.
Works with:
rpy r736
R version 2.9.0 (2009-04-17)
Python 2.5.2 (r252:60911, Jul 31 2008, 17:28:52)
"""
for k,v in D.iteritems():
if type(v)==unicode: # string scalar -> character scalar
D[k]=convert_unicode(v)
if type(v)==list:
if type(v[0])==dict: # list of dicts -> call recursively
D[k]=[encode_utf8(x) for x in v]
if type(v[0])==unicode: # string lists -> character vectors
D[k]=[convert_unicode(x) for x in v]
return D
class Plot(dataplot.GenericPlot):
"""R plot for the web.
Uses the rpy package and base R graphics. The idea is separation
of R and python code, so each subclass needs to map to an R
function (r_fun_name) in a .R code file (r_code_filename) that can
be sourced and used for plotting by rpy.
"""
convert_to={
'png':{'suffix':'.png'},
'thumb':{'suffix':'-thumb.png','convert_args':'-resize 65x90'},
'pdf':{'suffix':'.pdf'},
}
convert_from='pdf'
w=9
h=6.5
view_program='xpdf'
ERROR_MESSAGE=ERROR_MESSAGE
def get_data_file(self):
return self.get_full_base()+'.Rdata'
def get_test_file(self):
return self.get_full_base()+'.test.R'
def get_r_fun(self,e=None):
"""Try to get the R function from the r environment.
Returns true if it worked.
"""
try:
self.plot_fun=getattr(r,self.r_fun_name)
return True
except RException:
if e:
raise e
def __init__(self,*args,**kwargs):
"""Infer default values at init.
"""
# infer default r_fun_name from class name
if 'r_fun_name' not in dir(self):
self.r_fun_name=self.__class__.__name__.lower().replace("_",".")
#pdb.set_trace()
super(Plot,self).__init__(*args,**kwargs)
def check_files_for_function(self):
"""Go through files looking for the plot function.
"""
# infer default r_code_filename from r_fun_name
filename=getattr(self,'r_code_filename',self.r_fun_name+".R")
# lookup .R file in corresponding app/R/ directory
mn=self.__module__
mod=__import__(mn,[],[],'.'.split(mn)[-1])
files=[os.path.dirname(mod.__file__)]
self.r_fullpaths=[os.path.join(d,filename) for d in files]
actual_files=[f for f in self.r_fullpaths if os.path.exists(f)]
# test each one by sourcing it and checking if fun exists after
for r_code_fullpath in actual_files:
try:
r.source(r_code_fullpath)
except RException: # file does not exist or syntax error
pass
# if it worked, return now
if self.get_r_fun():
self.r_code_filename_fullpath=r_code_fullpath
return True
def source_for_function(self):
"""Source R code files looking for fun_name.
Raise error if fun_name is never found.
"""
# if it already exists, return now
if self.get_r_fun():
return
# if we can find it in a file, return now
if self.check_files_for_function():
return
# if it didn't work by now, raise error
e="Could not find R fun %s in %s"%(self.r_fun_name,self.r_fullpaths)
self.get_r_fun(RFunctionDoesNotExist(e))
def get_kwargs(self):
self.source_for_function()
D=r.formals(self.plot_fun)
try:
D.pop('...') # this won't actually work as a kwarg if supplied
except KeyError:
pass
return D
def save_data(self):
"""Save result of call to get_plot_args in Rdata.
"""
data_file=self.get_data_file()
test_file=self.get_test_file()
kwargs=self.set_r_args()
if os.path.exists(test_file) and getattr(self,'save_nice',None):
print "Warning: %s already exists, not saving test file"%test_file
else:
self.check_files_for_function()
Rcode='load("%s")\nsource("%s")\n%s(%s)\n'%(
data_file,
self.r_code_filename_fullpath,
self.r_fun_name,
',\n'.join(['%s=%s'%(k,k) for k in kwargs]),
)
f=open(test_file,'w')
f.write(Rcode)
f.close()
if os.path.exists(data_file) and getattr(self,'save_nice',None):
print "Warning: %s already exists, not saving data file"%data_file
else:
for k in kwargs:
r.assign(k,kwargs[k])
r.save(list=kwargs.keys(),file=data_file)
def set_r_args(self):
r_args=self.get_plot_args()
## rpy doesn't work with unicode accents hack
r_args=encode_utf8(r_args)
self.r_args=r_args
return r_args
def makefile(self):
"""Start a PDF device and execute R plotting code.
"""
try:
filename=self.get_filenames()['pdf']
# Can't pass unicode strings here
r.pdf(filename,h=self.h,w=self.w)
except RException, e:
raise dataplot.PlotError('\n'.join([
"Error in starting the R PDF graphics device.",
"Does the webserver have permission to write %s?"%filename]))
# Get r_args from defaults and database
r_args=self.set_r_args()
print r_args
##pdb.set_trace()
# Look for function first -- get from r code if specified
self.source_for_function()
# Then actually draw the figure -- may fail if bad data
try:
oldwarn=r.getOption('warn')
r.options(warn=-1)
# for characters with accent marks
r.Sys_setlocale(loc='en_US.UTF-8')
# weird bug for french , decimal separator
r.Sys_setlocale("LC_NUMERIC","C")
r.options(warn=oldwarn)
self.plot_fun_return_val=rval=self.plot_fun(**self.r_args)
r.dev_off()
except RException, e:
try:
self.save_data()
except:
pass
raise dataplot.PlotError('\n'.join([
'Error in generating the plots.',
'Is all the required data present?\nR said: %s'%e]))
r.warnings()
# Also save the data if explicitly requested
if getattr(self,'SAVE_DATA_ON_MAKEFILE',None):
self.save_data()
class Scatter(Plot):
"""Simple x-y scatterplot.
Required:
x: list of ints or floats: horizontal values.
y: list of ints or floats: vertical values.
Optional:
ann: list of strings: labels for each data point.
pch: plotting symbol to use; see R>example(points).
fit.lty: lty of least squares fit line, default: 0 => no line.
axis.round: decimal points for rounding axis labels.
lty.x.y: lty of line at x=y, default: 0 => no line.
one.to.one: Force axes to be same?
"""
r_fun_name='generic.scatter.plot'
default_args_map={
'xlab':'x',
'ylab':'y',
}
class SquareScatter(Scatter):
w=h=6.5 # arbitrary but good for us paper
class CorrScatter(SquareScatter):
"""Scatterplot used to judge correlation between 2 variables.
Same as Scatter but with one.to.one=T and lty.x.y=2.
"""
default_kwargs={
'one.to.one':True,
'lty.x.y':2,
}
class TimeSeries(Plot):
"""Simple cumulative time series.
Required:
d: list of time data producted with strftime('%s')
Optional:
y: values at time points. Will assume 1 for each as default.
transform: how to transform the data before plotting, one of:
'cumulative', 'monthly', 'daily'
"""
r_fun_name='generic.time.series'
default_args_map={'xlab':'d','ylab':'y'}
class Histogram(Plot):
"""generic histogram for showing a univariate distribution.
Arguments passed verbatim to R base function hist.
"""
r_fun_name='hist'
class NormalQQPlot(Plot):
"""Use to see if univariate data are approximately normal.
All arguments are passed verbatim to R base function qqnorm.
"""
r_fun_name='generic.qqnorm'
class barplot(Plot):
"""Standard boring barplot.
"""
class multi_time_series(Plot):
"""Multiple time series superimposed for comparison.
"""
Want the latest updates on software, tech news, and AI?
Get latest updates about software, tech news, and AI from SourceForge directly in your inbox once a month.