Menu

[r8989]: / trunk / py4science / examples / stats_descriptives.py  Maximize  Restore  History

Download this file

138 lines (112 with data), 4.7 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
import scipy.stats as stats
from matplotlib.mlab import detrend_linear, load
import numpy
import pylab
class Descriptives:
"""
a helper class for basic descriptive statistics and time series plots
"""
def __init__(self, samples):
self.samples = numpy.asarray(samples)
self.N = len(samples)
self.median = stats.median(samples)
self.min = numpy.amin(samples)
self.max = numpy.amax(samples)
self.mean = stats.mean(samples)
self.std = stats.std(samples)
self.var = self.std**2.
self.skew = stats.skew(samples)
self.kurtosis = stats.kurtosis(samples)
self.range = self.max - self.min
def __repr__(self):
"""
Create a string representation of self; pretty print all the
attributes:
N, median, min, max, mean, std, var, skew, kurtosis, range,
"""
descriptives = (
'N = %d' % self.N,
'Mean = %1.4f' % self.mean,
'Median = %1.4f' % self.median,
'Min = %1.4f' % self.min,
'Max = %1.4f' % self.max,
'Range = %1.4f' % self.range,
'Std = %1.4f' % self.std,
'Skew = %1.4f' % self.skew,
'Kurtosis = %1.4f' % self.kurtosis,
)
return '\n'.join(descriptives)
def plots(self, figfunc, maxlags=20, Fs=1, detrend=detrend_linear,
fmt='bo', bins=100,
):
"""
plots the time series, histogram, autocorrelation and spectrogram
figfunc is a figure generating function, eg pylab.figure
return an object which stores plot axes and their return
values from the plots. Attributes of the return object are
'plot', 'hist', 'acorr', 'psd', 'specgram' and these are the
return values from the corresponding plots. Additionally, the
axes instances are attached as c.ax1...c.ax5 and the figure is
c.fig
keyword args:
Fs : the sampling frequency of the data
maxlags : max number of lags for the autocorr
detrend : a function used to detrend the data for the correlation and spectral functions
fmt : the plot format string
bins : the bins argument to hist
"""
data = self.samples
# Here we use a rather strange idiom: we create an empty do
# nothing class C and simply attach attributes to it for
# return value (which we carefully describe in the docstring).
# The alternative is either to return a tuple a,b,c,d or a
# dictionary {'a':someval, 'b':someotherval} but both of these
# methods have problems. If you return a tuple, and later
# want to return something new, you have to change all the
# code that calls this function. Dictionaries work fine, but
# I find the client code harder to use d['a'] vesus d.a. The
# final alternative, which is most suitable for production
# code, is to define a custom class to store (and pretty
# print) your return object
class C: pass
c = C()
N = 5
fig = c.fig = figfunc()
fig.subplots_adjust(hspace=0.3)
ax = c.ax1 = fig.add_subplot(N,1,1)
c.plot = ax.plot(data, fmt)
ax.set_ylabel('data')
ax = c.ax2 = fig.add_subplot(N,1,2)
c.hist = ax.hist(data, bins)
ax.set_ylabel('hist')
ax = c.ax3 = fig.add_subplot(N,1,3)
c.acorr = ax.acorr(data, detrend=detrend, usevlines=True,
maxlags=maxlags, normed=True)
ax.set_ylabel('acorr')
ax = c.ax4 = fig.add_subplot(N,1,4)
c.psd = ax.psd(data, Fs=Fs, detrend=detrend)
ax.set_ylabel('psd')
ax = c.ax5 = fig.add_subplot(N,1,5)
c.specgtram = ax.specgram(data, Fs=Fs, detrend=detrend)
ax.set_ylabel('specgram')
return c
if __name__=='__main__':
# load the data in filename fname into the list data, which is a
# list of floating point values, one value per line. Note you
# will have to do some extra parsing
data = []
fname = 'data/nm560.dat' # tree rings in New Mexico 837-1987
fname = 'data/hsales.dat' # home sales
for line in file(fname):
line = line.strip()
if not line: continue
vals = line.split()
val = vals[0]
data.append(float(val))
desc = Descriptives(data)
print desc
c = desc.plots(pylab.figure, Fs=12, fmt='-')
c.ax1.set_title(fname)
c.fig.savefig('stats_descriptives.png', dpi=150)
c.fig.savefig('stats_descriptives.eps')
pylab.show()
Want the latest updates on software, tech news, and AI?
Get latest updates about software, tech news, and AI from SourceForge directly in your inbox once a month.