summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMark Wong2018-02-10 01:59:44 +0000
committerMark Wong2018-02-10 01:59:44 +0000
commit4d397f905511a0076841c0546173310c32f1d057 (patch)
tree82546fd896403e18ba8224d8c9f65fc7c395e52a
parent1f13bc07dabb555f5f8f5680c7e59cb74ef0639b (diff)
Refactor pgbench results output
The idea is that one results document represents a test of one particular commit. Then it contains the results of all the tests run against that commit. I think it makes more sense to restructure the pgbench results to be like: { "pgbench": { "ro": { "results at a scale factor": { "number of clients": { "std": 4389.727756305762, "metric": 41390.590287, "median": 43137.716637, "results": [ { ... pgbench results of a test ... } ... ] }, .... }, "rw": { ... read write test results ... }, ... other tests results }
-rw-r--r--client/benchmarks/pgbench.py93
1 files changed, 61 insertions, 32 deletions
diff --git a/client/benchmarks/pgbench.py b/client/benchmarks/pgbench.py
index d11d23c..ab4238f 100644
--- a/client/benchmarks/pgbench.py
+++ b/client/benchmarks/pgbench.py
@@ -4,6 +4,8 @@ import os.path
import re
import time
+from numpy import mean, median, std
+
from multiprocessing import cpu_count
from utils.logging import log
from utils.misc import available_ram, run_cmd
@@ -61,7 +63,11 @@ class PgBench(object):
"""
# initialize results for this dataset scale
- self._results[scale] = {'init': None, 'warmup': None, 'runs': []}
+ self._results['results'] = {
+ 'init': None,
+ 'runs': [],
+ 'warmup': None,
+ }
log("recreating '%s' database" % (self._dbname,))
run_cmd(['dropdb', '--if-exists', self._dbname], env=self._env)
@@ -72,7 +78,7 @@ class PgBench(object):
env=self._env, cwd=self._outdir)
# remember the init duration
- self._results[scale]['init'] = r[2]
+ self._results['results']['init'] = r[2]
@staticmethod
def _parse_results(data):
@@ -151,10 +157,19 @@ class PgBench(object):
return issues
- def _run(self, duration, nclients=1, njobs=1, read_only=False,
+ def _run(self, run, scale, duration, nclients=1, njobs=1, read_only=False,
aggregate=True, csv_queue=None):
'run pgbench on the database (either a warmup or actual benchmark run)'
+ # Create a separate directory for each pgbench run
+ if read_only:
+ rtag = "ro"
+ else:
+ rtag = "rw"
+ rdir = "%s/pgbench-%s-%d-%d-%s" % (self._outdir, rtag, scale, nclients,
+ str(run))
+ os.mkdir(rdir)
+
args = ['pgbench', '-c', str(nclients), '-j', str(njobs), '-T',
str(duration)]
@@ -174,7 +189,7 @@ class PgBench(object):
"duration=%d" % (nclients, njobs, aggregate, read_only, duration))
start = time.time()
- r = run_cmd(args, env=self._env, cwd=self._outdir)
+ r = run_cmd(args, env=self._env, cwd=rdir)
end = time.time()
r = PgBench._parse_results(r[1])
@@ -197,35 +212,49 @@ class PgBench(object):
# derive configuration for the CPU count / RAM size
configs = PgBench._configure(cpu_count(), available_ram())
+ results = {'ro': {}, 'rw': {}}
+ j = 0
for config in configs:
+ scale = config['scale']
- # init for the dataset scale and warmup
- self._init(config['scale'])
-
- warmup = self._run(self._duration, cpu_count(), cpu_count())
- results = []
-
- for run in range(self._runs):
-
- log("pgbench : run=%d" % (run,))
-
- for clients in config['clients']:
-
- # read-only
- r = self._run(self._duration, clients, clients, True, True,
- csv_queue)
- r.update({'run': run})
- results.append(r)
-
- # read-write
- r = self._run(self._duration, clients, clients, False,
- True, csv_queue)
- r.update({'run': run})
- results.append(r)
-
- self._results[config['scale']] = {
- 'warmup': warmup,
- 'runs': results
- }
+ if scale not in results['ro']:
+ results['ro'][scale] = {}
+ if scale not in results['rw']:
+ results['rw'][scale] = {}
+ # init for the dataset scale and warmup
+ self._init(scale)
+
+ warmup = self._run('w%d' % j, scale, self._duration, cpu_count(),
+ cpu_count())
+ j += 1
+
+ # read-only & read-write
+ for ro in [True, False]:
+ if ro:
+ tag = 'ro'
+ else:
+ tag = 'rw'
+
+ for i in range(self._runs):
+ log("pgbench : %s run=%d" % (tag, i))
+
+ for clients in config['clients']:
+ if clients not in results[tag][scale]:
+ results[tag][scale][clients] = {}
+ results[tag][scale][clients]['results'] = []
+
+ r = self._run(i, scale, self._duration, clients,
+ clients, ro, True, csv_queue)
+ r.update({'run': i})
+ results[tag][scale][clients]['results'].append(r)
+
+ tps = []
+ for result in results[tag][scale][clients]['results']:
+ tps.append(float(result['tps']))
+ results[tag][scale][clients]['metric'] = mean(tps)
+ results[tag][scale][clients]['median'] = median(tps)
+ results[tag][scale][clients]['std'] = std(tps)
+
+ self._results['pgbench'] = results
return self._results