diff options
author | Mark Wong | 2018-02-10 01:59:44 +0000 |
---|---|---|
committer | Mark Wong | 2018-02-10 01:59:44 +0000 |
commit | 4d397f905511a0076841c0546173310c32f1d057 (patch) | |
tree | 82546fd896403e18ba8224d8c9f65fc7c395e52a | |
parent | 1f13bc07dabb555f5f8f5680c7e59cb74ef0639b (diff) |
Refactor pgbench results output
The idea is that one results document represents a test of one
particular commit. Then it contains the results of all the tests run
against that commit. I think it makes more sense to restructure the
pgbench results to be like:
{
"pgbench": {
"ro": {
"results at a scale factor": {
"number of clients": {
"std": 4389.727756305762,
"metric": 41390.590287,
"median": 43137.716637,
"results": [
{
... pgbench results of a test ...
}
...
]
},
....
},
"rw": {
... read write test results ...
},
... other tests results
}
-rw-r--r-- | client/benchmarks/pgbench.py | 93 |
1 files changed, 61 insertions, 32 deletions
diff --git a/client/benchmarks/pgbench.py b/client/benchmarks/pgbench.py index d11d23c..ab4238f 100644 --- a/client/benchmarks/pgbench.py +++ b/client/benchmarks/pgbench.py @@ -4,6 +4,8 @@ import os.path import re import time +from numpy import mean, median, std + from multiprocessing import cpu_count from utils.logging import log from utils.misc import available_ram, run_cmd @@ -61,7 +63,11 @@ class PgBench(object): """ # initialize results for this dataset scale - self._results[scale] = {'init': None, 'warmup': None, 'runs': []} + self._results['results'] = { + 'init': None, + 'runs': [], + 'warmup': None, + } log("recreating '%s' database" % (self._dbname,)) run_cmd(['dropdb', '--if-exists', self._dbname], env=self._env) @@ -72,7 +78,7 @@ class PgBench(object): env=self._env, cwd=self._outdir) # remember the init duration - self._results[scale]['init'] = r[2] + self._results['results']['init'] = r[2] @staticmethod def _parse_results(data): @@ -151,10 +157,19 @@ class PgBench(object): return issues - def _run(self, duration, nclients=1, njobs=1, read_only=False, + def _run(self, run, scale, duration, nclients=1, njobs=1, read_only=False, aggregate=True, csv_queue=None): 'run pgbench on the database (either a warmup or actual benchmark run)' + # Create a separate directory for each pgbench run + if read_only: + rtag = "ro" + else: + rtag = "rw" + rdir = "%s/pgbench-%s-%d-%d-%s" % (self._outdir, rtag, scale, nclients, + str(run)) + os.mkdir(rdir) + args = ['pgbench', '-c', str(nclients), '-j', str(njobs), '-T', str(duration)] @@ -174,7 +189,7 @@ class PgBench(object): "duration=%d" % (nclients, njobs, aggregate, read_only, duration)) start = time.time() - r = run_cmd(args, env=self._env, cwd=self._outdir) + r = run_cmd(args, env=self._env, cwd=rdir) end = time.time() r = PgBench._parse_results(r[1]) @@ -197,35 +212,49 @@ class PgBench(object): # derive configuration for the CPU count / RAM size configs = PgBench._configure(cpu_count(), available_ram()) + results = {'ro': {}, 'rw': {}} + j = 0 for config in configs: + scale = config['scale'] - # init for the dataset scale and warmup - self._init(config['scale']) - - warmup = self._run(self._duration, cpu_count(), cpu_count()) - results = [] - - for run in range(self._runs): - - log("pgbench : run=%d" % (run,)) - - for clients in config['clients']: - - # read-only - r = self._run(self._duration, clients, clients, True, True, - csv_queue) - r.update({'run': run}) - results.append(r) - - # read-write - r = self._run(self._duration, clients, clients, False, - True, csv_queue) - r.update({'run': run}) - results.append(r) - - self._results[config['scale']] = { - 'warmup': warmup, - 'runs': results - } + if scale not in results['ro']: + results['ro'][scale] = {} + if scale not in results['rw']: + results['rw'][scale] = {} + # init for the dataset scale and warmup + self._init(scale) + + warmup = self._run('w%d' % j, scale, self._duration, cpu_count(), + cpu_count()) + j += 1 + + # read-only & read-write + for ro in [True, False]: + if ro: + tag = 'ro' + else: + tag = 'rw' + + for i in range(self._runs): + log("pgbench : %s run=%d" % (tag, i)) + + for clients in config['clients']: + if clients not in results[tag][scale]: + results[tag][scale][clients] = {} + results[tag][scale][clients]['results'] = [] + + r = self._run(i, scale, self._duration, clients, + clients, ro, True, csv_queue) + r.update({'run': i}) + results[tag][scale][clients]['results'].append(r) + + tps = [] + for result in results[tag][scale][clients]['results']: + tps.append(float(result['tps'])) + results[tag][scale][clients]['metric'] = mean(tps) + results[tag][scale][clients]['median'] = median(tps) + results[tag][scale][clients]['std'] = std(tps) + + self._results['pgbench'] = results return self._results |