client/benchmarks/runner.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181

import json
import os
import codecs
import urllib2

from multiprocessing import Process, Queue
from time import gmtime, strftime
from subprocess import check_output

from utils.logging import log


class BenchmarkRunner(object):
    'manages runs of all the benchmarks, including cluster restarts etc.'

    def __init__(self, out_dir, url, secret, cluster, collector):
        ''

        self._output = out_dir  # where to store output files
        self._benchmarks = {}  # bench name => class implementing the benchmark
        self._configs = {}  # config name => (bench name, config)
        self._cluster = cluster
        self._collector = collector
        self._url = url
        self._secret = secret

    def register_benchmark(self, benchmark_name, benchmark_class):
        ''

        # FIXME check if a mapping for the same name already exists
        self._benchmarks.update({benchmark_name: benchmark_class})

    def register_config(self, config_name, benchmark_name, branch, commit,
                        postgres_config, **kwargs):
        ''

        # FIXME check if a mapping for the same name already exists
        # FIXME check that the benchmark mapping already exists
        self._configs.update({config_name: {'benchmark': benchmark_name,
                                            'config': kwargs,
                                            'postgres': postgres_config,
                                            'branch': branch,
                                            'commit': commit}})

    def _check_config(self, config_name):
        ''

        log("checking benchmark configuration '%s'" % (config_name,))

        # construct the benchmark class for the given config name
        config = self._configs[config_name]
        bench = self._benchmarks[config['benchmark']]

        # expand the attribute names
        bench = bench(**config['config'])

        # run the tests
        return bench.check_config()

    def check(self):
        'check configurations for all benchmarks'

        issues = {}

        for config_name in self._configs:
            t = self._check_config(config_name)
            if t:
                issues[config_name] = t

        return issues

    def _run_config(self, config_name):
        ''

        log("running benchmark configuration '%s'" % (config_name,))

        # construct the benchmark class for the given config name
        config = self._configs[config_name]
        bench = self._benchmarks[config['benchmark']]

        # expand the attribute names
        bench = bench(**config['config'])

        self._cluster.start(config=config['postgres'])

        # start collector(s) of additional info
        self._collector.start()

        # if requested output to CSV, create a queue and collector process
        csv_queue = None
        csv_collector = None
        if 'csv' in config['config'] and config['config']['csv']:
            csv_queue = Queue()
            csv_collector = Process(target=csv_collect_results,
                                    args=(config_name, csv_queue))
            csv_collector.start()

        # run the tests
        r = bench.run_tests(csv_queue)

        # notify the result collector to end and wait for it to terminate
        if csv_queue:
            csv_queue.put("STOP")
            csv_collector.join()

        # stop the cluster and collector
        log("terminating collectors")
        self._collector.stop()
        self._cluster.stop()

        # merge data from the collectors into the JSON document with results
        r.update(self._collector.result())

        uname = check_output(['uname', '-a'])

        r['meta'] = {
                'benchmark': config['benchmark'],
                'date': strftime("%Y-%m-%d %H:%M:%S.000000+00", gmtime()),
                'name': config_name,
                'uname': uname,
        }

        r['postgres'] = {
                'branch': config['branch'],
                'commit': config['commit'],
                'settings': config['postgres'],
        }

        with open('%s/results.json' % self._output, 'w') as f:
            f.write(json.dumps(r, indent=4))

        try:
            self._upload_results(r)
        except Exception as e:
            print e

    def _upload_results(self, results):
        postdata = results
        post = []
        post.append(postdata)
        req = urllib2.Request(self._url, json.dumps(post))
        req.add_header('Authorization', self._secret) # add token in header
        req.add_header('Content-Type', 'application/json')
        response = urllib2.urlopen(req)


    def run(self):
        'run all the configured benchmarks'

        # It's ok if the output directory already exists.  One of the other
        # collector modules may have started before the benchmark.
        try:
            os.mkdir(self._output)
        except OSError as e:
            log("WARNING: output directory already exists: %s" % self._output)

        for config_name in self._configs:
            self._run_config(config_name)


def csv_collect_results(bench_name, queue):
    'collect results into a CSV files (through a queue)'

    with open("%s.csv" % (bench_name,), 'w') as results_file:

        # collect data from the queue - once we get a plain string (instead of
        # a list), it's a sign to terminate the collector
        while True:

            v = queue.get()

            # if we got a string, it means 'terminate'
            if isinstance(v, str):
                log("terminating CSV result collector")
                return

            v = [str(x) for x in v]

            # otherwise we expect the value to be a list, and we just print it
            results_file.write(bench_name + "\t" + "\t".join(v) + "\n")
            results_file.flush()