diff options
author | Tomas Vondra | 2016-08-10 21:23:55 +0000 |
---|---|---|
committer | Tomas Vondra | 2017-02-27 00:21:09 +0000 |
commit | 72e6220f64a89cd215660311a5680f07f543b150 (patch) | |
tree | c76f2df22da3819a28cad200f4b2a45642dfacdf | |
parent | cbac00d3965ad4f27f1e812668b5732c1c50b1dd (diff) |
Import initial version of the client
-rw-r--r-- | client/README.md | 38 | ||||
-rw-r--r-- | client/benchmarks/__init__.py | 0 | ||||
-rw-r--r-- | client/benchmarks/pgbench.py | 221 | ||||
-rw-r--r-- | client/benchmarks/runner.py | 82 | ||||
-rw-r--r-- | client/collectors/__init__.py | 0 | ||||
-rw-r--r-- | client/collectors/collector.py | 28 | ||||
-rw-r--r-- | client/collectors/linux.py | 93 | ||||
-rw-r--r-- | client/collectors/postgres.py | 157 | ||||
-rwxr-xr-x | client/perffarm-client.py | 74 | ||||
-rw-r--r-- | client/utils/__init__.py | 0 | ||||
-rw-r--r-- | client/utils/cluster.py | 72 | ||||
-rw-r--r-- | client/utils/git.py | 82 | ||||
-rw-r--r-- | client/utils/locking.py | 21 | ||||
-rw-r--r-- | client/utils/logging.py | 9 | ||||
-rw-r--r-- | client/utils/misc.py | 23 |
15 files changed, 900 insertions, 0 deletions
diff --git a/client/README.md b/client/README.md new file mode 100644 index 0000000..95e1bbf --- /dev/null +++ b/client/README.md @@ -0,0 +1,38 @@ +PostgreSQL performance farm client +================================== + +A client integrating several database benchmarks intended for regular testing +of PostgreSQL during development, and reporting the results back to a server. +You can think of this as another buildfarm, but running performance tests +instead of regression tests. Also, this is written in Python and not Perl. + +Currently there are three benchmarks available: + +* pgbench (TPC-B-like, testing OLTP workload) + + +Limitations +----------- + +The current client has a number of limitations - firstly, it only works on +Linux (and perhaps other unix-like operating systems - Solaris, BSD, ...). +If you're interested in making it work on Windows, let me know. + + +pgbench +------- + +Requires no extra setup, everything is handled by the code (including data +generation etc.). + + +Statistics +---------- + +The client also collects various system-level statistics, useful when analyzing +the results and investigating performance regressions or differences between +systems. This includes: + +* various data from /proc (cpuinfo, meminfo, ...) +* PostgreSQL statistics (bgwriter, databases, tables and indexes) +* sar statistics diff --git a/client/benchmarks/__init__.py b/client/benchmarks/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/client/benchmarks/__init__.py diff --git a/client/benchmarks/pgbench.py b/client/benchmarks/pgbench.py new file mode 100644 index 0000000..da95064 --- /dev/null +++ b/client/benchmarks/pgbench.py @@ -0,0 +1,221 @@ +import math +import os +import re +import time + +from multiprocessing import cpu_count +from utils.logging import log +from utils.misc import available_ram, run_cmd + + +class PgBench(object): + 'a simple wrapper around pgbench, running TPC-B-like workload by default' + + # TODO allow running custom scripts, not just the default read-write/read-only tests + # TODO allow running 'prepared' mode + + def __init__(self, bin_path, dbname): + ''' + bin_path - path to PostgreSQL binaries (dropdb, createdb, psql commands) + dbname - name of the database to use + ''' + + self._bin = bin_path + self._dbname = dbname + self._results = {} + + + @staticmethod + def _configure(cpu_count, ram_mbs): + 'derive the configurations to benchmark from CPU count and RAM size' + + config = [] + + # TODO allow overriding this from a global config + + # scales: 10 (small), 50% of RAM, 200% of RAM + #for s in [10, ram_mbs/15/2, ram_mbs*2/15]: + for s in [10]: + config.append({'scale' : int(math.ceil(s/10)*10), + 'clients' : [1, cpu_count, 2*cpu_count]}) + + return config + + + def _init(self, scale): + 'recreate the database (drop + create) and populate it with given scale' + + # initialize results for this dataset scale + self._results[scale] = {'init' : None, 'warmup' : None, 'runs' : []} + + log("recreating '%s' database" % (self._dbname,)) + run_cmd(['dropdb', '--if-exists', self._dbname], env={'PATH' : self._bin}) + run_cmd(['createdb', self._dbname], env={'PATH' : self._bin}) + + log("initializing pgbench '%s' with scale %s" % (self._dbname, scale)) + r = run_cmd(['pgbench', '-i', '-s', str(scale), self._dbname], env={'PATH' : self._bin}) + + # remember the init duration + self._results[scale]['init'] = r[2] + + + @staticmethod + def _parse_results(data): + 'extract results (including parameters) from the pgbench output' + + scale = -1 + r = re.search('scaling factor: ([0-9]+)', data) + if r: + scale = r.group(1) + + mode = -1 + r = re.search('query mode: (.+)', data) + if r: + mode = r.group(1) + + clients = -1 + r = re.search('number of clients: ([0-9]+)', data) + if r: + clients = r.group(1) + + threads = -1 + r = re.search('number of threads: ([0-9]+)', data) + if r: + threads = r.group(1) + + duration = -1 + r = re.search('duration: ([0-9]+) s', data) + if r: + duration = r.group(1) + + latency = -1 + r = re.search('latency average: ([0-9\.]+) ms', data) + if r: + latency = r.group(1) + + tps = -1 + r = re.search('tps = ([0-9]+\.[0-9]+) \(excluding connections establishing\)', data) + if r: + tps = r.group(1) + + return {'scale' : scale, + 'mode' : mode, + 'clients' : clients, + 'threads' : threads, + 'duration' : duration, + 'latency' : latency, + 'tps' : tps} + + + @staticmethod + def _merge_logs(): + 'merge log files produced by pgbench threads (aggregated per second)' + + r = {} + + # find pgbench transaction logs in current directory + logs = [v for v in os.listdir(os.getcwd()) if re.match('pgbench_log.[0-9]+(\.[0-9]+)?', v)] + + # parse each transaction log, and merge it into the existing results + for l in logs: + worker_log = open(l, 'r') + for row in worker_log: + values = row.split(' ') + + timestamp = values[0] + tps = int(values[1]) + lat_sum = long(values[2]) + lat_sum2 = long(values[3]) + lat_min = int(values[4]) + lat_max = int(values[5]) + + # if first record for the timestamp, store it, otherwise merge + if timestamp not in r: + r[timestamp] = {'tps' : tps, + 'lat_sum' : lat_sum, 'lat_sum2' : lat_sum2, + 'lat_min' : lat_min, 'lat_max' : lat_max} + else: + r[timestamp]['tps'] += int(tps) + r[timestamp]['lat_sum'] += long(lat_sum) + r[timestamp]['lat_sum2'] += long(lat_sum2) + r[timestamp]['lat_min'] = min(r[timestamp]['lat_min'], int(lat_min)) + r[timestamp]['lat_max'] = max(r[timestamp]['lat_max'], int(lat_max)) + + os.remove(l) + + # now produce a simple text log sorted by the timestamp + o = [] + for t in sorted(r.keys()): + o.append('%s %d %d %d %d %d' % (t, r[t]['tps'], r[t]['lat_sum'], r[t]['lat_sum2'], r[t]['lat_min'], r[t]['lat_max'])) + + return '\n'.join(o) + + + def _run(self, duration, nclients=1, njobs=1, read_only=False, aggregate=True): + 'run pgbench on the database (either a warmup or actual benchmark run)' + + args = ['pgbench', '-c', str(nclients), '-j', str(njobs), '-T', str(duration)] + + # aggregate on per second resolution + if aggregate: + args.extend(['-l', '--aggregate-interval', '1']) + + if read_only: + args.extend(['-S']) + + args.extend([self._dbname]) + + # do an explicit checkpoint before each run + run_cmd(['psql', self._dbname, '-c', 'checkpoint'], env={'PATH' : self._bin}) + + log("pgbench : clients=%d, jobs=%d, aggregate=%s, read-only=%s, duration=%d" % (nclients, njobs, aggregate, read_only, duration)) + + start = time.time() + r = run_cmd(args, env={'PATH' : self._bin}) + end = time.time() + + r = PgBench._parse_results(r[1]) + r.update({'read-only' : read_only}) + + if aggregate: + r.update({'transaction-log' : PgBench._merge_logs()}) + + r.update({'start' : start, 'end' : end}) + + return r + + + def run_tests(self, duration=10, runs=3): + 'execute the whole benchmark, including initialization, warmup and benchmark runs' + + # derive configuration for the CPU count / RAM size + configs = PgBench._configure(cpu_count(), available_ram()) + + for config in configs: + + # init for the dataset scale and warmup + self._init(config['scale']) + + warmup = self._run(duration, cpu_count(), cpu_count()) + results = [] + + for run in range(runs): + + for clients in config['clients']: + + # read-only + r = self._run(duration, clients, clients, True) + r.update({'run' : run}) + results.append(r) + + # read-write + r = self._run(duration, clients, clients, False) + r.update({'run' : run}) + results.append(r) + + self._results[config['scale']] = { + 'warmup' : warmup, + 'runs' : results + } + + return self._results diff --git a/client/benchmarks/runner.py b/client/benchmarks/runner.py new file mode 100644 index 0000000..3a891dc --- /dev/null +++ b/client/benchmarks/runner.py @@ -0,0 +1,82 @@ +import json +import os + +from utils.logging import log + + +class BenchmarkRunner(object): + 'manages runs of all the benchmarks, including cluster restarts etc.' + + def __init__(self, out_dir, cluster, collector): + '' + + self._output = out_dir # where to store output files + self._benchmarks = {} # bench name => class implementing the benchmark + self._configs = {} # config name => (bench name, config) + self._cluster = cluster + self._collector = collector + + + def register_benchmark(self, benchmark_name, benchmark_class): + '' + + # FIXME check if a mapping for the same name already exists + self._benchmarks.update({benchmark_name : benchmark_class}) + + + def register_config(self, config_name, benchmark_name, postgres_config, **kwargs): + '' + + # FIXME check if a mapping for the same name already exists + # FIXME check that the benchmark mapping already exists + self._configs.update({config_name : {'benchmark' : benchmark_name, 'config' : kwargs, 'postgres' : postgres_config}}) + + + def _run_config(self, config_name): + '' + + log("running benchmark configuration '%s'" % (config_name,)) + + # construct the benchmark class for the given config name + config = self._configs[config_name] + bench = self._benchmarks[config['benchmark']] + + # expand the attribute names + bench = bench(**config['config']) + + self._cluster.start(config = config['postgres']) + + # start collector(s) of additional info + self._collector.start() + + # run the tests + r = bench.run_tests() + + # stop the cluster and collector + self._collector.stop() + self._cluster.stop() + + # merge data from the collectors into the JSON document with results + r.update(self._collector.result()) + + # read the postgres log + with open('pg.log', 'r') as f: + r['postgres-log'] = f.read() + + r['meta'] = {'benchmark' : config['benchmark'], + 'name' : config_name} + + os.remove('pg.log') + + with open('%s/%s.json' % (self._output, config_name), 'w') as f: + f.write(json.dumps(r, indent=4)) + + + def run(self): + 'run all the configured benchmarks' + + # FIXME check that the directory does not exist + os.mkdir(self._output) + + for config_name in self._configs: + self._run_config(config_name) diff --git a/client/collectors/__init__.py b/client/collectors/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/client/collectors/__init__.py diff --git a/client/collectors/collector.py b/client/collectors/collector.py new file mode 100644 index 0000000..d73ef5b --- /dev/null +++ b/client/collectors/collector.py @@ -0,0 +1,28 @@ + +class MultiCollector(object): + 'a collector combining multiple other collectors' + + def __init__(self): + self._collectors = {} + + + def register(self, name, collector): + self._collectors[name] = collector + + + def start(self): + for name in self._collectors: + self._collectors[name].start() + + + def stop(self): + for name in self._collectors: + self._collectors[name].stop() + + + def result(self): + r = {} + for name in self._collectors: + r.update({name : self._collectors[name].result()}) + + return r diff --git a/client/collectors/linux.py b/client/collectors/linux.py new file mode 100644 index 0000000..9112a10 --- /dev/null +++ b/client/collectors/linux.py @@ -0,0 +1,93 @@ +from datetime import datetime, timedelta, time +from utils.logging import log +from utils.misc import run_cmd + + +class LinuxCollector(object): + 'collects various Linux-specific statistics (cpuinfo, mounts, sar)' + + def __init__(self, sar_path = '/var/log/sa'): + self._start_ts = None + self._end_ts = None + self._sar = sar_path + + + def start(self): + self._start_ts = datetime.now() + + + def stop(self): + self._end_ts = datetime.now() + + + def result(self): + 'build the results' + + r = { + 'sar' : self._collect_sar_stats(), + 'sysctl' : self._collect_sysctl() + } + r.update(self._collect_system_info()) + + return r + + + def _collect_sar_stats(self): + 'extracts all data available in sar, filters by timestamp range' + + sar = {} + log("collecting sar stats") + + d = self._start_ts.date() + while d <= self._end_ts.date(): + + # FIXME maybe skip if the file does not exist + filename = '%(path)s/sa%(day)s' % {'path' : self._sar, 'day' : d.strftime('%d')} + + log("extracting sar data from '%s'" % (filename,)) + + # need to use the right combination of start/end timestamps + s = self._start_ts.strftime('%H:%M:%S') + e = self._end_ts.strftime('%H:%M:%S') + + if d == self._start_ts.date() and d == self._end_ts.date(): + r = run_cmd(['sar', '-A', '-p', '-s', s, '-e', e, '-f', filename]) + elif d == self._start_ts.date(): + r = run_cmd(['sar', '-A', '-p', '-s', s, '-f', filename]) + elif d == self._end_ts.date(): + r = run_cmd(['sar', '-A', '-p', '-e', e, '-f', filename]) + else: + r = run_cmd(['sar', '-A', '-p', '-f', filename]) + + sar[str(d)] = r[1] + + # proceed to the next day + d += timedelta(days=1) + + return sar + + + def _collect_sysctl(self): + 'collect kernel configuration' + + log("collecting sysctl") + r = run_cmd(['/usr/sbin/sysctl', '-a']) + + return r[1] + + + def _collect_system_info(self): + 'collect cpuinfo, meminfo, mounts' + + system = {} + + with open('/proc/cpuinfo', 'r') as f: + system['cpuinfo'] = f.read() + + with open('/proc/meminfo', 'r') as f: + system['meminfo'] = f.read() + + with open('/proc/mounts', 'r') as f: + system['mounts'] = f.read() + + return system diff --git a/client/collectors/postgres.py b/client/collectors/postgres.py new file mode 100644 index 0000000..306c2b7 --- /dev/null +++ b/client/collectors/postgres.py @@ -0,0 +1,157 @@ +import csv +import multiprocessing +import os +import psycopg2 +import psycopg2.extras +import Queue +import time + +from multiprocessing import Process, Queue + + +class PostgresCollector(object): + 'collects basic PostgreSQL-level statistics (bgwriter, databases, tables, indexes)' + + def __init__(self, dbname): + self._dbname = dbname + + + def start(self): + self._in_queue = multiprocessing.Queue() + self._out_queue = multiprocessing.Queue() + self._worker = Process(target=run_collector, args=(self._in_queue, self._out_queue, self._dbname)) + self._worker.start() + + + def stop(self): + # signal the worker process to stop by writing a value into the queue + self._in_queue.put(True) + + # FIXME this gets stuck for some reason (but we'll wait for queue anyway) + # self._worker.join() + + # and then read the result + self._result = self._out_queue.get() + + self._worker = None + self._in_queue = None + self._out_queue = None + + + def result(self): + return self._result + + +def run_collector(in_queue, out_queue, dbname, interval=1.0): + 'collector code for a separate process, communicating through a pair of queues' + + bgwriter_log = None + tables_log = None + indexes_log = None + database_log = None + + # get current timestamp + ts = time.time() + + while True: + + # wait until the next tick + ts += interval + + # if we're behind, skip forward + if ts < time.time(): + continue + + # sleep (but only for the remaining time, to prevent drift) + time.sleep(ts - time.time()) + + # if we've received message in the input queue (not empty), terminate + if not in_queue.empty(): + break + + # open connection to the benchmark database (if can't open, continue) + # notice this is intentionally after the wait, so we'll wait before + # next connection attempt + try: + conn = psycopg2.connect('host=localhost dbname=%s' % (dbname,)) + cur = conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) + except Exception as ex: + continue + + # background writer stats + cur.execute('SELECT EXTRACT(EPOCH FROM now()) AS ts, * FROM pg_stat_bgwriter') + + # on the first iteration, construct the CSV files + if not bgwriter_log: + fields = [desc[0] for desc in cur.description] + bgwriter_log = csv.DictWriter(open('bgwriter.csv', 'w'), fields) + bgwriter_log.writeheader() + + bgwriter_log.writerows(cur.fetchall()) + + # TODO we can assume statistics for most objects (tables, indexes) won't + # change every second, so we can optimize the amount of data by detecting + # changes and only keeping the two rows next to it + + # table statistics + cur.execute('SELECT EXTRACT(EPOCH FROM now()) AS ts, * FROM pg_stat_all_tables JOIN pg_statio_all_tables USING (relid, schemaname, relname)') + + # on the first iteration, construct the CSV files + if not tables_log: + fields = [desc[0] for desc in cur.description] + tables_log = csv.DictWriter(open('tables.csv', 'w'), fields) + tables_log.writeheader() + + tables_log.writerows(cur.fetchall()) + + # index statistics + cur.execute('SELECT EXTRACT(EPOCH FROM now()) AS ts, * FROM pg_stat_all_indexes JOIN pg_statio_all_indexes USING (relid, indexrelid, schemaname, relname, indexrelname)') + + # on the first iteration, construct the CSV files + if not indexes_log: + fields = [desc[0] for desc in cur.description] + indexes_log = csv.DictWriter(open('indexes.csv', 'w'), fields) + indexes_log.writeheader() + + indexes_log.writerows(cur.fetchall()) + + # database statistics + cur.execute('SELECT EXTRACT(EPOCH FROM now()) AS ts, * FROM pg_stat_database') + + # on the first iteration, construct the CSV files + if not database_log: + fields = [desc[0] for desc in cur.description] + database_log = csv.DictWriter(open('database.csv', 'w'), fields) + database_log.writeheader() + + database_log.writerows(cur.fetchall()) + + conn.close() + + # close the CSV writers + bgwriter_log = None + tables_log = None + indexes_log = None + database_log = None + + result = {} + + with open('bgwriter.csv', 'r') as f: + result.update({'bgwriter' : f.read()}) + + with open('tables.csv', 'r') as f: + result.update({'tables' : f.read()}) + + with open('indexes.csv', 'r') as f: + result.update({'indexes' : f.read()}) + + with open('database.csv', 'r') as f: + result.update({'database' : f.read()}) + + # remove the files + os.remove('bgwriter.csv') + os.remove('tables.csv') + os.remove('indexes.csv') + os.remove('database.csv') + + out_queue.put(result) diff --git a/client/perffarm-client.py b/client/perffarm-client.py new file mode 100755 index 0000000..0d97677 --- /dev/null +++ b/client/perffarm-client.py @@ -0,0 +1,74 @@ +import argparse +import json +import os + +from benchmarks.pgbench import PgBench +from benchmarks.runner import BenchmarkRunner + +from collectors.linux import LinuxCollector +from collectors.postgres import PostgresCollector +from collectors.collector import MultiCollector + +from utils.locking import FileLock +from utils.git import GitRepository +from utils.cluster import PgCluster +from utils import logging + +GIT_URL = '[email protected]:postgres/postgres.git' +REPOSITORY_PATH = '/home/user/tmp/git-postgres' +BUILD_PATH = '/home/user/tmp/bin-postgres' +BIN_PATH = os.path.join(BUILD_PATH, 'bin') +DATADIR_PATH = '/home/user/tmp/data-postgres' + +POSTGRES_CONFIG = {'shared_buffers' : '1GB', + 'work_mem' : '64MB', + 'maintenance_work_mem' : '128MB', + 'min_wal_size' : '2GB', + 'max_wal_size' : '4GB', + 'log_line_prefix' : '%n %t ', + 'log_checkpoints' : 'on', + 'log_autovacuum_min_duration' : '0', + 'log_temp_files' : '32', + 'checkpoint_timeout' : '15min', + 'checkpoint_completion_target' : '0.9'} + +DATABASE_NAME = 'perf' + +OUTPUT_DIR = '/home/user/perf-output' + + +if __name__ == '__main__': + + with FileLock('.lock') as lock: + + # clone repository and build the sources + + repository = GitRepository(url = GIT_URL, path = REPOSITORY_PATH) + + repository.clone_or_update() + repository.build_and_install(path = BUILD_PATH) + + # build and start a postgres cluster + + cluster = PgCluster(bin_path = BIN_PATH, data_path = DATADIR_PATH) + + # create collectors + + collectors = MultiCollector() + + collectors.register('system', LinuxCollector()) + collectors.register('postgres', PostgresCollector(dbname=DATABASE_NAME)) + + runner = BenchmarkRunner(OUTPUT_DIR, cluster, collectors) + + # register the three tests we currently have + + runner.register_benchmark('pgbench', PgBench) + + # register one config for each benchmark (should be moved to a config file) + + runner.register_config('pgbench-basic', 'pgbench', dbname = DATABASE_NAME, + bin_path = ('%s/bin' % (BUILD_PATH,)), + postgres_config = POSTGRES_CONFIG) + + runner.run() diff --git a/client/utils/__init__.py b/client/utils/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/client/utils/__init__.py diff --git a/client/utils/cluster.py b/client/utils/cluster.py new file mode 100644 index 0000000..6e48970 --- /dev/null +++ b/client/utils/cluster.py @@ -0,0 +1,72 @@ +import os +import shutil +import time + +from multiprocessing import cpu_count, Process, Queue +from subprocess import call, STDOUT +from tempfile import TemporaryFile +from utils.logging import log + + +class PgCluster(object): + 'basic manipulation of postgres cluster (init, start, stop, destroy)' + + def __init__(self, bin_path, data_path): + self._bin = bin_path + self._data = data_path + + + def _initdb(self): + 'initialize the data directory' + + with TemporaryFile() as strout: + log("initializing cluster into '%s'" % (self._data,)) + call(['pg_ctl', '-D', self._data, 'init'], env={'PATH' : self._bin}, stdout=strout, stderr=STDOUT) + + + def _configure(self, config): + 'update configuration of a cluster (using postgresql.auto.conf)' + + log("configuring cluster in '%s'" % (self._data,)) + with open('%s/postgresql.auto.conf' % (self._data,), 'a+') as f: + for k in config: + f.write("%(name)s = '%(value)s'\n" % {'name' : k, 'value' : config[k]}) + + + def _destroy(self): + 'forced cleanup of possibly existing cluster processes and data directory' + + with TemporaryFile() as strout: + log("killing all existing postgres processes") + call(['killall', 'postgres'], stdout=strout, stderr=STDOUT) + + # remove the data directory + if os.path.exists(self._data): + shutil.rmtree(self._data) + + + def start(self, config, destroy=True): + 'init, configure and start the cluster' + + # cleanup any previous cluster running, remove data dir if it exists + if destroy: + self._destroy() + + self._initdb() + self._configure(config) + + with TemporaryFile() as strout: + log("starting cluster in '%s' using '%s' binaries" % (self._data, self._bin)) + call(['pg_ctl', '-D', self._data, '-l', 'pg.log', '-w', 'start'], env={'PATH' : self._bin}, stdout=strout, stderr=STDOUT) + + + def stop(self, destroy=True): + 'stop the cluster' + + with TemporaryFile() as strout: + log("stopping cluster in '%s' using '%s' binaries" % (self._data, self._bin)) + call(['pg_ctl', '-D', self._data, '-w', '-t', '60', 'stop'], env={'PATH' : self._bin}, stdout=strout, stderr=STDOUT) + + # kill any remaining processes, remove the data dir + if destroy: + self._destroy() diff --git a/client/utils/git.py b/client/utils/git.py new file mode 100644 index 0000000..dcd74af --- /dev/null +++ b/client/utils/git.py @@ -0,0 +1,82 @@ +import os +import shutil + +from multiprocessing import cpu_count +from subprocess import call, STDOUT +from tempfile import TemporaryFile +from utils.logging import log + + +class GitRepository(object): + 'a simple management of a git repository / source building' + + def __init__(self, url, path): + 'url - repository URL, path - local directory for the clone' + + self._url = url + self._path = path + + + def _exists(self): + 'check that a local repository clone exists' + + # TODO verify that the repository uses the proper upstream url + return os.path.exists(self._path) + + + def _clone(self): + '' + log("cloning repository '%s' to '%s'" % (url, path)) + + with TemporaryFile() as strout: + call(['git', 'clone', url, path], stdout=strout, stderr=STDOUT) + + + def _update(self): + 'update an existing repository clone' + + log("updating repository '%s' from '%s'" % (self._path, self._url)) + + # simply call git-pull and redirect stdout/stderr + # FIXME should verify that the repository uses the proper upstream url + with TemporaryFile() as strout: + call(['git', 'pull', self._path], stdout=strout, stderr=STDOUT) + + + def current_commit(self): + 'returns current commit hash' + + with TemporaryFile() as strout: + call(['git', 'rev-parse', 'HEAD'], cwd=self._path, stdout=strout, stderr=STDOUT) + strout.seek(0) + return strout.read().strip() + + + def clone_or_update(self): + 'refreshes the repository (either clone from scratch or refresh)' + + if self._exists(): + self._update() + else: + self._clone() + + log("current commit '%s'" % (self.current_commit(),)) + + + def build_and_install(self, path, remove=True): + 'builds and installs the sources' + + # TODO collect output of configure and make commands + if os.path.exists(path): + shutil.rmtree(path) + + with TemporaryFile() as strout: + log("configuring sources in '%s' with prefix '%s'" % (self._path, path)) + call(['./configure', '--prefix', path], cwd=self._path, stdout=strout, stderr=STDOUT) + + with TemporaryFile() as strout: + log("building sources and installing into '%s'" % (path,)) + + # cleanup and build using multiple cpus + call(['make', '-s', 'clean'], cwd=self._path, stdout=strout, stderr=STDOUT) + call(['make', '-s', '-j', str(cpu_count()), 'install'], cwd=self._path, stdout=strout, stderr=STDOUT) diff --git a/client/utils/locking.py b/client/utils/locking.py new file mode 100644 index 0000000..dfc8f63 --- /dev/null +++ b/client/utils/locking.py @@ -0,0 +1,21 @@ +import fcntl +import os + + +class FileLock(): + 'a simple wrapper around file lock' + + def __init__(self, filename): + self._file = open(filename, 'w') + + def __enter__(self): + 'locks the file and writes the PID of the current process into it' + fcntl.flock(self._file, fcntl.LOCK_EX) + self._file.write(str(os.getpid())) + self._file.flush() + + return self._file + + def __exit__(self, type, value, traceback): + 'unlock the file' + fcntl.flock(self._file, fcntl.LOCK_UN) diff --git a/client/utils/logging.py b/client/utils/logging.py new file mode 100644 index 0000000..9e12c88 --- /dev/null +++ b/client/utils/logging.py @@ -0,0 +1,9 @@ +import time + +def log(message): + '' + + print '%(epoch)s %(date)s %(message)s' % { + 'epoch' : time.time(), + 'date' : time.strftime('%Y-%m-%d %H:%M:%S'), + 'message' : message} diff --git a/client/utils/misc.py b/client/utils/misc.py new file mode 100644 index 0000000..1d5436e --- /dev/null +++ b/client/utils/misc.py @@ -0,0 +1,23 @@ +import os +import time + +from subprocess import call, STDOUT +from tempfile import TemporaryFile + + +def available_ram(): + 'determine amount of RAM in the system (in megabytes)' + + return int(os.popen("free -m").readlines()[1].split()[1]) + + +def run_cmd(args, env=None, cwd=None): + 'run command (a subprocess.call wrapper)' + + with TemporaryFile() as strout: + + start = time.time() + retcode = call(args, env=env, cwd=cwd, stdout=strout, stderr=STDOUT) + + strout.seek(0) + return (retcode, strout.read(), (time.time() - start)) |