Import initial version of the client

author: Tomas Vondra 2016-08-10 21:23:55 +0000
committer: Tomas Vondra 2017-02-27 00:21:09 +0000
commit: 72e6220f64a89cd215660311a5680f07f543b150 (patch)
tree: c76f2df22da3819a28cad200f4b2a45642dfacdf
parent: cbac00d3965ad4f27f1e812668b5732c1c50b1dd (diff)
15 files changed, 900 insertions, 0 deletions
diff --git a/client/README.md b/client/README.md
new file mode 100644
index 0000000..95e1bbf
--- /dev/null
+++ b/client/README.md
@@ -0,0 +1,38 @@
+PostgreSQL performance farm client
+==================================
+
+A client integrating several database benchmarks intended for regular testing
+of PostgreSQL during development, and reporting the results back to a server.
+You can think of this as another buildfarm, but running performance tests
+instead of regression tests. Also, this is written in Python and not Perl.
+
+Currently there are three benchmarks available:
+
+* pgbench (TPC-B-like, testing OLTP workload)
+
+
+Limitations
+-----------
+
+The current client has a number of limitations - firstly, it only works on
+Linux (and perhaps other unix-like operating systems - Solaris, BSD, ...).
+If you're interested in making it work on Windows, let me know.
+
+
+pgbench
+-------
+
+Requires no extra setup, everything is handled by the code (including data
+generation etc.).
+
+
+Statistics
+----------
+
+The client also collects various system-level statistics, useful when analyzing
+the results and investigating performance regressions or differences between
+systems. This includes:
+
+* various data from /proc (cpuinfo, meminfo, ...)
+* PostgreSQL statistics (bgwriter, databases, tables and indexes)
+* sar statistics
diff --git a/client/benchmarks/__init__.py b/client/benchmarks/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/client/benchmarks/__init__.py
diff --git a/client/benchmarks/pgbench.py b/client/benchmarks/pgbench.py
new file mode 100644
index 0000000..da95064
--- /dev/null
+++ b/client/benchmarks/pgbench.py
@@ -0,0 +1,221 @@
+import math
+import os
+import re
+import time
+
+from multiprocessing import cpu_count
+from utils.logging import log
+from utils.misc import available_ram, run_cmd
+
+
+class PgBench(object):
+	'a simple wrapper around pgbench, running TPC-B-like workload by default'
+
+	# TODO allow running custom scripts, not just the default read-write/read-only tests
+	# TODO allow running 'prepared' mode
+
+	def __init__(self, bin_path, dbname):
+		'''
+		bin_path   - path to PostgreSQL binaries (dropdb, createdb, psql commands)
+		dbname     - name of the database to use
+		'''
+
+		self._bin = bin_path
+		self._dbname = dbname
+		self._results = {}
+
+
+	@staticmethod
+	def _configure(cpu_count, ram_mbs):
+		'derive the configurations to benchmark from CPU count and RAM size'
+
+		config = []
+
+		# TODO allow overriding this from a global config
+
+		# scales: 10 (small), 50% of RAM, 200% of RAM
+		#for s in [10, ram_mbs/15/2, ram_mbs*2/15]:
+		for s in [10]:
+			config.append({'scale' : int(math.ceil(s/10)*10),
+						   'clients' : [1, cpu_count, 2*cpu_count]})
+
+		return config
+
+
+	def _init(self, scale):
+		'recreate the database (drop + create) and populate it with given scale'
+
+		# initialize results for this dataset scale
+		self._results[scale] = {'init' : None, 'warmup' : None, 'runs' : []}
+
+		log("recreating '%s' database" % (self._dbname,))
+		run_cmd(['dropdb', '--if-exists', self._dbname], env={'PATH' : self._bin})
+		run_cmd(['createdb', self._dbname], env={'PATH' : self._bin})
+
+		log("initializing pgbench '%s' with scale %s" % (self._dbname, scale))
+		r = run_cmd(['pgbench', '-i', '-s', str(scale), self._dbname], env={'PATH' : self._bin})
+
+		# remember the init duration
+		self._results[scale]['init'] = r[2]
+
+
+	@staticmethod
+	def _parse_results(data):
+		'extract results (including parameters) from the pgbench output'
+
+		scale = -1
+		r = re.search('scaling factor: ([0-9]+)', data)
+		if r:
+			scale = r.group(1)
+
+		mode = -1
+		r = re.search('query mode: (.+)', data)
+		if r:
+			mode = r.group(1)
+
+		clients = -1
+		r = re.search('number of clients: ([0-9]+)', data)
+		if r:
+			clients = r.group(1)
+
+		threads = -1
+		r = re.search('number of threads: ([0-9]+)', data)
+		if r:
+			threads = r.group(1)
+
+		duration = -1
+		r = re.search('duration: ([0-9]+) s', data)
+		if r:
+			duration = r.group(1)
+
+		latency = -1
+		r = re.search('latency average: ([0-9\.]+) ms', data)
+		if r:
+			latency = r.group(1)
+
+		tps = -1
+		r = re.search('tps = ([0-9]+\.[0-9]+) \(excluding connections establishing\)', data)
+		if r:
+			tps = r.group(1)
+
+		return {'scale' : scale,
+				'mode' : mode,
+				'clients' : clients,
+				'threads' : threads,
+				'duration' : duration,
+				'latency' : latency,
+				'tps' : tps}
+
+
+	@staticmethod
+	def _merge_logs():
+		'merge log files produced by pgbench threads (aggregated per second)'
+
+		r = {}
+
+		# find pgbench transaction logs in current directory
+		logs = [v for v in os.listdir(os.getcwd()) if re.match('pgbench_log.[0-9]+(\.[0-9]+)?', v)]
+
+		# parse each transaction log, and merge it into the existing results
+		for l in logs:
+			worker_log = open(l, 'r')
+			for row in worker_log:
+				values = row.split(' ')
+
+				timestamp = values[0]
+				tps = int(values[1])
+				lat_sum = long(values[2])
+				lat_sum2 = long(values[3])
+				lat_min = int(values[4])
+				lat_max = int(values[5])
+
+				# if first record for the timestamp, store it, otherwise merge
+				if timestamp not in r:
+					r[timestamp] = {'tps' : tps,
+									'lat_sum' : lat_sum, 'lat_sum2' : lat_sum2,
+									'lat_min' : lat_min, 'lat_max' : lat_max}
+				else:
+					r[timestamp]['tps'] += int(tps)
+					r[timestamp]['lat_sum'] += long(lat_sum)
+					r[timestamp]['lat_sum2'] += long(lat_sum2)
+					r[timestamp]['lat_min'] = min(r[timestamp]['lat_min'], int(lat_min))
+					r[timestamp]['lat_max'] = max(r[timestamp]['lat_max'], int(lat_max))
+
+			os.remove(l)
+
+		# now produce a simple text log sorted by the timestamp
+		o = []
+		for t in sorted(r.keys()):
+			o.append('%s %d %d %d %d %d' % (t, r[t]['tps'], r[t]['lat_sum'], r[t]['lat_sum2'], r[t]['lat_min'], r[t]['lat_max']))
+
+		return '\n'.join(o)
+
+
+	def _run(self, duration, nclients=1, njobs=1, read_only=False, aggregate=True):
+		'run pgbench on the database (either a warmup or actual benchmark run)'
+
+		args = ['pgbench', '-c', str(nclients), '-j', str(njobs), '-T', str(duration)]
+
+		# aggregate on per second resolution
+		if aggregate:
+			args.extend(['-l', '--aggregate-interval', '1'])
+
+		if read_only:
+			args.extend(['-S'])
+
+		args.extend([self._dbname])
+
+		# do an explicit checkpoint before each run
+		run_cmd(['psql', self._dbname, '-c', 'checkpoint'], env={'PATH' : self._bin})
+
+		log("pgbench : clients=%d, jobs=%d, aggregate=%s, read-only=%s, duration=%d" % (nclients, njobs, aggregate, read_only, duration))
+
+		start = time.time()
+		r = run_cmd(args, env={'PATH' : self._bin})
+		end = time.time()
+
+		r = PgBench._parse_results(r[1])
+		r.update({'read-only' : read_only})
+
+		if aggregate:
+			r.update({'transaction-log' : PgBench._merge_logs()})
+
+		r.update({'start' : start, 'end' : end})
+
+		return r
+
+
+	def run_tests(self, duration=10, runs=3):
+		'execute the whole benchmark, including initialization, warmup and benchmark runs'
+
+		# derive configuration for the CPU count / RAM size
+		configs = PgBench._configure(cpu_count(), available_ram())
+
+		for config in configs:
+
+			# init for the dataset scale and warmup
+			self._init(config['scale'])
+
+			warmup = self._run(duration, cpu_count(), cpu_count())
+			results = []
+
+			for run in range(runs):
+
+				for clients in config['clients']:
+
+					# read-only
+					r = self._run(duration, clients, clients, True)
+					r.update({'run' : run})
+					results.append(r)
+
+					# read-write
+					r = self._run(duration, clients, clients, False)
+					r.update({'run' : run})
+					results.append(r)
+
+			self._results[config['scale']] = {
+				'warmup' : warmup,
+				'runs' : results
+			}
+
+		return self._results
diff --git a/client/benchmarks/runner.py b/client/benchmarks/runner.py
new file mode 100644
index 0000000..3a891dc
--- /dev/null
+++ b/client/benchmarks/runner.py
@@ -0,0 +1,82 @@
+import json
+import os
+
+from utils.logging import log
+
+
+class BenchmarkRunner(object):
+	'manages runs of all the benchmarks, including cluster restarts etc.'
+
+	def __init__(self, out_dir, cluster, collector):
+		''
+
+		self._output = out_dir	# where to store output files
+		self._benchmarks = {}	# bench name => class implementing the benchmark
+		self._configs = {}		# config name => (bench name, config)
+		self._cluster = cluster
+		self._collector = collector
+
+
+	def register_benchmark(self, benchmark_name, benchmark_class):
+		''
+
+		# FIXME check if a mapping for the same name already exists
+		self._benchmarks.update({benchmark_name : benchmark_class})
+
+
+	def register_config(self, config_name, benchmark_name, postgres_config, **kwargs):
+		''
+
+		# FIXME check if a mapping for the same name already exists
+		# FIXME check that the benchmark mapping already exists
+		self._configs.update({config_name : {'benchmark' : benchmark_name, 'config' : kwargs, 'postgres' : postgres_config}})
+
+
+	def _run_config(self, config_name):
+		''
+
+		log("running benchmark configuration '%s'" % (config_name,))
+
+		# construct the benchmark class for the given config name
+		config = self._configs[config_name]
+		bench = self._benchmarks[config['benchmark']]
+
+		# expand the attribute names
+		bench = bench(**config['config'])
+
+		self._cluster.start(config = config['postgres'])
+
+		# start collector(s) of additional info
+		self._collector.start()
+
+		# run the tests
+		r = bench.run_tests()
+
+		# stop the cluster and collector
+		self._collector.stop()
+		self._cluster.stop()
+
+		# merge data from the collectors into the JSON document with results
+		r.update(self._collector.result())
+
+		# read the postgres log
+		with open('pg.log', 'r') as f:
+			r['postgres-log'] = f.read()
+
+		r['meta'] = {'benchmark' : config['benchmark'],
+					 'name' : config_name}
+
+		os.remove('pg.log')
+
+		with open('%s/%s.json' % (self._output, config_name), 'w') as f:
+			f.write(json.dumps(r, indent=4))
+
+
+	def run(self):
+		'run all the configured benchmarks'
+
+		# FIXME check that the directory does not exist
+		os.mkdir(self._output)
+
+		for config_name in self._configs:
+			self._run_config(config_name)
diff --git a/client/collectors/__init__.py b/client/collectors/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/client/collectors/__init__.py
diff --git a/client/collectors/collector.py b/client/collectors/collector.py
new file mode 100644
index 0000000..d73ef5b
--- /dev/null
+++ b/client/collectors/collector.py
@@ -0,0 +1,28 @@
+
+class MultiCollector(object):
+	'a collector combining multiple other collectors'
+
+	def __init__(self):
+		self._collectors = {}
+
+
+	def register(self, name, collector):
+		self._collectors[name] = collector
+
+
+	def start(self):
+		for name in self._collectors:
+			self._collectors[name].start()
+
+
+	def stop(self):
+		for name in self._collectors:
+			self._collectors[name].stop()
+
+
+	def result(self):
+		r = {}
+		for name in self._collectors:
+			r.update({name : self._collectors[name].result()})
+
+		return r
diff --git a/client/collectors/linux.py b/client/collectors/linux.py
new file mode 100644
index 0000000..9112a10
--- /dev/null
+++ b/client/collectors/linux.py
@@ -0,0 +1,93 @@
+from datetime import datetime, timedelta, time
+from utils.logging import log
+from utils.misc import run_cmd
+
+
+class LinuxCollector(object):
+	'collects various Linux-specific statistics (cpuinfo, mounts, sar)'
+
+	def __init__(self, sar_path = '/var/log/sa'):
+		self._start_ts = None
+		self._end_ts   = None
+		self._sar = sar_path
+
+
+	def start(self):
+		self._start_ts = datetime.now()
+
+
+	def stop(self):
+		self._end_ts = datetime.now()
+
+
+	def result(self):
+		'build the results'
+
+		r = {
+			'sar' : self._collect_sar_stats(),
+			'sysctl' : self._collect_sysctl()
+		}
+		r.update(self._collect_system_info())
+
+		return r
+
+
+	def _collect_sar_stats(self):
+		'extracts all data available in sar, filters by timestamp range'
+
+		sar = {}
+		log("collecting sar stats")
+
+		d = self._start_ts.date()
+		while d <= self._end_ts.date():
+
+			# FIXME maybe skip if the file does not exist
+			filename = '%(path)s/sa%(day)s' % {'path' : self._sar, 'day' : d.strftime('%d')}
+
+			log("extracting sar data from '%s'" % (filename,))
+
+			# need to use the right combination of start/end timestamps
+			s = self._start_ts.strftime('%H:%M:%S')
+			e = self._end_ts.strftime('%H:%M:%S')
+
+			if d == self._start_ts.date() and d == self._end_ts.date():
+				r = run_cmd(['sar', '-A', '-p', '-s', s, '-e', e, '-f', filename])
+			elif d == self._start_ts.date():
+				r = run_cmd(['sar', '-A', '-p', '-s', s, '-f', filename])
+			elif d == self._end_ts.date():
+				r = run_cmd(['sar', '-A', '-p', '-e', e, '-f', filename])
+			else:
+				r = run_cmd(['sar', '-A', '-p', '-f', filename])
+
+			sar[str(d)] = r[1]
+
+			# proceed to the next day
+			d += timedelta(days=1)
+
+		return sar
+
+
+	def _collect_sysctl(self):
+		'collect kernel configuration'
+
+		log("collecting sysctl")
+		r = run_cmd(['/usr/sbin/sysctl', '-a'])
+
+		return r[1]
+
+
+	def _collect_system_info(self):
+		'collect cpuinfo, meminfo, mounts'
+
+		system = {}
+
+		with open('/proc/cpuinfo', 'r') as f:
+			system['cpuinfo'] = f.read()
+
+		with open('/proc/meminfo', 'r') as f:
+			system['meminfo'] = f.read()
+
+		with open('/proc/mounts', 'r') as f:
+			system['mounts'] = f.read()
+
+		return system
diff --git a/client/collectors/postgres.py b/client/collectors/postgres.py
new file mode 100644
index 0000000..306c2b7
--- /dev/null
+++ b/client/collectors/postgres.py
@@ -0,0 +1,157 @@
+import csv
+import multiprocessing
+import os
+import psycopg2
+import psycopg2.extras
+import Queue
+import time
+
+from multiprocessing import Process, Queue
+
+
+class PostgresCollector(object):
+	'collects basic PostgreSQL-level statistics (bgwriter, databases, tables, indexes)'
+
+	def __init__(self, dbname):
+		self._dbname = dbname
+
+
+	def start(self):
+		self._in_queue = multiprocessing.Queue()
+		self._out_queue = multiprocessing.Queue()
+		self._worker = Process(target=run_collector, args=(self._in_queue, self._out_queue, self._dbname))
+		self._worker.start()
+
+
+	def stop(self):
+		# signal the worker process to stop by writing a value into the queue
+		self._in_queue.put(True)
+
+		# FIXME this gets stuck for some reason (but we'll wait for queue anyway)
+		# self._worker.join()
+
+		# and then read the result
+		self._result = self._out_queue.get()
+
+		self._worker = None
+		self._in_queue = None
+		self._out_queue = None
+
+
+	def result(self):
+		return self._result
+
+
+def run_collector(in_queue, out_queue, dbname, interval=1.0):
+	'collector code for a separate process, communicating through a pair of queues'
+
+	bgwriter_log = None
+	tables_log = None
+	indexes_log = None
+	database_log = None
+
+	# get current timestamp
+	ts = time.time()
+
+	while True:
+
+		# wait until the next tick
+		ts += interval
+
+		# if we're behind, skip forward
+		if ts < time.time():
+			continue
+
+		# sleep (but only for the remaining time, to prevent drift)
+		time.sleep(ts - time.time())
+
+		# if we've received message in the input queue (not empty), terminate
+		if not in_queue.empty():
+			break
+
+		# open connection to the benchmark database (if can't open, continue)
+		# notice this is intentionally after the wait, so we'll wait before
+		# next connection attempt
+		try:
+			conn = psycopg2.connect('host=localhost dbname=%s' % (dbname,))
+			cur = conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor)
+		except Exception as ex:
+			continue
+
+		# background writer stats
+		cur.execute('SELECT EXTRACT(EPOCH FROM now()) AS ts, * FROM pg_stat_bgwriter')
+
+		# on the first iteration, construct the CSV files
+		if not bgwriter_log:
+			fields = [desc[0] for desc in cur.description]
+			bgwriter_log = csv.DictWriter(open('bgwriter.csv', 'w'), fields)
+			bgwriter_log.writeheader()
+
+		bgwriter_log.writerows(cur.fetchall())
+
+		# TODO we can assume statistics for most objects (tables, indexes) won't
+		# change every second, so we can optimize the amount of data by detecting
+		# changes and only keeping the two rows next to it
+
+		# table statistics
+		cur.execute('SELECT EXTRACT(EPOCH FROM now()) AS ts, * FROM pg_stat_all_tables JOIN pg_statio_all_tables USING (relid, schemaname, relname)')
+
+		# on the first iteration, construct the CSV files
+		if not tables_log:
+			fields = [desc[0] for desc in cur.description]
+			tables_log = csv.DictWriter(open('tables.csv', 'w'), fields)
+			tables_log.writeheader()
+
+		tables_log.writerows(cur.fetchall())
+
+		# index statistics
+		cur.execute('SELECT EXTRACT(EPOCH FROM now()) AS ts, * FROM pg_stat_all_indexes JOIN pg_statio_all_indexes USING (relid, indexrelid, schemaname, relname, indexrelname)')
+
+		# on the first iteration, construct the CSV files
+		if not indexes_log:
+			fields = [desc[0] for desc in cur.description]
+			indexes_log = csv.DictWriter(open('indexes.csv', 'w'), fields)
+			indexes_log.writeheader()
+
+		indexes_log.writerows(cur.fetchall())
+
+		# database statistics
+		cur.execute('SELECT EXTRACT(EPOCH FROM now()) AS ts, * FROM pg_stat_database')
+
+		# on the first iteration, construct the CSV files
+		if not database_log:
+			fields = [desc[0] for desc in cur.description]
+			database_log = csv.DictWriter(open('database.csv', 'w'), fields)
+			database_log.writeheader()
+
+		database_log.writerows(cur.fetchall())
+
+		conn.close()
+
+	# close the CSV writers
+	bgwriter_log = None
+	tables_log = None
+	indexes_log = None
+	database_log = None
+
+	result = {}
+
+	with open('bgwriter.csv', 'r') as f:
+		result.update({'bgwriter' : f.read()})
+
+	with open('tables.csv', 'r') as f:
+		result.update({'tables' : f.read()})
+
+	with open('indexes.csv', 'r') as f:
+		result.update({'indexes' : f.read()})
+
+	with open('database.csv', 'r') as f:
+		result.update({'database' : f.read()})
+
+	# remove the files
+	os.remove('bgwriter.csv')
+	os.remove('tables.csv')
+	os.remove('indexes.csv')
+	os.remove('database.csv')
+
+	out_queue.put(result)
diff --git a/client/perffarm-client.py b/client/perffarm-client.py
new file mode 100755
index 0000000..0d97677
--- /dev/null
+++ b/client/perffarm-client.py
@@ -0,0 +1,74 @@
+import argparse
+import json
+import os
+
+from benchmarks.pgbench import PgBench
+from benchmarks.runner import BenchmarkRunner
+
+from collectors.linux import LinuxCollector
+from collectors.postgres import PostgresCollector
+from collectors.collector import MultiCollector
+
+from utils.locking import FileLock
+from utils.git import GitRepository
+from utils.cluster import PgCluster
+from utils import logging
+
+GIT_URL = '[email protected]:postgres/postgres.git'
+REPOSITORY_PATH = '/home/user/tmp/git-postgres'
+BUILD_PATH = '/home/user/tmp/bin-postgres'
+BIN_PATH = os.path.join(BUILD_PATH, 'bin')
+DATADIR_PATH = '/home/user/tmp/data-postgres'
+
+POSTGRES_CONFIG = {'shared_buffers' : '1GB',
+				   'work_mem' : '64MB',
+				   'maintenance_work_mem' : '128MB',
+				   'min_wal_size' : '2GB',
+				   'max_wal_size' : '4GB',
+				   'log_line_prefix' : '%n %t ',
+				   'log_checkpoints' : 'on',
+				   'log_autovacuum_min_duration' : '0',
+				   'log_temp_files' : '32',
+				   'checkpoint_timeout' : '15min',
+				   'checkpoint_completion_target' : '0.9'}
+
+DATABASE_NAME = 'perf'
+
+OUTPUT_DIR = '/home/user/perf-output'
+
+
+if __name__ == '__main__':
+
+	with FileLock('.lock') as lock:
+
+		# clone repository and build the sources
+
+		repository = GitRepository(url = GIT_URL, path = REPOSITORY_PATH)
+
+		repository.clone_or_update()
+		repository.build_and_install(path = BUILD_PATH)
+
+		# build and start a postgres cluster
+
+		cluster = PgCluster(bin_path = BIN_PATH, data_path = DATADIR_PATH)
+
+		# create collectors
+
+		collectors = MultiCollector()
+
+		collectors.register('system', LinuxCollector())
+		collectors.register('postgres', PostgresCollector(dbname=DATABASE_NAME))
+
+		runner = BenchmarkRunner(OUTPUT_DIR, cluster, collectors)
+
+		# register the three tests we currently have
+
+		runner.register_benchmark('pgbench', PgBench)
+
+		# register one config for each benchmark (should be moved to a config file)
+
+		runner.register_config('pgbench-basic', 'pgbench', dbname = DATABASE_NAME,
+								bin_path = ('%s/bin' % (BUILD_PATH,)),
+								postgres_config = POSTGRES_CONFIG)
+
+		runner.run()
diff --git a/client/utils/__init__.py b/client/utils/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/client/utils/__init__.py
diff --git a/client/utils/cluster.py b/client/utils/cluster.py
new file mode 100644
index 0000000..6e48970
--- /dev/null
+++ b/client/utils/cluster.py
@@ -0,0 +1,72 @@
+import os
+import shutil
+import time
+
+from multiprocessing import cpu_count, Process, Queue
+from subprocess import call, STDOUT
+from tempfile import TemporaryFile
+from utils.logging import log
+
+
+class PgCluster(object):
+	'basic manipulation of postgres cluster (init, start, stop, destroy)'
+
+	def __init__(self, bin_path, data_path):
+		self._bin = bin_path
+		self._data = data_path
+
+
+	def _initdb(self):
+		'initialize the data directory'
+
+		with TemporaryFile() as strout:
+			log("initializing cluster into '%s'" % (self._data,))
+			call(['pg_ctl', '-D', self._data, 'init'], env={'PATH' : self._bin}, stdout=strout, stderr=STDOUT)
+
+
+	def _configure(self, config):
+		'update configuration of a cluster (using postgresql.auto.conf)'
+
+		log("configuring cluster in '%s'" % (self._data,))
+		with open('%s/postgresql.auto.conf' % (self._data,), 'a+') as f:
+			for k in config:
+				f.write("%(name)s = '%(value)s'\n" % {'name' : k, 'value' : config[k]})
+
+
+	def _destroy(self):
+		'forced cleanup of possibly existing cluster processes and data directory'
+
+		with TemporaryFile() as strout:
+			log("killing all existing postgres processes")
+			call(['killall', 'postgres'], stdout=strout, stderr=STDOUT)
+
+		# remove the data directory
+		if os.path.exists(self._data):
+			shutil.rmtree(self._data)
+
+
+	def start(self, config, destroy=True):
+		'init, configure and start the cluster'
+
+		# cleanup any previous cluster running, remove data dir if it exists
+		if destroy:
+			self._destroy()
+
+		self._initdb()
+		self._configure(config)
+
+		with TemporaryFile() as strout:
+			log("starting cluster in '%s' using '%s' binaries" % (self._data, self._bin))
+			call(['pg_ctl', '-D', self._data, '-l', 'pg.log', '-w', 'start'], env={'PATH' : self._bin}, stdout=strout, stderr=STDOUT)
+
+
+	def stop(self, destroy=True):
+		'stop the cluster'
+
+		with TemporaryFile() as strout:
+			log("stopping cluster in '%s' using '%s' binaries" % (self._data, self._bin))
+			call(['pg_ctl', '-D', self._data, '-w', '-t', '60', 'stop'], env={'PATH' : self._bin}, stdout=strout, stderr=STDOUT)
+
+		# kill any remaining processes, remove the data dir
+		if destroy:
+			self._destroy()
diff --git a/client/utils/git.py b/client/utils/git.py
new file mode 100644
index 0000000..dcd74af
--- /dev/null
+++ b/client/utils/git.py
@@ -0,0 +1,82 @@
+import os
+import shutil
+
+from multiprocessing import cpu_count
+from subprocess import call, STDOUT
+from tempfile import TemporaryFile
+from utils.logging import log
+
+
+class GitRepository(object):
+	'a simple management of a git repository / source building'
+
+	def __init__(self, url, path):
+		'url - repository URL, path - local directory for the clone'
+
+		self._url = url
+		self._path = path
+
+
+	def _exists(self):
+		'check that a local repository clone exists'
+
+		# TODO verify that the repository uses the proper upstream url
+		return os.path.exists(self._path)
+
+
+	def _clone(self):
+		''
+		log("cloning repository '%s' to '%s'" % (url, path))
+
+		with TemporaryFile() as strout:
+			call(['git', 'clone', url, path], stdout=strout, stderr=STDOUT)
+
+
+	def _update(self):
+		'update an existing repository clone'
+
+		log("updating repository '%s' from '%s'" % (self._path, self._url))
+
+		# simply call git-pull and redirect stdout/stderr
+		# FIXME should verify that the repository uses the proper upstream url
+		with TemporaryFile() as strout:
+			call(['git', 'pull', self._path], stdout=strout, stderr=STDOUT)
+
+
+	def current_commit(self):
+		'returns current commit hash'
+
+		with TemporaryFile() as strout:
+			call(['git', 'rev-parse', 'HEAD'], cwd=self._path, stdout=strout, stderr=STDOUT)
+			strout.seek(0)
+			return strout.read().strip()
+
+
+	def clone_or_update(self):
+		'refreshes the repository (either clone from scratch or refresh)'
+
+		if self._exists():
+			self._update()
+		else:
+			self._clone()
+
+		log("current commit '%s'" % (self.current_commit(),))
+
+
+	def build_and_install(self, path, remove=True):
+		'builds and installs the sources'
+
+		# TODO collect output of configure and make commands
+		if os.path.exists(path):
+			shutil.rmtree(path)
+
+		with TemporaryFile() as strout:
+			log("configuring sources in '%s' with prefix '%s'" % (self._path, path))
+			call(['./configure', '--prefix', path], cwd=self._path, stdout=strout, stderr=STDOUT)
+
+		with TemporaryFile() as strout:
+			log("building sources and installing into '%s'" % (path,))
+
+			# cleanup and build using multiple cpus
+			call(['make', '-s', 'clean'], cwd=self._path, stdout=strout, stderr=STDOUT)
+			call(['make', '-s', '-j', str(cpu_count()), 'install'], cwd=self._path, stdout=strout, stderr=STDOUT)
diff --git a/client/utils/locking.py b/client/utils/locking.py
new file mode 100644
index 0000000..dfc8f63
--- /dev/null
+++ b/client/utils/locking.py
@@ -0,0 +1,21 @@
+import fcntl
+import os
+
+
+class FileLock():
+	'a simple wrapper around file lock'
+
+	def __init__(self, filename):
+		self._file = open(filename, 'w')
+
+	def __enter__(self):
+		'locks the file and writes the PID of the current process into it'
+		fcntl.flock(self._file, fcntl.LOCK_EX)
+		self._file.write(str(os.getpid()))
+		self._file.flush()
+
+		return self._file
+
+	def __exit__(self, type, value, traceback):
+		'unlock the file'
+		fcntl.flock(self._file, fcntl.LOCK_UN)
diff --git a/client/utils/logging.py b/client/utils/logging.py
new file mode 100644
index 0000000..9e12c88
--- /dev/null
+++ b/client/utils/logging.py
@@ -0,0 +1,9 @@
+import time
+
+def log(message):
+	''
+
+	print '%(epoch)s	%(date)s	%(message)s' % {
+		'epoch' : time.time(),
+		'date' : time.strftime('%Y-%m-%d %H:%M:%S'),
+		'message' : message}
diff --git a/client/utils/misc.py b/client/utils/misc.py
new file mode 100644
index 0000000..1d5436e
--- /dev/null
+++ b/client/utils/misc.py
@@ -0,0 +1,23 @@
+import os
+import time
+
+from subprocess import call, STDOUT
+from tempfile import TemporaryFile
+
+
+def available_ram():
+	'determine amount of RAM in the system (in megabytes)'
+
+	return int(os.popen("free -m").readlines()[1].split()[1])
+
+
+def run_cmd(args, env=None, cwd=None):
+	'run command (a subprocess.call wrapper)'
+
+	with TemporaryFile() as strout:
+
+		start = time.time()
+		retcode = call(args, env=env, cwd=cwd, stdout=strout, stderr=STDOUT)
+
+		strout.seek(0)
+		return (retcode, strout.read(), (time.time() - start))
author	Tomas Vondra	2016-08-10 21:23:55 +0000
committer	Tomas Vondra	2017-02-27 00:21:09 +0000
commit	72e6220f64a89cd215660311a5680f07f543b150 (patch)
tree	c76f2df22da3819a28cad200f4b2a45642dfacdf
parent	cbac00d3965ad4f27f1e812668b5732c1c50b1dd (diff)