diff options
author | Tomas Vondra | 2016-10-13 11:22:49 +0000 |
---|---|---|
committer | Tomas Vondra | 2017-02-27 00:31:05 +0000 |
commit | 128ad6eb0c757c34b9332cd41e79efe6b73e887a (patch) | |
tree | 2463b8e67dc78ade6c8bb5548adc2fd191558f44 | |
parent | afafb8549915da7b1dddb3e355efef70586a26aa (diff) |
open connections in a way resilient to temporary failures
In case of connection failure (e.g. a backend getting terminated by
OOM killer and taking down the whole server), the workers failed
with an exception without putting a result into the queue, leading
to hangs.
This fixes that by making the workers resilient to connection issues
by catching the exceptions and terminating nicely. Also, we retry
connection several times, to continue with benchmarking even after
OOM hits us unexpectedly, etc.
-rw-r--r-- | client/utils/misc.py | 46 |
1 files changed, 46 insertions, 0 deletions
diff --git a/client/utils/misc.py b/client/utils/misc.py index 1d5436e..6f73998 100644 --- a/client/utils/misc.py +++ b/client/utils/misc.py @@ -1,4 +1,6 @@ import os +import psycopg2 +import psycopg2.extras import time from subprocess import call, STDOUT @@ -21,3 +23,47 @@ def run_cmd(args, env=None, cwd=None): strout.seek(0) return (retcode, strout.read(), (time.time() - start)) + + +def connect(dbname, conn, cursor, nretries = 60, delay = 1.0): + '''Try opening a connection and a cursor. If it does not succeed (e.g. + when the database is performing recovery after a crash, retry multiple + times (as specified by nretries and delay in seconds). + ''' + + # if we already have connection and a cursor, return it + if conn and cursor: + return (conn, cursor) + + # we'll try repeatedly, with delays between the attempts + i = 0 + while i < nretries: + + i += 1 + + try: + conn = psycopg2.connect('host=localhost dbname=%s' % (dbname,)) + # TODO do we actually need autocommit? + conn.autocommit = True + cursor = conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) + + return (conn, cursor) + except: + # connection failure - sleep for a while, then try again + time.sleep(delay) + + return (None, None) + + +def disconnect(conn, cursor): + '''Make sure we're disconnected (but prevent exceptions)''' + + try: + cursor.close() + except: + pass + + try: + conn.close() + except: + pass |