summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTomas Vondra2016-10-13 11:22:49 +0000
committerTomas Vondra2017-02-27 00:31:05 +0000
commit128ad6eb0c757c34b9332cd41e79efe6b73e887a (patch)
tree2463b8e67dc78ade6c8bb5548adc2fd191558f44
parentafafb8549915da7b1dddb3e355efef70586a26aa (diff)
open connections in a way resilient to temporary failures
In case of connection failure (e.g. a backend getting terminated by OOM killer and taking down the whole server), the workers failed with an exception without putting a result into the queue, leading to hangs. This fixes that by making the workers resilient to connection issues by catching the exceptions and terminating nicely. Also, we retry connection several times, to continue with benchmarking even after OOM hits us unexpectedly, etc.
-rw-r--r--client/utils/misc.py46
1 files changed, 46 insertions, 0 deletions
diff --git a/client/utils/misc.py b/client/utils/misc.py
index 1d5436e..6f73998 100644
--- a/client/utils/misc.py
+++ b/client/utils/misc.py
@@ -1,4 +1,6 @@
import os
+import psycopg2
+import psycopg2.extras
import time
from subprocess import call, STDOUT
@@ -21,3 +23,47 @@ def run_cmd(args, env=None, cwd=None):
strout.seek(0)
return (retcode, strout.read(), (time.time() - start))
+
+
+def connect(dbname, conn, cursor, nretries = 60, delay = 1.0):
+ '''Try opening a connection and a cursor. If it does not succeed (e.g.
+ when the database is performing recovery after a crash, retry multiple
+ times (as specified by nretries and delay in seconds).
+ '''
+
+ # if we already have connection and a cursor, return it
+ if conn and cursor:
+ return (conn, cursor)
+
+ # we'll try repeatedly, with delays between the attempts
+ i = 0
+ while i < nretries:
+
+ i += 1
+
+ try:
+ conn = psycopg2.connect('host=localhost dbname=%s' % (dbname,))
+ # TODO do we actually need autocommit?
+ conn.autocommit = True
+ cursor = conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor)
+
+ return (conn, cursor)
+ except:
+ # connection failure - sleep for a while, then try again
+ time.sleep(delay)
+
+ return (None, None)
+
+
+def disconnect(conn, cursor):
+ '''Make sure we're disconnected (but prevent exceptions)'''
+
+ try:
+ cursor.close()
+ except:
+ pass
+
+ try:
+ conn.close()
+ except:
+ pass