summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarko Kreen2012-07-09 11:46:48 +0000
committerMarko Kreen2012-07-13 19:44:24 +0000
commit65ab2946e8f1a52ac47ff830cd4e335af3df6daa (patch)
treeb91ae7e2373891cd8a46af37d68d9fda1544ec91
parentce8c2ebdb17c63c0ef64664dae674188cf686424 (diff)
pgq.LocalConsumer: consumer that tracks processed ticks in local file
-rw-r--r--python/pgq/__init__.py2
-rw-r--r--python/pgq/localconsumer.py211
-rwxr-xr-xtests/localconsumer/init.sh9
-rwxr-xr-xtests/localconsumer/regen.sh47
-rwxr-xr-xtests/localconsumer/testconsumer.py12
5 files changed, 281 insertions, 0 deletions
diff --git a/python/pgq/__init__.py b/python/pgq/__init__.py
index 638ee372..dc6ece29 100644
--- a/python/pgq/__init__.py
+++ b/python/pgq/__init__.py
@@ -21,6 +21,7 @@ from pgq.event import *
from pgq.consumer import *
from pgq.coopconsumer import *
from pgq.remoteconsumer import *
+from pgq.localconsumer import *
from pgq.producer import *
from pgq.ticker import *
@@ -37,6 +38,7 @@ __all__ = (
pgq.consumer.__all__ +
pgq.coopconsumer.__all__ +
pgq.remoteconsumer.__all__ +
+ pgq.localconsumer.__all__ +
pgq.cascade.nodeinfo.__all__ +
pgq.cascade.admin.__all__ +
pgq.cascade.consumer.__all__ +
diff --git a/python/pgq/localconsumer.py b/python/pgq/localconsumer.py
new file mode 100644
index 00000000..780f9219
--- /dev/null
+++ b/python/pgq/localconsumer.py
@@ -0,0 +1,211 @@
+
+"""
+Consumer that stores last applied position in local file.
+
+For cases where the consumer cannot use single database for remote tracking.
+
+"""
+
+import sys
+import os
+import errno
+import skytools
+import pgq
+
+__all__ = ['LocalConsumer']
+
+class LocalConsumer(pgq.Consumer):
+ """Consumer that applies batches sequentially in second database.
+
+ Requirements:
+ - Whole batch in one TX.
+ - Must not use retry queue.
+
+ Features:
+ - Can detect if several batches are already applied to dest db.
+ - If some ticks are lost. allows to seek back on queue.
+ Whether it succeeds, depends on pgq configuration.
+
+ Config options::
+
+ ## Parameters for LocalConsumer ##
+
+ # file location where last applied tick is tracked
+ local_tracking_file = ~/state/%(job_name)s.tick
+ """
+
+ def reload(self):
+ super(LocalConsumer, self).reload()
+
+ self.local_tracking_file = self.cf.getfile('local_tracking_file')
+
+ def init_optparse(self, parser = None):
+ p = super(LocalConsumer, self).init_optparse(parser)
+ p.add_option("--rewind", action = "store_true",
+ help = "change queue position according to local tick")
+ p.add_option("--reset", action = "store_true",
+ help = "reset local tick based on queue position")
+ return p
+
+ def startup(self):
+ if self.options.rewind:
+ self.rewind()
+ sys.exit(0)
+ if self.options.reset:
+ self.dst_reset()
+ sys.exit(0)
+ super(LocalConsumer, self).startup()
+
+ self.check_queue()
+
+ def check_queue(self):
+ queue_tick = -1
+ local_tick = self.load_local_tick()
+
+ db = self.get_database(self.db_name)
+ curs = db.cursor()
+ q = "select last_tick from pgq.get_consumer_info(%s, %s)"
+ curs.execute(q, [self.queue_name, self.consumer_name])
+ rows = curs.fetchall()
+ if len(rows) == 1:
+ queue_tick = rows[0]['last_tick']
+ db.commit()
+
+ if queue_tick < 0:
+ if local_tick >= 0:
+ self.log.info("Registering consumer at tick %d", local_tick)
+ q = "select * from pgq.register_consumer_at(%s, %s, %s)"
+ curs.execute(q, [self.queue_name, self.consumer_name, local_tick])
+ else:
+ self.log.info("Registering consumer at queue top")
+ q = "select * from pgq.register_consumer(%s, %s)"
+ curs.execute(q, [self.queue_name, self.consumer_name])
+ elif local_tick < 0:
+ self.log.info("Local tick missing, storing queueu tick %d", queue_tick)
+ self.save_local_tick(queue_tick)
+ elif local_tick > queue_tick:
+ self.log.warning("Tracking out of sync: queue=%d local=%d. Repositioning on queue. [Database failure?]",
+ queue_tick, local_tick)
+ q = "select * from pgq.register_consumer_at(%s, %s, %s)"
+ curs.execute(q, [self.queue_name, self.consumer_name, local_tick])
+ elif local_tick < queue_tick:
+ self.log.warning("Tracking out of sync: queue=%d local=%d. Rewinding queue. [Lost file data?]",
+ queue_tick, local_tick)
+ q = "select * from pgq.register_consumer_at(%s, %s, %s)"
+ curs.execute(q, [self.queue_name, self.consumer_name, local_tick])
+ else:
+ self.log.info("Ticks match: Queue=%d Local=%d", queue_tick, local_tick)
+
+ def work(self):
+ if self.work_state < 0:
+ self.check_queue()
+ return super(LocalConsumer, self).work()
+
+ def process_batch(self, db, batch_id, event_list):
+ """Process all events in batch.
+ """
+
+ # check if done
+ if self.is_batch_done():
+ return
+
+ # actual work
+ self.process_local_batch(db, batch_id, event_list)
+
+ # finish work
+ self.set_batch_done()
+
+ def process_local_batch(self, db, batch_id, event_list):
+ for ev in event_list:
+ self.process_local_event(db, batch_id, ev)
+
+ def process_local_event(self, db, batch_id, ev):
+ raise Exception('process_remote_batch not implemented')
+
+ def is_batch_done(self):
+ """Helper function to keep track of last successful batch
+ in external database.
+ """
+
+ local_tick = self.load_local_tick()
+
+ cur_tick = self.batch_info['tick_id']
+ prev_tick = self.batch_info['prev_tick_id']
+
+ if local_tick < 0:
+ # seems this consumer has not run yet?
+ return False
+
+ if prev_tick == local_tick:
+ # on track
+ return False
+
+ if cur_tick == local_tick:
+ # current batch is already applied, skip it
+ return True
+
+ # anything else means problems
+ raise Exception('Lost position: batch %d..%d, dst has %d' % (
+ prev_tick, cur_tick, local_tick))
+
+ def set_batch_done(self):
+ """Helper function to set last successful batch
+ in external database.
+ """
+ tick_id = self.batch_info['tick_id']
+ self.save_local_tick(tick_id)
+
+ def register_consumer(self):
+ new = super(LocalConsumer, self).register_consumer()
+ if new: # fixme
+ self.dst_reset()
+
+ def unregister_consumer(self):
+ """If unregistering, also clean completed tick table on dest."""
+
+ super(LocalConsumer, self).unregister_consumer()
+ self.dst_reset()
+
+ def rewind(self):
+ dst_tick = self.load_local_tick()
+ if dst_tick >= 0:
+ src_db = self.get_database(self.db_name)
+ src_curs = src_db.cursor()
+
+ self.log.info("Rewinding queue to tick local tick %d", dst_tick)
+ q = "select pgq.register_consumer_at(%s, %s, %s)"
+ src_curs.execute(q, [self.queue_name, self.consumer_name, dst_tick])
+
+ src_db.commit()
+ else:
+ self.log.error('Cannot rewind, no tick found in local file')
+
+ def dst_reset(self):
+ self.log.info("Removing local tracking file")
+ try:
+ os.remove(self.local_tracking_file)
+ except:
+ pass
+
+ def load_local_tick(self):
+ """Reads stored tick or -1."""
+ try:
+ f = open(self.local_tracking_file, 'r')
+ buf = f.read()
+ f.close()
+ data = buf.strip()
+ if data:
+ tick_id = int(data)
+ else:
+ tick_id = -1
+ return tick_id
+ except IOError, ex:
+ if ex.errno == errno.ENOENT:
+ return -1
+ raise
+
+ def save_local_tick(self, tick_id):
+ """Store tick in local file."""
+ data = str(tick_id)
+ skytools.write_atomic(self.local_tracking_file, data)
+
diff --git a/tests/localconsumer/init.sh b/tests/localconsumer/init.sh
new file mode 100755
index 00000000..0b884158
--- /dev/null
+++ b/tests/localconsumer/init.sh
@@ -0,0 +1,9 @@
+#! /bin/sh
+
+. ../env.sh
+
+mkdir -p log pid
+
+dropdb qdb
+createdb qdb
+
diff --git a/tests/localconsumer/regen.sh b/tests/localconsumer/regen.sh
new file mode 100755
index 00000000..f0e6cf87
--- /dev/null
+++ b/tests/localconsumer/regen.sh
@@ -0,0 +1,47 @@
+#! /bin/sh
+
+. ../testlib.sh
+
+for db in qdb; do
+ cleardb $db
+done
+
+rm -f log/*.log
+mkdir -p state
+rm -f state/*
+
+set -e
+
+title LocalConsumer test
+
+title2 Initialization
+
+msg Install PgQ
+
+run_qadmin qdb "install pgq;"
+run_qadmin qdb "create queue test_queue;"
+
+msg Run ticker
+
+cat_file conf/pgqd.ini <<EOF
+[pgqd]
+database_list = qdb
+logfile = log/pgqd.log
+pidfile = pid/pgqd.pid
+EOF
+
+run pgqd -d conf/pgqd.ini
+
+msg Run consumer
+
+cat_file conf/testconsumer_qdb.ini <<EOF
+[testconsumer]
+queue_name = test_queue
+db = dbname=qdb
+logfile = log/%(job_name)s.log
+pidfile = pid/%(job_name)s.pid
+local_tracking_file = state/%(job_name)s.tick
+EOF
+
+run ./testconsumer.py -v conf/testconsumer_qdb.ini
+
diff --git a/tests/localconsumer/testconsumer.py b/tests/localconsumer/testconsumer.py
new file mode 100755
index 00000000..bf4e8366
--- /dev/null
+++ b/tests/localconsumer/testconsumer.py
@@ -0,0 +1,12 @@
+#! /usr/bin/env python
+
+import sys, time, skytools, pgq
+
+class TestLocalConsumer(pgq.LocalConsumer):
+ def process_local_event(self, src_db, batch_id, ev):
+ self.log.info("event: type=%s data=%s", ev.type, ev.data)
+
+if __name__ == '__main__':
+ script = TestLocalConsumer('testconsumer', 'db', sys.argv[1:])
+ script.start()
+