summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarko Kreen2009-06-01 13:32:08 +0000
committerMarko Kreen2009-06-01 13:39:04 +0000
commitbe82460b0778e282336e0a73eec5b069cd59bb53 (patch)
tree59f36e8063218cebff6be4be8ab00b805d1e2c74
parent52fa34d45d19fe4843b77e7ff21a4f9e93832800 (diff)
python/pgq: relaxed event handling
.tag_done() call is no more required. Events are by default in 'done' state. In 2.x events were in 'retry' state by default, which was very bad idea in retrospect. Changing them to 'untagged' and still requiring tag_done() does not seem too good either. Original reasoning was to detect and survive errors in scripts, but the result was only confusion to everybody. So instead of assuming that script may be buggy, now we assume that script knows what it does. And only by explicit action can they be tagged as retry.
-rw-r--r--python/londiste/playback.py8
-rw-r--r--python/pgq/cascade/consumer.py4
-rw-r--r--python/pgq/cascade/worker.py8
-rw-r--r--python/pgq/consumer.py9
-rw-r--r--python/pgq/event.py4
-rw-r--r--python/pgq/remoteconsumer.py4
-rwxr-xr-xscripts/bulk_loader.py1
-rwxr-xr-xscripts/cube_dispatcher.py2
-rwxr-xr-xscripts/queue_loader.py1
-rwxr-xr-xscripts/queue_mover.py1
-rwxr-xr-xscripts/queue_splitter.py1
-rwxr-xr-xscripts/simple_serial_consumer.py1
-rwxr-xr-xscripts/table_dispatcher.py2
13 files changed, 13 insertions, 33 deletions
diff --git a/python/londiste/playback.py b/python/londiste/playback.py
index 5618da4a..d9db6fd1 100644
--- a/python/londiste/playback.py
+++ b/python/londiste/playback.py
@@ -434,34 +434,26 @@ class Replicator(CascadedWorker):
self.log.debug("New event: id=%s / type=%s / data=%s / extra1=%s" % (ev.id, ev.type, ev.data, ev.extra1))
if ev.type in ('I', 'U', 'D'):
self.handle_data_event(ev, dst_curs)
- ev.tag_done()
elif ev.type[:2] in ('I:', 'U:', 'D:'):
self.handle_urlenc_event(ev, dst_curs)
- ev.tag_done()
elif ev.type == "TRUNCATE":
self.flush_sql(dst_curs)
self.handle_truncate_event(ev, dst_curs)
- ev.tag_done()
elif ev.type == 'EXECUTE':
self.flush_sql(dst_curs)
self.handle_execute_event(ev, dst_curs)
- ev.tag_done()
elif ev.type == 'londiste.add-table':
self.flush_sql(dst_curs)
self.add_set_table(dst_curs, ev.data)
- ev.tag_done()
elif ev.type == 'londiste.remove-table':
self.flush_sql(dst_curs)
self.remove_set_table(dst_curs, ev.data)
- ev.tag_done()
elif ev.type == 'londiste.remove-seq':
self.flush_sql(dst_curs)
self.remove_set_seq(dst_curs, ev.data)
- ev.tag_done()
elif ev.type == 'londiste.update-seq':
self.flush_sql(dst_curs)
self.update_seq(dst_curs, ev)
- ev.tag_done()
else:
CascadedWorker.process_remote_event(self, src_curs, dst_curs, ev)
diff --git a/python/pgq/cascade/consumer.py b/python/pgq/cascade/consumer.py
index a3b0fc7c..8676e5bd 100644
--- a/python/pgq/cascade/consumer.py
+++ b/python/pgq/cascade/consumer.py
@@ -102,8 +102,6 @@ class CascadedConsumer(Consumer):
state = self._consumer_state
if self.is_batch_done(state, self._batch_info):
- for ev in event_list:
- ev.tag_done()
return
dst_db = self.get_database(self.target_db)
@@ -201,7 +199,7 @@ class CascadedConsumer(Consumer):
"""
if ev.ev_type[:4] == "pgq.":
# ignore cascading events
- ev.tag_done()
+ pass
else:
raise Exception('Unhandled event type in queue: %s' % ev.ev_type)
diff --git a/python/pgq/cascade/worker.py b/python/pgq/cascade/worker.py
index 5e17e358..c697156a 100644
--- a/python/pgq/cascade/worker.py
+++ b/python/pgq/cascade/worker.py
@@ -124,8 +124,6 @@ class CascadedWorker(CascadedConsumer):
else:
if st.process_events:
self.process_remote_event(src_curs, dst_curs, ev)
- else:
- ev.tag_done()
if ev.ev_id > max_id:
max_id = ev.ev_id
if st.local_wm_publish:
@@ -163,6 +161,10 @@ class CascadedWorker(CascadedConsumer):
def process_remote_event(self, src_curs, dst_curs, ev):
"""Handle cascading events.
"""
+
+ if ev.retry:
+ raise Exception('CascadedWorker must not get retry events')
+
# non cascade events send to CascadedConsumer to error out
if ev.ev_type[:4] != 'pgq.':
CascadedConsumer.process_remote_event(self, src_curs, dst_curs, ev)
@@ -170,7 +172,6 @@ class CascadedWorker(CascadedConsumer):
# ignore cascade events if not main worker
if not self.main_worker:
- ev.tag_done()
return
# check if for right queue
@@ -199,7 +200,6 @@ class CascadedWorker(CascadedConsumer):
dst_curs.execute(q, [self.pgq_queue_name, ev.ev_extra1, tick_id])
else:
raise Exception("unknown cascade event: %s" % t)
- ev.tag_done()
def finish_remote_batch(self, src_db, dst_db, tick_id):
"""Worker-specific cleanup on target node.
diff --git a/python/pgq/consumer.py b/python/pgq/consumer.py
index 01f16abd..a23b882e 100644
--- a/python/pgq/consumer.py
+++ b/python/pgq/consumer.py
@@ -68,7 +68,6 @@ class _BatchWalker(object):
self.length += len(rows)
for row in rows:
ev = _WalkerEvent(self, self.queue_name, row)
- self.status_map[ev.id] = (EV_UNTAGGED, None)
yield ev
self.curs.execute("close %s" % self.sql_cursor)
@@ -77,17 +76,19 @@ class _BatchWalker(object):
def __len__(self):
if self.fetch_status != 2:
- raise Exception("BatchWalker: len() for incomplete result. (%d)" % self.fetch_status)
+ return -1
+ #raise Exception("BatchWalker: len() for incomplete result. (%d)" % self.fetch_status)
return self.length
def tag_event_done(self, event):
- del self.status_map[event.id]
+ if event.id in self.status_map:
+ del self.status_map[event.id]
def tag_event_retry(self, event, retry_time):
self.status_map[event.id] = (EV_RETRY, retry_time)
def get_status(self, event):
- return self.status_map[event.id][0]
+ return self.status_map.get(event.id, (EV_DONE, 0))[0]
def iter_status(self):
for res in self.status_map.iteritems():
diff --git a/python/pgq/event.py b/python/pgq/event.py
index 93745035..39735507 100644
--- a/python/pgq/event.py
+++ b/python/pgq/event.py
@@ -19,6 +19,7 @@ _fldmap = {
'ev_extra2': 'ev_extra2',
'ev_extra3': 'ev_extra3',
'ev_extra4': 'ev_extra4',
+ 'ev_retry': 'ev_retry',
'id': 'ev_id',
'txid': 'ev_txid',
@@ -29,6 +30,7 @@ _fldmap = {
'extra2': 'ev_extra2',
'extra3': 'ev_extra3',
'extra4': 'ev_extra4',
+ 'retry': 'ev_retry',
}
class Event(object):
@@ -42,7 +44,7 @@ class Event(object):
def __init__(self, queue_name, row):
self._event_row = row
- self._status = EV_UNTAGGED
+ self._status = EV_DONE
self.retry_time = 60
self.queue_name = queue_name
diff --git a/python/pgq/remoteconsumer.py b/python/pgq/remoteconsumer.py
index f5c2ced5..cc8b73d9 100644
--- a/python/pgq/remoteconsumer.py
+++ b/python/pgq/remoteconsumer.py
@@ -29,8 +29,6 @@ class RemoteConsumer(Consumer):
curs = dst_db.cursor()
if self.is_last_batch(curs, batch_id):
- for ev in event_list:
- ev.tag_done()
return
self.process_remote_batch(db, batch_id, event_list, dst_db)
@@ -103,8 +101,6 @@ class SerialConsumer(Consumer):
# check if done
if self.is_batch_done(curs):
- for ev in event_list:
- ev.tag_done()
return
# actual work
diff --git a/scripts/bulk_loader.py b/scripts/bulk_loader.py
index 60852bea..c9a67496 100755
--- a/scripts/bulk_loader.py
+++ b/scripts/bulk_loader.py
@@ -253,7 +253,6 @@ class BulkLoader(pgq.SerialConsumer):
tables[tbl] = TableCache(tbl)
cache = tables[tbl]
cache.add_event(ev)
- ev.tag_done()
# then process them
for tbl, cache in tables.items():
diff --git a/scripts/cube_dispatcher.py b/scripts/cube_dispatcher.py
index 9bd8fc3e..76a3ab3f 100755
--- a/scripts/cube_dispatcher.py
+++ b/scripts/cube_dispatcher.py
@@ -90,8 +90,6 @@ class CubeDispatcher(pgq.SerialConsumer):
if not tbl in tables:
tables[tbl] = self.get_table_info(ev, tbl)
- ev.tag_done()
-
# create tables if needed
self.check_tables(dst_db, tables)
diff --git a/scripts/queue_loader.py b/scripts/queue_loader.py
index c71b20e9..1b3090df 100755
--- a/scripts/queue_loader.py
+++ b/scripts/queue_loader.py
@@ -593,7 +593,6 @@ class QueueLoader(CascadedWorker):
self.init_state(tbl)
st = self.table_state[tbl]
st.add(dst_curs, ev, self._batch_info)
- ev.tag_done()
def finish_remote_batch(self, src_db, dst_db, tick_id):
curs = dst_db.cursor()
diff --git a/scripts/queue_mover.py b/scripts/queue_mover.py
index 6c0556d6..92fe5370 100755
--- a/scripts/queue_mover.py
+++ b/scripts/queue_mover.py
@@ -35,7 +35,6 @@ class QueueMover(pgq.SerialConsumer):
for ev in ev_list:
data = [ev.type, ev.data, ev.extra1, ev.extra2, ev.extra3, ev.extra4, ev.time]
rows.append(data)
- ev.tag_done()
fields = ['type', 'data', 'extra1', 'extra2', 'extra3', 'extra4', 'time']
# insert data
diff --git a/scripts/queue_splitter.py b/scripts/queue_splitter.py
index 29ee0db0..dab23982 100755
--- a/scripts/queue_splitter.py
+++ b/scripts/queue_splitter.py
@@ -37,7 +37,6 @@ class QueueSplitter(pgq.SerialConsumer):
if queue not in cache:
cache[queue] = []
cache[queue].append(row)
- ev.tag_done()
# should match the composed row
fields = ['type', 'data', 'extra1', 'extra2', 'extra3', 'extra4', 'time']
diff --git a/scripts/simple_serial_consumer.py b/scripts/simple_serial_consumer.py
index 0c771954..2bd06be7 100755
--- a/scripts/simple_serial_consumer.py
+++ b/scripts/simple_serial_consumer.py
@@ -87,7 +87,6 @@ class SimpleSerialConsumer(pgq.SerialConsumer):
self.log.debug(res)
except:
pass
- ev.tag_done()
if __name__ == '__main__':
script = SimpleSerialConsumer(sys.argv[1:])
diff --git a/scripts/table_dispatcher.py b/scripts/table_dispatcher.py
index b4fd2d10..52cd2b7b 100755
--- a/scripts/table_dispatcher.py
+++ b/scripts/table_dispatcher.py
@@ -104,8 +104,6 @@ class TableDispatcher(pgq.SerialConsumer):
else:
tables[tbl].append(dstrow)
- ev.tag_done()
-
# create tables if needed
self.check_tables(dst_db, tables)