diff options
author | Marko Kreen | 2009-02-13 12:16:59 +0000 |
---|---|---|
committer | Marko Kreen | 2009-02-13 13:20:31 +0000 |
commit | 411f237ebfdce9532bbf01f51d46cdb91f7fe283 (patch) | |
tree | 851a378f1234629cf2f15d903ecea4ae88df126f | |
parent | c8198c637b777296ec49b845395487242d730394 (diff) |
python/pgq update
- remove the obsolete setconsumer stuff
- New CascadedConsumer / CascadedWorker classes,
that are based on regular pgq.Consumer
- move RemoteConsumer / SerialConsumer out of pgq/consumer.py
pgq.Consumer:
- rename conf params and instance variables:
pgq_queue_name -> queue_name
pgq_consumer_id -> consumer_name
- disable automatic registration on the queue,
now script needs to be called with switches --register / --unregister
- drop code to read from multiple-queues at once
pgq.ticker:
- drop event seq tracking code, this is now done in db
-rw-r--r-- | python/conf/pgqadm.ini | 1 | ||||
-rw-r--r-- | python/pgq/__init__.py | 37 | ||||
-rw-r--r-- | python/pgq/cascade/__init__.py | 2 | ||||
-rw-r--r-- | python/pgq/cascade/admin.py | 701 | ||||
-rw-r--r-- | python/pgq/cascade/consumer.py | 216 | ||||
-rw-r--r-- | python/pgq/cascade/nodeinfo.py (renamed from python/pgq/setinfo.py) | 127 | ||||
-rw-r--r-- | python/pgq/cascade/worker.py | 260 | ||||
-rw-r--r-- | python/pgq/consumer.py | 344 | ||||
-rw-r--r-- | python/pgq/event.py | 16 | ||||
-rw-r--r-- | python/pgq/maint.py | 2 | ||||
-rw-r--r-- | python/pgq/rawconsumer.py | 52 | ||||
-rw-r--r-- | python/pgq/remoteconsumer.py | 197 | ||||
-rw-r--r-- | python/pgq/setadmin.py | 463 | ||||
-rw-r--r-- | python/pgq/setconsumer.py | 247 | ||||
-rw-r--r-- | python/pgq/status.py | 40 | ||||
-rw-r--r-- | python/pgq/ticker.py | 139 | ||||
-rwxr-xr-x | python/pgqadm.py | 53 | ||||
-rwxr-xr-x | python/setadm.py | 7 |
18 files changed, 1635 insertions, 1269 deletions
diff --git a/python/conf/pgqadm.ini b/python/conf/pgqadm.ini index a2e92f6b..24ac4fc4 100644 --- a/python/conf/pgqadm.ini +++ b/python/conf/pgqadm.ini @@ -1,6 +1,7 @@ [pgqadm] +# should be globally unique job_name = pgqadm_somedb db = dbname=provider port=6000 host=127.0.0.1 diff --git a/python/pgq/__init__.py b/python/pgq/__init__.py index bb78abc0..b34ef625 100644 --- a/python/pgq/__init__.py +++ b/python/pgq/__init__.py @@ -1,18 +1,47 @@ """PgQ framework for Python.""" +__pychecker__ = 'no-miximport' + import pgq.event import pgq.consumer -import pgq.setconsumer +import pgq.remoteconsumer import pgq.producer +import pgq.ticker +import pgq.maint +import pgq.status + +import pgq.cascade +import pgq.cascade.nodeinfo +import pgq.cascade.admin +import pgq.cascade.consumer +import pgq.cascade.worker + from pgq.event import * from pgq.consumer import * -from pgq.setconsumer import * +from pgq.remoteconsumer import * from pgq.producer import * +from pgq.ticker import * +from pgq.maint import * +from pgq.status import * + +from pgq.cascade.nodeinfo import * +from pgq.cascade.admin import * +from pgq.cascade.consumer import * +from pgq.cascade.worker import * + __all__ = ( pgq.event.__all__ + pgq.consumer.__all__ + - pgq.setconsumer.__all__ + - pgq.producer.__all__ ) + pgq.remoteconsumer.__all__ + + pgq.cascade.nodeinfo.__all__ + + pgq.cascade.admin.__all__ + + pgq.cascade.consumer.__all__ + + pgq.cascade.worker.__all__ + + pgq.producer.__all__ + + pgq.ticker.__all__ + + pgq.maint.__all__ + + pgq.status.__all__ ) + diff --git a/python/pgq/cascade/__init__.py b/python/pgq/cascade/__init__.py new file mode 100644 index 00000000..051c4620 --- /dev/null +++ b/python/pgq/cascade/__init__.py @@ -0,0 +1,2 @@ +"""Cascaded Queue support.""" + diff --git a/python/pgq/cascade/admin.py b/python/pgq/cascade/admin.py new file mode 100644 index 00000000..e1cb4822 --- /dev/null +++ b/python/pgq/cascade/admin.py @@ -0,0 +1,701 @@ +#! /usr/bin/env python + +## NB: not all commands work ## + +"""cascaded queue administration. + +londiste.py INI pause [NODE [CONS]] + +setadm.py INI pause NODE [CONS] + + +""" + +import sys, time, optparse, skytools + +from pgq.cascade.nodeinfo import * + +__all__ = ['CascadeAdmin'] + +command_usage = """\ +%prog [options] INI CMD [subcmd args] + +Node Initialization: + create-root NAME CONNSTR + create-branch NAME CONNSTR --provider=<public_connstr> + create-leaf NAME CONNSTR --provider=<public_connstr> + Initializes node. + + setadm: give worker name with switch --worker. + +Node Administration: + pause Pause a consumer. + resume Resume a consumer. + change-provider --provider NEW Change where consumer reads from + + setadm: --node and/or --consumer switches to specify + either node or consumer. + +Works, naming problems: + + status Show set state [set-status] + members Show members in set [nodes] + switchover --target NODE [--all] + +Broken: + + rename-node OLD NEW Rename a node + show-consumers [--node] + failover NEWROOT + tag-dead NODE .. Tag node as dead + tag-alive NODE .. Tag node as alive +""" + +class CascadeAdmin(skytools.AdminScript): + """Cascaded pgq administration.""" + queue_name = None + queue_info = None + extra_objs = [] + local_node = None + + def __init__(self, svc_name, dbname, args, worker_setup = False): + skytools.AdminScript.__init__(self, svc_name, args) + self.initial_db_name = dbname + if worker_setup: + self.options.worker = self.job_name + self.options.consumer = self.job_name + + def init_optparse(self, parser = None): + """Add SetAdmin switches to parser.""" + p = skytools.AdminScript.init_optparse(self, parser) + p.set_usage(command_usage.strip()) + + g = optparse.OptionGroup(p, "actual queue admin options") + g.add_option("--connstr", action="store_true", + help = "initial connect string") + g.add_option("--provider", + help = "init: connect string for provider") + g.add_option("--queue", + help = "specify queue name") + g.add_option("--worker", + help = "create: specify worker name") + g.add_option("--node", + help = "specify node name") + g.add_option("--consumer", + help = "specify consumer name") + g.add_option("--target", + help = "switchover: specify replacement node") + g.add_option("--merge", + help = "create-node: combined queue name") + p.add_option_group(g) + return p + + def reload(self): + """Reload config.""" + skytools.AdminScript.reload(self) + if self.options.queue: + self.queue_name = self.options.queue + else: + self.queue_name = self.cf.get('queue_name', '') + if not self.queue_name: + self.queue_name = self.cf.get('pgq_queue_name', '') + if not self.queue_name: + raise Exception('"queue_name" not specified in config') + + # + # Node initialization. + # + + def cmd_create_root(self, node_name, node_location): + return self.create_node('root', node_name, node_location) + + def cmd_create_branch(self, node_name, node_location): + return self.create_node('branch', node_name, node_location) + + def cmd_create_leaf(self, node_name, node_location): + return self.create_node('leaf', node_name, node_location) + + def create_node(self, node_type, node_name, node_location): + """Generic node init.""" + provider_loc = self.options.provider + + if node_type not in ('root', 'branch', 'leaf'): + raise Exception('unknown node type') + + # connect to database + db = self.get_database("new_node", connstr = node_location) + + # check if code is installed + self.install_code(db) + + # query current status + res = self.exec_query(db, "select * from pgq_node.get_node_info(%s)", [self.queue_name]) + info = res[0] + if info['node_type'] is not None: + self.log.info("Node is already initialized as %s" % info['node_type']) + return + + self.log.info("Initializing node") + + worker_name = self.options.worker + if not worker_name: + raise Exception('--worker required') + combined_queue = self.options.merge + if combined_queue and node_type != 'leaf': + raise Exception('--merge can be used only for leafs') + + # register member + if node_type == 'root': + global_watermark = None + combined_queue = None + provider_name = None + self.exec_cmd(db, "select * from pgq_node.register_location(%s, %s, %s, false)", + [self.queue_name, node_name, node_location]) + self.exec_cmd(db, "select * from pgq_node.create_node(%s, %s, %s, %s, %s, %s, %s)", + [self.queue_name, node_type, node_name, worker_name, provider_name, global_watermark, combined_queue]) + provider_db = None + else: + root_db = self.find_root_db(provider_loc) + queue_info = self.load_queue_info(root_db) + + # check if member already exists + if queue_info.get_member(node_name) is not None: + self.log.error("Node '%s' already exists" % node_name) + sys.exit(1) + + combined_set = None + + provider_db = self.get_database('provider_db', connstr = provider_loc) + q = "select node_type, node_name from pgq_node.get_node_info(%s)" + res = self.exec_query(provider_db, q, [self.queue_name]) + row = res[0] + if not row['node_name']: + raise Exception("provider node not found") + provider_name = row['node_name'] + + # register member on root + self.exec_cmd(root_db, "select * from pgq_node.register_location(%s, %s, %s, false)", + [self.queue_name, node_name, node_location]) + + # lookup provider + provider = queue_info.get_member(provider_name) + if not provider: + self.log.error("Node %s does not exist" % provider_name) + sys.exit(1) + + # register on provider + self.exec_cmd(provider_db, "select * from pgq_node.register_location(%s, %s, %s, false)", + [self.queue_name, node_name, node_location]) + rows = self.exec_cmd(provider_db, "select * from pgq_node.register_subscriber(%s, %s, %s, null)", + [self.queue_name, node_name, worker_name]) + global_watermark = rows[0]['global_watermark'] + + # initialize node itself + + # insert members + self.exec_cmd(db, "select * from pgq_node.register_location(%s, %s, %s, false)", + [self.queue_name, node_name, node_location]) + for m in queue_info.member_map.values(): + self.exec_cmd(db, "select * from pgq_node.register_location(%s, %s, %s, %s)", + [self.queue_name, m.name, m.location, m.dead]) + + # real init + self.exec_cmd(db, "select * from pgq_node.create_node(%s, %s, %s, %s, %s, %s, %s)", + [ self.queue_name, node_type, node_name, worker_name, + provider_name, global_watermark, combined_queue ]) + + + self.extra_init(node_type, db, provider_db) + + self.log.info("Done") + + def extra_init(self, node_type, node_db, provider_db): + """Callback to do specific init.""" + pass + + def find_root_db(self, initial_loc = None): + """Find root node, having start point.""" + if initial_loc: + loc = initial_loc + else: + loc = self.cf.get(self.initial_db_name) + + while self.looping: + db = self.get_database('root_db', connstr = loc) + + + # query current status + res = self.exec_query(db, "select * from pgq_node.get_node_info(%s)", [self.queue_name]) + info = res[0] + node_type = info['node_type'] + if node_type is None: + self.log.info("Root node not initialized?") + sys.exit(1) + + self.log.debug("db='%s' -- type='%s' provider='%s'" % (loc, node_type, info['provider_location'])) + # configured db may not be root anymore, walk upwards then + if node_type in ('root', 'combined-root'): + db.commit() + return db + + self.close_database('root_db') + if loc == info['provider_location']: + raise Exception("find_root_db: got loop: %s" % loc) + loc = info['provider_location'] + if loc is None: + self.log.error("Sub node provider not initialized?") + sys.exit(1) + raise Exception('process canceled') + + def find_consumer_check(self, node, consumer): + cmap = self.get_node_consumer_map(node) + return (consumer in cmap) + + def find_consumer(self, node = None, consumer = None): + if not node and not consumer: + node = self.options.node + consumer = self.options.consumer + if not node and not consumer: + raise Exception('Need either --node or --consumer') + + # specific node given + if node: + if consumer: + if not self.find_consumer_check(node, consumer): + raise Exception('Consumer not found') + else: + state = self.get_node_info(node) + consumer = state.worker_name + return (node, consumer) + + # global consumer search + if self.find_consumer_check(self.local_node, consumer): + return (self.local_node, consumer) + + # fixme: dead node handling? + nodelist = self.queue_info.member_map.keys() + for node in nodelist: + if node == self.local_node: + continue + if self.find_consumer_check(node, consumer): + return (node, consumer) + + raise Exception('Consumer not found') + + def install_code(self, db): + """Install cascading code to db.""" + objs = [ + skytools.DBLanguage("plpgsql"), + skytools.DBFunction("txid_current_snapshot", 0, sql_file="txid.sql"), + skytools.DBSchema("pgq", sql_file="pgq.sql"), + skytools.DBSchema("pgq_ext", sql_file="pgq_ext.sql"), # not needed actually + skytools.DBSchema("pgq_node", sql_file="pgq_node.sql"), + ] + objs += self.extra_objs + skytools.db_install(db.cursor(), objs, self.log) + db.commit() + + # + # Print status of whole set. + # + + def cmd_status(self): + """Show set status.""" + root_db = self.find_root_db() + sinf = self.load_queue_info(root_db) + + for mname, minf in sinf.member_map.iteritems(): + db = self.get_database('look_db', connstr = minf.location, autocommit = 1) + curs = db.cursor() + curs.execute("select * from pgq_node.get_node_info(%s)", [self.queue_name]) + node = NodeInfo(self.queue_name, curs.fetchone()) + node.load_status(curs) + self.load_extra_status(curs, node) + sinf.add_node(node) + self.close_database('look_db') + + sinf.print_tree() + + def load_extra_status(self, curs, node): + """Fetch extra info.""" + pass + + # + # Normal commands. + # + + def cmd_change_provider(self): + """Change node provider.""" + + self.load_local_info() + self.change_provider( + node = self.options.node, + consumer = self.options.consumer, + new_provider = self.options.provider) + + def node_change_provider(self, node, new_provider): + self.change_provider(node, new_provider = new_provider) + + def change_provider(self, node = None, consumer = None, new_provider = None): + old_provider = None + if not new_provider: + raise Exception('Please give --provider') + + if not node or not consumer: + node, consumer = self.find_consumer(node = node, consumer = consumer) + + cmap = self.get_node_consumer_map(node) + cinfo = cmap[consumer] + old_provider = cinfo['provider_node'] + + if old_provider == new_provider: + self.log.info("Consumer '%s' at node '%s' has already '%s' as provider" % ( + consumer, node, new_provider)) + return + + # pause target node + self.pause_consumer(node, consumer) + + # reload node info + node_db = self.get_node_database(node) + qinfo = self.load_queue_info(node_db) + ninfo = qinfo.local_node + + # reload consumer info + cmap = self.get_node_consumer_map(node) + cinfo = cmap[consumer] + + # is it node worker or plain consumer? + is_worker = (ninfo.worker_name == consumer) + + # fixme: expect the node to be described already + #q = "select * from pgq_node.add_member(%s, %s, %s, false)" + #self.node_cmd(new_provider, q, [self.queue_name, node_name, node_location]) + + # subscribe on new provider + if is_worker: + q = 'select * from pgq_node.register_subscriber(%s, %s, %s, %s)' + self.node_cmd(new_provider, q, [self.queue_name, node, consumer, cinfo['last_tick_id']]) + else: + q = 'select * from pgq.register_consumer_at(%s, %s, %s)' + self.node_cmd(new_provider, q, [self.queue_name, consumer, cinfo['last_tick_id']]) + + # change provider on target node + q = 'select * from pgq_node.change_consumer_provider(%s, %s, %s)' + self.node_cmd(node, q, [self.queue_name, consumer, new_provider]) + + # unsubscribe from old provider + if is_worker: + q = "select * from pgq_node.unregister_subscriber(%s, %s)" + self.node_cmd(old_provider, q, [self.queue_name, node]) + else: + q = "select * from pgq.unregister_consumer(%s, %s)" + self.node_cmd(old_provider, q, [self.queue_name, consumer]) + + # done + self.resume_consumer(node, consumer) + + def cmd_rename_node(self, old_name, new_name): + """Rename node.""" + + self.load_local_info() + + root_db = self.find_root_db() + + # pause target node + self.pause_node(old_name) + node = self.load_node_info(old_name) + provider_node = node.provider_node + subscriber_list = self.get_node_subscriber_list(old_name) + + + # create copy of member info / subscriber+queue info + step1 = 'select * from pgq_node.rename_node_step1(%s, %s, %s)' + # rename node itself, drop copies + step2 = 'select * from pgq_node.rename_node_step2(%s, %s, %s)' + + # step1 + self.exec_cmd(root_db, step1, [self.queue_name, old_name, new_name]) + self.node_cmd(provider_node, step1, [self.queue_name, old_name, new_name]) + self.node_cmd(old_name, step1, [self.queue_name, old_name, new_name]) + for child in subscriber_list: + self.node_cmd(child, step1, [self.queue_name, old_name, new_name]) + + # step1 + self.node_cmd(old_name, step2, [self.queue_name, old_name, new_name]) + self.node_cmd(provider_node, step1, [self.queue_name, old_name, new_name]) + for child in subscriber_list: + self.node_cmd(child, step2, [self.queue_name, old_name, new_name]) + self.exec_cmd(root_db, step2, [self.queue_name, old_name, new_name]) + + # resume node + self.resume_node(old_name) + + def node_depends(self, sub_node, top_node): + cur_node = sub_node + # walk upstream + while 1: + info = self.get_node_info(cur_node) + if cur_node == top_node: + # yes, top_node is sub_node's provider + return True + if info.type == 'root': + # found root, no dependancy + return False + # step upwards + cur_node = info.provider_node + + def demote_node(self, oldnode, step, newnode): + """Downgrade old root?""" + q = "select * from pgq_node.demote_root(%s, %s, %s)" + res = self.node_cmd(oldnode, q, [self.queue_name, step, newnode]) + return res[0]['last_tick'] + + def promote_branch(self, node): + """Promote old branch as root.""" + q = "select * from pgq_node.promote_branch(%s)" + self.node_cmd(node, q, [self.queue_name]) + + def wait_for_catchup(self, new, last_tick): + """Wait until new_node catches up to old_node.""" + # wait for it on subscriber + info = self.load_node_info(new) + if info.completed_tick >= last_tick: + self.log.info('tick already exists') + return info + if info.paused: + self.log.info('new node seems paused, resuming') + self.resume_node(new) + while 1: + self.log.debug('waiting for catchup: need=%d, cur=%d' % (last_tick, info.completed_tick)) + time.sleep(1) + info = self.load_node_info(new) + if info.completed_tick >= last_tick: + return info + + + def switchover_root(self, old_info, new_info): + """Root switchover.""" + old = old_info.name + new = new_info.name + + self.pause_node(old) + + self.demote_node(old, 1, new) + + last_tick = self.demote_node(old, 2, new) + + self.wait_for_catchup(new, last_tick) + + self.pause_node(new) + self.promote_branch(new) + + #self.subscribe_node(new, old, old_info.completed_tick) + q = 'select * from pgq_node.register_subscriber(%s, %s, %s, %s)' + self.node_cmd(new, q, [self.queue_name, old, old_info.worker_name, last_tick]) + + #self.unsubscribe_node(new_node.parent_node, new_node.name) + q = "select * from pgq_node.unregister_subscriber(%s, %s)" + self.node_cmd(new_info.provider_node, q, [self.queue_name, new]) + + self.resume_node(new) + + self.demote_node(old, 3, new) + + self.resume_node(old) + + def switchover_nonroot(self, old_node, new_node): + """Non-root switchover.""" + if self.node_depends(new_node.name, old_node.name): + # yes, old_node is new_nodes provider, + # switch it around + self.node_change_provider(new_node.name, old_node.provider_node) + + self.node_change_provider(old_node.name, new_node.name) + + def cmd_switchover(self): + """Generic node switchover.""" + self.load_local_info() + old_node_name = self.options.node + new_node_name = self.options.target + if not old_node_name: + worker = self.options.consumer + if not worker: + raise Exception('old node not given') + if self.queue_info.local_node.worker_name != worker: + raise Exception('old node not given') + old_node_name = self.local_node + if not new_node_name: + raise Exception('new node not given') + old_node = self.get_node_info(old_node_name) + new_node = self.get_node_info(new_node_name) + + if old_node.name == new_node.name: + self.log.info("same node?") + return + + if old_node.type == 'root': + self.switchover_root(old_node, new_node) + else: + self.switchover_nonroot(old_node, new_node) + + # switch subscribers around + if self.options.all: + for n in self.get_node_subscriber_list(old_node.name): + self.node_change_provider(n, new_node.name) + + def cmd_pause(self): + """Pause a node""" + self.load_local_info() + node, consumer = self.find_consumer() + self.pause_consumer(node, consumer) + + def cmd_resume(self): + """Resume a node from pause.""" + self.load_local_info() + node, consumer = self.find_consumer() + self.resume_consumer(node, consumer) + + def cmd_members(self): + """Show member list.""" + db = self.get_database(self.initial_db_name) + desc = 'Member info on %s:' % self.local_node + q = "select node_name, dead, node_location"\ + " from pgq_node.get_queue_locations(%s) order by 1" + self.display_table(db, desc, q, [self.queue_name]) + + # + # Shortcuts for operating on nodes. + # + + def load_local_info(self): + """fetch set info from local node.""" + db = self.get_database(self.initial_db_name) + self.queue_info = self.load_queue_info(db) + self.local_node = self.queue_info.local_node.name + + def get_node_database(self, node_name): + """Connect to node.""" + if node_name == self.queue_info.local_node.name: + db = self.get_database(self.initial_db_name) + else: + m = self.queue_info.get_member(node_name) + if not m: + self.log.error("get_node_database: cannot resolve %s" % node_name) + sys.exit(1) + loc = m.location + db = self.get_database('node.' + node_name, connstr = loc) + return db + + def close_node_database(self, node_name): + """Disconnect node's connection.""" + if node_name == self.queue_info.local_node.name: + self.close_database(self.initial_db_name) + else: + self.close_database("node." + node_name) + + def node_cmd(self, node_name, sql, args, quiet = False): + """Execute SQL command on particular node.""" + db = self.get_node_database(node_name) + return self.exec_cmd(db, sql, args, quiet = quiet) + + # + # Various operation on nodes. + # + + def set_paused(self, node, consumer, pause_flag): + """Set node pause flag and wait for confirmation.""" + + q = "select * from pgq_node.set_consumer_paused(%s, %s, %s)" + self.node_cmd(node, q, [self.queue_name, consumer, pause_flag]) + + self.log.info('Waiting for worker to accept') + while self.looping: + q = "select * from pgq_node.get_consumer_state(%s, %s)" + stat = self.node_cmd(node, q, [self.queue_name, consumer], quiet = 1)[0] + if stat['paused'] != pause_flag: + raise Exception('operation canceled? %s <> %s' % (repr(stat['paused']), repr(pause_flag))) + + if stat['uptodate']: + op = pause_flag and "paused" or "resumed" + self.log.info("Consumer '%s' on node '%s' %s" % (consumer, node, op)) + return + time.sleep(1) + raise Exception('process canceled') + + + def pause_consumer(self, node, consumer): + """Shortcut for pausing by name.""" + self.set_paused(node, consumer, True) + + def resume_consumer(self, node, consumer): + """Shortcut for resuming by name.""" + self.set_paused(node, consumer, False) + + def pause_node(self, node): + """Shortcut for pausing by name.""" + state = self.get_node_info(node) + self.pause_consumer(node, state.worker_name) + + def resume_node(self, node): + """Shortcut for resuming by name.""" + state = self.get_node_info(node) + self.resume_consumer(node, state.worker_name) + + def subscribe_node(self, target_node, subscriber_node, tick_pos): + """Subscribing one node to another.""" + q = "select * from pgq_node.subscribe_node(%s, %s, %s)" + self.node_cmd(target_node, q, [self.queue_name, subscriber_node, tick_pos]) + + def unsubscribe_node(self, target_node, subscriber_node): + """Unsubscribing one node from another.""" + q = "select * from pgq_node.unsubscribe_node(%s, %s)" + self.node_cmd(target_node, q, [self.queue_name, subscriber_node]) + + _node_cache = {} + def get_node_info(self, node_name): + """Cached node info lookup.""" + if node_name in self._node_cache: + return self._node_cache[node_name] + inf = self.load_node_info(node_name) + self._node_cache[node_name] = inf + return inf + + def load_node_info(self, node_name): + """Non-cached node info lookup.""" + db = self.get_node_database(node_name) + q = "select * from pgq_node.get_node_info(%s)" + rows = self.exec_query(db, q, [self.queue_name]) + return NodeInfo(self.queue_name, rows[0]) + + def load_queue_info(self, db): + """Non-cached set info lookup.""" + res = self.exec_query(db, "select * from pgq_node.get_node_info(%s)", [self.queue_name]) + info = res[0] + + q = "select * from pgq_node.get_queue_locations(%s)" + member_list = self.exec_query(db, q, [self.queue_name]) + + return QueueInfo(self.queue_name, info, member_list) + + def get_node_subscriber_list(self, node_name): + """Fetch subscriber list from a node.""" + q = "select node_name, local_watermark from pgq_node.get_subscriber_info(%s)" + db = self.get_node_database(node_name) + rows = self.exec_query(db, q, [self.queue_name]) + return [r['node_name'] for r in rows] + + def get_node_consumer_map(self, node_name): + """Fetch consumer list from a node.""" + q = "select consumer_name, provider_node, last_tick_id from pgq_node.get_consumer_info(%s)" + db = self.get_node_database(node_name) + rows = self.exec_query(db, q, [self.queue_name]) + res = {} + for r in rows: + res[r['consumer_name']] = r + return res + +if __name__ == '__main__': + script = CascadeAdmin('setadm', 'node_db', sys.argv[1:], worker_setup = False) + script.start() + diff --git a/python/pgq/cascade/consumer.py b/python/pgq/cascade/consumer.py new file mode 100644 index 00000000..97b90c24 --- /dev/null +++ b/python/pgq/cascade/consumer.py @@ -0,0 +1,216 @@ +"""Cascaded consumer. + + +Does not maintain node, but is able to pause, resume and switch provider. +""" + +import sys, time, skytools + +from pgq.consumer import Consumer + +PDB = '_provider_db' + +__all__ = ['CascadedConsumer'] + +class CascadedConsumer(Consumer): + """CascadedConsumer base class. + + Loads provider from target node, accepts pause/resume commands. + """ + + _batch_info = None + _consumer_state = None + + def __init__(self, service_name, db_name, args): + """Initialize new consumer. + + @param service_name: service_name for DBScript + @param db_name: target database name for get_database() + @param args: cmdline args for DBScript + """ + + Consumer.__init__(self, service_name, PDB, args) + + self.log.debug("__init__") + + self.target_db = db_name + self.provider_connstr = None + + def register_consumer(self, provider_loc = None): + """Register consumer on source node first, then target node.""" + + if not provider_loc: + provider_loc = self.options.provider + if not provider_loc: + self.log.error('Please give provider location with --provider=') + sys.exit(1) + + dst_db = self.get_database(self.target_db) + dst_curs = dst_db.cursor() + src_db = self.get_database(PDB, connstr = provider_loc) + src_curs = src_db.cursor() + + # check target info + q = "select * from pgq_node.get_node_info(%s)" + res = self.exec_cmd(src_db, q, [ self.queue_name ]) + pnode = res[0]['node_name'] + if not pnode: + raise Exception('parent node not initialized?') + + # source queue + Consumer.register_consumer(self) + + # fetch pos + q = "select last_tick from pgq.get_consumer_info(%s, %s)" + src_curs.execute(q, [self.queue_name, self.consumer_name]) + last_tick = src_curs.fetchone()['last_tick'] + if not last_tick: + raise Exception('registration failed?') + src_db.commit() + + # target node + q = "select * from pgq_node.register_consumer(%s, %s, %s, %s)" + self.exec_cmd(dst_db, q, [self.queue_name, self.consumer_name, pnode, last_tick]) + + def unregister_consumer(self): + dst_db = self.get_database(self.target_db) + dst_curs = dst_db.cursor() + + # fetch provider loc + q = "select * from pgq_node.get_consumer_state(%s, %s)" + rows = self.exec_cmd(dst_db, q, [ self.queue_name, self.consumer_name ]) + state = rows[0] + provider_loc = state['provider_location'] + + # unregister on provider + src_db = self.get_database(PDB, connstr = provider_loc) + src_curs = src_db.cursor() + Consumer.unregister_consumer(self) + + # unregister on subscriber + q = "select * from pgq_node.unregister_consumer(%s, %s)" + self.exec_cmd(dst_db, q, [ self.queue_name, self.consumer_name ]) + + def init_optparse(self, parser = None): + p = Consumer.init_optparse(self, parser) + p.add_option("--provider", help = "provider location for --register") + return p + + def process_batch(self, src_db, batch_id, event_list): + self._batch_info = self.get_batch_info(batch_id) + + state = self._consumer_state + + if self.is_batch_done(state, self._batch_info): + for ev in event_list: + ev.tag_done() + return + + dst_db = self.get_database(self.target_db) + tick_id = self._batch_info['tick_id'] + self.process_remote_batch(src_db, tick_id, event_list, dst_db) + + # this also commits + self.finish_remote_batch(src_db, dst_db, tick_id) + + def process_root_node(self, dst_db): + """This is called on root node, where no processing should happen. + """ + # extra sleep + time.sleep(10*self.loop_delay) + + def work(self): + """Refresh state before calling Consumer.work().""" + + dst_db = self.get_database(self.target_db) + self._consumer_state = self.refresh_state(dst_db) + + if self._consumer_state['node_type'] == 'root': + self.log.info("target is root") + self.process_root_node(dst_db) + return + + if not self.provider_connstr: + raise Exception('provider_connstr not set') + src_db = self.get_database('_provider_db', connstr = self.provider_connstr) + + return Consumer.work(self) + + def refresh_state(self, dst_db, full_logic = True): + """Fetch consumer state from target node. + + This also sleeps if pause is set and updates + "uptodate" flag to notify that data is refreshed. + """ + + while 1: + q = "select * from pgq_node.get_consumer_state(%s, %s)" + rows = self.exec_cmd(dst_db, q, [ self.queue_name, self.consumer_name ]) + state = rows[0] + + # tag refreshed + if not state['uptodate'] and full_logic: + q = "select * from pgq_node.set_consumer_uptodate(%s, %s, true)" + self.exec_cmd(dst_db, q, [ self.queue_name, self.consumer_name ]) + + if not state['paused'] or not full_logic: + break + time.sleep(self.loop_delay) + + # update connection + loc = state['provider_location'] + if self.provider_connstr != loc: + self.close_database('_provider_db') + self.provider_connstr = loc + + return state + + def is_batch_done(self, state, batch_info): + cur_tick = batch_info['tick_id'] + prev_tick = batch_info['prev_tick_id'] + dst_tick = state['completed_tick'] + + if not dst_tick: + raise Exception('dst_tick NULL?') + + if prev_tick == dst_tick: + # on track + return False + + if cur_tick == dst_tick: + # current batch is already applied, skip it + return True + + # anything else means problems + raise Exception('Lost position: batch %d..%d, dst has %d' % ( + prev_tick, cur_tick, dst_tick)) + + def process_remote_batch(self, src_db, tick_id, event_list, dst_db): + """Per-batch callback. + + By default just calls process_remote_event() in loop.""" + src_curs = src_db.cursor() + dst_curs = dst_db.cursor() + for ev in event_list: + self.process_remote_event(src_curs, dst_curs, ev) + + def process_remote_event(self, src_curs, dst_curs, ev): + """Per-event callback. + + By default ignores cascading events and gives error on others. + Can be called from user handler to finish unprocessed events. + """ + if ev.ev_type[:4] == "pgq.": + # ignore cascading events + ev.tag_done() + else: + raise Exception('Unhandled event type in queue: %s' % ev.ev_type) + + def finish_remote_batch(self, src_db, dst_db, tick_id): + """Called after event processing. This should finish + work on remote db and commit there. + """ + # this also commits + q = "select * from pgq_node.set_consumer_completed(%s, %s, %s)" + self.exec_cmd(dst_db, q, [ self.queue_name, self.consumer_name, tick_id ]) + diff --git a/python/pgq/setinfo.py b/python/pgq/cascade/nodeinfo.py index 25a7f3c3..6d401792 100644 --- a/python/pgq/setinfo.py +++ b/python/pgq/cascade/nodeinfo.py @@ -1,86 +1,58 @@ #! /usr/bin/env python -__all__ = ['MemberInfo', 'NodeInfo', 'SetInfo', - 'ROOT', 'BRANCH', 'LEAF', 'COMBINED_ROOT', - 'COMBINED_BRANCH', 'MERGE_LEAF'] +"""Info about node/set/members. For admin tool. +""" + +__all__ = ['MemberInfo', 'NodeInfo', 'QueueInfo'] # node types ROOT = 'root' BRANCH = 'branch' LEAF = 'leaf' -COMBINED_ROOT = 'combined-root' -COMBINED_BRANCH = 'combined-branch' -MERGE_LEAF = 'merge-leaf' - -# which nodes need to do what actions -action_map = { -'process-batch': {'root':0, 'branch':1, 'leaf':1, 'combined-root':0, 'combined-branch':1, 'merge-leaf-to-root':1, 'merge-leaf-to-branch':1}, -'process-events': {'root':0, 'branch':1, 'leaf':1, 'combined-root':0, 'combined-branch':1, 'merge-leaf-to-root':1, 'merge-leaf-to-branch':0}, -'update-event-seq':{'root':0, 'branch':1, 'leaf':0, 'combined-root':0, 'combined-branch':1, 'merge-leaf-to-root':0, 'merge-leaf-to-branch':0}, -'copy-events': {'root':0, 'branch':1, 'leaf':0, 'combined-root':0, 'combined-branch':1, 'merge-leaf-to-root':0, 'merge-leaf-to-branch':0}, -'tick-event': {'root':0, 'branch':0, 'leaf':0, 'combined-root':0, 'combined-branch':0, 'merge-leaf-to-root':1, 'merge-leaf-to-branch':0}, -'global-wm-event': {'root':1, 'branch':0, 'leaf':0, 'combined-root':1, 'combined-branch':0, 'merge-leaf-to-root':0, 'merge-leaf-to-branch':0}, -'wait-behind': {'root':0, 'branch':0, 'leaf':0, 'combined-root':0, 'combined-branch':0, 'merge-leaf-to-root':0, 'merge-leaf-to-branch':1}, -'sync-part-pos': {'root':0, 'branch':0, 'leaf':0, 'combined-root':0, 'combined-branch':1, 'merge-leaf-to-root':0, 'merge-leaf-to-branch':0}, -'local-wm-publish':{'root':0, 'branch':1, 'leaf':1, 'combined-root':0, 'combined-branch':1, 'merge-leaf-to-root':1, 'merge-leaf-to-branch':1}, -} class MemberInfo: + """Info about set member.""" def __init__(self, row): self.name = row['node_name'] self.location = row['node_location'] self.dead = row['dead'] class NodeInfo: - def __init__(self, set_name, row, main_worker = True): - self.set_name = set_name + """Detailed info about set node.""" + def __init__(self, queue_name, row, main_worker = True): + self.queue_name = queue_name self.member_map = {} self.main_worker = main_worker self.name = row['node_name'] self.type = row['node_type'] - self.queue_name = row['queue_name'] self.global_watermark = row['global_watermark'] self.local_watermark = row['local_watermark'] - self.completed_tick = row['completed_tick'] + self.completed_tick = row['worker_last_tick'] self.provider_node = row['provider_node'] self.provider_location = row['provider_location'] - self.paused = row['paused'] - self.resync = row['resync'] - self.uptodate = row['uptodate'] - self.combined_set = row['combined_set'] - self.combined_type = row['combined_type'] + self.consumer_name = row['worker_name'] + self.worker_name = row['worker_name'] + self.paused = row['worker_paused'] + self.uptodate = row['worker_uptodate'] self.combined_queue = row['combined_queue'] + self.combined_type = row['combined_type'] + + self.parent = None + self.consumer_map = {} + self.queue_info = {} self._row = row self._info_lines = [] - def need_action(self, action_name): - """Returns True if worker for this node needs - to do specified action. - """ - if not self.main_worker: - return action_name in ('process-batch', 'process-events') - - typ = self.type - if type == MERGE_LEAF: - if self.target_type == COMBINED_BRANCH: - typ = "merge-leaf-to-branch" - elif self.target_type == COMBINED_ROOT: - typ = "merge-leaf-to-root" - else: - raise Exception('bad target type') - - try: - return action_map[action_name][typ] - except KeyError, d: - raise Exception('need_action(name=%s, type=%s) unknown' % (action_name, typ)) - - def get_target_queue(self): + def __get_target_queue(self): qname = None - if self.type == 'merge-leaf': - qname = self.combined_queue + if self.type == LEAF: + if self.combined_queue: + qname = self.combined_queue + else: + return None else: qname = self.queue_name if qname is None: @@ -89,11 +61,12 @@ class NodeInfo: def get_infolines(self): lst = self._info_lines + if self.parent: root = self.parent while root.parent: root = root.parent - tick_time = self.parent.consumer_map[self.name]['tick_time'] + tick_time = self.parent.consumer_map[self.consumer_name]['tick_time'] root_time = root.queue_info['now'] lag = root_time - tick_time else: @@ -116,20 +89,25 @@ class NodeInfo: q = "select consumer_name, current_timestamp - lag as tick_time,"\ " lag, last_seen, last_tick "\ "from pgq.get_consumer_info(%s)" - curs.execute(q, [self.set_name]) + curs.execute(q, [self.queue_name]) for row in curs.fetchall(): cname = row['consumer_name'] self.consumer_map[cname] = row q = "select current_timestamp - ticker_lag as tick_time,"\ " ticker_lag, current_timestamp as now "\ "from pgq.get_queue_info(%s)" - curs.execute(q, [self.set_name]) + curs.execute(q, [self.queue_name]) self.queue_info = curs.fetchone() -class SetInfo: - def __init__(self, set_name, info_row, member_rows): - self.local_node = NodeInfo(set_name, info_row) - self.set_name = set_name +class QueueInfo: + """Info about cascaded queue. + + Slightly broken, as all info is per-node. + """ + + def __init__(self, queue_name, info_row, member_rows): + self.local_node = NodeInfo(queue_name, info_row) + self.queue_name = queue_name self.member_map = {} self.node_map = {} self.add_node(self.local_node) @@ -147,6 +125,10 @@ class SetInfo: def add_node(self, node): self.node_map[node.name] = node + # + # Rest is about printing the tree + # + _DATAFMT = "%-30s%s" def print_tree(self): """Print ascii-tree for set. @@ -156,15 +138,15 @@ class SetInfo: self._tree_calc(root) datalines = self._print_node(root, '', []) for ln in datalines: - print self._DATAFMT % (' ', ln) + print(self._DATAFMT % (' ', ln)) def _print_node(self, node, pfx, datalines): # print a tree fragment for node and info # returns list of unprinted data rows for ln in datalines: - print self._DATAFMT % (_setpfx(pfx, '|'), ln) + print(self._DATAFMT % (_setpfx(pfx, '|'), ln)) datalines = node.get_infolines() - print "%s%s" % (_setpfx(pfx, '+--'), node.name) + print("%s%s" % (_setpfx(pfx, '+--'), node.name)) for i, n in enumerate(node.child_list): sfx = ((i < len(node.child_list) - 1) and ' |' or ' ') @@ -176,14 +158,14 @@ class SetInfo: # reset vars, fill parent and child_list for each node # returns root root = None - for node in self.node_map.itervalues(): + for node in self.node_map.values(): node.total_childs = 0 node.levels = 0 node.child_list = [] - if node.type in (ROOT, COMBINED_ROOT): + if node.type == ROOT: root = node - for node in self.node_map.itervalues(): - if node.provider_node: + for node in self.node_map.values(): + if node.provider_node and node.provider_node != node.name: p = self.node_map[node.provider_node] p.child_list.append(node) node.parent = p @@ -191,7 +173,7 @@ class SetInfo: node.parent = None if root is None: - raise Exception("root nod enot found") + raise Exception("root node not found") return root def _tree_calc(self, node): @@ -206,18 +188,13 @@ class SetInfo: levels = subnode.levels + 1 node.total_childs = total node.levels = levels - node.child_list.sort(_cmp_node) + node.child_list.sort(key = _node_key) def _setpfx(pfx, sfx): if pfx: pfx = pfx[:-1] + sfx return pfx - -def _cmp_node(n1, n2): - # returns neg if n1 smaller - cmp = n1.levels - n2.levels - if cmp == 0: - cmp = n1.total_childs - n2.total_childs - return cmp +def _node_key(n): + return (n.levels, n.total_childs) diff --git a/python/pgq/cascade/worker.py b/python/pgq/cascade/worker.py new file mode 100644 index 00000000..caaa7747 --- /dev/null +++ b/python/pgq/cascade/worker.py @@ -0,0 +1,260 @@ +"""Cascaded worker. + +CascadedConsumer that also maintains node. + +""" + +import sys, time, skytools + +from pgq.cascade.consumer import CascadedConsumer +from pgq.producer import bulk_insert_events + +__all__ = ['CascadedWorker'] + +class WorkerState: + """Depending on node state decides on actions worker needs to do.""" + # node_type, + # node_name, provider_node, + # global_watermark, local_watermark + # combined_queue, combined_type + process_batch = 0 # handled in CascadedConsumer + copy_events = 0 # ok + global_wm_event = 0 # ok + local_wm_publish = 0 # ok + + process_events = 0 # ok + send_tick_event = 0 # ok + wait_behind = 0 # ok + process_tick_event = 0 # ok + target_queue = '' # ok + keep_event_ids = 0 # ok + create_tick = 0 # ok + def __init__(self, queue_name, nst): + ntype = nst['node_type'] + ctype = nst['combined_type'] + if ntype == 'root': + self.global_wm_event = 1 + elif ntype == 'branch': + self.target_queue = queue_name + self.process_batch = 1 + self.process_events = 1 + self.copy_events = 1 + self.process_tick_event = 1 + self.local_wm_publish = 1 + self.keep_event_ids = 1 + self.create_tick = 1 + elif ntype == 'leaf' and not ctype: + self.process_batch = 1 + self.process_events = 1 + elif ntype == 'leaf' and ctype: + self.target_queue = nst['combined_queue'] + if ctype == 'root': + self.process_batch = 1 + self.process_events = 1 + self.copy_events = 1 + self.send_tick_event = 1 + elif ctype == 'branch': + self.process_batch = 1 + self.wait_behind = 1 + else: + raise Exception('invalid state 1') + else: + raise Exception('invalid state 2') + if ctype and ntype != 'leaf': + raise Exception('invalid state 3') + +class CascadedWorker(CascadedConsumer): + """CascadedWorker base class. + """ + + global_wm_publish_time = 0 + global_wm_publish_period = 5 * 60 + + local_wm_publish_time = 0 + local_wm_publish_period = 5 * 60 + + max_evbuf = 500 + cur_event_seq = 0 + cur_max_id = 0 + seq_buffer = 10000 + + main_worker = True + + _worker_state = None + ev_buf = None + + def __init__(self, service_name, db_name, args): + """Initialize new consumer. + + @param service_name: service_name for DBScript + @param db_name: target database name for get_database() + @param args: cmdline args for DBScript + """ + + CascadedConsumer.__init__(self, service_name, db_name, args) + + def process_remote_batch(self, src_db, tick_id, event_list, dst_db): + """Worker-specific event processing.""" + self.ev_buf = [] + max_id = 0 + st = self._worker_state + + if st.wait_behind: + self.wait_for_tick(dst_db, tick_id) + + src_curs = src_db.cursor() + dst_curs = dst_db.cursor() + for ev in event_list: + if st.copy_events: + self.copy_event(dst_curs, ev) + if ev.ev_type[:4] == "pgq.": + # process cascade events even on waiting leaf node + self.process_remote_event(src_curs, dst_curs, ev) + else: + if st.process_events: + self.process_remote_event(src_curs, dst_curs, ev) + else: + ev.tag_done() + if ev.ev_id > max_id: + max_id = ev.ev_id + if st.local_wm_publish: + self.publish_local_wm(src_db) + if max_id > self.cur_max_id: + self.cur_max_id = max_id + + def wait_for_tick(self, dst_db, tick_id): + """On combined-branch leaf needs to wait from tick + to appear from combined-root. + """ + while 1: + cst = self._consumer_state + if cst['completed_tick'] >= tick_id: + return + time.sleep(10 * self.loop_delay) + self._consumer_state = self.refresh_state(dst_db) + + def publish_local_wm(self, src_db): + """Send local watermark to provider. + """ + if not self.main_worker: + return + t = time.time() + if t - self.local_wm_publish_time >= self.local_wm_publish_period: + return + + st = self._worker_state + src_curs = src_db.cursor() + q = "select * from pgq_node.set_subscriber_watermark(%s, %s, %s)" + src_curs.execute(q, [self.pgq_queue_name, st.node_name, st.local_watermark]) + self.local_wm_publish_time = t + + def process_remote_event(self, src_curs, dst_curs, ev): + """Handle cascading events. + """ + # non cascade events send to CascadedConsumer to error out + if ev.ev_type[:4] != 'pgq.': + CascadedConsumer.process_remote_event(self, src_curs, dst_curs, ev) + return + + # ignore cascade events if not main worker + if not self.main_worker: + ev.tag_done() + return + + # check if for right queue + t = ev.ev_type + if ev.ev_extra1 != self.pgq_queue_name and t != "pgq.tick-id": + raise Exception("bad event in queue: "+str(ev)) + + self.log.info("got cascade event: %s" % t) + if t == "pgq.location-info": + node = ev.ev_data + loc = ev.ev_extra2 + dead = ev.ev_extra3 + q = "select * from pgq_node.register_location(%s, %s, %s, %s)" + dst_curs.execute(q, [self.pgq_queue_name, node, loc, dead]) + elif t == "pgq.global-watermark": + tick_id = int(ev.ev_data) + q = "select * from pgq_node.set_global_watermark(%s, %s)" + dst_curs.execute(q, [self.pgq_queue_name, tick_id]) + elif t == "pgq.tick-id": + tick_id = int(ev.ev_data) + if ev.ev_extra1 == self.pgq_queue_name: + raise Exception('tick-id event for own queue?') + st = self._worker_state + if st.process_tick_event: + q = "select * from pgq_node.set_partition_watermark(%s, %s, %s)" + dst_curs.execute(q, [self.pgq_queue_name, ev.ev_extra1, tick_id]) + else: + raise Exception("unknown cascade event: %s" % t) + ev.tag_done() + + def finish_remote_batch(self, src_db, dst_db, tick_id): + """Worker-specific cleanup on target node. + """ + + if self.main_worker: + st = self._worker_state + dst_curs = dst_db.cursor() + + self.flush_events(dst_curs) + + # send tick event into queue + if st.send_tick_event: + q = "select pgq.insert_event(%s, 'pgq.tick-id', %s, %s, null, null, null)" + dst_curs.execute(q, [st.target_queue, str(tick_id), self.pgq_queue_name]) + if st.create_tick: + # create actual tick + tick_id = self._batch_info['tick_id'] + tick_time = self._batch_info['batch_end'] + q = "select pgq.ticker(%s, %s, %s, %s)" + dst_curs.execute(q, [self.pgq_queue_name, tick_id, tick_time, self.cur_max_id]) + + CascadedConsumer.finish_remote_batch(self, src_db, dst_db, tick_id) + + def copy_event(self, dst_curs, ev): + """Add event to copy buffer. + """ + if not self.main_worker: + return + if ev.type[:4] == "pgq.": + return + if len(self.ev_buf) >= self.max_evbuf: + self.flush_events(dst_curs) + self.ev_buf.append(ev) + + def flush_events(self, dst_curs): + """Send copy buffer to target queue. + """ + if len(self.ev_buf) == 0: + return + flds = ['ev_time', 'ev_type', 'ev_data', 'ev_extra1', + 'ev_extra2', 'ev_extra3', 'ev_extra4'] + st = self._worker_state + if st.keep_event_ids: + flds.append('ev_id') + bulk_insert_events(dst_curs, self.ev_buf, flds, st.target_queue) + self.ev_buf = [] + + def refresh_state(self, dst_db, full_logic = True): + """Load also node state from target node. + """ + res = CascadedConsumer.refresh_state(self, dst_db, full_logic) + q = "select * from pgq_node.get_node_info(%s)" + st = self.exec_cmd(dst_db, q, [ self.pgq_queue_name ]) + self._worker_state = WorkerState(self.pgq_queue_name, st[0]) + return res + + def process_root_node(self, dst_db): + """On root node send global watermark downstream. + """ + t = time.time() + if t - self.global_wm_publish_time < self.global_wm_publish_period: + return + + dst_curs = dst_db.cursor() + q = "select * from pgq_node.set_global_watermark(%s, NULL)" + dst_curs.execute(q, [self.pgq_queue_name]) + dst_db.commit() + self.global_wm_publish_time = t + diff --git a/python/pgq/consumer.py b/python/pgq/consumer.py index 3b823031..8c001cbe 100644 --- a/python/pgq/consumer.py +++ b/python/pgq/consumer.py @@ -1,9 +1,9 @@ """PgQ consumer framework for Python. -API problems(?): - - process_event() and process_batch() should have db as argument. - - should ev.tag*() update db immidiately? +todo: + - pgq.next_batch_details() + - tag_done() by default """ @@ -11,7 +11,7 @@ import sys, time, skytools from pgq.event import * -__all__ = ['Consumer', 'RemoteConsumer', 'SerialConsumer'] +__all__ = ['Consumer'] class _WalkerEvent(Event): """Redirects status flags to BatchWalker. @@ -113,21 +113,41 @@ class Consumer(skytools.DBScript): skytools.DBScript.__init__(self, service_name, args) self.db_name = db_name - self.reg_list = [] - self.consumer_id = self.cf.get("pgq_consumer_id", self.job_name) - self.pgq_queue_name = self.cf.get("pgq_queue_name") + + # compat params + self.consumer_name = self.cf.get("pgq_consumer_id", '') + self.queue_name = self.cf.get("pgq_queue_name", '') + + # proper params + if not self.consumer_name: + self.consumer_name = self.cf.get("consumer_name", self.job_name) + if not self.queue_name: + self.queue_name = self.cf.get("queue_name") + self.pgq_lazy_fetch = self.cf.getint("pgq_lazy_fetch", 0) + self.stat_batch_start = 0 - def attach(self): - """Attach consumer to interesting queues.""" - res = self.register_consumer(self.pgq_queue_name) - return res + # compat vars + self.pgq_queue_name = self.queue_name + self.consumer_id = self.consumer_name + + def startup(self): + """Handle commands here. __init__ does not have error logging.""" + if self.options.register: + self.register_consumer() + sys.exit(0) + if self.options.unregister: + self.unregister_consumer() + sys.exit(0) + return skytools.DBScript.startup(self) - def detach(self): - """Detach consumer from all queues.""" - tmp = self.reg_list[:] - for q in tmp: - self.unregister_consumer(q) + def init_optparse(self, parser = None): + p = skytools.DBScript.init_optparse(self, parser) + p.add_option('--register', action='store_true', + help = 'register consumer on queue') + p.add_option('--unregister', action='store_true', + help = 'unregister consumer from queue') + return p def process_event(self, db, event): """Process one event. @@ -152,66 +172,59 @@ class Consumer(skytools.DBScript): self.process_event(db, ev) def work(self): - """Do the work loop, once (internal).""" - - if len(self.reg_list) == 0: - self.log.debug("Attaching") - self.attach() + """Do the work loop, once (internal). + Returns: true if wants to be called again, + false if script can sleep. + """ db = self.get_database(self.db_name) curs = db.cursor() - data_avail = 0 - for queue in self.reg_list: - self.stat_start() - - # acquire batch - batch_id = self._load_next_batch(curs, queue) - db.commit() - if batch_id == None: - continue - data_avail = 1 - - # load events - list = self._load_batch_events(curs, batch_id, queue) - db.commit() - - # process events - self._launch_process_batch(db, batch_id, list) - - # done - self._finish_batch(curs, batch_id, list) - db.commit() - self.stat_end(len(list)) - - # if false, script sleeps - return data_avail - - def register_consumer(self, queue_name): + self.stat_start() + + # acquire batch + batch_id = self._load_next_batch(curs) + db.commit() + if batch_id == None: + return 0 + + # load events + ev_list = self._load_batch_events(curs, batch_id) + db.commit() + + # process events + self._launch_process_batch(db, batch_id, ev_list) + + # done + self._finish_batch(curs, batch_id, ev_list) + db.commit() + self.stat_end(len(ev_list)) + + return 1 + + def register_consumer(self): + self.log.info("Registering consumer on source queue") db = self.get_database(self.db_name) cx = db.cursor() cx.execute("select pgq.register_consumer(%s, %s)", - [queue_name, self.consumer_id]) + [self.queue_name, self.consumer_name]) res = cx.fetchone()[0] db.commit() - self.reg_list.append(queue_name) - return res - def unregister_consumer(self, queue_name): + def unregister_consumer(self): + self.log.info("Unregistering consumer from source queue") db = self.get_database(self.db_name) cx = db.cursor() cx.execute("select pgq.unregister_consumer(%s, %s)", - [queue_name, self.consumer_id]) + [self.queue_name, self.consumer_name]) db.commit() - self.reg_list.remove(queue_name) - def _launch_process_batch(self, db, batch_id, list): self.process_batch(db, batch_id, list) - def _load_batch_events_old(self, curs, batch_id, queue_name): + def _load_batch_events_old(self, curs, batch_id): """Fetch all events for this batch.""" # load events @@ -220,26 +233,26 @@ class Consumer(skytools.DBScript): rows = curs.dictfetchall() # map them to python objects - list = [] + ev_list = [] for r in rows: - ev = Event(queue_name, r) - list.append(ev) + ev = Event(self.queue_name, r) + ev_list.append(ev) - return list + return ev_list - def _load_batch_events(self, curs, batch_id, queue_name): + def _load_batch_events(self, curs, batch_id): """Fetch all events for this batch.""" if self.pgq_lazy_fetch: - return _BatchWalker(curs, batch_id, queue_name, self.pgq_lazy_fetch) + return _BatchWalker(curs, batch_id, self.queue_name, self.pgq_lazy_fetch) else: - return self._load_batch_events_old(curs, batch_id, queue_name) + return self._load_batch_events_old(curs, batch_id) - def _load_next_batch(self, curs, queue_name): + def _load_next_batch(self, curs): """Allocate next batch. (internal)""" q = "select pgq.next_batch(%s, %s)" - curs.execute(q, [queue_name, self.consumer_id]) + curs.execute(q, [self.queue_name, self.consumer_name]) return curs.fetchone()[0] def _finish_batch(self, curs, batch_id, list): @@ -255,23 +268,24 @@ class Consumer(skytools.DBScript): self._tag_failed(curs, batch_id, ev_id, stat[1]) failed += 1 elif stat[0] != EV_DONE: - raise Exception("Untagged event: %d" % ev_id) + raise Exception("Untagged event: id=%d" % ev_id) else: for ev in list: - if ev.status == EV_FAILED: + if ev._status == EV_FAILED: self._tag_failed(curs, batch_id, ev.id, ev.fail_reason) failed += 1 - elif ev.status == EV_RETRY: + elif ev._status == EV_RETRY: self._tag_retry(curs, batch_id, ev.id, ev.retry_time) retry += 1 - elif stat[0] != EV_DONE: - raise Exception("Untagged event: %d" % ev_id) + elif ev._status != EV_DONE: + raise Exception("Untagged event: (id=%d, type=%s, data=%s, ex1=%s" % ( + ev.id, ev.type, ev.data, ev.extra1)) # report weird events if retry: - self.stat_add('retry-events', retry) + self.stat_increase('retry-events', retry) if failed: - self.stat_add('failed-events', failed) + self.stat_increase('failed-events', failed) curs.execute("select pgq.finish_batch(%s)", [batch_id]) @@ -317,189 +331,3 @@ class Consumer(skytools.DBScript): self.stat_put('duration', t - self.stat_batch_start) -class RemoteConsumer(Consumer): - """Helper for doing event processing in another database. - - Requires that whole batch is processed in one TX. - """ - - def __init__(self, service_name, db_name, remote_db, args): - Consumer.__init__(self, service_name, db_name, args) - self.remote_db = remote_db - - def process_batch(self, db, batch_id, event_list): - """Process all events in batch. - - By default calls process_event for each. - """ - dst_db = self.get_database(self.remote_db) - curs = dst_db.cursor() - - if self.is_last_batch(curs, batch_id): - for ev in event_list: - ev.tag_done() - return - - self.process_remote_batch(db, batch_id, event_list, dst_db) - - self.set_last_batch(curs, batch_id) - dst_db.commit() - - def is_last_batch(self, dst_curs, batch_id): - """Helper function to keep track of last successful batch - in external database. - """ - q = "select pgq_ext.is_batch_done(%s, %s)" - dst_curs.execute(q, [ self.consumer_id, batch_id ]) - return dst_curs.fetchone()[0] - - def set_last_batch(self, dst_curs, batch_id): - """Helper function to set last successful batch - in external database. - """ - q = "select pgq_ext.set_batch_done(%s, %s)" - dst_curs.execute(q, [ self.consumer_id, batch_id ]) - - def process_remote_batch(self, db, batch_id, event_list, dst_db): - raise Exception('process_remote_batch not implemented') - -class SerialConsumer(Consumer): - """Consumer that applies batches sequentially in second database. - - Requirements: - - Whole batch in one TX. - - Must not use retry queue. - - Features: - - Can detect if several batches are already applied to dest db. - - If some ticks are lost. allows to seek back on queue. - Whether it succeeds, depends on pgq configuration. - """ - - def __init__(self, service_name, db_name, remote_db, args): - Consumer.__init__(self, service_name, db_name, args) - self.remote_db = remote_db - self.dst_schema = "pgq_ext" - self.cur_batch_info = None - - def startup(self): - if self.options.rewind: - self.rewind() - sys.exit(0) - if self.options.reset: - self.dst_reset() - sys.exit(0) - return Consumer.startup(self) - - def init_optparse(self, parser = None): - p = Consumer.init_optparse(self, parser) - p.add_option("--rewind", action = "store_true", - help = "change queue position according to destination") - p.add_option("--reset", action = "store_true", - help = "reset queue pos on destination side") - return p - - def process_batch(self, db, batch_id, event_list): - """Process all events in batch. - """ - - dst_db = self.get_database(self.remote_db) - curs = dst_db.cursor() - - self.cur_batch_info = self.get_batch_info(batch_id) - - # check if done - if self.is_batch_done(curs): - for ev in event_list: - ev.tag_done() - return - - # actual work - self.process_remote_batch(db, batch_id, event_list, dst_db) - - # finish work - self.set_batch_done(curs) - dst_db.commit() - - def is_batch_done(self, dst_curs): - """Helper function to keep track of last successful batch - in external database. - """ - - cur_tick = self.cur_batch_info['tick_id'] - prev_tick = self.cur_batch_info['prev_tick_id'] - - dst_tick = self.get_last_tick(dst_curs) - if not dst_tick: - # seems this consumer has not run yet against dst_db - return False - - if prev_tick == dst_tick: - # on track - return False - - if prev_tick < dst_tick: - if dst_tick - prev_tick > 5: - raise Exception('Difference too big, skipping dangerous') - self.log.warning('Got tick %d, dst has %d - skipping' % (prev_tick, dst_tick)) - return True - else: - self.log.error('Got tick %d, dst has %d - ticks lost' % (prev_tick, dst_tick)) - raise Exception('Lost ticks') - - def set_batch_done(self, dst_curs): - """Helper function to set last successful batch - in external database. - """ - tick_id = self.cur_batch_info['tick_id'] - self.set_last_tick(dst_curs, tick_id) - - def attach(self): - new = Consumer.attach(self) - if new: - self.dst_reset() - - def detach(self): - """If detaching, also clean completed tick table on dest.""" - - Consumer.detach(self) - self.dst_reset() - - def process_remote_batch(self, db, batch_id, event_list, dst_db): - raise Exception('process_remote_batch not implemented') - - def rewind(self): - self.log.info("Rewinding queue") - src_db = self.get_database(self.db_name) - dst_db = self.get_database(self.remote_db) - src_curs = src_db.cursor() - dst_curs = dst_db.cursor() - - dst_tick = self.get_last_tick(dst_curs) - if dst_tick: - q = "select pgq.register_consumer_at(%s, %s, %s)" - src_curs.execute(q, [self.pgq_queue_name, self.consumer_id, dst_tick]) - else: - self.log.warning('No tick found on dst side') - - dst_db.commit() - src_db.commit() - - def dst_reset(self): - self.log.info("Resetting queue tracking on dst side") - dst_db = self.get_database(self.remote_db) - dst_curs = dst_db.cursor() - self.set_last_tick(dst_curs, None) - dst_db.commit() - - def get_last_tick(self, dst_curs): - q = "select %s.get_last_tick(%%s)" % self.dst_schema - dst_curs.execute(q, [self.consumer_id]) - res = dst_curs.fetchone() - return res[0] - - def set_last_tick(self, dst_curs, tick_id): - q = "select %s.set_last_tick(%%s, %%s)" % self.dst_schema - dst_curs.execute(q, [ self.consumer_id, tick_id ]) - - diff --git a/python/pgq/event.py b/python/pgq/event.py index afaabbaf..80801b16 100644 --- a/python/pgq/event.py +++ b/python/pgq/event.py @@ -38,6 +38,9 @@ class Event(object): Consumer is supposed to tag them after processing. If not, events will stay in retry queue. """ + __slots__ = ('_event_row', '_status', 'retry_time', + 'fail_reason', 'queue_name') + def __init__(self, queue_name, row): self._event_row = row self._status = EV_UNTAGGED @@ -62,3 +65,16 @@ class Event(object): def get_status(self): return self._status + # be also dict-like + def __getitem__(self, k): return self._event_row.__getitem__(k) + def __contains__(self, k): return self._event_row.__contains__(k) + def get(self, k, d=None): return self._event_row.get(k, d) + def has_key(self, k): return self._event_row.has_key(k) + def keys(self): return self._event_row.keys() + def values(self): return self._event_row.keys() + def items(self): return self._event_row.items() + def iterkeys(self): return self._event_row.iterkeys() + def itervalues(self): return self._event_row.itervalues() + def __str__(self): + return "<id=%d type=%s data=%s e1=%s e2=%s e3=%s e4=%s>" % ( + self.id, self.type, self.data, self.extra1, self.extra2, self.extra3, self.extra4) diff --git a/python/pgq/maint.py b/python/pgq/maint.py index 396f9c0a..89014dd4 100644 --- a/python/pgq/maint.py +++ b/python/pgq/maint.py @@ -2,6 +2,8 @@ import skytools, time +__all__ = ['MaintenanceJob'] + def get_pgq_api_version(curs): q = "select count(1) from pg_proc p, pg_namespace n"\ " where n.oid = p.pronamespace and n.nspname='pgq'"\ diff --git a/python/pgq/rawconsumer.py b/python/pgq/rawconsumer.py deleted file mode 100644 index a43b86b7..00000000 --- a/python/pgq/rawconsumer.py +++ /dev/null @@ -1,52 +0,0 @@ - - -import sys, time, skytools, pgq.consumer - -class RawQueue: - queue_name = None - consumer_name = None - batch_id = None - cur_tick = None - prev_tick = None - def __init__(self, queue_name, consumer_name): - self.queue_name = queue_name - self.consumer_name = consumer_name - self.bulk_insert_buf = [] - self.bulk_insert_size = 200 - self.bulk_insert_fields = ['ev_id', 'ev_time', 'ev_type', 'ev_data', 'ev_extra1', 'ev_extra2', 'ev_extra3', 'ev_extra4'] - - def next_batch(self, curs): - q = "select * from pgq.next_batch(%s, %s)" - curs.execute(q, [self.queue_name, self.consumer_name]) - self.batch_id = curs.fetchone()[0] - - if not self.batch_id: - return self.batch_id - - q = "select tick_id, prev_tick_id, batch_end from pgq.get_batch_info(%s)" - curs.execute(q, [self.batch_id]) - inf = curs.dictfetchone() - self.cur_tick = inf['tick_id'] - self.prev_tick = inf['prev_tick_id'] - self.tick_time = inf['batch_end'] - - return self.batch_id - - def finish_batch(self, curs): - q = "select * from pgq.finish_batch(%s)" - curs.execute(q, [self.batch_id]) - - def get_batch_events(self, curs): - return pgq.consumer._BatchWalker(curs, self.batch_id, self.queue_name) - - def bulk_insert(self, curs, ev): - row = map(ev.__getattr__, self.bulk_insert_fields) - self.bulk_insert_buf.append(row) - if len(self.bulk_insert_buf) >= self.bulk_insert_size: - self.finish_bulk_insert(curs) - - def finish_bulk_insert(self, curs): - pgq.bulk_insert_events(curs, self.bulk_insert_buf, - self.bulk_insert_fields, self.queue_name) - self.bulk_insert_buf = [] - diff --git a/python/pgq/remoteconsumer.py b/python/pgq/remoteconsumer.py new file mode 100644 index 00000000..f5c2ced5 --- /dev/null +++ b/python/pgq/remoteconsumer.py @@ -0,0 +1,197 @@ + +""" +old RemoteConsumer / SerialConsumer classes. + +""" + +import sys, time, skytools + +from pgq.consumer import Consumer + +__all__ = ['RemoteConsumer', 'SerialConsumer'] + +class RemoteConsumer(Consumer): + """Helper for doing event processing in another database. + + Requires that whole batch is processed in one TX. + """ + + def __init__(self, service_name, db_name, remote_db, args): + Consumer.__init__(self, service_name, db_name, args) + self.remote_db = remote_db + + def process_batch(self, db, batch_id, event_list): + """Process all events in batch. + + By default calls process_event for each. + """ + dst_db = self.get_database(self.remote_db) + curs = dst_db.cursor() + + if self.is_last_batch(curs, batch_id): + for ev in event_list: + ev.tag_done() + return + + self.process_remote_batch(db, batch_id, event_list, dst_db) + + self.set_last_batch(curs, batch_id) + dst_db.commit() + + def is_last_batch(self, dst_curs, batch_id): + """Helper function to keep track of last successful batch + in external database. + """ + q = "select pgq_ext.is_batch_done(%s, %s)" + dst_curs.execute(q, [ self.consumer_name, batch_id ]) + return dst_curs.fetchone()[0] + + def set_last_batch(self, dst_curs, batch_id): + """Helper function to set last successful batch + in external database. + """ + q = "select pgq_ext.set_batch_done(%s, %s)" + dst_curs.execute(q, [ self.consumer_name, batch_id ]) + + def process_remote_batch(self, db, batch_id, event_list, dst_db): + raise Exception('process_remote_batch not implemented') + +class SerialConsumer(Consumer): + """Consumer that applies batches sequentially in second database. + + Requirements: + - Whole batch in one TX. + - Must not use retry queue. + + Features: + - Can detect if several batches are already applied to dest db. + - If some ticks are lost. allows to seek back on queue. + Whether it succeeds, depends on pgq configuration. + """ + + def __init__(self, service_name, db_name, remote_db, args): + Consumer.__init__(self, service_name, db_name, args) + self.remote_db = remote_db + self.dst_schema = "pgq_ext" + self.cur_batch_info = None + + def startup(self): + if self.options.rewind: + self.rewind() + sys.exit(0) + if self.options.reset: + self.dst_reset() + sys.exit(0) + return Consumer.startup(self) + + def init_optparse(self, parser = None): + p = Consumer.init_optparse(self, parser) + p.add_option("--rewind", action = "store_true", + help = "change queue position according to destination") + p.add_option("--reset", action = "store_true", + help = "reset queue pos on destination side") + return p + + def process_batch(self, db, batch_id, event_list): + """Process all events in batch. + """ + + dst_db = self.get_database(self.remote_db) + curs = dst_db.cursor() + + self.cur_batch_info = self.get_batch_info(batch_id) + + # check if done + if self.is_batch_done(curs): + for ev in event_list: + ev.tag_done() + return + + # actual work + self.process_remote_batch(db, batch_id, event_list, dst_db) + + # finish work + self.set_batch_done(curs) + dst_db.commit() + + def is_batch_done(self, dst_curs): + """Helper function to keep track of last successful batch + in external database. + """ + + cur_tick = self.cur_batch_info['tick_id'] + prev_tick = self.cur_batch_info['prev_tick_id'] + + dst_tick = self.get_last_tick(dst_curs) + if not dst_tick: + # seems this consumer has not run yet against dst_db + return False + + if prev_tick == dst_tick: + # on track + return False + + if cur_tick == dst_tick: + # current batch is already applied, skip it + return True + + # anything else means problems + raise Exception('Lost position: batch %d..%d, dst has %d' % ( + prev_tick, cur_tick, dst_tick)) + + def set_batch_done(self, dst_curs): + """Helper function to set last successful batch + in external database. + """ + tick_id = self.cur_batch_info['tick_id'] + self.set_last_tick(dst_curs, tick_id) + + def register_consumer(self): + new = Consumer.register_consumer(self) + if new: # fixme + self.dst_reset() + + def unregister_consumer(self): + """If unregistering, also clean completed tick table on dest.""" + + Consumer.unregister_consumer(self) + self.dst_reset() + + def process_remote_batch(self, db, batch_id, event_list, dst_db): + raise Exception('process_remote_batch not implemented') + + def rewind(self): + self.log.info("Rewinding queue") + src_db = self.get_database(self.db_name) + dst_db = self.get_database(self.remote_db) + src_curs = src_db.cursor() + dst_curs = dst_db.cursor() + + dst_tick = self.get_last_tick(dst_curs) + if dst_tick: + q = "select pgq.register_consumer_at(%s, %s, %s)" + src_curs.execute(q, [self.queue_name, self.consumer_name, dst_tick]) + else: + self.log.warning('No tick found on dst side') + + dst_db.commit() + src_db.commit() + + def dst_reset(self): + self.log.info("Resetting queue tracking on dst side") + dst_db = self.get_database(self.remote_db) + dst_curs = dst_db.cursor() + self.set_last_tick(dst_curs, None) + dst_db.commit() + + def get_last_tick(self, dst_curs): + q = "select %s.get_last_tick(%%s)" % self.dst_schema + dst_curs.execute(q, [self.consumer_name]) + res = dst_curs.fetchone() + return res[0] + + def set_last_tick(self, dst_curs, tick_id): + q = "select %s.set_last_tick(%%s, %%s)" % self.dst_schema + dst_curs.execute(q, [ self.consumer_name, tick_id ]) + + diff --git a/python/pgq/setadmin.py b/python/pgq/setadmin.py deleted file mode 100644 index 0c500468..00000000 --- a/python/pgq/setadmin.py +++ /dev/null @@ -1,463 +0,0 @@ -#! /usr/bin/env python - -import sys, time, optparse, skytools - -from pgq.setinfo import * - -__all__ = ['SetAdmin'] - -command_usage = """\ -%prog [options] INI CMD [subcmd args] - -Node Initialization: - init-root NODE_NAME NODE_CONSTR - init-branch NODE_NAME NODE_CONSTR --provider=<constr> - init-leaf NODE_NAME NODE_CONSTR --provider=<constr> - Initializes node. Given connstr is kept as global connstring - for that node. Those commands ignore node_db in .ini. - The --provider connstr is used only for initial set info - fetching, later actual provider's connect string is used. - -Node Administration: - status Show set state - members Show members in set - rename-node OLD NEW Rename a node - change-provider NODE NEWSRC - pause NODE - resume NODE - - switchover NEWROOT - failover NEWROOT - tag-dead NODE .. Tag node as dead - tag-alive NODE .. Tag node as alive -""" - -class SetAdmin(skytools.AdminScript): - set_name = None - extra_objs = [] - initial_db_name = 'node_db' - - def init_optparse(self, parser = None): - p = skytools.AdminScript.init_optparse(self, parser) - p.set_usage(command_usage.strip()) - - g = optparse.OptionGroup(p, "actual setadm options") - g.add_option("--connstr", action="store_true", - help = "initial connect string") - g.add_option("--provider", - help = "init: connect string for provider") - p.add_option_group(g) - return p - - def reload(self): - skytools.AdminScript.reload(self) - self.set_name = self.cf.get('set_name') - - # - # Node initialization. - # - - def cmd_init_root(self, node_name, node_location): - self.init_node('root', node_name, node_location) - - def cmd_init_branch(self, node_name, node_location): - if len(args) != 2: - raise Exception('init-branch needs 2 args') - self.init_node('branch', node_name, node_location) - - def cmd_init_leaf(self, node_name, node_location): - self.init_node('leaf', node_name, node_location) - - def init_node(self, node_type, node_name, node_location): - provider_loc = self.options.provider - - # connect to database - db = self.get_database("new_node", connstr = node_location) - - # check if code is installed - self.install_code(db) - - # query current status - res = self.exec_query(db, "select * from pgq_set.get_node_info(%s)", [self.set_name]) - info = res[0] - if info['node_type'] is not None: - self.log.info("Node is already initialized as %s" % info['node_type']) - return - - self.log.info("Initializing node") - - # register member - if node_type in ('root', 'combined-root'): - global_watermark = None - combined_set = None - provider_name = None - self.exec_cmd(db, "select * from pgq_set.add_member(%s, %s, %s, false)", - [self.set_name, node_name, node_location]) - self.exec_cmd(db, "select * from pgq_set.create_node(%s, %s, %s, %s, %s, %s)", - [self.set_name, node_type, node_name, provider_name, global_watermark, combined_set]) - provider_db = None - else: - root_db = self.find_root_db(provider_loc) - set = self.load_set_info(root_db) - - # check if member already exists - if set.get_member(node_name) is not None: - self.log.error("Node '%s' already exists" % node_name) - sys.exit(1) - - combined_set = None - - provider_db = self.get_database('provider_db', connstr = provider_loc) - q = "select node_type, node_name from pgq_set.get_node_info(%s)" - res = self.exec_query(provider_db, q, [self.set_name]) - row = res[0] - if not row['node_name']: - raise Exception("provider node not found") - provider_name = row['node_name'] - - # register member on root - self.exec_cmd(root_db, "select * from pgq_set.add_member(%s, %s, %s, false)", - [self.set_name, node_name, node_location]) - - # lookup provider - provider = set.get_member(provider_name) - if not provider: - self.log.error("Node %s does not exist" % provider_name) - sys.exit(1) - - # register on provider - self.exec_cmd(provider_db, "select * from pgq_set.add_member(%s, %s, %s, false)", - [self.set_name, node_name, node_location]) - rows = self.exec_cmd(provider_db, "select * from pgq_set.subscribe_node(%s, %s)", - [self.set_name, node_name]) - global_watermark = rows[0]['global_watermark'] - - # initialize node itself - - # insert members - self.exec_cmd(db, "select * from pgq_set.add_member(%s, %s, %s, false)", - [self.set_name, node_name, node_location]) - for m in set.member_map.values(): - self.exec_cmd(db, "select * from pgq_set.add_member(%s, %s, %s, %s)", - [self.set_name, m.name, m.location, m.dead]) - - # real init - self.exec_cmd(db, "select * from pgq_set.create_node(%s, %s, %s, %s, %s, %s)", - [self.set_name, node_type, node_name, provider_name, - global_watermark, combined_set]) - - - self.extra_init(node_type, db, provider_db) - - self.log.info("Done") - - def extra_init(self, node_type, node_db, provider_db): - pass - - def find_root_db(self, initial_loc = None): - if initial_loc: - loc = initial_loc - else: - loc = self.cf.get(self.initial_db_name) - - while 1: - db = self.get_database('root_db', connstr = loc) - - - # query current status - res = self.exec_query(db, "select * from pgq_set.get_node_info(%s)", [self.set_name]) - info = res[0] - type = info['node_type'] - if type is None: - self.log.info("Root node not initialized?") - sys.exit(1) - - self.log.debug("db='%s' -- type='%s' provider='%s'" % (loc, type, info['provider_location'])) - # configured db may not be root anymore, walk upwards then - if type in ('root', 'combined-root'): - db.commit() - return db - - self.close_database('root_db') - if loc == info['provider_location']: - raise Exception("find_root_db: got loop: %s" % loc) - loc = info['provider_location'] - if loc is None: - self.log.error("Sub node provider not initialized?") - sys.exit(1) - - def install_code(self, db): - objs = [ - skytools.DBLanguage("plpgsql"), - skytools.DBFunction("txid_current_snapshot", 0, sql_file="txid.sql"), - skytools.DBSchema("pgq", sql_file="pgq.sql"), - skytools.DBSchema("pgq_ext", sql_file="pgq_ext.sql"), - skytools.DBSchema("pgq_set", sql_file="pgq_set.sql"), - ] - objs += self.extra_objs - skytools.db_install(db.cursor(), objs, self.log) - db.commit() - - # - # Print status of whole set. - # - - def cmd_status(self): - root_db = self.find_root_db() - sinf = self.load_set_info(root_db) - - for mname, minf in sinf.member_map.iteritems(): - db = self.get_database('look_db', connstr = minf.location, autocommit = 1) - curs = db.cursor() - curs.execute("select * from pgq_set.get_node_info(%s)", [self.set_name]) - node = NodeInfo(self.set_name, curs.fetchone()) - node.load_status(curs) - self.load_extra_status(curs, node) - sinf.add_node(node) - self.close_database('look_db') - - sinf.print_tree() - - def load_extra_status(self, curs, node): - pass - - # - # Normal commands. - # - - def cmd_change_provider(self, node_name, new_provider): - old_provider = None - - self.load_local_info() - node_location = self.set_info.get_member(node_name).location - node_db = self.get_node_database(node_name) - node_set_info = self.load_set_info(node_db) - node = node_set_info.local_node - old_provider = node.provider_node - - if old_provider == new_provider: - self.log.info("Node %s has already %s as provider" % (node_name, new_provider)) - - # pause target node - self.pause_node(node_name) - - # reload node info - node_set_info = self.load_set_info(node_db) - node = node_set_info.local_node - - # subscribe on new provider - q = "select * from pgq_set.add_member(%s, %s, %s, false)" - self.node_cmd(new_provider, q, [self.set_name, node_name, node_location]) - q = 'select * from pgq_set.subscribe_node(%s, %s, %s)' - self.node_cmd(new_provider, q, [self.set_name, node_name, node.completed_tick]) - - # change provider on node - q = 'select * from pgq_set.change_provider(%s, %s)' - self.node_cmd(node_name, q, [self.set_name, new_provider]) - - # unsubscribe from old provider - q = "select * from pgq_set.unsubscribe_node(%s, %s)" - self.node_cmd(old_provider, q, [self.set_name, node_name]) - - # resume node - self.resume_node(node_name) - - def cmd_rename_node(self, old_name, new_name): - - self.load_local_info() - - root_db = self.find_root_db() - - # pause target node - self.pause_node(old_name) - node = self.load_node_info(old_name) - provider_node = node.provider_node - subscriber_list = self.get_node_subscriber_list(old_name) - - - # create copy of member info / subscriber+queue info - step1 = 'select * from pgq_set.rename_node_step1(%s, %s, %s)' - # rename node itself, drop copies - step2 = 'select * from pgq_set.rename_node_step2(%s, %s, %s)' - - # step1 - self.exec_cmd(root_db, step1, [self.set_name, old_name, new_name]) - self.node_cmd(provider_node, step1, [self.set_name, old_name, new_name]) - self.node_cmd(old_name, step1, [self.set_name, old_name, new_name]) - for child in subscriber_list: - self.node_cmd(child, step1, [self.set_name, old_name, new_name]) - - # step1 - self.node_cmd(old_name, step2, [self.set_name, old_name, new_name]) - self.node_cmd(provider_node, step1, [self.set_name, old_name, new_name]) - for child in subscriber_list: - self.node_cmd(child, step2, [self.set_name, old_name, new_name]) - self.exec_cmd(root_db, step2, [self.set_name, old_name, new_name]) - - # resume node - self.resume_node(old_name) - - def switchover_nonroot(self, old_node, new_node): - # see if we need to change new nodes' provider - tmp_node = new_node - while 1: - if tmp_node.is_root(): - break - if tmp_node.name == old_node: - # yes, old_node is new_nodes provider, - # switch it around - self.change_provider(new_node, old_node.parent_node) - break - self.change_provider(old_node.name, new_node.name) - - def switchover_root(self, old_node, new_node): - self.pause_node(old_node.name) - self.extra_lockdown(old_node) - - self.wait_for_catchup(new_node, old_node) - self.pause_node(new_node.name) - self.promote_node(new_node.name) - self.subscribe_node(new_node.name, old_node.name, tick_pos) - self.unsubscribe_node(new_node.parent_node, new_node.name) - self.resume_node(new_node.name) - - # demote & set provider on node - q = 'select * from pgq_set.demote_root(%s, %s)' - self.node_cmd(old_node.name, q, [self.set_name, new_node.name]) - - self.resume_node(old_node.name) - - def cmd_switchover(self, old_node_name, new_node_name): - self.load_local_info() - old_node = self.get_node_info(old_node_name) - new_node = self.get_node_info(new_node_name) - if old_node.name == new_node.name: - self.log.info("same node?") - return - - if old_node.is_root(): - self.switchover_root(old_node, new_node) - else: - self.switchover_nonroot(old_node, new_node) - - # switch subscribers around - if self.options.all: - for n in self.get_node_subscriber_list(old_node.name): - self.change_provider(n, new_node.name) - - def cmd_pause(self, node_name): - self.load_local_info() - self.pause_node(node_name) - - def cmd_resume(self, node_name): - self.load_local_info() - self.resume_node(node_name) - - def cmd_members(self): - db = self.get_database(self.initial_db_name) - q = "select node_name from pgq_set.get_node_info(%s)" - rows = self.exec_query(db, q, [self.set_name]) - - desc = 'Member info on %s:' % rows[0]['node_name'] - q = "select node_name, dead, node_location"\ - " from pgq_set.get_member_info(%s) order by 1" - self.display_table(db, desc, q, [self.set_name]) - - # - # Shortcuts for operating on nodes. - # - - def load_local_info(self): - """fetch set info from local node.""" - db = self.get_database(self.initial_db_name) - self.set_info = self.load_set_info(db) - - def get_node_database(self, node_name): - """Connect to node.""" - if node_name == self.set_info.local_node.name: - db = self.get_database(self.initial_db_name) - else: - m = self.set_info.get_member(node_name) - if not m: - self.log.error("cannot resolve %s" % node_name) - sys.exit(1) - loc = m.location - db = self.get_database('node.' + node_name, connstr = loc) - return db - - def close_node_database(self, node_name): - """Disconnect node's connection.""" - if node_name == self.set_info.local_node.name: - self.close_database(self.initial_db_name) - else: - self.close_database("node." + node_name) - - def node_cmd(self, node_name, sql, args): - """Execute SQL command on particular node.""" - db = self.get_node_database(node_name) - return self.exec_cmd(db, sql, args) - - # - # Various operation on nodes. - # - - def set_paused(self, db, pause_flag): - q = "select * from pgq_set.set_node_paused(%s, %s)" - self.exec_cmd(db, q, [self.set_name, pause_flag]) - - self.log.info('Waiting for worker to accept') - while 1: - q = "select * from pgq_set.get_node_info(%s)" - stat = self.exec_query(db, q, [self.set_name])[0] - if stat['paused'] != pause_flag: - raise Exception('operation canceled? %s <> %s' % (repr(stat['paused']), repr(pause_flag))) - - if stat['uptodate']: - break - time.sleep(1) - - op = pause_flag and "paused" or "resumed" - - self.log.info("Node %s %s" % (stat['node_name'], op)) - - def pause_node(self, node_name): - db = self.get_node_database(node_name) - self.set_paused(db, True) - - def resume_node(self, node_name): - db = self.get_node_database(node_name) - self.set_paused(db, False) - - def subscribe_node(self, target_node, subscriber_node, tick_pos): - q = "select * from pgq_set.subscribe_node(%s, %s, %s)" - self.node_cmd(target_node, q, [self.set_name, subscribe_node, tick_pos]) - - def unsubscribe_node(self, target_node, subscriber_node): - q = "select * from pgq_set.unsubscribe_node(%s, %s)" - self.node_cmd(target_node, q, [self.set_name, subscribe_node]) - - def load_node_info(self, node_name): - db = self.get_node_database(node_name) - q = "select * from pgq_set.get_node_info(%s)" - rows = self.exec_query(db, q, [self.set_name]) - return NodeInfo(self.set_name, rows[0]) - - def load_set_info(self, db): - res = self.exec_query(db, "select * from pgq_set.get_node_info(%s)", [self.set_name]) - info = res[0] - - q = "select * from pgq_set.get_member_info(%s)" - member_list = self.exec_query(db, q, [self.set_name]) - - return SetInfo(self.set_name, info, member_list) - - def get_node_subscriber_list(self, node_name): - q = "select node_name, local_watermark from pgq_set.get_subscriber_info(%s)" - db = self.get_node_database(node_name) - rows = self.exec_query(db, q, [self.set_name]) - return [r['node_name'] for r in rows] - -if __name__ == '__main__': - script = SetAdmin('set_admin', sys.argv[1:]) - script.start() - diff --git a/python/pgq/setconsumer.py b/python/pgq/setconsumer.py deleted file mode 100644 index d17ecf5f..00000000 --- a/python/pgq/setconsumer.py +++ /dev/null @@ -1,247 +0,0 @@ -#! /usr/bin/env python - -import sys, time, skytools - -from pgq.rawconsumer import RawQueue -from pgq.setinfo import * - -__all__ = ['SetConsumer'] - -class SetConsumer(skytools.DBScript): - last_local_wm_publish_time = 0 - last_global_wm_publish_time = 0 - main_worker = True - reg_ok = False - actual_dst_event_id = 0 - batch_max_event_id = 0 - seq_buffer = 10000 - def __init__(self, service_name, args, - node_db_name = 'node_db'): - skytools.DBScript.__init__(self, service_name, args) - self.node_db_name = node_db_name - self.consumer_name = self.cf.get('consumer_name', self.job_name) - - def work(self): - self.tick_id_cache = {} - - self.set_name = self.cf.get('set_name') - dst_db = self.get_database(self.node_db_name) - dst_curs = dst_db.cursor() - - dst_node = self.load_node_info(dst_db) - if self.main_worker: - self.consumer_name = dst_node.name - if not dst_node.uptodate: - self.tag_node_uptodate(dst_db) - - if dst_node.paused: - return 0 - - if dst_node.need_action('global-wm-event'): - self.publish_global_watermark(dst_db, dst_node.local_watermark) - - if not dst_node.need_action('process-batch'): - return 0 - - # - # batch processing follows - # - - src_db = self.get_database('src_db', connstr = dst_node.provider_location) - src_curs = src_db.cursor() - src_node = self.load_node_info(src_db) - - # get batch - src_queue = RawQueue(src_node.queue_name, self.consumer_name) - self.src_queue = src_queue - self.dst_queue = None - - if not self.main_worker and not self.reg_ok: - self.register_consumer(src_curs) - - batch_id = src_queue.next_batch(src_curs) - src_db.commit() - if batch_id is None: - return 0 - - self.log.debug("New batch: tick_id=%d / batch_id=%d" % (src_queue.cur_tick, batch_id)) - - if dst_node.need_action('wait-behind'): - if dst_node.should_wait(src_queue.cur_tick): - return 0 - - if dst_node.need_action('process-events'): - # load and process batch data - ev_list = src_queue.get_batch_events(src_curs) - - if dst_node.need_action('copy-events'): - self.dst_queue = RawQueue(dst_node.get_target_queue(), self.consumer_name) - self.process_set_batch(src_db, dst_db, ev_list) - if self.dst_queue: - self.dst_queue.finish_bulk_insert(dst_curs) - self.copy_tick(dst_curs, src_queue, self.dst_queue) - - # COMBINED_BRANCH needs to sync with part sets - if dst_node.need_action('sync-part-pos'): - self.move_part_positions(dst_curs) - if dst_node.need_action('update-event-seq'): - self.update_event_seq(dst_curs) - - # we are done on target - self.set_tick_complete(dst_curs, src_queue.cur_tick) - dst_db.commit() - - # done on source - src_queue.finish_batch(src_curs) - src_db.commit() - - # occasinally send watermark upwards - if dst_node.need_action('local-wm-publish'): - self.send_local_watermark_upwards(src_db, dst_node) - - # got a batch so there can be more - return 1 - - def process_set_batch(self, src_db, dst_db, ev_list): - dst_curs = dst_db.cursor() - max_id = 0 - for ev in ev_list: - self.process_set_event(dst_curs, ev) - if self.dst_queue: - self.dst_queue.bulk_insert(dst_curs, ev) - if ev.id > max_id: - max_id = ev.id - self.batch_max_event_id = max_id - self.stat_increase('count', len(ev_list)) - - def update_event_seq(self, dst_curs): - qname = self.dst_queue.queue_name - if self.actual_dst_event_id == 0: - q = "select pgq.seq_getval(queue_event_seq) from pgq.queue where queue_name = %s" - dst_curs.execute(q, [qname]) - self.actual_dst_event_id = dst_curs.fetchone()[0] - self.log.debug('got local event_id value = %d' % self.actual_dst_event_id) - - if self.batch_max_event_id + self.seq_buffer >= self.actual_dst_event_id: - next_id = self.batch_max_event_id + 2 * self.seq_buffer - q = "select pgq.seq_setval(queue_event_seq, %s) from pgq.queue where queue_name = %s" - self.log.debug('set local event_id value = %d' % next_id) - dst_curs.execute(q, [next_id, qname]) - self.actual_dst_event_id = next_id - - def process_set_event(self, dst_curs, ev): - if ev.type == 'set-tick': - self.handle_set_tick(dst_curs, ev) - elif ev.type == 'member-info': - self.handle_member_info(dst_curs, ev) - elif ev.type == 'global-watermark': - self.handle_global_watermark(dst_curs, ev) - else: - raise Exception('bad event for set consumer') - - def handle_global_watermark(self, dst_curs, ev): - set_name = ev.extra1 - tick_id = ev.data - if set_name == self.set_name: - self.set_global_watermark(dst_curs, tick_id) - - def handle_set_tick(self, dst_curs, ev): - data = skytools.db_urldecode(ev.data) - set_name = data['set_name'] - tick_id = data['tick_id'] - self.tick_id_cache[set_name] = tick_id - - def move_part_positions(self, dst_curs): - q = "select * from pgq_set.set_partition_watermark(%s, %s, %s)" - for set_name, tick_id in self.tick_id_cache.items(): - dst_curs.execute(q, [self.set_name, set_name, tick_id]) - - def handle_member_info(self, dst_curs, ev): - node_name = ev.ev_data - set_name = ev.ev_extra1 - node_location = ev.ev_extra2 - dead = ev.ev_extra3 - # this can also be member for part set, ignore then - if set_name != self.set_name: - return - - q = "select * from pgq_set.add_member(%s, %s, %s, %s)" - dst_curs.execute(q, [set_name, node_name, node_location, dead]) - - def send_local_watermark_upwards(self, src_db, node): - # fixme - delay - now = time.time() - delay = now - self.last_local_wm_publish_time - if delay < 1*60: - return - self.last_local_wm_publish_time = now - - self.log.debug("send_local_watermark_upwards") - src_curs = src_db.cursor() - q = "select pgq_set.set_subscriber_watermark(%s, %s, %s)" - src_curs.execute(q, [self.set_name, node.name, node.local_watermark]) - src_db.commit() - - def set_global_watermark(self, dst_curs, tick_id): - self.log.debug("set_global_watermark: %s" % tick_id) - q = "select pgq_set.set_global_watermark(%s, %s)" - dst_curs.execute(q, [self.set_name, tick_id]) - - def publish_global_watermark(self, dst_db, watermark): - now = time.time() - delay = now - self.last_global_wm_publish_time - if delay < 1*60: - return - self.last_global_wm_publish_time = now - - self.set_global_watermark(dst_db.cursor(), watermark) - dst_db.commit() - - def load_node_info(self, db): - curs = db.cursor() - - q = "select * from pgq_set.get_node_info(%s)" - curs.execute(q, [self.set_name]) - node_row = curs.dictfetchone() - if not node_row: - raise Exception('node not initialized') - - q = "select * from pgq_set.get_member_info(%s)" - curs.execute(q, [self.set_name]) - mbr_list = curs.dictfetchall() - db.commit() - - return NodeInfo(self.set_name, node_row, self.main_worker) - - def tag_node_uptodate(self, dst_db): - dst_curs = dst_db.cursor() - q = "select * from pgq_set.set_node_uptodate(%s, true)" - dst_curs.execute(q, [self.set_name]) - dst_db.commit() - - def copy_tick(self, dst_curs, src_queue, dst_queue): - q = "select * from pgq.ticker(%s, %s, %s)" - dst_curs.execute(q, [dst_queue.queue_name, src_queue.cur_tick, src_queue.tick_time]) - - def set_tick_complete(self, dst_curs, tick_id): - q = "select * from pgq_set.set_completed_tick(%s, %s, %s)" - dst_curs.execute(q, [self.set_name, self.consumer_name, tick_id]) - - def register_consumer(self, src_curs): - if self.main_worker: - raise Exception('main set worker should not play with registrations') - - q = "select * from pgq.register_consumer(%s, %s)" - src_curs.execute(q, [self.src_queue.queue_name, self.consumer_name]) - - def unregister_consumer(self, src_curs): - if self.main_worker: - raise Exception('main set worker should not play with registrations') - - q = "select * from pgq.unregister_consumer(%s, %s)" - src_curs.execute(q, [self.src_queue.queue_name, self.consumer_name]) - -if __name__ == '__main__': - script = SetConsumer('setconsumer', sys.argv[1:]) - script.start() - diff --git a/python/pgq/status.py b/python/pgq/status.py index f17539de..d3f285c0 100644 --- a/python/pgq/status.py +++ b/python/pgq/status.py @@ -4,15 +4,18 @@ import sys, os, skytools -def ival(data, as = None): +__all__ = ['PGQStatus'] + +def ival(data, _as = None): "Format interval for output" - if not as: - as = data.split('.')[-1] + if not _as: + _as = data.split('.')[-1] numfmt = 'FM9999999' expr = "coalesce(to_char(extract(epoch from %s), '%s') || 's', 'NULL') as %s" - return expr % (data, numfmt, as) + return expr % (data, numfmt, _as) class PGQStatus(skytools.DBScript): + """Info gathering and display.""" def __init__(self, args, check = 0): skytools.DBScript.__init__(self, 'pgqadm', args) @@ -28,7 +31,7 @@ class PGQStatus(skytools.DBScript): pgver = cx.fetchone()[0] cx.execute("select pgq.version()") qver = cx.fetchone()[0] - print "Postgres version: %s PgQ version: %s" % (pgver, qver) + print("Postgres version: %s PgQ version: %s" % (pgver, qver)) q = """select f.queue_name, f.queue_ntables, %s, %s, %s, %s, q.queue_ticker_max_count @@ -50,36 +53,37 @@ class PGQStatus(skytools.DBScript): cx.execute(q) consumer_rows = cx.dictfetchall() - print "\n%-45s %9s %13s %6s" % ('Event queue', - 'Rotation', 'Ticker', 'TLag') - print '-' * 78 + print("\n%-45s %9s %13s %6s" % ('Event queue', + 'Rotation', 'Ticker', 'TLag')) + print('-' * 78) for ev_row in event_rows: tck = "%s/%s/%s" % (ev_row['queue_ticker_max_count'], ev_row['queue_ticker_max_lag'], ev_row['queue_ticker_idle_period']) rot = "%s/%s" % (ev_row['queue_ntables'], ev_row['queue_rotation_period']) - print "%-45s %9s %13s %6s" % ( + print("%-45s %9s %13s %6s" % ( ev_row['queue_name'], rot, tck, ev_row['ticker_lag'], - ) - print '-' * 78 - print "\n%-56s %9s %9s" % ( - 'Consumer', 'Lag', 'LastSeen') - print '-' * 78 + )) + print('-' * 78) + print("\n%-56s %9s %9s" % ( + 'Consumer', 'Lag', 'LastSeen')) + print('-' * 78) for ev_row in event_rows: cons = self.pick_consumers(ev_row, consumer_rows) self.show_queue(ev_row, cons) - print '-' * 78 + print('-' * 78) db.commit() def show_consumer(self, cons): - print " %-54s %9s %9s" % ( + print(" %-54s %9s %9s" % ( cons['consumer_name'], - cons['lag'], cons['last_seen']) + cons['lag'], cons['last_seen'])) + def show_queue(self, ev_row, consumer_rows): - print "%(queue_name)s:" % ev_row + print("%(queue_name)s:" % ev_row) for cons in consumer_rows: self.show_consumer(cons) diff --git a/python/pgq/ticker.py b/python/pgq/ticker.py index 327864d5..ba8aadb2 100644 --- a/python/pgq/ticker.py +++ b/python/pgq/ticker.py @@ -8,74 +8,10 @@ import skytools from maint import MaintenanceJob -__all__ = ['SmartTicker'] +__all__ = ['SmallTicker'] -def is_txid_sane(curs): - curs.execute("select txid_current()") - txid = curs.fetchone()[0] - - # on 8.2 theres no such table - if not skytools.exists_table(curs, 'txid.epoch'): - return 1 - - curs.execute("select epoch, last_value from txid.epoch") - epoch, last_val = curs.fetchone() - stored_val = (epoch << 32) | last_val - - if stored_val <= txid: - return 1 - else: - return 0 - -class QueueStatus(object): - def __init__(self, name): - self.queue_name = name - self.seq_name = None - self.idle_period = 60 - self.max_lag = 3 - self.max_count = 200 - self.last_tick_time = 0 - self.last_count = 0 - self.quiet_count = 0 - - def set_data(self, row): - self.seq_name = row['queue_event_seq'] - self.idle_period = row['queue_ticker_idle_period'] - self.max_lag = row['queue_ticker_max_lag'] - self.max_count = row['queue_ticker_max_count'] - - def need_tick(self, cur_count, cur_time): - # check if tick is needed - need_tick = 0 - lag = cur_time - self.last_tick_time - - if cur_count == self.last_count: - # totally idle database - - # don't go immidiately to big delays, as seq grows before commit - if self.quiet_count < 5: - if lag >= self.max_lag: - need_tick = 1 - self.quiet_count += 1 - else: - if lag >= self.idle_period: - need_tick = 1 - else: - self.quiet_count = 0 - # somewhat loaded machine - if cur_count - self.last_count >= self.max_count: - need_tick = 1 - elif lag >= self.max_lag: - need_tick = 1 - if need_tick: - self.last_tick_time = cur_time - self.last_count = cur_count - return need_tick - -class SmartTicker(skytools.DBScript): - last_tick_event = 0 - last_tick_time = 0 - quiet_count = 0 +class SmallTicker(skytools.DBScript): + """Ticker that periodically calls pgq.ticker().""" tick_count = 0 maint_thread = None @@ -84,8 +20,6 @@ class SmartTicker(skytools.DBScript): self.ticker_log_time = 0 self.ticker_log_delay = 5*60 - self.queue_map = {} - self.refresh_time = 0 def reload(self): skytools.DBScript.reload(self) @@ -95,79 +29,22 @@ class SmartTicker(skytools.DBScript): if self.maint_thread: return - db = self.get_database("db", autocommit = 1) - cx = db.cursor() - ok = is_txid_sane(cx) - if not ok: - self.log.error('txid in bad state') - sys.exit(1) - + # launch maint thread self.maint_thread = MaintenanceJob(self, [self.cf.filename]) t = threading.Thread(name = 'maint_thread', target = self.maint_thread.run) t.setDaemon(1) t.start() - def refresh_queues(self, cx): - q = "select queue_name, queue_event_seq,"\ - " extract('epoch' from queue_ticker_idle_period) as queue_ticker_idle_period,"\ - " extract('epoch' from queue_ticker_max_lag) as queue_ticker_max_lag,"\ - " queue_ticker_max_count"\ - " from pgq.queue"\ - " where not queue_external_ticker" - cx.execute(q) - new_map = {} - data_list = [] - from_list = [] - for row in cx.dictfetchall(): - queue_name = row['queue_name'] - try: - que = self.queue_map[queue_name] - except KeyError, x: - que = QueueStatus(queue_name) - que.set_data(row) - new_map[queue_name] = que - - p1 = "'%s', %s.last_value" % (queue_name, que.seq_name) - data_list.append(p1) - from_list.append(que.seq_name) - - self.queue_map = new_map - self.seq_query = "select %s from %s" % ( - ", ".join(data_list), - ", ".join(from_list)) - - if len(from_list) == 0: - self.seq_query = None - - self.refresh_time = time.time() - def work(self): db = self.get_database("db", autocommit = 1) cx = db.cursor() - queue_refresh = self.cf.getint('queue_refresh_period', 30) - cur_time = time.time() - - if cur_time >= self.refresh_time + queue_refresh: - self.refresh_queues(cx) - - if not self.seq_query: - return - - # now check seqs - cx.execute(self.seq_query) - res = cx.fetchone() - pos = 0 - while pos < len(res): - id = res[pos] - val = res[pos + 1] - pos += 2 - que = self.queue_map[id] - if que.need_tick(val, cur_time): - cx.execute("select pgq.ticker(%s)", [que.queue_name]) - self.tick_count += 1 + # run ticker + cx.execute("select pgq.ticker()") + self.tick_count += cx.fetchone()[0] + cur_time = time.time() if cur_time > self.ticker_log_time + self.ticker_log_delay: self.ticker_log_time = cur_time self.stat_increase('ticks', self.tick_count) diff --git a/python/pgqadm.py b/python/pgqadm.py index 7c2bf740..2687ed8a 100755 --- a/python/pgqadm.py +++ b/python/pgqadm.py @@ -4,11 +4,8 @@ """ import sys -import skytools - -from pgq.ticker import SmartTicker -from pgq.status import PGQStatus -#from pgq.admin import PGQAdmin +import skytools, pgq +from pgq.cascade.admin import CascadeAdmin """TODO: pgqadm ini check @@ -17,7 +14,7 @@ pgqadm ini check command_usage = """ %prog [options] INI CMD [subcmd args] -commands: +local queue commands: ticker start ticking & maintenance process status show overview of queue health @@ -28,6 +25,17 @@ commands: register QNAME CONS install code into db unregister QNAME CONS install code into db config QNAME [VAR=VAL] show or change queue config + +cascaded queue commands: + create-node + rename-node + pause-node + resume-node + change-provider + tag-alive + tag-dead + switchover + failover """ config_allowed_list = { @@ -38,12 +46,14 @@ config_allowed_list = { } class PGQAdmin(skytools.DBScript): + """PgQ admin + maint script.""" def __init__(self, args): + """Initialize pgqadm.""" skytools.DBScript.__init__(self, 'pgqadm', args) self.set_single_loop(1) if len(self.args) < 2: - print "need command" + print("need command") sys.exit(1) int_cmds = { @@ -55,16 +65,20 @@ class PGQAdmin(skytools.DBScript): 'config': self.change_config, } + cascade_cmds = ['create-node'] + cmd = self.args[1] if cmd == "ticker": - script = SmartTicker(args) + script = pgq.SmallTicker(args) elif cmd == "status": - script = PGQStatus(args) + script = pgq.PGQStatus(args) + elif cmd in cascade_cmds: + script = CascadeAdmin(self.service_name, 'db', args) elif cmd in int_cmds: script = None self.work = int_cmds[cmd] else: - print "unknown command" + print("unknown command") sys.exit(1) if self.pidfile: @@ -80,6 +94,7 @@ class PGQAdmin(skytools.DBScript): def init_optparse(self, parser=None): p = skytools.DBScript.init_optparse(self, parser) p.set_usage(command_usage.strip()) + p.add_option("--queue", help = 'cascading: specify queue name') return p def installer(self): @@ -118,8 +133,8 @@ class PGQAdmin(skytools.DBScript): def change_config(self): if len(self.args) < 3: - list = self.get_queue_list() - for qname in list: + qlist = self.get_queue_list() + for qname in qlist: self.show_config(qname) return @@ -139,7 +154,7 @@ class PGQAdmin(skytools.DBScript): expr = "%s=%s" % (k, skytools.quote_literal(v)) alist.append(expr) self.log.info('Change queue %s config to: %s' % (qname, ", ".join(alist))) - sql = "update pgq.queue set %s where queue_name = %s" % ( + sql = "update pgq.queue set %s where queue_name = %s" % ( ", ".join(alist), skytools.quote_literal(qname)) self.exec_sql(sql, []) @@ -168,15 +183,15 @@ class PGQAdmin(skytools.DBScript): db.commit() if res is None: - print "no such queue:", qname + print("no such queue: " + qname) return - print qname + print(qname) for k in config_allowed_list: n = k if k[:6] == "queue_": n = k[6:] - print " %s\t=%7s" % (n, res[k]) + print(" %s\t=%7s" % (n, res[k])) def get_queue_list(self): db = self.get_database('db') @@ -185,10 +200,10 @@ class PGQAdmin(skytools.DBScript): rows = curs.fetchall() db.commit() - list = [] + qlist = [] for r in rows: - list.append(r[0]) - return list + qlist.append(r[0]) + return qlist if __name__ == '__main__': script = PGQAdmin(sys.argv[1:]) diff --git a/python/setadm.py b/python/setadm.py index 61cfdc77..2c93d4ef 100755 --- a/python/setadm.py +++ b/python/setadm.py @@ -1,8 +1,11 @@ #! /usr/bin/env python -import sys, pgq.setadmin +"""SetAdmin launcher. +""" + +import sys, pgq.cascade.admin if __name__ == '__main__': - script = pgq.setadmin.SetAdmin('set_admin', sys.argv[1:]) + script = pgq.cascade.admin.CascadeAdmin('cascade_admin', 'node_db', sys.argv[1:], worker_setup = False) script.start() |