summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarko Kreen2013-06-19 21:17:26 +0000
committerMarko Kreen2013-06-19 21:17:26 +0000
commit9d2b2d30a1736548d40f17eff5f9f05faa32aaf1 (patch)
treebe947d1c90b61fa139bf765e5486650f960ac8fe
parent609617c369cc5ad740ad1fa5c90a56a66d2da762 (diff)
parent26be6819f4ecc80b7d01db3bfd4933ec1d02115a (diff)
Merge remote-tracking branch 'gamato/develop'
-rw-r--r--doc/skytools3.txt33
-rw-r--r--python/londiste/handler.py18
-rw-r--r--python/londiste/handlers/__init__.py2
-rw-r--r--python/londiste/handlers/dispatch.py59
-rw-r--r--python/londiste/handlers/shard.py (renamed from python/londiste/handlers/part.py)73
-rw-r--r--python/londiste/playback.py5
-rw-r--r--python/londiste/setup.py59
-rw-r--r--python/londiste/util.py22
-rw-r--r--python/skytools/gzlog.py7
-rw-r--r--python/skytools/scripting.py5
-rw-r--r--scripts/data_maintainer.py4
11 files changed, 176 insertions, 111 deletions
diff --git a/doc/skytools3.txt b/doc/skytools3.txt
index 70a95f79..0d346e4a 100644
--- a/doc/skytools3.txt
+++ b/doc/skytools3.txt
@@ -11,9 +11,9 @@ Keep old design from Skytools 2
- No pushing with LISTEN/NOTIFY is used for data transport.
- Administrative work happens in separate process.
- Can go down anytime, without affecting anything else.
-* Relaxed attitude about tables
- - Tables can be added/removed any time.
- - Inital data sync happens table-by-table, no attempt is made to keep
+* Relaxed attitude about tables.
+ - Tables can be added/removed at any time.
+ - Initial data sync happens table-by-table, no attempt is made to keep
consistent picture between tables during initial copy.
New features in Skytools 3
@@ -26,19 +26,19 @@ New features in Skytools 3
- For terminology and technical details see here: set.notes.txt.
* New Londiste features:
- - Parallel copy - during inital sync several tables can be
- copied at the same time. In 2.x the copy already happened in separate
- process, making it parallel was just a matter of tuning launching/syncing logic.
+ - Parallel copy - during initial sync several tables can be copied
+ at the same time. In 2.x the copy already happened in separate process,
+ making it parallel was just a matter of tuning launching/syncing logic.
- - EXECUTE command, to run random SQL script on all nodes. The script is executed
- in single a TX on root, and inserted as an event into the queue in the same TX.
- The goal is to emulate DDL AFTER TRIGGER that way.
- Londiste itself does no locking and no coordination between nodes. The assumption
- is that the DDL commands themselves do enough locking. If more locking is needed
- is can be added to script.
+ - EXECUTE command, to run random SQL script on all nodes. The script is
+ executed in single TX on root, and inserted as an event into the queue
+ in the same TX. The goal is to emulate DDL AFTER TRIGGER that way.
+ Londiste itself does no locking and no coordination between nodes.
+ The assumption is that the DDL commands themselves do enough locking.
+ If more locking is needed is can be added to script.
- Automatic table or sequence creation by importing the structure
- from provider node. Activeted with --create switch for add-table, add-seq.
+ from provider node. Activated with --create switch for add-table, add-seq.
By default *everything* is copied, including Londiste own triggers.
The basic idea is that the triggers may be customized and that way
we avoid the need to keep track of trigger customizations.
@@ -58,8 +58,8 @@ New features in Skytools 3
- Target table can use different name (--dest-table)
-* New interactive admin console - qadmin. Because long command lines are not very
- user-friendly, this is an experiment on interactive console with
+* New interactive admin console - qadmin. Because long command lines are
+ not very user-friendly, this is an experiment on interactive console with
heavy emphasis on tab-completion.
* New multi-database ticker: `pgqd`. It is possible to set up one process that
@@ -95,7 +95,7 @@ Minor improvements
* Skytools 3 modules are parallel installable with Skytools 2.
Solved via loader module (like http://faq.pygtk.org/index.py?req=all#2.4[pygtk]).
-
+
import pkgloader
pkgloader.require('skytools', '3.0')
import skytools
@@ -105,4 +105,3 @@ Further reading
---------------
* http://skytools.projects.postgresql.org/skytools-3.0/[Documentation] for skytools3.
-
diff --git a/python/londiste/handler.py b/python/londiste/handler.py
index 51fa603a..287ad546 100644
--- a/python/londiste/handler.py
+++ b/python/londiste/handler.py
@@ -145,7 +145,7 @@ class BaseHandler:
def real_copy(self, src_tablename, src_curs, dst_curs, column_list):
"""do actual table copy and return tuple with number of bytes and rows
- copyed
+ copied
"""
condition = self.get_copy_condition(src_curs, dst_curs)
return skytools.full_copy(src_tablename, src_curs, dst_curs,
@@ -178,9 +178,9 @@ class TableHandler(BaseHandler):
enc = args.get('encoding')
if enc:
- self.enc = EncodingValidator(self.log, enc)
+ self.encoding_validator = EncodingValidator(self.log, enc)
else:
- self.enc = None
+ self.encoding_validator = None
def process_event(self, ev, sql_queue_func, arg):
row = self.parse_row_data(ev)
@@ -212,13 +212,13 @@ class TableHandler(BaseHandler):
if len(ev.type) == 1:
if not self.allow_sql_event:
raise Exception('SQL events not supported by this handler')
- if self.enc:
- return self.enc.validate_string(ev.data, self.table_name)
+ if self.encoding_validator:
+ return self.encoding_validator.validate_string(ev.data, self.table_name)
return ev.data
else:
row = skytools.db_urldecode(ev.data)
- if self.enc:
- return self.enc.validate_dict(row, self.table_name)
+ if self.encoding_validator:
+ return self.encoding_validator.validate_dict(row, self.table_name)
return row
def real_copy(self, src_tablename, src_curs, dst_curs, column_list):
@@ -226,9 +226,9 @@ class TableHandler(BaseHandler):
copied
"""
- if self.enc:
+ if self.encoding_validator:
def _write_hook(obj, data):
- return self.enc.validate_copy(data, column_list, src_tablename)
+ return self.encoding_validator.validate_copy(data, column_list, src_tablename)
else:
_write_hook = None
condition = self.get_copy_condition(src_curs, dst_curs)
diff --git a/python/londiste/handlers/__init__.py b/python/londiste/handlers/__init__.py
index 92705462..b6b39100 100644
--- a/python/londiste/handlers/__init__.py
+++ b/python/londiste/handlers/__init__.py
@@ -5,7 +5,7 @@ import sys
DEFAULT_HANDLERS = [
'londiste.handlers.qtable',
'londiste.handlers.applyfn',
- 'londiste.handlers.part',
+ 'londiste.handlers.shard',
'londiste.handlers.multimaster',
'londiste.handlers.vtable',
diff --git a/python/londiste/handlers/dispatch.py b/python/londiste/handlers/dispatch.py
index 758034c7..0b02edcd 100644
--- a/python/londiste/handlers/dispatch.py
+++ b/python/londiste/handlers/dispatch.py
@@ -138,6 +138,10 @@ post_part:
retention_period:
how long to keep partitions around. examples: '3 months', '1 year'
+ignore_old_events:
+ * 0 - handle all events in the same way (default)
+ * 1 - ignore events coming for obsolete partitions
+
encoding:
name of destination encoding. handler replaces all invalid encoding symbols
and logs them as warnings
@@ -153,17 +157,20 @@ creating or coping initial data to destination table. --expect-sync and
--skip-truncate should be used and --create switch is to be avoided.
"""
-import sys
-import datetime
import codecs
+import datetime
import re
+import sys
+from functools import partial
+
import skytools
-from londiste.handler import BaseHandler, EncodingValidator
from skytools import quote_ident, quote_fqident, UsageError
from skytools.dbstruct import *
from skytools.utf8 import safe_utf8_decode
-from functools import partial
+
+from londiste.handler import EncodingValidator
from londiste.handlers import handler_args, update
+from londiste.handlers.shard import ShardHandler
__all__ = ['Dispatcher']
@@ -618,7 +625,7 @@ ROW_HANDLERS = {'plain': RowHandler,
#------------------------------------------------------------------------------
-class Dispatcher(BaseHandler):
+class Dispatcher (ShardHandler):
"""Partitioned loader.
Splits events into partitions, if requested.
Then applies them without further processing.
@@ -630,10 +637,11 @@ class Dispatcher(BaseHandler):
# compat for dest-table
dest_table = args.get('table', dest_table)
- BaseHandler.__init__(self, table_name, args, dest_table)
+ ShardHandler.__init__(self, table_name, args, dest_table)
# show args
self.log.debug("dispatch.init: table_name=%r, args=%r", table_name, args)
+ self.ignored_tables = set()
self.batch_info = None
self.dst_curs = None
self.pkeys = None
@@ -641,11 +649,6 @@ class Dispatcher(BaseHandler):
self.conf = self.get_config()
hdlr_cls = ROW_HANDLERS[self.conf.row_mode]
self.row_handler = hdlr_cls(self.log)
- if self.conf.encoding:
- self.encoding_validator = EncodingValidator(self.log,
- self.conf.encoding)
- else:
- self.encoding_validator = None
def _parse_args_from_doc (self):
doc = __doc__
@@ -688,6 +691,7 @@ class Dispatcher(BaseHandler):
conf.post_part = self.args.get('post_part')
conf.part_func = self.args.get('part_func', PART_FUNC_NEW)
conf.retention_period = self.args.get('retention_period')
+ conf.ignore_old_events = self.get_arg('ignore_old_events', [0, 1], 0)
# set row mode and event types to process
conf.row_mode = self.get_arg('row_mode', ROW_MODES)
event_types = self.args.get('event_types', '*')
@@ -717,8 +721,6 @@ class Dispatcher(BaseHandler):
conf.field_map[tmp[0]] = tmp[0]
else:
conf.field_map[tmp[0]] = tmp[1]
- # encoding validator
- conf.encoding = self.args.get('encoding')
return conf
def get_arg(self, name, value_list, default = None):
@@ -728,17 +730,20 @@ class Dispatcher(BaseHandler):
raise Exception('Bad argument %s value %r' % (name, val))
return val
+ def _validate_hash_key(self):
+ pass # no need for hash key when not sharding
+
def reset(self):
"""Called before starting to process a batch.
Should clean any pending data."""
- BaseHandler.reset(self)
+ ShardHandler.reset(self)
def prepare_batch(self, batch_info, dst_curs):
"""Called on first event for this table in current batch."""
if self.conf.table_mode != 'ignore':
self.batch_info = batch_info
self.dst_curs = dst_curs
- #BaseHandler.prepare_batch(self, batch_info, dst_curs)
+ ShardHandler.prepare_batch(self, batch_info, dst_curs)
def filter_data(self, data):
"""Process with fields skip and map"""
@@ -763,7 +768,7 @@ class Dispatcher(BaseHandler):
pkeys = [fmap[p] for p in pkeys if p in fmap]
return pkeys
- def process_event(self, ev, sql_queue_func, arg):
+ def _process_event(self, ev, sql_queue_func, arg):
"""Process a event.
Event should be added to sql_queue or executed directly.
"""
@@ -781,6 +786,7 @@ class Dispatcher(BaseHandler):
raise Exception('Unknown event type: %s' % ev.ev_type)
# process only operations specified
if not op in self.conf.event_types:
+ #self.log.debug('dispatch.process_event: ignored event type')
return
self.log.debug('dispatch.process_event: %s/%s', ev.ev_type, ev.ev_data)
if self.pkeys is None:
@@ -789,22 +795,25 @@ class Dispatcher(BaseHandler):
# prepare split table when needed
if self.conf.table_mode == 'part':
dst, part_time = self.split_format(ev, data)
+ if dst in self.ignored_tables:
+ return
if dst not in self.row_handler.table_map:
self.check_part(dst, part_time)
+ if dst in self.ignored_tables:
+ return
else:
dst = self.dest_table
if dst not in self.row_handler.table_map:
self.row_handler.add_table(dst, LOADERS[self.conf.load_mode],
- self.pkeys, self.conf)
+ self.pkeys, self.conf)
self.row_handler.process(dst, op, data)
- #BaseHandler.process_event(self, ev, sql_queue_func, arg)
def finish_batch(self, batch_info, dst_curs):
"""Called when batch finishes."""
if self.conf.table_mode != 'ignore':
self.row_handler.flush(dst_curs)
- #BaseHandler.finish_batch(self, batch_info, dst_curs)
+ #ShardHandler.finish_batch(self, batch_info, dst_curs)
def get_part_name(self):
# if custom part name template given, use it
@@ -902,6 +911,8 @@ class Dispatcher(BaseHandler):
if self.conf.retention_period:
self.drop_obsolete_partitions (self.dest_table, self.conf.retention_period, self.conf.period)
+ if self.conf.ignore_old_events and not skytools.exists_table(curs, dst):
+ self.ignored_tables.add(dst) # must have been just dropped
def drop_obsolete_partitions (self, parent_table, retention_period, partition_period):
""" Drop obsolete partitions of partition-by-date parent table.
@@ -918,12 +929,17 @@ class Dispatcher(BaseHandler):
if res:
self.log.info("Dropped tables: %s", ", ".join(res))
+ def get_copy_condition(self, src_curs, dst_curs):
+ """ Prepare where condition for copy and replay filtering.
+ """
+ return ShardHandler.get_copy_condition(self, src_curs, dst_curs)
+
def real_copy(self, tablename, src_curs, dst_curs, column_list):
"""do actual table copy and return tuple with number of bytes and rows
copied
"""
_src_cols = _dst_cols = column_list
- condition = ''
+ condition = self.get_copy_condition (src_curs, dst_curs)
if self.conf.skip_fields:
_src_cols = [col for col in column_list
@@ -940,7 +956,8 @@ class Dispatcher(BaseHandler):
else:
_write_hook = None
- return skytools.full_copy(tablename, src_curs, dst_curs, _src_cols, condition,
+ return skytools.full_copy(tablename, src_curs, dst_curs,
+ _src_cols, condition,
dst_tablename = self.dest_table,
dst_column_list = _dst_cols,
write_hook = _write_hook)
diff --git a/python/londiste/handlers/part.py b/python/londiste/handlers/shard.py
index 247256e4..950ac7c6 100644
--- a/python/londiste/handlers/part.py
+++ b/python/londiste/handlers/shard.py
@@ -2,6 +2,7 @@
Parameters:
key=COLUMN: column name to use for hashing
+ hash_key=COLUMN: column name to use for hashing (overrides 'key' parameter)
hashfunc=NAME: function to use for hashing (default: partconf.get_hash_raw)
hashexpr=EXPR: full expression to use for hashing (deprecated)
encoding=ENC: validate and fix incoming data (only utf8 supported atm)
@@ -23,77 +24,89 @@ Local config:
import skytools
from londiste.handler import TableHandler
-__all__ = ['PartHandler']
+__all__ = ['ShardHandler', 'PartHandler']
-class PartHandler(TableHandler):
+class ShardHandler (TableHandler):
__doc__ = __doc__
- handler_name = 'part'
+ handler_name = 'shard'
DEFAULT_HASHFUNC = "partconf.get_hash_raw"
DEFAULT_HASHEXPR = "%s(%s)"
def __init__(self, table_name, args, dest_table):
TableHandler.__init__(self, table_name, args, dest_table)
- self.max_part = None # max part number
- self.local_part = None # part number of local node
+ self.hash_mask = None # aka max part number (atm)
+ self.shard_nr = None # part number of local node
# primary key columns
- self.key = args.get('key')
- if self.key is None:
- raise Exception('Specify key field as key argument')
+ self.hash_key = args.get('hash_key', args.get('key'))
+ self._validate_hash_key()
# hash function & full expression
hashfunc = args.get('hashfunc', self.DEFAULT_HASHFUNC)
self.hashexpr = self.DEFAULT_HASHEXPR % (
skytools.quote_fqident(hashfunc),
- skytools.quote_ident(self.key))
+ skytools.quote_ident(self.hash_key or ''))
self.hashexpr = args.get('hashexpr', self.hashexpr)
+ def _validate_hash_key(self):
+ if self.hash_key is None:
+ raise Exception('Specify hash key field as hash_key argument')
+
def reset(self):
"""Forget config info."""
- self.max_part = None
- self.local_part = None
+ self.hash_mask = None
+ self.shard_nr = None
TableHandler.reset(self)
def add(self, trigger_arg_list):
"""Let trigger put hash into extra3"""
-
arg = "ev_extra3='hash='||%s" % self.hashexpr
trigger_arg_list.append(arg)
TableHandler.add(self, trigger_arg_list)
def prepare_batch(self, batch_info, dst_curs):
"""Called on first event for this table in current batch."""
- if not self.max_part:
- self.load_part_info(dst_curs)
+ if self.hash_key is not None:
+ if not self.hash_mask:
+ self.load_shard_info(dst_curs)
TableHandler.prepare_batch(self, batch_info, dst_curs)
def process_event(self, ev, sql_queue_func, arg):
- """Filter event by hash in extra3, apply only local part."""
- if ev.extra3:
+ """Filter event by hash in extra3, apply only if for local shard."""
+ if ev.extra3 and self.hash_key is not None:
meta = skytools.db_urldecode(ev.extra3)
- self.log.debug('part.process_event: hash=%d, max_part=%s, local_part=%d',
- int(meta['hash']), self.max_part, self.local_part)
- if (int(meta['hash']) & self.max_part) != self.local_part:
- self.log.debug('part.process_event: not my event')
+ self.log.debug('shard.process_event: hash=%i, hash_mask=%i, shard_nr=%i',
+ int(meta['hash']), self.hash_mask, self.shard_nr)
+ if (int(meta['hash']) & self.hash_mask) != self.shard_nr:
+ self.log.debug('shard.process_event: not my event')
return
- self.log.debug('part.process_event: my event, processing')
+ self._process_event(ev, sql_queue_func, arg)
+
+ def _process_event(self, ev, sql_queue_func, arg):
+ self.log.debug('shard.process_event: my event, processing')
TableHandler.process_event(self, ev, sql_queue_func, arg)
def get_copy_condition(self, src_curs, dst_curs):
"""Prepare the where condition for copy and replay filtering"""
- self.load_part_info(dst_curs)
- w = "(%s & %d) = %d" % (self.hashexpr, self.max_part, self.local_part)
- self.log.debug('part: copy_condition=%s', w)
+ if self.hash_key is None:
+ return TableHandler.get_copy_condition(self, src_curs, dst_curs)
+ self.load_shard_info(dst_curs)
+ w = "(%s & %d) = %d" % (self.hashexpr, self.hash_mask, self.shard_nr)
+ self.log.debug('shard: copy_condition=%r', w)
return w
- def load_part_info(self, curs):
- """Load slot info from database."""
+ def load_shard_info(self, curs):
+ """Load part/slot info from database."""
q = "select part_nr, max_part from partconf.conf"
curs.execute(q)
- self.local_part, self.max_part = curs.fetchone()
- if self.local_part is None or self.max_part is None:
- raise Exception('Error loading part info')
+ self.shard_nr, self.hash_mask = curs.fetchone()
+ if self.shard_nr is None or self.hash_mask is None:
+ raise Exception('Error loading shard info')
+
+class PartHandler (ShardHandler):
+ """ Deprecated compat name for shard handler. """
+ handler_name = 'part'
# register handler class
-__londiste_handlers__ = [PartHandler]
+__londiste_handlers__ = [ShardHandler, PartHandler]
diff --git a/python/londiste/playback.py b/python/londiste/playback.py
index 73b6d298..b04ad317 100644
--- a/python/londiste/playback.py
+++ b/python/londiste/playback.py
@@ -865,7 +865,7 @@ class Replicator(CascadedWorker):
return None
def launch_copy(self, tbl_stat):
- """Run paraller worker for copy."""
+ """Run parallel worker for copy."""
self.log.info("Launching copy process")
script = sys.argv[0]
conf = self.cf.filename
@@ -874,8 +874,7 @@ class Replicator(CascadedWorker):
# pass same verbosity options as main script got
if self.options.quiet:
cmd.append('-q')
- if self.options.verbose:
- cmd.append('-v')
+ cmd += self.options.verbose * ['-v']
# let existing copy finish and clean its pidfile,
# otherwise new copy will exit immediately.
diff --git a/python/londiste/setup.py b/python/londiste/setup.py
index 3ce5446d..a23d079a 100644
--- a/python/londiste/setup.py
+++ b/python/londiste/setup.py
@@ -7,6 +7,7 @@ import sys, os, re, skytools
from pgq.cascade.admin import CascadeAdmin
from londiste.exec_attrs import ExecAttrs
+from londiste.util import find_copy_source
import londiste.handler
@@ -139,6 +140,25 @@ class LondisteSetup(CascadeAdmin):
needs_tbl = self.handler_needs_table()
args = self.expand_arg_list(dst_db, 'r', False, args, needs_tbl)
+ # pick proper create flags
+ if self.options.create_full:
+ create_flags = skytools.T_ALL
+ elif self.options.create:
+ create_flags = skytools.T_TABLE | skytools.T_PKEY
+ else:
+ create_flags = 0
+
+ # search for usable copy node if requested & needed
+ if (self.options.find_copy_node and create_flags != 0
+ and needs_tbl and not self.is_root()):
+ src_name, src_loc, _ = find_copy_source(self, self.queue_name, args, None, self.provider_location)
+ self.options.copy_node = src_name
+ self.close_database('provider_db')
+ src_db = self.get_provider_db()
+ src_curs = src_db.cursor()
+ src_tbls = self.fetch_set_tables(src_curs)
+ src_db.commit()
+
# dont check for exist/not here (root handling)
if not self.is_root() and not self.options.expect_sync and not self.options.find_copy_node:
problems = False
@@ -154,24 +174,11 @@ class LondisteSetup(CascadeAdmin):
self.log.error("Problems, canceling operation")
sys.exit(1)
- # pick proper create flags
- if self.options.create_full:
- create_flags = skytools.T_ALL
- elif self.options.create:
- create_flags = skytools.T_TABLE | skytools.T_PKEY
- else:
- create_flags = 0
-
# sanity check
if self.options.dest_table and len(args) > 1:
self.log.error("--dest-table can be given only for single table")
sys.exit(1)
- # not implemented
- if self.options.find_copy_node and create_flags != 0:
- self.log.error("--find-copy-node does not work with --create")
- sys.exit(1)
-
# seems ok
for tbl in args:
self.add_table(src_db, dst_db, tbl, create_flags, src_tbls)
@@ -448,6 +455,25 @@ class LondisteSetup(CascadeAdmin):
"""Reload data from provider node."""
db = self.get_database('db')
args = self.expand_arg_list(db, 'r', True, args)
+
+ if self.options.find_copy_node or self.options.copy_node:
+ q = "select table_name, table_attrs from londiste.get_table_list(%s) where local"
+ cur = db.cursor()
+ cur.execute(q, [self.set_name])
+ for row in cur.fetchall():
+ if row['table_name'] not in args:
+ continue
+ attrs = skytools.db_urldecode (row['table_attrs'] or '')
+
+ if self.options.find_copy_node:
+ attrs['copy_node'] = '?'
+ elif self.options.copy_node:
+ attrs['copy_node'] = self.options.copy_node
+
+ attrs = skytools.db_urlencode (attrs)
+ q = "select * from londiste.local_set_table_attrs (%s, %s, %s)"
+ self.exec_cmd(db, q, [self.set_name, row['table_name'], attrs])
+
q = "select * from londiste.local_set_table_state(%s, %s, null, null)"
self.exec_cmd_many(db, q, [self.set_name], args)
@@ -533,9 +559,8 @@ class LondisteSetup(CascadeAdmin):
db.commit()
def get_provider_db(self):
-
- # use custom node for copy
if self.options.copy_node:
+ # use custom node for copy
source_node = self.options.copy_node
m = self.queue_info.get_member(source_node)
if not m:
@@ -549,6 +574,7 @@ class LondisteSetup(CascadeAdmin):
q = 'select * from pgq_node.get_node_info(%s)'
res = self.exec_cmd(db, q, [self.queue_name], quiet = True)
self.provider_location = res[0]['provider_location']
+
return self.get_database('provider_db', connstr = self.provider_location, profile = 'remote')
def expand_arg_list(self, db, kind, existing, args, needs_tbl=True):
@@ -589,6 +615,9 @@ class LondisteSetup(CascadeAdmin):
res = self.solve_globbing(args, lst_exists, map_exists, map_missing, allow_nonexist)
else:
res = self.solve_globbing(args, lst_missing, map_missing, map_exists, allow_nonexist)
+
+ if not res:
+ self.log.info("what to do ?")
return res
def solve_globbing(self, args, full_list, full_map, reverse_map, allow_nonexist):
diff --git a/python/londiste/util.py b/python/londiste/util.py
index cba18f62..07ff9407 100644
--- a/python/londiste/util.py
+++ b/python/londiste/util.py
@@ -18,7 +18,7 @@ def find_copy_source(script, queue_name, copy_table_name, node_name, node_locati
@param script: DbScript
@param queue_name: name of the cascaded queue
- @param copy_table_name: name of the table
+ @param copy_table_name: name of the table (or list of names)
@param node_name: target node name
@param node_location: target node location
@returns (node_name, node_location, downstream_worker_name) of source node
@@ -27,6 +27,11 @@ def find_copy_source(script, queue_name, copy_table_name, node_name, node_locati
# None means no steps upwards were taken, so local consumer is worker
worker_name = None
+ if isinstance(copy_table_name, str):
+ need = set([copy_table_name])
+ else:
+ need = set(copy_table_name)
+
while 1:
src_db = script.get_database('_source_db', connstr = node_location, autocommit = 1, profile = 'remote')
src_curs = src_db.cursor()
@@ -39,12 +44,12 @@ def find_copy_source(script, queue_name, copy_table_name, node_name, node_locati
script.log.info("Checking if %s can be used for copy", info['node_name'])
- q = "select table_name, local, table_attrs from londiste.get_table_list(%s) where table_name = %s"
- src_curs.execute(q, [queue_name, copy_table_name])
- got = False
+ q = "select table_name, local, table_attrs from londiste.get_table_list(%s)"
+ src_curs.execute(q, [queue_name])
+ got = set()
for row in src_curs.fetchall():
tbl = row['table_name']
- if tbl != copy_table_name:
+ if tbl not in need:
continue
if not row['local']:
script.log.debug("Problem: %s is not local", tbl)
@@ -53,14 +58,15 @@ def find_copy_source(script, queue_name, copy_table_name, node_name, node_locati
script.log.debug("Problem: %s handler does not store data [%s]", tbl, row['table_attrs'])
continue
script.log.debug("Good: %s is usable", tbl)
- got = True
- break
+ got.add(tbl)
script.close_database('_source_db')
- if got:
+ if got == need:
script.log.info("Node %s seems good source, using it", info['node_name'])
return node_name, node_location, worker_name
+ else:
+ script.log.info("Node %s does not have all tables", info['node_name'])
if info['node_type'] == 'root':
raise skytools.UsageError("Found root and no source found")
diff --git a/python/skytools/gzlog.py b/python/skytools/gzlog.py
index 558e2813..0db40fc3 100644
--- a/python/skytools/gzlog.py
+++ b/python/skytools/gzlog.py
@@ -1,8 +1,8 @@
"""Atomic append of gzipped data.
-The point is - if several gzip streams are concated, they
-are read back as one whose stream.
+The point is - if several gzip streams are concatenated,
+they are read back as one whole stream.
"""
import gzip
@@ -22,7 +22,7 @@ def gzip_append(filename, data, level = 6):
g.write(data)
g.close()
zdata = buf.getvalue()
-
+
# append, safely
f = open(filename, "a+", 0)
f.seek(0, 2)
@@ -36,4 +36,3 @@ def gzip_append(filename, data, level = 6):
f.truncate()
f.close()
raise ex
-
diff --git a/python/skytools/scripting.py b/python/skytools/scripting.py
index a5e82663..840f3cf4 100644
--- a/python/skytools/scripting.py
+++ b/python/skytools/scripting.py
@@ -588,7 +588,10 @@ class BaseScript(object):
self.reset()
sys.exit(1)
except Exception, d:
- self.send_stats()
+ try: # this may fail too
+ self.send_stats()
+ except:
+ pass
emsg = str(d).rstrip()
self.reset()
self.exception_hook(d, emsg)
diff --git a/scripts/data_maintainer.py b/scripts/data_maintainer.py
index 0c2c48b0..5bd8cd87 100644
--- a/scripts/data_maintainer.py
+++ b/scripts/data_maintainer.py
@@ -7,7 +7,7 @@ either one by one or in batches.
Config template::
- [data_maintainer]
+ [data_maintainer3]
job_name = dm_remove_expired_services
dbread = dbname=sourcedb_test
@@ -81,7 +81,7 @@ class DataMaintainer (skytools.DBScript):
loop_delay = -1
def __init__(self, args):
- super(DataMaintainer, self).__init__("data_maintainer", args)
+ super(DataMaintainer, self).__init__("data_maintainer3", args)
# query for fetching the PK-s of the data set to be maintained
self.sql_pk = self.cf.get("sql_get_pk_list")