diff options
author | Marko Kreen | 2013-06-19 21:17:26 +0000 |
---|---|---|
committer | Marko Kreen | 2013-06-19 21:17:26 +0000 |
commit | 9d2b2d30a1736548d40f17eff5f9f05faa32aaf1 (patch) | |
tree | be947d1c90b61fa139bf765e5486650f960ac8fe | |
parent | 609617c369cc5ad740ad1fa5c90a56a66d2da762 (diff) | |
parent | 26be6819f4ecc80b7d01db3bfd4933ec1d02115a (diff) |
Merge remote-tracking branch 'gamato/develop'
-rw-r--r-- | doc/skytools3.txt | 33 | ||||
-rw-r--r-- | python/londiste/handler.py | 18 | ||||
-rw-r--r-- | python/londiste/handlers/__init__.py | 2 | ||||
-rw-r--r-- | python/londiste/handlers/dispatch.py | 59 | ||||
-rw-r--r-- | python/londiste/handlers/shard.py (renamed from python/londiste/handlers/part.py) | 73 | ||||
-rw-r--r-- | python/londiste/playback.py | 5 | ||||
-rw-r--r-- | python/londiste/setup.py | 59 | ||||
-rw-r--r-- | python/londiste/util.py | 22 | ||||
-rw-r--r-- | python/skytools/gzlog.py | 7 | ||||
-rw-r--r-- | python/skytools/scripting.py | 5 | ||||
-rw-r--r-- | scripts/data_maintainer.py | 4 |
11 files changed, 176 insertions, 111 deletions
diff --git a/doc/skytools3.txt b/doc/skytools3.txt index 70a95f79..0d346e4a 100644 --- a/doc/skytools3.txt +++ b/doc/skytools3.txt @@ -11,9 +11,9 @@ Keep old design from Skytools 2 - No pushing with LISTEN/NOTIFY is used for data transport. - Administrative work happens in separate process. - Can go down anytime, without affecting anything else. -* Relaxed attitude about tables - - Tables can be added/removed any time. - - Inital data sync happens table-by-table, no attempt is made to keep +* Relaxed attitude about tables. + - Tables can be added/removed at any time. + - Initial data sync happens table-by-table, no attempt is made to keep consistent picture between tables during initial copy. New features in Skytools 3 @@ -26,19 +26,19 @@ New features in Skytools 3 - For terminology and technical details see here: set.notes.txt. * New Londiste features: - - Parallel copy - during inital sync several tables can be - copied at the same time. In 2.x the copy already happened in separate - process, making it parallel was just a matter of tuning launching/syncing logic. + - Parallel copy - during initial sync several tables can be copied + at the same time. In 2.x the copy already happened in separate process, + making it parallel was just a matter of tuning launching/syncing logic. - - EXECUTE command, to run random SQL script on all nodes. The script is executed - in single a TX on root, and inserted as an event into the queue in the same TX. - The goal is to emulate DDL AFTER TRIGGER that way. - Londiste itself does no locking and no coordination between nodes. The assumption - is that the DDL commands themselves do enough locking. If more locking is needed - is can be added to script. + - EXECUTE command, to run random SQL script on all nodes. The script is + executed in single TX on root, and inserted as an event into the queue + in the same TX. The goal is to emulate DDL AFTER TRIGGER that way. + Londiste itself does no locking and no coordination between nodes. + The assumption is that the DDL commands themselves do enough locking. + If more locking is needed is can be added to script. - Automatic table or sequence creation by importing the structure - from provider node. Activeted with --create switch for add-table, add-seq. + from provider node. Activated with --create switch for add-table, add-seq. By default *everything* is copied, including Londiste own triggers. The basic idea is that the triggers may be customized and that way we avoid the need to keep track of trigger customizations. @@ -58,8 +58,8 @@ New features in Skytools 3 - Target table can use different name (--dest-table) -* New interactive admin console - qadmin. Because long command lines are not very - user-friendly, this is an experiment on interactive console with +* New interactive admin console - qadmin. Because long command lines are + not very user-friendly, this is an experiment on interactive console with heavy emphasis on tab-completion. * New multi-database ticker: `pgqd`. It is possible to set up one process that @@ -95,7 +95,7 @@ Minor improvements * Skytools 3 modules are parallel installable with Skytools 2. Solved via loader module (like http://faq.pygtk.org/index.py?req=all#2.4[pygtk]). - + import pkgloader pkgloader.require('skytools', '3.0') import skytools @@ -105,4 +105,3 @@ Further reading --------------- * http://skytools.projects.postgresql.org/skytools-3.0/[Documentation] for skytools3. - diff --git a/python/londiste/handler.py b/python/londiste/handler.py index 51fa603a..287ad546 100644 --- a/python/londiste/handler.py +++ b/python/londiste/handler.py @@ -145,7 +145,7 @@ class BaseHandler: def real_copy(self, src_tablename, src_curs, dst_curs, column_list): """do actual table copy and return tuple with number of bytes and rows - copyed + copied """ condition = self.get_copy_condition(src_curs, dst_curs) return skytools.full_copy(src_tablename, src_curs, dst_curs, @@ -178,9 +178,9 @@ class TableHandler(BaseHandler): enc = args.get('encoding') if enc: - self.enc = EncodingValidator(self.log, enc) + self.encoding_validator = EncodingValidator(self.log, enc) else: - self.enc = None + self.encoding_validator = None def process_event(self, ev, sql_queue_func, arg): row = self.parse_row_data(ev) @@ -212,13 +212,13 @@ class TableHandler(BaseHandler): if len(ev.type) == 1: if not self.allow_sql_event: raise Exception('SQL events not supported by this handler') - if self.enc: - return self.enc.validate_string(ev.data, self.table_name) + if self.encoding_validator: + return self.encoding_validator.validate_string(ev.data, self.table_name) return ev.data else: row = skytools.db_urldecode(ev.data) - if self.enc: - return self.enc.validate_dict(row, self.table_name) + if self.encoding_validator: + return self.encoding_validator.validate_dict(row, self.table_name) return row def real_copy(self, src_tablename, src_curs, dst_curs, column_list): @@ -226,9 +226,9 @@ class TableHandler(BaseHandler): copied """ - if self.enc: + if self.encoding_validator: def _write_hook(obj, data): - return self.enc.validate_copy(data, column_list, src_tablename) + return self.encoding_validator.validate_copy(data, column_list, src_tablename) else: _write_hook = None condition = self.get_copy_condition(src_curs, dst_curs) diff --git a/python/londiste/handlers/__init__.py b/python/londiste/handlers/__init__.py index 92705462..b6b39100 100644 --- a/python/londiste/handlers/__init__.py +++ b/python/londiste/handlers/__init__.py @@ -5,7 +5,7 @@ import sys DEFAULT_HANDLERS = [ 'londiste.handlers.qtable', 'londiste.handlers.applyfn', - 'londiste.handlers.part', + 'londiste.handlers.shard', 'londiste.handlers.multimaster', 'londiste.handlers.vtable', diff --git a/python/londiste/handlers/dispatch.py b/python/londiste/handlers/dispatch.py index 758034c7..0b02edcd 100644 --- a/python/londiste/handlers/dispatch.py +++ b/python/londiste/handlers/dispatch.py @@ -138,6 +138,10 @@ post_part: retention_period: how long to keep partitions around. examples: '3 months', '1 year' +ignore_old_events: + * 0 - handle all events in the same way (default) + * 1 - ignore events coming for obsolete partitions + encoding: name of destination encoding. handler replaces all invalid encoding symbols and logs them as warnings @@ -153,17 +157,20 @@ creating or coping initial data to destination table. --expect-sync and --skip-truncate should be used and --create switch is to be avoided. """ -import sys -import datetime import codecs +import datetime import re +import sys +from functools import partial + import skytools -from londiste.handler import BaseHandler, EncodingValidator from skytools import quote_ident, quote_fqident, UsageError from skytools.dbstruct import * from skytools.utf8 import safe_utf8_decode -from functools import partial + +from londiste.handler import EncodingValidator from londiste.handlers import handler_args, update +from londiste.handlers.shard import ShardHandler __all__ = ['Dispatcher'] @@ -618,7 +625,7 @@ ROW_HANDLERS = {'plain': RowHandler, #------------------------------------------------------------------------------ -class Dispatcher(BaseHandler): +class Dispatcher (ShardHandler): """Partitioned loader. Splits events into partitions, if requested. Then applies them without further processing. @@ -630,10 +637,11 @@ class Dispatcher(BaseHandler): # compat for dest-table dest_table = args.get('table', dest_table) - BaseHandler.__init__(self, table_name, args, dest_table) + ShardHandler.__init__(self, table_name, args, dest_table) # show args self.log.debug("dispatch.init: table_name=%r, args=%r", table_name, args) + self.ignored_tables = set() self.batch_info = None self.dst_curs = None self.pkeys = None @@ -641,11 +649,6 @@ class Dispatcher(BaseHandler): self.conf = self.get_config() hdlr_cls = ROW_HANDLERS[self.conf.row_mode] self.row_handler = hdlr_cls(self.log) - if self.conf.encoding: - self.encoding_validator = EncodingValidator(self.log, - self.conf.encoding) - else: - self.encoding_validator = None def _parse_args_from_doc (self): doc = __doc__ @@ -688,6 +691,7 @@ class Dispatcher(BaseHandler): conf.post_part = self.args.get('post_part') conf.part_func = self.args.get('part_func', PART_FUNC_NEW) conf.retention_period = self.args.get('retention_period') + conf.ignore_old_events = self.get_arg('ignore_old_events', [0, 1], 0) # set row mode and event types to process conf.row_mode = self.get_arg('row_mode', ROW_MODES) event_types = self.args.get('event_types', '*') @@ -717,8 +721,6 @@ class Dispatcher(BaseHandler): conf.field_map[tmp[0]] = tmp[0] else: conf.field_map[tmp[0]] = tmp[1] - # encoding validator - conf.encoding = self.args.get('encoding') return conf def get_arg(self, name, value_list, default = None): @@ -728,17 +730,20 @@ class Dispatcher(BaseHandler): raise Exception('Bad argument %s value %r' % (name, val)) return val + def _validate_hash_key(self): + pass # no need for hash key when not sharding + def reset(self): """Called before starting to process a batch. Should clean any pending data.""" - BaseHandler.reset(self) + ShardHandler.reset(self) def prepare_batch(self, batch_info, dst_curs): """Called on first event for this table in current batch.""" if self.conf.table_mode != 'ignore': self.batch_info = batch_info self.dst_curs = dst_curs - #BaseHandler.prepare_batch(self, batch_info, dst_curs) + ShardHandler.prepare_batch(self, batch_info, dst_curs) def filter_data(self, data): """Process with fields skip and map""" @@ -763,7 +768,7 @@ class Dispatcher(BaseHandler): pkeys = [fmap[p] for p in pkeys if p in fmap] return pkeys - def process_event(self, ev, sql_queue_func, arg): + def _process_event(self, ev, sql_queue_func, arg): """Process a event. Event should be added to sql_queue or executed directly. """ @@ -781,6 +786,7 @@ class Dispatcher(BaseHandler): raise Exception('Unknown event type: %s' % ev.ev_type) # process only operations specified if not op in self.conf.event_types: + #self.log.debug('dispatch.process_event: ignored event type') return self.log.debug('dispatch.process_event: %s/%s', ev.ev_type, ev.ev_data) if self.pkeys is None: @@ -789,22 +795,25 @@ class Dispatcher(BaseHandler): # prepare split table when needed if self.conf.table_mode == 'part': dst, part_time = self.split_format(ev, data) + if dst in self.ignored_tables: + return if dst not in self.row_handler.table_map: self.check_part(dst, part_time) + if dst in self.ignored_tables: + return else: dst = self.dest_table if dst not in self.row_handler.table_map: self.row_handler.add_table(dst, LOADERS[self.conf.load_mode], - self.pkeys, self.conf) + self.pkeys, self.conf) self.row_handler.process(dst, op, data) - #BaseHandler.process_event(self, ev, sql_queue_func, arg) def finish_batch(self, batch_info, dst_curs): """Called when batch finishes.""" if self.conf.table_mode != 'ignore': self.row_handler.flush(dst_curs) - #BaseHandler.finish_batch(self, batch_info, dst_curs) + #ShardHandler.finish_batch(self, batch_info, dst_curs) def get_part_name(self): # if custom part name template given, use it @@ -902,6 +911,8 @@ class Dispatcher(BaseHandler): if self.conf.retention_period: self.drop_obsolete_partitions (self.dest_table, self.conf.retention_period, self.conf.period) + if self.conf.ignore_old_events and not skytools.exists_table(curs, dst): + self.ignored_tables.add(dst) # must have been just dropped def drop_obsolete_partitions (self, parent_table, retention_period, partition_period): """ Drop obsolete partitions of partition-by-date parent table. @@ -918,12 +929,17 @@ class Dispatcher(BaseHandler): if res: self.log.info("Dropped tables: %s", ", ".join(res)) + def get_copy_condition(self, src_curs, dst_curs): + """ Prepare where condition for copy and replay filtering. + """ + return ShardHandler.get_copy_condition(self, src_curs, dst_curs) + def real_copy(self, tablename, src_curs, dst_curs, column_list): """do actual table copy and return tuple with number of bytes and rows copied """ _src_cols = _dst_cols = column_list - condition = '' + condition = self.get_copy_condition (src_curs, dst_curs) if self.conf.skip_fields: _src_cols = [col for col in column_list @@ -940,7 +956,8 @@ class Dispatcher(BaseHandler): else: _write_hook = None - return skytools.full_copy(tablename, src_curs, dst_curs, _src_cols, condition, + return skytools.full_copy(tablename, src_curs, dst_curs, + _src_cols, condition, dst_tablename = self.dest_table, dst_column_list = _dst_cols, write_hook = _write_hook) diff --git a/python/londiste/handlers/part.py b/python/londiste/handlers/shard.py index 247256e4..950ac7c6 100644 --- a/python/londiste/handlers/part.py +++ b/python/londiste/handlers/shard.py @@ -2,6 +2,7 @@ Parameters: key=COLUMN: column name to use for hashing + hash_key=COLUMN: column name to use for hashing (overrides 'key' parameter) hashfunc=NAME: function to use for hashing (default: partconf.get_hash_raw) hashexpr=EXPR: full expression to use for hashing (deprecated) encoding=ENC: validate and fix incoming data (only utf8 supported atm) @@ -23,77 +24,89 @@ Local config: import skytools from londiste.handler import TableHandler -__all__ = ['PartHandler'] +__all__ = ['ShardHandler', 'PartHandler'] -class PartHandler(TableHandler): +class ShardHandler (TableHandler): __doc__ = __doc__ - handler_name = 'part' + handler_name = 'shard' DEFAULT_HASHFUNC = "partconf.get_hash_raw" DEFAULT_HASHEXPR = "%s(%s)" def __init__(self, table_name, args, dest_table): TableHandler.__init__(self, table_name, args, dest_table) - self.max_part = None # max part number - self.local_part = None # part number of local node + self.hash_mask = None # aka max part number (atm) + self.shard_nr = None # part number of local node # primary key columns - self.key = args.get('key') - if self.key is None: - raise Exception('Specify key field as key argument') + self.hash_key = args.get('hash_key', args.get('key')) + self._validate_hash_key() # hash function & full expression hashfunc = args.get('hashfunc', self.DEFAULT_HASHFUNC) self.hashexpr = self.DEFAULT_HASHEXPR % ( skytools.quote_fqident(hashfunc), - skytools.quote_ident(self.key)) + skytools.quote_ident(self.hash_key or '')) self.hashexpr = args.get('hashexpr', self.hashexpr) + def _validate_hash_key(self): + if self.hash_key is None: + raise Exception('Specify hash key field as hash_key argument') + def reset(self): """Forget config info.""" - self.max_part = None - self.local_part = None + self.hash_mask = None + self.shard_nr = None TableHandler.reset(self) def add(self, trigger_arg_list): """Let trigger put hash into extra3""" - arg = "ev_extra3='hash='||%s" % self.hashexpr trigger_arg_list.append(arg) TableHandler.add(self, trigger_arg_list) def prepare_batch(self, batch_info, dst_curs): """Called on first event for this table in current batch.""" - if not self.max_part: - self.load_part_info(dst_curs) + if self.hash_key is not None: + if not self.hash_mask: + self.load_shard_info(dst_curs) TableHandler.prepare_batch(self, batch_info, dst_curs) def process_event(self, ev, sql_queue_func, arg): - """Filter event by hash in extra3, apply only local part.""" - if ev.extra3: + """Filter event by hash in extra3, apply only if for local shard.""" + if ev.extra3 and self.hash_key is not None: meta = skytools.db_urldecode(ev.extra3) - self.log.debug('part.process_event: hash=%d, max_part=%s, local_part=%d', - int(meta['hash']), self.max_part, self.local_part) - if (int(meta['hash']) & self.max_part) != self.local_part: - self.log.debug('part.process_event: not my event') + self.log.debug('shard.process_event: hash=%i, hash_mask=%i, shard_nr=%i', + int(meta['hash']), self.hash_mask, self.shard_nr) + if (int(meta['hash']) & self.hash_mask) != self.shard_nr: + self.log.debug('shard.process_event: not my event') return - self.log.debug('part.process_event: my event, processing') + self._process_event(ev, sql_queue_func, arg) + + def _process_event(self, ev, sql_queue_func, arg): + self.log.debug('shard.process_event: my event, processing') TableHandler.process_event(self, ev, sql_queue_func, arg) def get_copy_condition(self, src_curs, dst_curs): """Prepare the where condition for copy and replay filtering""" - self.load_part_info(dst_curs) - w = "(%s & %d) = %d" % (self.hashexpr, self.max_part, self.local_part) - self.log.debug('part: copy_condition=%s', w) + if self.hash_key is None: + return TableHandler.get_copy_condition(self, src_curs, dst_curs) + self.load_shard_info(dst_curs) + w = "(%s & %d) = %d" % (self.hashexpr, self.hash_mask, self.shard_nr) + self.log.debug('shard: copy_condition=%r', w) return w - def load_part_info(self, curs): - """Load slot info from database.""" + def load_shard_info(self, curs): + """Load part/slot info from database.""" q = "select part_nr, max_part from partconf.conf" curs.execute(q) - self.local_part, self.max_part = curs.fetchone() - if self.local_part is None or self.max_part is None: - raise Exception('Error loading part info') + self.shard_nr, self.hash_mask = curs.fetchone() + if self.shard_nr is None or self.hash_mask is None: + raise Exception('Error loading shard info') + +class PartHandler (ShardHandler): + """ Deprecated compat name for shard handler. """ + handler_name = 'part' # register handler class -__londiste_handlers__ = [PartHandler] +__londiste_handlers__ = [ShardHandler, PartHandler] diff --git a/python/londiste/playback.py b/python/londiste/playback.py index 73b6d298..b04ad317 100644 --- a/python/londiste/playback.py +++ b/python/londiste/playback.py @@ -865,7 +865,7 @@ class Replicator(CascadedWorker): return None def launch_copy(self, tbl_stat): - """Run paraller worker for copy.""" + """Run parallel worker for copy.""" self.log.info("Launching copy process") script = sys.argv[0] conf = self.cf.filename @@ -874,8 +874,7 @@ class Replicator(CascadedWorker): # pass same verbosity options as main script got if self.options.quiet: cmd.append('-q') - if self.options.verbose: - cmd.append('-v') + cmd += self.options.verbose * ['-v'] # let existing copy finish and clean its pidfile, # otherwise new copy will exit immediately. diff --git a/python/londiste/setup.py b/python/londiste/setup.py index 3ce5446d..a23d079a 100644 --- a/python/londiste/setup.py +++ b/python/londiste/setup.py @@ -7,6 +7,7 @@ import sys, os, re, skytools from pgq.cascade.admin import CascadeAdmin from londiste.exec_attrs import ExecAttrs +from londiste.util import find_copy_source import londiste.handler @@ -139,6 +140,25 @@ class LondisteSetup(CascadeAdmin): needs_tbl = self.handler_needs_table() args = self.expand_arg_list(dst_db, 'r', False, args, needs_tbl) + # pick proper create flags + if self.options.create_full: + create_flags = skytools.T_ALL + elif self.options.create: + create_flags = skytools.T_TABLE | skytools.T_PKEY + else: + create_flags = 0 + + # search for usable copy node if requested & needed + if (self.options.find_copy_node and create_flags != 0 + and needs_tbl and not self.is_root()): + src_name, src_loc, _ = find_copy_source(self, self.queue_name, args, None, self.provider_location) + self.options.copy_node = src_name + self.close_database('provider_db') + src_db = self.get_provider_db() + src_curs = src_db.cursor() + src_tbls = self.fetch_set_tables(src_curs) + src_db.commit() + # dont check for exist/not here (root handling) if not self.is_root() and not self.options.expect_sync and not self.options.find_copy_node: problems = False @@ -154,24 +174,11 @@ class LondisteSetup(CascadeAdmin): self.log.error("Problems, canceling operation") sys.exit(1) - # pick proper create flags - if self.options.create_full: - create_flags = skytools.T_ALL - elif self.options.create: - create_flags = skytools.T_TABLE | skytools.T_PKEY - else: - create_flags = 0 - # sanity check if self.options.dest_table and len(args) > 1: self.log.error("--dest-table can be given only for single table") sys.exit(1) - # not implemented - if self.options.find_copy_node and create_flags != 0: - self.log.error("--find-copy-node does not work with --create") - sys.exit(1) - # seems ok for tbl in args: self.add_table(src_db, dst_db, tbl, create_flags, src_tbls) @@ -448,6 +455,25 @@ class LondisteSetup(CascadeAdmin): """Reload data from provider node.""" db = self.get_database('db') args = self.expand_arg_list(db, 'r', True, args) + + if self.options.find_copy_node or self.options.copy_node: + q = "select table_name, table_attrs from londiste.get_table_list(%s) where local" + cur = db.cursor() + cur.execute(q, [self.set_name]) + for row in cur.fetchall(): + if row['table_name'] not in args: + continue + attrs = skytools.db_urldecode (row['table_attrs'] or '') + + if self.options.find_copy_node: + attrs['copy_node'] = '?' + elif self.options.copy_node: + attrs['copy_node'] = self.options.copy_node + + attrs = skytools.db_urlencode (attrs) + q = "select * from londiste.local_set_table_attrs (%s, %s, %s)" + self.exec_cmd(db, q, [self.set_name, row['table_name'], attrs]) + q = "select * from londiste.local_set_table_state(%s, %s, null, null)" self.exec_cmd_many(db, q, [self.set_name], args) @@ -533,9 +559,8 @@ class LondisteSetup(CascadeAdmin): db.commit() def get_provider_db(self): - - # use custom node for copy if self.options.copy_node: + # use custom node for copy source_node = self.options.copy_node m = self.queue_info.get_member(source_node) if not m: @@ -549,6 +574,7 @@ class LondisteSetup(CascadeAdmin): q = 'select * from pgq_node.get_node_info(%s)' res = self.exec_cmd(db, q, [self.queue_name], quiet = True) self.provider_location = res[0]['provider_location'] + return self.get_database('provider_db', connstr = self.provider_location, profile = 'remote') def expand_arg_list(self, db, kind, existing, args, needs_tbl=True): @@ -589,6 +615,9 @@ class LondisteSetup(CascadeAdmin): res = self.solve_globbing(args, lst_exists, map_exists, map_missing, allow_nonexist) else: res = self.solve_globbing(args, lst_missing, map_missing, map_exists, allow_nonexist) + + if not res: + self.log.info("what to do ?") return res def solve_globbing(self, args, full_list, full_map, reverse_map, allow_nonexist): diff --git a/python/londiste/util.py b/python/londiste/util.py index cba18f62..07ff9407 100644 --- a/python/londiste/util.py +++ b/python/londiste/util.py @@ -18,7 +18,7 @@ def find_copy_source(script, queue_name, copy_table_name, node_name, node_locati @param script: DbScript @param queue_name: name of the cascaded queue - @param copy_table_name: name of the table + @param copy_table_name: name of the table (or list of names) @param node_name: target node name @param node_location: target node location @returns (node_name, node_location, downstream_worker_name) of source node @@ -27,6 +27,11 @@ def find_copy_source(script, queue_name, copy_table_name, node_name, node_locati # None means no steps upwards were taken, so local consumer is worker worker_name = None + if isinstance(copy_table_name, str): + need = set([copy_table_name]) + else: + need = set(copy_table_name) + while 1: src_db = script.get_database('_source_db', connstr = node_location, autocommit = 1, profile = 'remote') src_curs = src_db.cursor() @@ -39,12 +44,12 @@ def find_copy_source(script, queue_name, copy_table_name, node_name, node_locati script.log.info("Checking if %s can be used for copy", info['node_name']) - q = "select table_name, local, table_attrs from londiste.get_table_list(%s) where table_name = %s" - src_curs.execute(q, [queue_name, copy_table_name]) - got = False + q = "select table_name, local, table_attrs from londiste.get_table_list(%s)" + src_curs.execute(q, [queue_name]) + got = set() for row in src_curs.fetchall(): tbl = row['table_name'] - if tbl != copy_table_name: + if tbl not in need: continue if not row['local']: script.log.debug("Problem: %s is not local", tbl) @@ -53,14 +58,15 @@ def find_copy_source(script, queue_name, copy_table_name, node_name, node_locati script.log.debug("Problem: %s handler does not store data [%s]", tbl, row['table_attrs']) continue script.log.debug("Good: %s is usable", tbl) - got = True - break + got.add(tbl) script.close_database('_source_db') - if got: + if got == need: script.log.info("Node %s seems good source, using it", info['node_name']) return node_name, node_location, worker_name + else: + script.log.info("Node %s does not have all tables", info['node_name']) if info['node_type'] == 'root': raise skytools.UsageError("Found root and no source found") diff --git a/python/skytools/gzlog.py b/python/skytools/gzlog.py index 558e2813..0db40fc3 100644 --- a/python/skytools/gzlog.py +++ b/python/skytools/gzlog.py @@ -1,8 +1,8 @@ """Atomic append of gzipped data. -The point is - if several gzip streams are concated, they -are read back as one whose stream. +The point is - if several gzip streams are concatenated, +they are read back as one whole stream. """ import gzip @@ -22,7 +22,7 @@ def gzip_append(filename, data, level = 6): g.write(data) g.close() zdata = buf.getvalue() - + # append, safely f = open(filename, "a+", 0) f.seek(0, 2) @@ -36,4 +36,3 @@ def gzip_append(filename, data, level = 6): f.truncate() f.close() raise ex - diff --git a/python/skytools/scripting.py b/python/skytools/scripting.py index a5e82663..840f3cf4 100644 --- a/python/skytools/scripting.py +++ b/python/skytools/scripting.py @@ -588,7 +588,10 @@ class BaseScript(object): self.reset() sys.exit(1) except Exception, d: - self.send_stats() + try: # this may fail too + self.send_stats() + except: + pass emsg = str(d).rstrip() self.reset() self.exception_hook(d, emsg) diff --git a/scripts/data_maintainer.py b/scripts/data_maintainer.py index 0c2c48b0..5bd8cd87 100644 --- a/scripts/data_maintainer.py +++ b/scripts/data_maintainer.py @@ -7,7 +7,7 @@ either one by one or in batches. Config template:: - [data_maintainer] + [data_maintainer3] job_name = dm_remove_expired_services dbread = dbname=sourcedb_test @@ -81,7 +81,7 @@ class DataMaintainer (skytools.DBScript): loop_delay = -1 def __init__(self, args): - super(DataMaintainer, self).__init__("data_maintainer", args) + super(DataMaintainer, self).__init__("data_maintainer3", args) # query for fetching the PK-s of the data set to be maintained self.sql_pk = self.cf.get("sql_get_pk_list") |