diff options
author | martinko | 2013-05-15 12:32:06 +0000 |
---|---|---|
committer | martinko | 2013-05-15 12:32:06 +0000 |
commit | dd1901c533b8a07ab9bde03374ac59ed185e7113 (patch) | |
tree | 82376def65cdd21e4ea90aa818395526a7f6a497 | |
parent | c3d00f0ba488ffc3aab36842dfd6bcde3e1e72e9 (diff) | |
parent | ab9e8f02531c1ee7400858af14b33bfda9b069d5 (diff) |
Merge branch 'feature/dispatch_handler_with_sharding' into develop
-rw-r--r-- | python/londiste/handler.py | 16 | ||||
-rw-r--r-- | python/londiste/handlers/dispatch.py | 44 | ||||
-rw-r--r-- | python/londiste/handlers/part.py | 25 |
3 files changed, 49 insertions, 36 deletions
diff --git a/python/londiste/handler.py b/python/londiste/handler.py index 72d98b43..287ad546 100644 --- a/python/londiste/handler.py +++ b/python/londiste/handler.py @@ -178,9 +178,9 @@ class TableHandler(BaseHandler): enc = args.get('encoding') if enc: - self.enc = EncodingValidator(self.log, enc) + self.encoding_validator = EncodingValidator(self.log, enc) else: - self.enc = None + self.encoding_validator = None def process_event(self, ev, sql_queue_func, arg): row = self.parse_row_data(ev) @@ -212,13 +212,13 @@ class TableHandler(BaseHandler): if len(ev.type) == 1: if not self.allow_sql_event: raise Exception('SQL events not supported by this handler') - if self.enc: - return self.enc.validate_string(ev.data, self.table_name) + if self.encoding_validator: + return self.encoding_validator.validate_string(ev.data, self.table_name) return ev.data else: row = skytools.db_urldecode(ev.data) - if self.enc: - return self.enc.validate_dict(row, self.table_name) + if self.encoding_validator: + return self.encoding_validator.validate_dict(row, self.table_name) return row def real_copy(self, src_tablename, src_curs, dst_curs, column_list): @@ -226,9 +226,9 @@ class TableHandler(BaseHandler): copied """ - if self.enc: + if self.encoding_validator: def _write_hook(obj, data): - return self.enc.validate_copy(data, column_list, src_tablename) + return self.encoding_validator.validate_copy(data, column_list, src_tablename) else: _write_hook = None condition = self.get_copy_condition(src_curs, dst_curs) diff --git a/python/londiste/handlers/dispatch.py b/python/londiste/handlers/dispatch.py index 758034c7..d1af2f02 100644 --- a/python/londiste/handlers/dispatch.py +++ b/python/londiste/handlers/dispatch.py @@ -153,17 +153,20 @@ creating or coping initial data to destination table. --expect-sync and --skip-truncate should be used and --create switch is to be avoided. """ -import sys -import datetime import codecs +import datetime import re +import sys +from functools import partial + import skytools -from londiste.handler import BaseHandler, EncodingValidator from skytools import quote_ident, quote_fqident, UsageError from skytools.dbstruct import * from skytools.utf8 import safe_utf8_decode -from functools import partial + +from londiste.handler import EncodingValidator from londiste.handlers import handler_args, update +from londiste.handlers.part import PartHandler __all__ = ['Dispatcher'] @@ -618,7 +621,7 @@ ROW_HANDLERS = {'plain': RowHandler, #------------------------------------------------------------------------------ -class Dispatcher(BaseHandler): +class Dispatcher (PartHandler): """Partitioned loader. Splits events into partitions, if requested. Then applies them without further processing. @@ -630,7 +633,7 @@ class Dispatcher(BaseHandler): # compat for dest-table dest_table = args.get('table', dest_table) - BaseHandler.__init__(self, table_name, args, dest_table) + super(Dispatcher, self).__init__(table_name, args, dest_table) # show args self.log.debug("dispatch.init: table_name=%r, args=%r", table_name, args) @@ -641,11 +644,6 @@ class Dispatcher(BaseHandler): self.conf = self.get_config() hdlr_cls = ROW_HANDLERS[self.conf.row_mode] self.row_handler = hdlr_cls(self.log) - if self.conf.encoding: - self.encoding_validator = EncodingValidator(self.log, - self.conf.encoding) - else: - self.encoding_validator = None def _parse_args_from_doc (self): doc = __doc__ @@ -717,8 +715,6 @@ class Dispatcher(BaseHandler): conf.field_map[tmp[0]] = tmp[0] else: conf.field_map[tmp[0]] = tmp[1] - # encoding validator - conf.encoding = self.args.get('encoding') return conf def get_arg(self, name, value_list, default = None): @@ -728,17 +724,20 @@ class Dispatcher(BaseHandler): raise Exception('Bad argument %s value %r' % (name, val)) return val + def _validate_hash_key(self): + pass # no need for hash key when not sharding + def reset(self): """Called before starting to process a batch. Should clean any pending data.""" - BaseHandler.reset(self) + super(Dispatcher, self).reset() def prepare_batch(self, batch_info, dst_curs): """Called on first event for this table in current batch.""" if self.conf.table_mode != 'ignore': self.batch_info = batch_info self.dst_curs = dst_curs - #BaseHandler.prepare_batch(self, batch_info, dst_curs) + super(Dispatcher, self).prepare_batch(batch_info, dst_curs) def filter_data(self, data): """Process with fields skip and map""" @@ -763,7 +762,7 @@ class Dispatcher(BaseHandler): pkeys = [fmap[p] for p in pkeys if p in fmap] return pkeys - def process_event(self, ev, sql_queue_func, arg): + def _process_event(self, ev, sql_queue_func, arg): """Process a event. Event should be added to sql_queue or executed directly. """ @@ -798,13 +797,12 @@ class Dispatcher(BaseHandler): self.row_handler.add_table(dst, LOADERS[self.conf.load_mode], self.pkeys, self.conf) self.row_handler.process(dst, op, data) - #BaseHandler.process_event(self, ev, sql_queue_func, arg) def finish_batch(self, batch_info, dst_curs): """Called when batch finishes.""" if self.conf.table_mode != 'ignore': self.row_handler.flush(dst_curs) - #BaseHandler.finish_batch(self, batch_info, dst_curs) + #super(Dispatcher, self).finish_batch(batch_info, dst_curs) def get_part_name(self): # if custom part name template given, use it @@ -918,12 +916,17 @@ class Dispatcher(BaseHandler): if res: self.log.info("Dropped tables: %s", ", ".join(res)) + def get_copy_condition(self, src_curs, dst_curs): + """ Prepare where condition for copy and replay filtering. + """ + return super(Dispatcher, self).get_copy_condition(src_curs, dst_curs) + def real_copy(self, tablename, src_curs, dst_curs, column_list): """do actual table copy and return tuple with number of bytes and rows copied """ _src_cols = _dst_cols = column_list - condition = '' + condition = self.get_copy_condition (src_curs, dst_curs) if self.conf.skip_fields: _src_cols = [col for col in column_list @@ -940,7 +943,8 @@ class Dispatcher(BaseHandler): else: _write_hook = None - return skytools.full_copy(tablename, src_curs, dst_curs, _src_cols, condition, + return skytools.full_copy(tablename, src_curs, dst_curs, + _src_cols, condition, dst_tablename = self.dest_table, dst_column_list = _dst_cols, write_hook = _write_hook) diff --git a/python/londiste/handlers/part.py b/python/londiste/handlers/part.py index 247256e4..1cbd99bc 100644 --- a/python/londiste/handlers/part.py +++ b/python/londiste/handlers/part.py @@ -2,6 +2,7 @@ Parameters: key=COLUMN: column name to use for hashing + hash_key=COLUMN: column name to use for hashing (overrides 'key' parameter) hashfunc=NAME: function to use for hashing (default: partconf.get_hash_raw) hashexpr=EXPR: full expression to use for hashing (deprecated) encoding=ENC: validate and fix incoming data (only utf8 supported atm) @@ -38,17 +39,20 @@ class PartHandler(TableHandler): self.local_part = None # part number of local node # primary key columns - self.key = args.get('key') - if self.key is None: - raise Exception('Specify key field as key argument') + self.hash_key = args.get('hash_key', args.get('key')) + self._validate_hash_key() # hash function & full expression hashfunc = args.get('hashfunc', self.DEFAULT_HASHFUNC) self.hashexpr = self.DEFAULT_HASHEXPR % ( skytools.quote_fqident(hashfunc), - skytools.quote_ident(self.key)) + skytools.quote_ident(self.hash_key)) self.hashexpr = args.get('hashexpr', self.hashexpr) + def _validate_hash_key(self): + if self.hash_key is None: + raise Exception('Specify key field as key argument') + def reset(self): """Forget config info.""" self.max_part = None @@ -57,31 +61,36 @@ class PartHandler(TableHandler): def add(self, trigger_arg_list): """Let trigger put hash into extra3""" - arg = "ev_extra3='hash='||%s" % self.hashexpr trigger_arg_list.append(arg) TableHandler.add(self, trigger_arg_list) def prepare_batch(self, batch_info, dst_curs): """Called on first event for this table in current batch.""" - if not self.max_part: - self.load_part_info(dst_curs) + if self.hash_key is not None: + if not self.max_part: + self.load_part_info(dst_curs) TableHandler.prepare_batch(self, batch_info, dst_curs) def process_event(self, ev, sql_queue_func, arg): """Filter event by hash in extra3, apply only local part.""" - if ev.extra3: + if ev.extra3 and self.hash_key is not None: meta = skytools.db_urldecode(ev.extra3) self.log.debug('part.process_event: hash=%d, max_part=%s, local_part=%d', int(meta['hash']), self.max_part, self.local_part) if (int(meta['hash']) & self.max_part) != self.local_part: self.log.debug('part.process_event: not my event') return + self._process_event(ev, sql_queue_func, arg) + + def _process_event(self, ev, sql_queue_func, arg): self.log.debug('part.process_event: my event, processing') TableHandler.process_event(self, ev, sql_queue_func, arg) def get_copy_condition(self, src_curs, dst_curs): """Prepare the where condition for copy and replay filtering""" + if self.hash_key is None: + return TableHandler.get_copy_condition(self, src_curs, dst_curs) self.load_part_info(dst_curs) w = "(%s & %d) = %d" % (self.hashexpr, self.max_part, self.local_part) self.log.debug('part: copy_condition=%s', w) |