summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authormartinko2013-05-15 12:32:06 +0000
committermartinko2013-05-15 12:32:06 +0000
commitdd1901c533b8a07ab9bde03374ac59ed185e7113 (patch)
tree82376def65cdd21e4ea90aa818395526a7f6a497
parentc3d00f0ba488ffc3aab36842dfd6bcde3e1e72e9 (diff)
parentab9e8f02531c1ee7400858af14b33bfda9b069d5 (diff)
Merge branch 'feature/dispatch_handler_with_sharding' into develop
-rw-r--r--python/londiste/handler.py16
-rw-r--r--python/londiste/handlers/dispatch.py44
-rw-r--r--python/londiste/handlers/part.py25
3 files changed, 49 insertions, 36 deletions
diff --git a/python/londiste/handler.py b/python/londiste/handler.py
index 72d98b43..287ad546 100644
--- a/python/londiste/handler.py
+++ b/python/londiste/handler.py
@@ -178,9 +178,9 @@ class TableHandler(BaseHandler):
enc = args.get('encoding')
if enc:
- self.enc = EncodingValidator(self.log, enc)
+ self.encoding_validator = EncodingValidator(self.log, enc)
else:
- self.enc = None
+ self.encoding_validator = None
def process_event(self, ev, sql_queue_func, arg):
row = self.parse_row_data(ev)
@@ -212,13 +212,13 @@ class TableHandler(BaseHandler):
if len(ev.type) == 1:
if not self.allow_sql_event:
raise Exception('SQL events not supported by this handler')
- if self.enc:
- return self.enc.validate_string(ev.data, self.table_name)
+ if self.encoding_validator:
+ return self.encoding_validator.validate_string(ev.data, self.table_name)
return ev.data
else:
row = skytools.db_urldecode(ev.data)
- if self.enc:
- return self.enc.validate_dict(row, self.table_name)
+ if self.encoding_validator:
+ return self.encoding_validator.validate_dict(row, self.table_name)
return row
def real_copy(self, src_tablename, src_curs, dst_curs, column_list):
@@ -226,9 +226,9 @@ class TableHandler(BaseHandler):
copied
"""
- if self.enc:
+ if self.encoding_validator:
def _write_hook(obj, data):
- return self.enc.validate_copy(data, column_list, src_tablename)
+ return self.encoding_validator.validate_copy(data, column_list, src_tablename)
else:
_write_hook = None
condition = self.get_copy_condition(src_curs, dst_curs)
diff --git a/python/londiste/handlers/dispatch.py b/python/londiste/handlers/dispatch.py
index 758034c7..d1af2f02 100644
--- a/python/londiste/handlers/dispatch.py
+++ b/python/londiste/handlers/dispatch.py
@@ -153,17 +153,20 @@ creating or coping initial data to destination table. --expect-sync and
--skip-truncate should be used and --create switch is to be avoided.
"""
-import sys
-import datetime
import codecs
+import datetime
import re
+import sys
+from functools import partial
+
import skytools
-from londiste.handler import BaseHandler, EncodingValidator
from skytools import quote_ident, quote_fqident, UsageError
from skytools.dbstruct import *
from skytools.utf8 import safe_utf8_decode
-from functools import partial
+
+from londiste.handler import EncodingValidator
from londiste.handlers import handler_args, update
+from londiste.handlers.part import PartHandler
__all__ = ['Dispatcher']
@@ -618,7 +621,7 @@ ROW_HANDLERS = {'plain': RowHandler,
#------------------------------------------------------------------------------
-class Dispatcher(BaseHandler):
+class Dispatcher (PartHandler):
"""Partitioned loader.
Splits events into partitions, if requested.
Then applies them without further processing.
@@ -630,7 +633,7 @@ class Dispatcher(BaseHandler):
# compat for dest-table
dest_table = args.get('table', dest_table)
- BaseHandler.__init__(self, table_name, args, dest_table)
+ super(Dispatcher, self).__init__(table_name, args, dest_table)
# show args
self.log.debug("dispatch.init: table_name=%r, args=%r", table_name, args)
@@ -641,11 +644,6 @@ class Dispatcher(BaseHandler):
self.conf = self.get_config()
hdlr_cls = ROW_HANDLERS[self.conf.row_mode]
self.row_handler = hdlr_cls(self.log)
- if self.conf.encoding:
- self.encoding_validator = EncodingValidator(self.log,
- self.conf.encoding)
- else:
- self.encoding_validator = None
def _parse_args_from_doc (self):
doc = __doc__
@@ -717,8 +715,6 @@ class Dispatcher(BaseHandler):
conf.field_map[tmp[0]] = tmp[0]
else:
conf.field_map[tmp[0]] = tmp[1]
- # encoding validator
- conf.encoding = self.args.get('encoding')
return conf
def get_arg(self, name, value_list, default = None):
@@ -728,17 +724,20 @@ class Dispatcher(BaseHandler):
raise Exception('Bad argument %s value %r' % (name, val))
return val
+ def _validate_hash_key(self):
+ pass # no need for hash key when not sharding
+
def reset(self):
"""Called before starting to process a batch.
Should clean any pending data."""
- BaseHandler.reset(self)
+ super(Dispatcher, self).reset()
def prepare_batch(self, batch_info, dst_curs):
"""Called on first event for this table in current batch."""
if self.conf.table_mode != 'ignore':
self.batch_info = batch_info
self.dst_curs = dst_curs
- #BaseHandler.prepare_batch(self, batch_info, dst_curs)
+ super(Dispatcher, self).prepare_batch(batch_info, dst_curs)
def filter_data(self, data):
"""Process with fields skip and map"""
@@ -763,7 +762,7 @@ class Dispatcher(BaseHandler):
pkeys = [fmap[p] for p in pkeys if p in fmap]
return pkeys
- def process_event(self, ev, sql_queue_func, arg):
+ def _process_event(self, ev, sql_queue_func, arg):
"""Process a event.
Event should be added to sql_queue or executed directly.
"""
@@ -798,13 +797,12 @@ class Dispatcher(BaseHandler):
self.row_handler.add_table(dst, LOADERS[self.conf.load_mode],
self.pkeys, self.conf)
self.row_handler.process(dst, op, data)
- #BaseHandler.process_event(self, ev, sql_queue_func, arg)
def finish_batch(self, batch_info, dst_curs):
"""Called when batch finishes."""
if self.conf.table_mode != 'ignore':
self.row_handler.flush(dst_curs)
- #BaseHandler.finish_batch(self, batch_info, dst_curs)
+ #super(Dispatcher, self).finish_batch(batch_info, dst_curs)
def get_part_name(self):
# if custom part name template given, use it
@@ -918,12 +916,17 @@ class Dispatcher(BaseHandler):
if res:
self.log.info("Dropped tables: %s", ", ".join(res))
+ def get_copy_condition(self, src_curs, dst_curs):
+ """ Prepare where condition for copy and replay filtering.
+ """
+ return super(Dispatcher, self).get_copy_condition(src_curs, dst_curs)
+
def real_copy(self, tablename, src_curs, dst_curs, column_list):
"""do actual table copy and return tuple with number of bytes and rows
copied
"""
_src_cols = _dst_cols = column_list
- condition = ''
+ condition = self.get_copy_condition (src_curs, dst_curs)
if self.conf.skip_fields:
_src_cols = [col for col in column_list
@@ -940,7 +943,8 @@ class Dispatcher(BaseHandler):
else:
_write_hook = None
- return skytools.full_copy(tablename, src_curs, dst_curs, _src_cols, condition,
+ return skytools.full_copy(tablename, src_curs, dst_curs,
+ _src_cols, condition,
dst_tablename = self.dest_table,
dst_column_list = _dst_cols,
write_hook = _write_hook)
diff --git a/python/londiste/handlers/part.py b/python/londiste/handlers/part.py
index 247256e4..1cbd99bc 100644
--- a/python/londiste/handlers/part.py
+++ b/python/londiste/handlers/part.py
@@ -2,6 +2,7 @@
Parameters:
key=COLUMN: column name to use for hashing
+ hash_key=COLUMN: column name to use for hashing (overrides 'key' parameter)
hashfunc=NAME: function to use for hashing (default: partconf.get_hash_raw)
hashexpr=EXPR: full expression to use for hashing (deprecated)
encoding=ENC: validate and fix incoming data (only utf8 supported atm)
@@ -38,17 +39,20 @@ class PartHandler(TableHandler):
self.local_part = None # part number of local node
# primary key columns
- self.key = args.get('key')
- if self.key is None:
- raise Exception('Specify key field as key argument')
+ self.hash_key = args.get('hash_key', args.get('key'))
+ self._validate_hash_key()
# hash function & full expression
hashfunc = args.get('hashfunc', self.DEFAULT_HASHFUNC)
self.hashexpr = self.DEFAULT_HASHEXPR % (
skytools.quote_fqident(hashfunc),
- skytools.quote_ident(self.key))
+ skytools.quote_ident(self.hash_key))
self.hashexpr = args.get('hashexpr', self.hashexpr)
+ def _validate_hash_key(self):
+ if self.hash_key is None:
+ raise Exception('Specify key field as key argument')
+
def reset(self):
"""Forget config info."""
self.max_part = None
@@ -57,31 +61,36 @@ class PartHandler(TableHandler):
def add(self, trigger_arg_list):
"""Let trigger put hash into extra3"""
-
arg = "ev_extra3='hash='||%s" % self.hashexpr
trigger_arg_list.append(arg)
TableHandler.add(self, trigger_arg_list)
def prepare_batch(self, batch_info, dst_curs):
"""Called on first event for this table in current batch."""
- if not self.max_part:
- self.load_part_info(dst_curs)
+ if self.hash_key is not None:
+ if not self.max_part:
+ self.load_part_info(dst_curs)
TableHandler.prepare_batch(self, batch_info, dst_curs)
def process_event(self, ev, sql_queue_func, arg):
"""Filter event by hash in extra3, apply only local part."""
- if ev.extra3:
+ if ev.extra3 and self.hash_key is not None:
meta = skytools.db_urldecode(ev.extra3)
self.log.debug('part.process_event: hash=%d, max_part=%s, local_part=%d',
int(meta['hash']), self.max_part, self.local_part)
if (int(meta['hash']) & self.max_part) != self.local_part:
self.log.debug('part.process_event: not my event')
return
+ self._process_event(ev, sql_queue_func, arg)
+
+ def _process_event(self, ev, sql_queue_func, arg):
self.log.debug('part.process_event: my event, processing')
TableHandler.process_event(self, ev, sql_queue_func, arg)
def get_copy_condition(self, src_curs, dst_curs):
"""Prepare the where condition for copy and replay filtering"""
+ if self.hash_key is None:
+ return TableHandler.get_copy_condition(self, src_curs, dst_curs)
self.load_part_info(dst_curs)
w = "(%s & %d) = %d" % (self.hashexpr, self.max_part, self.local_part)
self.log.debug('part: copy_condition=%s', w)