summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarko Kreen2011-07-12 11:17:49 +0000
committerMarko Kreen2011-07-12 11:17:49 +0000
commit152b3de9c49364cb3454b0dfbe5d58da140a10b7 (patch)
tree3e4b2f04b38c19b6bac9679bbb936007f908f9ff
parent3c77b8a218c92cd2fb8126112f599a67f7f48e92 (diff)
sql/conflicthandler: experimental conflict handler.
skytools.plpy_applyrow: low-level helper module conflicthandler/merge_on_time.sql: sample time-based conflict handler
-rw-r--r--python/skytools/plpy_applyrow.py217
-rw-r--r--sql/conflicthandler/Makefile16
-rw-r--r--sql/conflicthandler/README5
-rw-r--r--sql/conflicthandler/expected/test_merge.out46
-rw-r--r--sql/conflicthandler/merge_on_time.sql28
-rw-r--r--sql/conflicthandler/sql/test_merge.sql25
6 files changed, 337 insertions, 0 deletions
diff --git a/python/skytools/plpy_applyrow.py b/python/skytools/plpy_applyrow.py
new file mode 100644
index 00000000..bd2ef924
--- /dev/null
+++ b/python/skytools/plpy_applyrow.py
@@ -0,0 +1,217 @@
+
+"""
+PLPY helper module for applying row events from pgq.logutriga().
+"""
+
+
+import plpy
+
+import pkgloader
+pkgloader.require('skytools', '3.0')
+import skytools
+
+## TODO: automatic fkey detection
+# find FK columns
+FK_SQL = """
+SELECT (SELECT array_agg( (SELECT attname::text FROM pg_attribute
+ WHERE attrelid = conrelid AND attnum = conkey[i]))
+ FROM generate_series(1, array_upper(conkey, 1)) i) AS kcols,
+ (SELECT array_agg( (SELECT attname::text FROM pg_attribute
+ WHERE attrelid = confrelid AND attnum = confkey[i]))
+ FROM generate_series(1, array_upper(confkey, 1)) i) AS fcols,
+ confrelid::regclass::text AS ftable
+ FROM pg_constraint
+ WHERE conrelid = {tbl}::regclass AND contype='f'
+"""
+
+class DataError(Exception):
+ "Invalid data"
+
+def colfilter_full(rnew, rold):
+ return rnew
+
+def colfilter_changed(rnew, rold):
+ res = {}
+ for k, v in rnew:
+ if rnew[k] != rold[k]:
+ res[k] = rnew[k]
+ return res
+
+def canapply_dummy(rnew, rold):
+ return True
+
+def canapply_tstamp_helper(rnew, rold, tscol):
+ tnew = rnew[tscol]
+ told = rold[tscol]
+ if not tnew[0].isdigit():
+ raise DataError('invalid timestamp')
+ if not told[0].isdigit():
+ raise DataError('invalid timestamp')
+ return tnew > told
+
+def applyrow(tblname, ev_type, new_row,
+ backup_row = None,
+ alt_pkey_cols = None,
+ fkey_cols = None,
+ fkey_ref_table = None,
+ fkey_ref_cols = None,
+ fn_canapply = canapply_dummy,
+ fn_colfilter = colfilter_full):
+ """Core logic. Actual decisions will be done in callback functions.
+
+ - [IUD]: If row referenced by fkey does not exist, event is not applied
+ - If pkey does not exist but alt_pkey does, row is not applied.
+
+ @param tblname: table name, schema-qualified
+ @param ev_type: [IUD]:pkey1,pkey2
+ @param alt_pkey_cols: list of alternatice columns to consuder
+ @param fkey_cols: columns in this table that refer to other table
+ @param fkey_ref_table: other table referenced here
+ @param fkey_ref_cols: column in other table that must match
+ @param fn_canapply: callback function, gets new and old row, returns whether the row should be applied
+ @param fn_colfilter: callback function, gets new and old row, returns dict of final columns to be applied
+ """
+
+ gd = None
+
+ # parse ev_type
+ tmp = ev_type.split(':', 1)
+ if len(tmp) != 2 or tmp[0] not in ('I', 'U', 'D'):
+ raise DataError('Unsupported ev_type: '+repr(ev_type))
+ if not tmp[1]:
+ raise DataError('No pkey in event')
+
+ cmd = tmp[0]
+ pkey_cols = tmp[1].split(',')
+ qtblname = skytools.quote_fqident(tblname)
+
+ # parse ev_data
+ fields = skytools.db_urldecode(new_row)
+
+ if ev_type.find('}') >= 0:
+ raise DataError('Really suspicious activity')
+ if ",".join(fields.keys()).find('}') >= 0:
+ raise DataError('Really suspicious activity 2')
+
+ # generate pkey expressions
+ tmp = ["%s = {%s}" % (skytools.quote_ident(k), k) for k in pkey_cols]
+ pkey_expr = " and ".join(tmp)
+ alt_pkey_expr = None
+ if alt_pkey_cols:
+ tmp = ["%s = {%s}" % (skytools.quote_ident(k), k) for k in alt_pkey_cols]
+ alt_pkey_expr = " and ".join(tmp)
+
+ log = "data ok"
+
+ #
+ # Row data seems fine, now apply it
+ #
+
+ if fkey_ref_table:
+ tmp = []
+ for k, rk in zip(fkey_cols, fkey_ref_cols):
+ tmp.append("%s = {%s}" % (skytools.quote_ident(rk), k))
+ fkey_expr = " and ".join(tmp)
+ q = "select 1 from only %s where %s" % (
+ skytools.quote_fqident(fkey_ref_table),
+ fkey_expr)
+ res = skytools.plpy_exec(gd, q, fields)
+ if not res:
+ return "IGN: parent row does not exist"
+ log += ", fkey ok"
+
+ # fetch old row
+ if alt_pkey_expr:
+ q = "select * from only %s where %s for update" % (qtbl, alt_pkey_expr)
+ res = skytools.plpy_exec(gd, q, fields)
+ if res:
+ oldrow = res[0]
+ # if altpk matches, but pk not, then delete
+ need_del = 0
+ for k in pkey_cols:
+ # fixme: proper type cmp?
+ if fields[k] != str(oldrow[k]):
+ need_del = 1
+ break
+ if need_del:
+ log += ", altpk del"
+ q = "delete from only %s where %s" % (qtblname, alt_pkey_expr)
+ skytools.plpy_exec(gd, q, fields)
+ res = None
+ else:
+ log += ", altpk ok"
+ else:
+ # no altpk
+ q = "select * from only %s where %s for update" % (qtblname, pkey_expr)
+ res = skytools.plpy_exec(None, q, fields)
+
+ # got old row, with same pk and altpk
+ if res:
+ oldrow = res[0]
+ log += ", old row"
+ ok = fn_canapply(fields, oldrow)
+ if ok:
+ log += ", new row better"
+ if not ok:
+ # ignore the update
+ return "IGN:" + log + ", current row more up-to-date"
+ else:
+ log += ", no old row"
+ oldrow = None
+
+ if res:
+ if cmd == 'I':
+ cmd = 'U'
+ else:
+ if cmd == 'U':
+ cmd = 'I'
+
+ # allow column changes
+ if oldrow:
+ fields2 = fn_colfilter(fields, oldrow)
+ for k in pkey_cols:
+ if k not in fields2:
+ fields2[k] = fields[k]
+ fields = fields2
+
+ # apply change
+ if cmd == 'I':
+ q = skytools.mk_insert_sql(fields, tblname, pkey_cols)
+ elif cmd == 'U':
+ q = skytools.mk_update_sql(fields, tblname, pkey_cols)
+ elif cmd == 'D':
+ q = skytools.mk_delete_sql(fields, tblname, pkey_cols)
+ else:
+ plpy.error('Huh')
+
+ plpy.execute(q)
+
+ return log
+
+
+def ts_conflict_handler(gd, args):
+ """Conflict handling based on timestamp column."""
+
+ conf = skytools.db_urldecode(args[0])
+ timefield = conf['timefield']
+ ev_type = args[1]
+ ev_data = args[2]
+ ev_extra1 = args[3]
+ ev_extra2 = args[4]
+ ev_extra3 = args[5]
+ ev_extra4 = args[6]
+ altpk = None
+ if 'altpk' in conf:
+ altpk = conf['altpk'].split(',')
+
+ def ts_canapply(rnew, rold):
+ return canapply_tstamp_helper(rnew, rold, timefield)
+
+ return applyrow(ev_extra1, ev_type, ev_data,
+ backup_row = ev_extra2,
+ alt_pkey_cols = altpk,
+ fkey_ref_table = conf.get('fkey_ref_table'),
+ fkey_ref_cols = conf.get('fkey_ref_cols'),
+ fkey_cols = conf.get('fkey_cols'),
+ fn_canapply = ts_canapply)
+
diff --git a/sql/conflicthandler/Makefile b/sql/conflicthandler/Makefile
new file mode 100644
index 00000000..79e387d8
--- /dev/null
+++ b/sql/conflicthandler/Makefile
@@ -0,0 +1,16 @@
+
+REGRESS = test_merge
+REGRESS_OPTS = --load-language=plpgsql --load-language=plpythonu
+
+#include ../../config.mak
+
+PG_CONFIG = pg_config
+PGXS = $(shell $(PG_CONFIG) --pgxs)
+include $(PGXS)
+
+test:
+ make installcheck || { less regression.diffs ; exit 1; }
+
+ack:
+ cp results/* expected/
+
diff --git a/sql/conflicthandler/README b/sql/conflicthandler/README
new file mode 100644
index 00000000..7ad19cda
--- /dev/null
+++ b/sql/conflicthandler/README
@@ -0,0 +1,5 @@
+
+Merge function to be used with londiste 'applyfn' handler.
+
+londiste3 add-table foo --handler="applyfn(func_name=merge_on_time,func_conf=timefield%3dmodified_date)"
+
diff --git a/sql/conflicthandler/expected/test_merge.out b/sql/conflicthandler/expected/test_merge.out
new file mode 100644
index 00000000..28861bb2
--- /dev/null
+++ b/sql/conflicthandler/expected/test_merge.out
@@ -0,0 +1,46 @@
+\set ECHO none
+set DateStyle='ISO';
+create table mergetest (
+ intcol int4,
+ txtcol text,
+ timecol timestamp
+);
+-- insert to empty
+select merge_on_time('timefield=timecol', 'I:intcol', 'intcol=5&txtcol=v1&timecol=2010-09-09+12:12', 'mergetest', null, null, null);
+ merge_on_time
+---------------------
+ data ok, no old row
+(1 row)
+
+select * from mergetest;
+ intcol | txtcol | timecol
+--------+--------+---------------------
+ 5 | v1 | 2010-09-09 12:12:00
+(1 row)
+
+-- insert to with time earlier
+select merge_on_time('timefield=timecol', 'I:intcol', 'intcol=5&txtcol=v2&timecol=2010-09-08+12:12', 'mergetest', null, null, null);
+ merge_on_time
+---------------------------------------------------
+ IGN:data ok, old row, current row more up-to-date
+(1 row)
+
+select * from mergetest;
+ intcol | txtcol | timecol
+--------+--------+---------------------
+ 5 | v1 | 2010-09-09 12:12:00
+(1 row)
+
+-- insert to with time later
+select merge_on_time('timefield=timecol', 'I:intcol', 'intcol=5&txtcol=v3&timecol=2010-09-10+12:12', 'mergetest', null, null, null);
+ merge_on_time
+----------------------------------
+ data ok, old row, new row better
+(1 row)
+
+select * from mergetest;
+ intcol | txtcol | timecol
+--------+--------+---------------------
+ 5 | v3 | 2010-09-10 12:12:00
+(1 row)
+
diff --git a/sql/conflicthandler/merge_on_time.sql b/sql/conflicthandler/merge_on_time.sql
new file mode 100644
index 00000000..e495aeda
--- /dev/null
+++ b/sql/conflicthandler/merge_on_time.sql
@@ -0,0 +1,28 @@
+
+create or replace function merge_on_time(
+ fn_conf text,
+ ev_type text,
+ ev_data text,
+ ev_extra1 text,
+ ev_extra2 text,
+ ev_extra3 text,
+ ev_extra4 text)
+returns text as $$
+# callback function for londiste applyfn handler
+try:
+ import pkgloader
+ pkgloader.require('skytools', '3.0')
+ from skytools.plpy_applyrow import ts_conflict_handler
+
+ return ts_conflict_handler(SD, args)
+except:
+ import traceback
+ for ln in traceback.format_exc().split('\n'):
+ if ln:
+ plpy.warning(ln)
+ raise
+
+$$ language plpythonu;
+
+-- select merge_on_time('timefield=modified_date', 'I:id_ccard', 'key_user=foo&id_ccard=1&modified_date=2005-01-01', 'ccdb.ccard', '', '', '');
+
diff --git a/sql/conflicthandler/sql/test_merge.sql b/sql/conflicthandler/sql/test_merge.sql
new file mode 100644
index 00000000..15103df6
--- /dev/null
+++ b/sql/conflicthandler/sql/test_merge.sql
@@ -0,0 +1,25 @@
+
+\set ECHO none
+\i merge_on_time.sql
+\set ECHO all
+
+set DateStyle='ISO';
+
+create table mergetest (
+ intcol int4,
+ txtcol text,
+ timecol timestamp
+);
+
+-- insert to empty
+select merge_on_time('timefield=timecol', 'I:intcol', 'intcol=5&txtcol=v1&timecol=2010-09-09+12:12', 'mergetest', null, null, null);
+select * from mergetest;
+
+-- insert to with time earlier
+select merge_on_time('timefield=timecol', 'I:intcol', 'intcol=5&txtcol=v2&timecol=2010-09-08+12:12', 'mergetest', null, null, null);
+select * from mergetest;
+
+-- insert to with time later
+select merge_on_time('timefield=timecol', 'I:intcol', 'intcol=5&txtcol=v3&timecol=2010-09-10+12:12', 'mergetest', null, null, null);
+select * from mergetest;
+