diff options
author | Marko Kreen | 2012-07-05 15:47:47 +0000 |
---|---|---|
committer | Marko Kreen | 2012-07-05 18:19:54 +0000 |
commit | e8a70f0d2fb424b9e9fda06f11695b40a173a0be (patch) | |
tree | ce6cbdd541dbc3fc54e6e6e6669975076d711739 | |
parent | e85a1184a070779ac21528662c7cce682cbff308 (diff) |
londiste execute: support meta attributes in sql
THis allows use of EXECUTE in sitations where table
can be missing on few nodes.
Currenly, this needed to to be solved with temp update functions
that detect local situation or avoiding Londiste completely.
Both which are non-optimal solutions.
-rw-r--r-- | doc/londiste3.txt | 63 | ||||
-rw-r--r-- | python/londiste/exec_attrs.py | 367 | ||||
-rw-r--r-- | python/londiste/playback.py | 31 | ||||
-rw-r--r-- | python/londiste/setup.py | 28 | ||||
-rw-r--r-- | sql/londiste/functions/londiste.execute_finish.sql | 6 | ||||
-rw-r--r-- | sql/londiste/functions/londiste.execute_start.sql | 50 | ||||
-rw-r--r-- | sql/londiste/functions/londiste.upgrade_schema.sql | 9 | ||||
-rw-r--r-- | sql/londiste/structure/tables.sql | 1 |
8 files changed, 540 insertions, 15 deletions
diff --git a/doc/londiste3.txt b/doc/londiste3.txt index 92ca7a63..89351651 100644 --- a/doc/londiste3.txt +++ b/doc/londiste3.txt @@ -327,7 +327,68 @@ Repair data on subscriber. === execute [filepath] === -Execute SQL files on each nodes of the set. +Execute SQL files on each node of the cascaded queue. The SQL file is +executed locally in single transaction and inserted into queue in same +transaction. Thus guaranteeing that is will be replayed in subscriber +databases at correct position. + +The filename is stored in `londiste.applied_execute` table, and checked +before execution. If same filename already exists, the SQL execution +is skipped. + +==== SQL meta-data attributes ==== + +SQL file can contain attributes that limit where the SQL is executed: + + --*-- + --*-- Local-Table: mytable, othertable, + --*-- thirdtable + --*-- Local-Sequence: thisseq + --*-- + +The magic comments are searched only in file start, before any actual SQL +statement is seen. Empty lines and lines with regular SQL comments +are ignored. + +Supported keys: + + Local-Table:: + Table must be added to local node with `add-table`. + + Local-Sequence:: + Sequence must be added to local node with `add-seq`. + + Local-Destination:: + Table must be added to local node and actual destination table must exists. + This is for cases where table is added to some nodes with handler + that does not need actual table to exist. + + Need-Table:: + Physical table must exist in database. It does not matter if it is + replicated or not. + + Need-Sequence:: + Sequence must exist in database. + + Need-Function:: + Database function must exists. The function name is in form `function_name(nargs)`. + If the `(nargs)` portion is missed then nargs is taken as 0. + + Need-View:: + A view must exist in database. + + Need-Schema:: + Schema mist exist in database. + +Londiste supports table renaming, where table is attached to queue with one name +but events are applied to local table with different name. To make this +work with EXECUTE, the Local-Toble and Local-Destination support tag replacement, +where queue's table name that is mentioned in attribute is replaced +with actual table name in local database: + + --*-- Local-Table: mytable + ALTER TABLE @mytable@ ...; + === show-handlers ['handler'] === diff --git a/python/londiste/exec_attrs.py b/python/londiste/exec_attrs.py new file mode 100644 index 00000000..7e35723c --- /dev/null +++ b/python/londiste/exec_attrs.py @@ -0,0 +1,367 @@ +"""Custom parser for EXECUTE attributes. + +The values are parsed from SQL file given to EXECUTE. + +Format rules: + * Only lines starting with meta-comment prefix will be parsed: --*-- + * Empty or regular SQL comment lines are ignored. + * Parsing stops on first SQL statement. + * Meta-line format: "--*-- Key: value1, value2" + * If line ends with ',' then next line is taken as continuation. + +Supported keys: + * Local-Table: + * Local-Sequence: + * Local-Destination: + + * Need-Table + * Need-Sequence + * Need-Function + * Need-Schema + * Need-View + +Sample file:: + --*-- Local-Sequence: myseq + --*-- + --*-- Local-Table: table1, + --*-- table2, table3 + --*-- + +Tests: + +>>> a = ExecAttrs() +>>> a.add_value("Local-Table", "mytable") +>>> a.add_value("Local-Sequence", "seq1") +>>> a.add_value("Local-Sequence", "seq2") +>>> a.to_urlenc() +'local-table=mytable&local-sequence=seq1%2Cseq2' +>>> a.add_value("Local-Destination", "mytable-longname-more1") +>>> a.add_value("Local-Destination", "mytable-longname-more2") +>>> a.add_value("Local-Destination", "mytable-longname-more3") +>>> a.add_value("Local-Destination", "mytable-longname-more4") +>>> a.add_value("Local-Destination", "mytable-longname-more5") +>>> a.add_value("Local-Destination", "mytable-longname-more6") +>>> a.add_value("Local-Destination", "mytable-longname-more7") +>>> print a.to_sql() +--*-- Local-Table: mytable +--*-- Local-Sequence: seq1, seq2 +--*-- Local-Destination: mytable-longname-more1, mytable-longname-more2, +--*-- mytable-longname-more3, mytable-longname-more4, mytable-longname-more5, +--*-- mytable-longname-more6, mytable-longname-more7 +>>> a = ExecAttrs(sql = ''' +... +... -- +... +... --*-- Local-Table: foo , +... -- +... --*-- bar , +... --*-- +... --*-- zoo +... --*-- +... --*-- Local-Sequence: goo +... --*-- +... -- +... +... create fooza; +... ''') +>>> print a.to_sql() +--*-- Local-Table: foo, bar, zoo +--*-- Local-Sequence: goo +>>> seqs = {'public.goo': 'public.goo'} +>>> tables = {} +>>> tables['public.foo'] = 'public.foo' +>>> tables['public.bar'] = 'other.Bar' +>>> tables['public.zoo'] = 'Other.Foo' +>>> a.need_execute(None, tables, seqs) +True +>>> a.need_execute(None, [], []) +False +>>> sql = '''alter table @foo@; +... alter table @bar@; +... alter table @zoo@;''' +>>> print a.process_sql(sql, tables, seqs) +alter table public.foo; +alter table other."Bar"; +alter table "Other"."Foo"; +""" + +import skytools + +META_PREFIX = "--*--" + +class Matcher: + nice_name = None + def match(self, objname, curs, tables, seqs): + pass + def get_key(self): + return self.nice_name.lower() + def local_rename(self): + return False + +class LocalTable(Matcher): + nice_name = "Local-Table" + def match(self, objname, curs, tables, seqs): + return objname in tables + def local_rename(self): + return True + +class LocalSequence(Matcher): + nice_name = "Local-Sequence" + def match(self, objname, curs, tables, seqs): + return objname in seqs + def local_rename(self): + return True + +class LocalDestination(Matcher): + nice_name = "Local-Destination" + def match(self, objname, curs, tables, seqs): + if objname not in tables: + return False + dest_name = tables[objname] + return skytools.exists_table(curs, dest_name) + def local_rename(self): + return True + +class NeedTable(Matcher): + nice_name = "Need-Table" + def match(self, objname, curs, tables, seqs): + return skytools.exists_table(curs, objname) + +class NeedSequence(Matcher): + nice_name = "Need-Sequence" + def match(self, objname, curs, tables, seqs): + return skytools.exists_sequence(curs, objname) + +class NeedSchema(Matcher): + nice_name = "Need-Schema" + def match(self, objname, curs, tables, seqs): + return skytools.exists_schema(curs, objname) + +class NeedFunction(Matcher): + nice_name = "Need-Function" + def match(self, objname, curs, tables, seqs): + nargs = 0 + pos1 = objname.find('(') + if pos1 > 0: + pos2 = objname.find(')') + if pos2 > 0: + s = objname[pos1+1 : pos2] + objname = objname[:pos1] + nargs = int(s) + return skytools.exists_function(curs, objname, nargs) + +class NeedView(Matcher): + nice_name = "Need-View" + def match(self, objname, curs, tables, seqs): + return skytools.exists_view(curs, objname) + +META_SPLITLINE = 70 + +# list of matches, in order they need to be probed +META_MATCHERS = [ + LocalTable(), LocalSequence(), LocalDestination(), + NeedTable(), NeedSequence(), NeedFunction(), + NeedSchema(), NeedView() +] + +# key to nice key +META_KEYS = {} +for m in META_MATCHERS: + k = m.nice_name.lower() + META_KEYS[k] = m + +class ExecAttrsException(skytools.UsageError): + """Some parsing problem.""" + +class ExecAttrs: + """Container and parser for EXECUTE attributes.""" + def __init__(self, sql=None, ustr=None): + """Create container and parse either sql or urlenc string.""" + + self.attrs = {} + if sql and ustr: + raise Exception("Both sql and ustr set.") + if ustr: + self.parse_urlenc(ustr) + elif sql: + self.parse_sql(sql) + + def add_value(self, k, v): + """Add single value to key.""" + + xk = k.lower().strip() + if xk not in META_KEYS: + raise ExecAttrsException("Invalid key: %s" % k) + if xk not in self.attrs: + self.attrs[xk] = [] + + xv = v.strip() + self.attrs[xk].append(xv) + + def to_urlenc(self): + """Convert container to urlencoded string.""" + sdict = {} + for k, v in self.attrs.items(): + sdict[k] = ','.join(v) + return skytools.db_urlencode(sdict) + + def parse_urlenc(self, ustr): + """Parse urlencoded string adding values to current container.""" + sdict = skytools.db_urldecode(ustr) + for k, v in sdict: + for v1 in v.split(','): + self.add_value(k, v1) + + def to_sql(self): + """Convert container to SQL meta-comments.""" + lines = [] + for m in META_MATCHERS: + k = m.get_key() + if k not in self.attrs: + continue + vlist = self.attrs[k] + ln = "%s %s: " % (META_PREFIX, m.nice_name) + start = 0 + for nr, v in enumerate(vlist): + if nr > start: + ln = ln + ", " + v + else: + ln = ln + v + + if len(ln) >= META_SPLITLINE and nr < len(vlist) - 1: + ln += ',' + lines.append(ln) + ln = META_PREFIX + " " + start = nr + 1 + lines.append(ln) + return '\n'.join(lines) + + def parse_sql(self, sql): + """Parse SQL meta-comments.""" + + cur_key = None + cur_continued = False + lineno = 1 + for nr, ln in enumerate(sql.splitlines()): + lineno = nr+1 + + # skip empty lines + ln = ln.strip() + if not ln: + continue + + # stop at non-comment + if ln[:2] != '--': + break + + # parse only meta-comments + if ln[:len(META_PREFIX)] != META_PREFIX: + continue + + # cut prefix, skip empty comments + ln = ln[len(META_PREFIX):].strip() + if not ln: + continue + + # continuation of previous key + if cur_continued: + # collect values + for v in ln.split(','): + v = v.strip() + if not v: + continue + self.add_value(cur_key, v) + + # does this key continue? + if ln[-1] != ',': + cur_key = None + cur_continued = False + + # go to next line + continue + + # parse key + pos = ln.find(':') + if pos < 0: + continue + k = ln[:pos].strip() + + # collect values + for v in ln[pos+1:].split(','): + v = v.strip() + if not v: + continue + self.add_value(k, v) + + # check if current key values will continue + if ln[-1] == ',': + cur_key = k + cur_continued = True + else: + cur_key = None + cur_continued = False + + def need_execute(self, curs, local_tables, local_seqs): + # if no attrs, always execute + if not self.attrs: + return True + + matched = 0 + missed = 0 + good_list = [] + miss_list = [] + for m in META_MATCHERS: + k = m.get_key() + if k not in self.attrs: + continue + for v in self.attrs[k]: + fqname = skytools.fq_name(v) + if m.match(fqname, curs, local_tables, local_seqs): + matched += 1 + good_list.append(v) + else: + missed += 1 + miss_list.append(v) + # should be drop out early? + if matched > 0 and missed == 0: + return True + elif missed > 0 and matched == 0: + return False + elif missed == 0 and matched == 0: + # should not happen, but lets restore old behaviour? + return True + else: + raise Exception("SQL only partially matches local setup: matches=%r misses=%r" % (good_list, miss_list)) + + def get_attr(self, k): + k = k.lower().strip() + if k not in META_KEYS: + raise Exception("Bug: invalid key requested: " + k) + if k not in self.attrs: + return [] + return self.attrs[k] + + def process_sql(self, sql, local_tables, local_seqs): + """Replace replacement tags in sql with actual local names.""" + for k, vlist in self.attrs.items(): + m = META_KEYS[k] + if not m.local_rename(): + continue + for v in vlist: + repname = '@%s@' % v + fqname = skytools.fq_name(v) + if fqname in local_tables: + localname = local_tables[fqname] + elif fqname in local_seqs: + localname = local_seqs[fqname] + else: + # should not happen + raise Exception("bug: lost table: "+v) + qdest = skytools.quote_fqident(localname) + sql = sql.replace(repname, qdest) + return sql + +if __name__ == "__main__": + import doctest + doctest.testmod() + diff --git a/python/londiste/playback.py b/python/londiste/playback.py index a8c2a51a..6742f02d 100644 --- a/python/londiste/playback.py +++ b/python/londiste/playback.py @@ -8,6 +8,7 @@ import skytools from pgq.cascade.worker import CascadedWorker from londiste.handler import * +from londiste.exec_attrs import ExecAttrs __all__ = ['Replicator', 'TableState', 'TABLE_MISSING', 'TABLE_IN_COPY', 'TABLE_CATCHING_UP', @@ -653,20 +654,42 @@ class Replicator(CascadedWorker): # parse event fname = ev.extra1 + s_attrs = ev.extra2 + exec_attrs = ExecAttrs(urlenc = s_attrs) sql = ev.data # fixme: curs? pgver = dst_curs.connection.server_version if pgver >= 80300: dst_curs.execute("set local session_replication_role = 'local'") - q = "select * from londiste.execute_start(%s, %s, %s, false)" - res = self.exec_cmd(dst_curs, q, [self.queue_name, fname, sql], commit = False) + + seq_map = {} + q = "select seq_name, local from londiste.get_seq_list(%s) where local" + dst_curs.execute(q, [self.queue_name]) + for row in dst_curs.fetchall(): + seq_map[row['seq_name']] = row['seq_name'] + + tbl_map = {} + for tbl, t in self.table_map.items(): + if not t.local: + continue + tbl_map[t.name] = t.dest_table + + q = "select * from londiste.execute_start(%s, %s, %s, false, %s)" + res = self.exec_cmd(dst_curs, q, [self.queue_name, fname, sql, s_attrs], commit = False) ret = res[0]['ret_code'] if ret >= 300: self.log.warning("Skipping execution of '%s'", fname) return - for stmt in skytools.parse_statements(sql): - dst_curs.execute(stmt) + + if exec_attrs.need_execute(dst_curs, tbl_map, seq_map): + self.log.info("%s: executing sql") + xsql = exec_attrs.process_sql(sql, tbl_map, seq_map) + for stmt in skytools.parse_statements(xsql): + dst_curs.execute(stmt) + else: + self.log.info("%s: execution not needed on this node") + q = "select * from londiste.execute_finish(%s, %s)" self.exec_cmd(dst_curs, q, [self.queue_name, fname], commit = False) if pgver >= 80300: diff --git a/python/londiste/setup.py b/python/londiste/setup.py index b055fcb1..7f1a02e2 100644 --- a/python/londiste/setup.py +++ b/python/londiste/setup.py @@ -6,6 +6,7 @@ import sys, os, re, skytools from pgq.cascade.admin import CascadeAdmin +from londiste.exec_attrs import ExecAttrs import londiste.handler @@ -422,6 +423,19 @@ class LondisteSetup(CascadeAdmin): db = self.get_database('db') curs = db.cursor() + tables = self.fetch_set_tables(curs) + seqs = self.fetch_seqs(curs) + + # generate local maps + local_tables = {} + local_seqs = {} + for tbl in tables: + if tbl['local']: + local_tables[tbl['table_name']] = tbl['dest_table'] + for seq in seqs: + if seq['local']: + local_seqs[seq['seq_name']] = seq['seq_name'] + # set replica role for EXECUTE transaction if db.server_version >= 80300: curs.execute("set local session_replication_role = 'local'") @@ -429,14 +443,20 @@ class LondisteSetup(CascadeAdmin): for fn in files: fname = os.path.basename(fn) sql = open(fn, "r").read() - q = "select * from londiste.execute_start(%s, %s, %s, true)" - res = self.exec_cmd(db, q, [self.queue_name, fname, sql], commit = False) + attrs = ExecAttrs(sql = sql) + q = "select * from londiste.execute_start(%s, %s, %s, true, %s)" + res = self.exec_cmd(db, q, [self.queue_name, fname, sql, attrs.to_urlenc()], commit = False) ret = res[0]['ret_code'] if ret >= 300: self.log.warning("Skipping execution of '%s'" % fname) continue - for stmt in skytools.parse_statements(sql): - curs.execute(stmt) + if attrs.need_execute(curs, local_tables, local_seqs): + self.log.info("%s: executing sql", fname) + xsql = attrs.process_sql(sql, local_tables, local_seqs) + for stmt in skytools.parse_statements(xsql): + curs.execute(stmt) + else: + self.log.info("%s: This SQL does not need to run on this node.", fname) q = "select * from londiste.execute_finish(%s, %s)" self.exec_cmd(db, q, [self.queue_name, fname], commit = False) db.commit() diff --git a/sql/londiste/functions/londiste.execute_finish.sql b/sql/londiste/functions/londiste.execute_finish.sql index a7d510eb..884f61dc 100644 --- a/sql/londiste/functions/londiste.execute_finish.sql +++ b/sql/londiste/functions/londiste.execute_finish.sql @@ -20,10 +20,12 @@ as $$ declare is_root boolean; sql text; + attrs text; begin is_root := pgq_node.is_root_node(i_queue_name); - select execute_sql into sql + select execute_sql, execute_attrs + into sql, attrs from londiste.applied_execute where queue_name = i_queue_name and execute_file = i_file_name; @@ -34,7 +36,7 @@ begin end if; if is_root then - perform pgq.insert_event(i_queue_name, 'EXECUTE', sql, i_file_name, null, null, null); + perform pgq.insert_event(i_queue_name, 'EXECUTE', sql, i_file_name, attrs, null, null); end if; select 200, 'Execute finished: ' || i_file_name into ret_code, ret_note; diff --git a/sql/londiste/functions/londiste.execute_start.sql b/sql/londiste/functions/londiste.execute_start.sql index 9ce0071f..c6898195 100644 --- a/sql/londiste/functions/londiste.execute_start.sql +++ b/sql/londiste/functions/londiste.execute_start.sql @@ -3,11 +3,12 @@ create or replace function londiste.execute_start( in i_file_name text, in i_sql text, in i_expect_root boolean, + in i_attrs text, out ret_code int4, out ret_note text) as $$ -- ---------------------------------------------------------------------- --- Function: londiste.execute_start(4) +-- Function: londiste.execute_start(5) -- -- Start execution of DDL. Should be called at the -- start of the transaction that does the SQL execution. @@ -21,6 +22,9 @@ as $$ -- i_sql - Actual script (informative, not used here) -- i_expect_root - Is this on root? Setup tool sets this to avoid -- execution on branches. +-- i_attrs - urlencoded dict of extra attributes. +-- The value will be put into ev_extra2 +-- field of outgoing event. -- -- Returns: -- 200 - Proceed. @@ -50,11 +54,49 @@ begin end if; -- this also lock against potetial parallel execute - insert into londiste.applied_execute (queue_name, execute_file, execute_sql) - values (i_queue_name, i_file_name, i_sql); + insert into londiste.applied_execute (queue_name, execute_file, execute_sql, execute_attrs) + values (i_queue_name, i_file_name, i_sql, i_attrs); select 200, 'Executing: ' || i_file_name into ret_code, ret_note; return; end; -$$ language plpgsql strict; +$$ language plpgsql; + +create or replace function londiste.execute_start( + in i_queue_name text, + in i_file_name text, + in i_sql text, + in i_expect_root boolean, + out ret_code int4, + out ret_note text) +as $$ +-- ---------------------------------------------------------------------- +-- Function: londiste.execute_start(4) +-- +-- Start execution of DDL. Should be called at the +-- start of the transaction that does the SQL execution. +-- +-- Called-by: +-- Londiste setup tool on root, replay on branches/leafs. +-- +-- Parameters: +-- i_queue_name - cascaded queue name +-- i_file_name - Unique ID for SQL +-- i_sql - Actual script (informative, not used here) +-- i_expect_root - Is this on root? Setup tool sets this to avoid +-- execution on branches. +-- +-- Returns: +-- 200 - Proceed. +-- 301 - Already applied +-- 401 - Not root. +-- 404 - No such queue +-- ---------------------------------------------------------------------- +begin + select f.ret_code, f.ret_note + from londiste.execute_start(i_queue_name, i_file_name, i_sql, i_expect_root, null) f + into ret_code, ret_note; + return; +end; +$$ language plpgsql; diff --git a/sql/londiste/functions/londiste.upgrade_schema.sql b/sql/londiste/functions/londiste.upgrade_schema.sql index 9067ed7d..2767f00e 100644 --- a/sql/londiste/functions/londiste.upgrade_schema.sql +++ b/sql/londiste/functions/londiste.upgrade_schema.sql @@ -28,6 +28,15 @@ begin alter table londiste.table_info add column dest_table text; end if; + -- applied_execute.dest_table + perform 1 from information_schema.columns + where table_schema = 'londiste' + and table_name = 'applied_execute' + and column_name = 'execute_attrs'; + if not found then + alter table londiste.applied_execute add column execute_attrs text; + end if; + -- create roles perform 1 from pg_catalog.pg_roles where rolname = 'londiste_writer'; if not found then diff --git a/sql/londiste/structure/tables.sql b/sql/londiste/structure/tables.sql index 7d082dbb..35eda747 100644 --- a/sql/londiste/structure/tables.sql +++ b/sql/londiste/structure/tables.sql @@ -146,6 +146,7 @@ create table londiste.applied_execute ( execute_file text not null, execute_time timestamptz not null default now(), execute_sql text not null, + execute_attrs text, primary key (queue_name, execute_file), foreign key (queue_name) references pgq_node.node_info (queue_name) |