diff options
author | Tarvi Pillessaar | 2013-03-05 14:02:46 +0000 |
---|---|---|
committer | Tarvi Pillessaar | 2013-03-05 14:02:46 +0000 |
commit | 611de298f91776b4d6cdf439395d5bd59d2e3f7c (patch) | |
tree | bbc8b8d1206054cbeba081cb9b113a484befe05a | |
parent | 1190f5d3534e2a5f8ad765bf3c34870edc511af0 (diff) | |
parent | 8c5e6b151808b72b9c408ff81354af89b60acea9 (diff) |
Merge branch 'master' of https://github.com/markokr/skytools
38 files changed, 1042 insertions, 480 deletions
@@ -4,7 +4,7 @@ Skytools modules use some other code to run, so you need to install the following dependencies on the system where you want to build and run -skytool applications. +skytools based applications. === Build dependencies === @@ -64,10 +64,10 @@ The following configure options are available: --prefix:: Path prefix where to install skytools files (default: /usr/local) - + --with-python:: Name or full path of the Python executable to use. (default: python) - + --with-pgconfig:: Name or full path to pg_config (default: pg_config) @@ -75,6 +75,16 @@ The following configure options are available: If asciidoc should be used to re-generate manpages. Required when building from GIT as the generated pages are not in repo. +== Building without Makefile == + +It is possible to build and install Python part of Skytools only. +This is useful when no required build / devel tools (dependencies) +are available (typically on Windows platform). + +Commands to run vary across platforms but usually go like this: + + $ [sudo] python setup_pkgloader.py install + $ [sudo] python setup_skytools.py install [--sk3-subdir] == Documentation == @@ -104,7 +114,7 @@ that simple anymore. But simple nonetheless: install PgQ on the machine where you want to run it, provider being the straightforward choice. Then install londiste only on the box from where you want to launch the replication -process, a good default begin the subscriber box this time. +process, a good default being the subscriber box this time. You can then install londiste on provider from the subscriber box with the +londiste.py+ command line tool. @@ -112,4 +122,3 @@ the +londiste.py+ command line tool. == Usage == Please refer to the documentation. - @@ -73,9 +73,10 @@ tgz: config.mak clean rm -f source.list $(PYTHON) setup_skytools.py sdist -t source.cfg -m source.list -debclean: distclean - rm -rf debian/tmp-* debian/build* debian/control debian/packages-tmp* - rm -f debian/files debian/rules debian/sub* debian/packages +debclean: clean + rm -rf debian/tmp-* debian/build* debian/packages-tmp* + rm -f debian/files debian/sub* debian/packages + grep-dctrl -vP PGVER debian/control.in > debian/control boot: configure @@ -11,7 +11,7 @@ It contains the following modules: === PgQ === -PgQ is a queuing system written in PL/pgsql, Python and C code. It is +PgQ is a queuing system written in PL/pgSQL, Python and C code. It is based on snapshot-based event handling ideas from Slony-I, and is written for general usage. @@ -53,8 +53,7 @@ Replication tool written in Python, using PgQ as event transport. Features: - Tables can be added one-by-one into set. -- Initial COPY for one table does not block event replay for other - tables. +- Initial COPY for one table does not block event replay for other tables. - Can compare tables on both sides. Documentation: @@ -62,7 +61,7 @@ Documentation: - Londiste script usage: doc/londiste3.txt (also available as `man 1 londiste`) -- Londiste HOWTOS: doc/howto/ +- Londiste HOWTOs: doc/howto/ === walmgr === @@ -76,8 +75,7 @@ database server. == Source tree contents == doc/:: - Documentation in asciidoc format. Source for both html - and man pages. + Documentation in AsciiDoc format. Source for both html and man pages. python/:: Python modules and primary executables - walmgr, londiste, qadmin, pgqadm. @@ -108,7 +106,7 @@ sql/londiste/:: sql/ticker/:: PgQ ticker written in C. - + scripts/:: Python scripts with lesser priority. @@ -117,7 +115,7 @@ lib/:: debian/:: Debian packaging. This is for creating private packages, - official Debian packages uses it's own packagin code. + official Debian packages use their own packaging code. misc/:: Random scripts used for building. @@ -135,4 +133,4 @@ Londiste 3 in parallel with Londiste 2. 5. Apply londiste.sql - this will throw error on CREATE SCHEMA, but should otherwise apply fine. 6. Start pgqd. -The files mentined above are installed under $PREFIX/share/skytools3/ directory. +The files mentioned above are installed under $PREFIX/share/skytools3/ directory. diff --git a/configure.ac b/configure.ac index 2b0698d9..fbc36fe7 100644 --- a/configure.ac +++ b/configure.ac @@ -145,6 +145,7 @@ dnl Postres headers on Solaris define incompat unsetenv without that AC_CHECK_FUNCS(unsetenv) dnl Optional use of libevent +AC_SEARCH_LIBS(clock_gettime, rt) AC_USUAL_LIBEVENT_OPT dnl Needed on SmartOS (Solaris) diff --git a/doc/howto/londiste3_cascaded_rep_howto.txt b/doc/howto/londiste3_cascaded_rep_howto.txt index c5fe2e8a..e6a6a3bb 100644 --- a/doc/howto/londiste3_cascaded_rep_howto.txt +++ b/doc/howto/londiste3_cascaded_rep_howto.txt @@ -109,19 +109,19 @@ $ londiste3 -q conf/londiste_db1.ini add-seq mytable_id_seq Register table on other node with creation ---- -$ psql -d db2 -c create sequence mytable_id_seq +$ psql -d db2 -c "create sequence mytable_id_seq" CREATE SEQUENCE $ londiste3 -q conf/londiste_db2.ini add-seq mytable_id_seq $ londiste3 -q conf/londiste_db2.ini add-table mytable --create-full -$ psql -d db3 -c create sequence mytable_id_seq +$ psql -d db3 -c "create sequence mytable_id_seq" CREATE SEQUENCE $ londiste3 -q conf/londiste_db3.ini add-seq mytable_id_seq $ londiste3 -q conf/londiste_db3.ini add-table mytable --create-full -$ psql -d db4 -c create sequence mytable_id_seq +$ psql -d db4 -c "create sequence mytable_id_seq" CREATE SEQUENCE $ londiste3 -q conf/londiste_db4.ini add-seq mytable_id_seq $ londiste3 -q conf/londiste_db4.ini add-table mytable --create-full -$ psql -d db5 -c create sequence mytable_id_seq +$ psql -d db5 -c "create sequence mytable_id_seq" CREATE SEQUENCE $ londiste3 -q conf/londiste_db5.ini add-seq mytable_id_seq $ londiste3 -q conf/londiste_db5.ini add-table mytable --create-full @@ -255,5 +255,3 @@ node2 (root) ---- That's it! - - diff --git a/misc/kwcheck.py b/misc/kwcheck.py new file mode 100755 index 00000000..713167a8 --- /dev/null +++ b/misc/kwcheck.py @@ -0,0 +1,50 @@ +#! /usr/bin/env python + +import sys +import re + +import pkgloader +pkgloader.require('skytools', '3.0') +import skytools.quoting + +kwmap = skytools.quoting._ident_kwmap + +fn = "/opt/src/pgsql/postgresql/src/include/parser/kwlist.h" +if len(sys.argv) == 2: + fn = sys.argv[1] + +rc = re.compile(r'PG_KEYWORD[(]"(.*)" , \s* \w+ , \s* (\w+) [)]', re.X) + +data = open(fn, 'r').read() +full_map = {} +cur_map = {} +print "== new ==" +for kw, cat in rc.findall(data): + full_map[kw] = 1 + if cat == 'UNRESERVED_KEYWORD': + continue + if cat == 'COL_NAME_KEYWORD': + continue + cur_map[kw] = 1 + if kw not in kwmap: + print kw, cat + kwmap[kw] = 1 + +print "== obsolete ==" +kws = kwmap.keys() +kws.sort() +for k in kws: + if k not in full_map: + print k, '(not in full_map)' + elif k not in cur_map: + print k, '(not in cur_map)' + +print "== full list ==" +ln = "" +for k in kws: + ln += '"%s":1, ' % k + if len(ln) > 70: + print ln.strip() + ln = "" +print ln.strip() + diff --git a/python/londiste/compare.py b/python/londiste/compare.py index 83dac2e4..3f74862f 100644 --- a/python/londiste/compare.py +++ b/python/londiste/compare.py @@ -27,7 +27,7 @@ class Comparator(Syncer): dst_where = t2.plugin.get_copy_condition(src_curs, dst_curs) src_where = dst_where - self.log.info('Counting %s' % dst_tbl) + self.log.info('Counting %s', dst_tbl) # get common cols cols = self.calc_cols(src_curs, src_tbl, dst_curs, dst_tbl) @@ -61,22 +61,22 @@ class Comparator(Syncer): f += ", checksum=%(chksum)s" f = self.cf.get('compare_fmt', f) - self.log.debug("srcdb: " + src_q) + self.log.debug("srcdb: %s", src_q) src_curs.execute(src_q) src_row = src_curs.fetchone() src_str = f % src_row - self.log.info("srcdb: %s" % src_str) + self.log.info("srcdb: %s", src_str) src_db.commit() - self.log.debug("dstdb: " + dst_q) + self.log.debug("dstdb: %s", dst_q) dst_curs.execute(dst_q) dst_row = dst_curs.fetchone() dst_str = f % dst_row - self.log.info("dstdb: %s" % dst_str) + self.log.info("dstdb: %s", dst_str) dst_db.commit() if src_str != dst_str: - self.log.warning("%s: Results do not match!" % dst_tbl) + self.log.warning("%s: Results do not match!", dst_tbl) return 1 return 0 diff --git a/python/londiste/handlers/applyfn.py b/python/londiste/handlers/applyfn.py index b7b1173c..cbbf603b 100644 --- a/python/londiste/handlers/applyfn.py +++ b/python/londiste/handlers/applyfn.py @@ -34,7 +34,7 @@ class ApplyFuncHandler(BaseHandler): qfn = skytools.quote_fqident(fn) qargs = [skytools.quote_literal(a) for a in args] sql = "select %s(%s);" % (qfn, ', '.join(qargs)) - self.log.debug('applyfn.sql: %s' % sql) + self.log.debug('applyfn.sql: %s', sql) sql_queue_func(sql, qfunc_arg) #------------------------------------------------------------------------------ diff --git a/python/londiste/handlers/bulk.py b/python/londiste/handlers/bulk.py index 0c0167ac..e8b9104b 100644 --- a/python/londiste/handlers/bulk.py +++ b/python/londiste/handlers/bulk.py @@ -82,7 +82,7 @@ class BulkLoader(BaseHandler): if not self.method in (0,1,2): raise Exception('unknown method: %s' % self.method) - self.log.debug('bulk_init(%s), method=%d' % (repr(args), self.method)) + self.log.debug('bulk_init(%r), method=%d', args, self.method) def reset(self): self.pkey_ev_map = {} @@ -98,7 +98,7 @@ class BulkLoader(BaseHandler): op = ev.ev_type[0] if op not in 'IUD': raise Exception('Unknown event type: '+ev.ev_type) - self.log.debug('bulk.process_event: %s/%s' % (ev.ev_type, ev.ev_data)) + self.log.debug('bulk.process_event: %s/%s', ev.ev_type, ev.ev_data) # pkey_list = ev.ev_type[2:].split(',') data = skytools.db_urldecode(ev.ev_data) @@ -184,8 +184,8 @@ class BulkLoader(BaseHandler): real_update_count = len(upd_list) - self.log.debug("bulk_flush: %s (I/U/D = %d/%d/%d)" % ( - self.table_name, len(ins_list), len(upd_list), len(del_list))) + self.log.debug("bulk_flush: %s (I/U/D = %d/%d/%d)", + self.table_name, len(ins_list), len(upd_list), len(del_list)) # hack to unbroke stuff if self.method == METH_MERGED: @@ -200,8 +200,8 @@ class BulkLoader(BaseHandler): for fld in self.dist_fields: if fld not in key_fields: key_fields.append(fld) - self.log.debug("PKey fields: %s Dist fields: %s" % ( - ",".join(self.pkey_list), ",".join(self.dist_fields))) + self.log.debug("PKey fields: %s Dist fields: %s", + ",".join(self.pkey_list), ",".join(self.dist_fields)) # create temp table temp, qtemp = self.create_temp_table(curs) @@ -241,67 +241,67 @@ class BulkLoader(BaseHandler): # process deleted rows if len(del_list) > 0: - self.log.debug("bulk: Deleting %d rows from %s" % (len(del_list), tbl)) + self.log.debug("bulk: Deleting %d rows from %s", len(del_list), tbl) # delete old rows q = "truncate %s" % qtemp - self.log.debug('bulk: %s' % q) + self.log.debug('bulk: %s', q) curs.execute(q) # copy rows - self.log.debug("bulk: COPY %d rows into %s" % (len(del_list), temp)) + self.log.debug("bulk: COPY %d rows into %s", len(del_list), temp) skytools.magic_insert(curs, qtemp, del_list, col_list, quoted_table=1) # delete rows - self.log.debug('bulk: ' + del_sql) + self.log.debug('bulk: %s', del_sql) curs.execute(del_sql) - self.log.debug("bulk: %s - %d" % (curs.statusmessage, curs.rowcount)) + self.log.debug("bulk: %s - %d", curs.statusmessage, curs.rowcount) if len(del_list) != curs.rowcount: - self.log.warning("Delete mismatch: expected=%s deleted=%d" - % (len(del_list), curs.rowcount)) + self.log.warning("Delete mismatch: expected=%s deleted=%d", + len(del_list), curs.rowcount) temp_used = True # process updated rows if len(upd_list) > 0: - self.log.debug("bulk: Updating %d rows in %s" % (len(upd_list), tbl)) + self.log.debug("bulk: Updating %d rows in %s", len(upd_list), tbl) # delete old rows q = "truncate %s" % qtemp - self.log.debug('bulk: ' + q) + self.log.debug('bulk: %s', q) curs.execute(q) # copy rows - self.log.debug("bulk: COPY %d rows into %s" % (len(upd_list), temp)) + self.log.debug("bulk: COPY %d rows into %s", len(upd_list), temp) skytools.magic_insert(curs, qtemp, upd_list, col_list, quoted_table=1) temp_used = True if self.method == METH_CORRECT: # update main table - self.log.debug('bulk: ' + upd_sql) + self.log.debug('bulk: %s', upd_sql) curs.execute(upd_sql) - self.log.debug("bulk: %s - %d" % (curs.statusmessage, curs.rowcount)) + self.log.debug("bulk: %s - %d", curs.statusmessage, curs.rowcount) # check count if len(upd_list) != curs.rowcount: - self.log.warning("Update mismatch: expected=%s updated=%d" - % (len(upd_list), curs.rowcount)) + self.log.warning("Update mismatch: expected=%s updated=%d", + len(upd_list), curs.rowcount) else: # delete from main table - self.log.debug('bulk: ' + del_sql) + self.log.debug('bulk: %s', del_sql) curs.execute(del_sql) - self.log.debug('bulk: ' + curs.statusmessage) + self.log.debug('bulk: %s', curs.statusmessage) # check count if real_update_count != curs.rowcount: - self.log.warning("bulk: Update mismatch: expected=%s deleted=%d" - % (real_update_count, curs.rowcount)) + self.log.warning("bulk: Update mismatch: expected=%s deleted=%d", + real_update_count, curs.rowcount) # insert into main table if AVOID_BIZGRES_BUG: # copy again, into main table - self.log.debug("bulk: COPY %d rows into %s" % (len(upd_list), tbl)) + self.log.debug("bulk: COPY %d rows into %s", len(upd_list), tbl) skytools.magic_insert(curs, qtbl, upd_list, col_list, quoted_table=1) else: # better way, but does not work due bizgres bug - self.log.debug('bulk: ' + ins_sql) + self.log.debug('bulk: %s', ins_sql) curs.execute(ins_sql) - self.log.debug('bulk: ' + curs.statusmessage) + self.log.debug('bulk: %s', curs.statusmessage) # process new rows if len(ins_list) > 0: - self.log.debug("bulk: Inserting %d rows into %s" % (len(ins_list), tbl)) - self.log.debug("bulk: COPY %d rows into %s" % (len(ins_list), tbl)) + self.log.debug("bulk: Inserting %d rows into %s", len(ins_list), tbl) + self.log.debug("bulk: COPY %d rows into %s", len(ins_list), tbl) skytools.magic_insert(curs, qtbl, ins_list, col_list, quoted_table=1) # delete remaining rows @@ -311,7 +311,7 @@ class BulkLoader(BaseHandler): else: # fscking problems with long-lived temp tables q = "drop table %s" % qtemp - self.log.debug('bulk: ' + q) + self.log.debug('bulk: %s', q) curs.execute(q) self.reset() @@ -326,19 +326,19 @@ class BulkLoader(BaseHandler): # check if exists if USE_REAL_TABLE: if skytools.exists_table(curs, tempname): - self.log.debug("bulk: Using existing real table %s" % tempname) + self.log.debug("bulk: Using existing real table %s", tempname) return tempname, quote_fqident(tempname) # create non-temp table q = "create table %s (like %s)" % ( quote_fqident(tempname), quote_fqident(self.dest_table)) - self.log.debug("bulk: Creating real table: %s" % q) + self.log.debug("bulk: Creating real table: %s", q) curs.execute(q) return tempname, quote_fqident(tempname) elif USE_LONGLIVED_TEMP_TABLES: if skytools.exists_temp_table(curs, tempname): - self.log.debug("bulk: Using existing temp table %s" % tempname) + self.log.debug("bulk: Using existing temp table %s", tempname) return tempname, quote_ident(tempname) # bizgres crashes on delete rows @@ -347,7 +347,7 @@ class BulkLoader(BaseHandler): # create temp table for loading q = "create temp table %s (like %s) %s" % ( quote_ident(tempname), quote_fqident(self.dest_table), arg) - self.log.debug("bulk: Creating temp table: %s" % q) + self.log.debug("bulk: Creating temp table: %s", q) curs.execute(q) return tempname, quote_ident(tempname) diff --git a/python/londiste/handlers/dispatch.py b/python/londiste/handlers/dispatch.py index 17e88a24..758034c7 100644 --- a/python/londiste/handlers/dispatch.py +++ b/python/londiste/handlers/dispatch.py @@ -135,6 +135,9 @@ post_part: sql statement(s) to execute after creating partition table. Usable variables are the same as in part_template +retention_period: + how long to keep partitions around. examples: '3 months', '1 year' + encoding: name of destination encoding. handler replaces all invalid encoding symbols and logs them as warnings @@ -190,6 +193,8 @@ PART_FUNC_OLD = 'public.create_partition' PART_FUNC_NEW = 'londiste.create_partition' PART_FUNC_ARGS = ['parent', 'part', 'pkeys', 'part_field', 'part_time', 'period'] +RETENTION_FUNC = "londiste.drop_obsolete_partitions" + #------------------------------------------------------------------------------ @@ -311,10 +316,9 @@ class BaseBulkTempLoader(BaseBulkCollectingLoader): def logexec(self, curs, sql): """Logs and executes sql statement""" - self.log.debug('exec: %s' % sql) + self.log.debug('exec: %s', sql) curs.execute(sql) - self.log.debug('msg: %s, rows: %s' % ( - curs.statusmessage, curs.rowcount)) + self.log.debug('msg: %s, rows: %s', curs.statusmessage, curs.rowcount) # create sql parts @@ -398,15 +402,15 @@ class BulkLoader(BaseBulkTempLoader): cnt = len(data) if (cnt == 0): return - self.log.debug("bulk: Deleting %d rows from %s" % (cnt, self.table)) + self.log.debug("bulk: Deleting %d rows from %s", cnt, self.table) # copy rows to temp self.bulk_insert(curs, data) # delete rows using temp self.delete(curs) # check if right amount of rows deleted (only in direct mode) if self.conf.table_mode == 'direct' and cnt != curs.rowcount: - self.log.warning("%s: Delete mismatch: expected=%s deleted=%d" - % (self.table, cnt, curs.rowcount)) + self.log.warning("%s: Delete mismatch: expected=%s deleted=%d", + self.table, cnt, curs.rowcount) def process_update(self, curs, op_map): """Process update list""" @@ -419,7 +423,7 @@ class BulkLoader(BaseBulkTempLoader): cnt = len(data) if (cnt == 0): return - self.log.debug("bulk: Updating %d rows in %s" % (cnt, self.table)) + self.log.debug("bulk: Updating %d rows in %s", cnt, self.table) # copy rows to temp self.bulk_insert(curs, data) if self.method == METH_CORRECT: @@ -427,15 +431,15 @@ class BulkLoader(BaseBulkTempLoader): self.update(curs) # check count (only in direct mode) if self.conf.table_mode == 'direct' and cnt != curs.rowcount: - self.log.warning("%s: Update mismatch: expected=%s updated=%d" - % (self.table, cnt, curs.rowcount)) + self.log.warning("%s: Update mismatch: expected=%s updated=%d", + self.table, cnt, curs.rowcount) else: # delete from main table using temp self.delete(curs) # check count (only in direct mode) if self.conf.table_mode == 'direct' and real_cnt != curs.rowcount: - self.log.warning("%s: Update mismatch: expected=%s deleted=%d" - % (self.table, real_cnt, curs.rowcount)) + self.log.warning("%s: Update mismatch: expected=%s deleted=%d", + self.table, real_cnt, curs.rowcount) # insert into main table if AVOID_BIZGRES_BUG: # copy again, into main table @@ -452,19 +456,19 @@ class BulkLoader(BaseBulkTempLoader): # merged method loads inserts together with updates if (cnt == 0) or (self.method == METH_MERGED): return - self.log.debug("bulk: Inserting %d rows into %s" % (cnt, self.table)) + self.log.debug("bulk: Inserting %d rows into %s", cnt, self.table) # copy into target table (no temp used) self.bulk_insert(curs, data, table = self.qtable) def bulk_flush(self, curs, op_map): - self.log.debug("bulk_flush: %s (I/U/D = %d/%d/%d)" % ( - self.table, len(op_map['I']), len(op_map['U']), len(op_map['D']))) + self.log.debug("bulk_flush: %s (I/U/D = %d/%d/%d)", self.table, + len(op_map['I']), len(op_map['U']), len(op_map['D'])) # fetch distribution fields if self.dist_fields is None: self.dist_fields = self.find_dist_fields(curs) - self.log.debug("Key fields: %s Dist fields: %s" % ( - ",".join(self.pkeys), ",".join(self.dist_fields))) + self.log.debug("Key fields: %s Dist fields: %s", + ",".join(self.pkeys), ",".join(self.dist_fields)) # add them to key for key in self.dist_fields: if key not in self.keys: @@ -500,7 +504,7 @@ class BulkLoader(BaseBulkTempLoader): """ if USE_LONGLIVED_TEMP_TABLES or USE_REAL_TABLE: if self.temp_present: - self.log.debug("bulk: Using existing temp table %s" % self.temp) + self.log.debug("bulk: Using existing temp table %s", self.temp) return False self.create(curs) self.temp_present = True @@ -520,7 +524,7 @@ class BulkLoader(BaseBulkTempLoader): # truncate when re-using existing table if not self.create_temp(curs): self.truncate(curs) - self.log.debug("bulk: COPY %d rows into %s" % (len(data), table)) + self.log.debug("bulk: COPY %d rows into %s", len(data), table) skytools.magic_insert(curs, table, data, self.fields, quoted_table = True) if _use_temp and self.run_analyze: @@ -629,8 +633,7 @@ class Dispatcher(BaseHandler): BaseHandler.__init__(self, table_name, args, dest_table) # show args - self.log.debug("dispatch.init: table_name=%r, args=%r" % \ - (table_name, args)) + self.log.debug("dispatch.init: table_name=%r, args=%r", table_name, args) self.batch_info = None self.dst_curs = None self.pkeys = None @@ -684,6 +687,7 @@ class Dispatcher(BaseHandler): conf.pre_part = self.args.get('pre_part') conf.post_part = self.args.get('post_part') conf.part_func = self.args.get('part_func', PART_FUNC_NEW) + conf.retention_period = self.args.get('retention_period') # set row mode and event types to process conf.row_mode = self.get_arg('row_mode', ROW_MODES) event_types = self.args.get('event_types', '*') @@ -778,8 +782,7 @@ class Dispatcher(BaseHandler): # process only operations specified if not op in self.conf.event_types: return - self.log.debug('dispatch.process_event: %s/%s' % ( - ev.ev_type, ev.ev_data)) + self.log.debug('dispatch.process_event: %s/%s', ev.ev_type, ev.ev_data) if self.pkeys is None: self.pkeys = self.filter_pkeys(pkeys.split(',')) data = self.filter_data(data) @@ -880,7 +883,7 @@ class Dispatcher(BaseHandler): have_func = skytools.exists_function(curs, PART_FUNC_OLD, len(PART_FUNC_ARGS)) if have_func: - self.log.debug('check_part.exec: func:%s, args: %s' % (pfcall, vals)) + self.log.debug('check_part.exec: func: %s, args: %s', pfcall, vals) curs.execute(pfcall, vals) else: # @@ -890,12 +893,30 @@ class Dispatcher(BaseHandler): # - check constraints # - inheritance # - self.log.debug('part func %s not found, cloning table' % self.conf.part_func) + self.log.debug('part func %s not found, cloning table', self.conf.part_func) struct = TableStruct(curs, self.dest_table) struct.create(curs, T_ALL, dst) exec_with_vals(self.conf.post_part) - self.log.info("Created table: %s" % dst) + self.log.info("Created table: %s", dst) + + if self.conf.retention_period: + self.drop_obsolete_partitions (self.dest_table, self.conf.retention_period, self.conf.period) + + def drop_obsolete_partitions (self, parent_table, retention_period, partition_period): + """ Drop obsolete partitions of partition-by-date parent table. + """ + curs = self.dst_curs + func = RETENTION_FUNC + args = [parent_table, retention_period, partition_period] + sql = "select " + func + " (%s, %s, %s)" + self.log.debug("func: %s, args: %s", func, args) + curs.execute(sql, args) + res = [] + for row in curs.fetchall(): + res.append(row[0]) + if res: + self.log.info("Dropped tables: %s", ", ".join(res)) def real_copy(self, tablename, src_curs, dst_curs, column_list): """do actual table copy and return tuple with number of bytes and rows diff --git a/python/londiste/handlers/part.py b/python/londiste/handlers/part.py index bdabb3e6..247256e4 100644 --- a/python/londiste/handlers/part.py +++ b/python/londiste/handlers/part.py @@ -72,8 +72,8 @@ class PartHandler(TableHandler): """Filter event by hash in extra3, apply only local part.""" if ev.extra3: meta = skytools.db_urldecode(ev.extra3) - self.log.debug('part.process_event: hash=%d, max_part=%s, local_part=%d' %\ - (int(meta['hash']), self.max_part, self.local_part)) + self.log.debug('part.process_event: hash=%d, max_part=%s, local_part=%d', + int(meta['hash']), self.max_part, self.local_part) if (int(meta['hash']) & self.max_part) != self.local_part: self.log.debug('part.process_event: not my event') return @@ -84,7 +84,7 @@ class PartHandler(TableHandler): """Prepare the where condition for copy and replay filtering""" self.load_part_info(dst_curs) w = "(%s & %d) = %d" % (self.hashexpr, self.max_part, self.local_part) - self.log.debug('part: copy_condition=%s' % w) + self.log.debug('part: copy_condition=%s', w) return w def load_part_info(self, curs): diff --git a/python/londiste/playback.py b/python/londiste/playback.py index 4129cbe1..4fa87014 100644 --- a/python/londiste/playback.py +++ b/python/londiste/playback.py @@ -103,7 +103,7 @@ class TableState(object): """Set snapshot.""" if self.str_snapshot == str_snapshot: return - self.log.debug("%s: change_snapshot to %s" % (self.name, str_snapshot)) + self.log.debug("%s: change_snapshot to %s", self.name, str_snapshot) self.str_snapshot = str_snapshot if str_snapshot: self.from_snapshot = skytools.Snapshot(str_snapshot) @@ -122,8 +122,7 @@ class TableState(object): self.state = state self.sync_tick_id = tick_id self.changed = 1 - self.log.debug("%s: change_state to %s" % (self.name, - self.render_state())) + self.log.debug("%s: change_state to %s", self.name, self.render_state()) def render_state(self): """Make a string to be stored in db.""" @@ -172,8 +171,8 @@ class TableState(object): def loaded_state(self, row): """Update object with info from db.""" - self.log.debug("loaded_state: %s: %s / %s" % ( - self.name, row['merge_state'], row['custom_snapshot'])) + self.log.debug("loaded_state: %s: %s / %s", + self.name, row['merge_state'], row['custom_snapshot']) self.change_snapshot(row['custom_snapshot'], 0) self.state = self.parse_state(row['merge_state']) self.changed = 0 @@ -278,6 +277,10 @@ class Replicator(CascadedWorker): # target database db = dbname=somedb host=127.0.0.1 + # public connect string for target node, which other nodes use + # to access this one. + #public_node_location = + # how many tables can be copied in parallel #parallel_copies = 1 @@ -290,7 +293,7 @@ class Replicator(CascadedWorker): # compare: sql to use #compare_sql = select count(1) as cnt, sum(hashtext(t.*::text)) as chksum from only _TABLE_ t # workaround for hashtext change between 8.3 and 8.4 - #compare_sql = select count(1) as cnt, sum(('x'||substr(md5(t.*::text),1,16))::bit(64)::bigint) as chksum from only _TABLE_ t + #compare_sql = select count(1) as cnt, sum(('x'||substr(md5(t.*::text),1,16))::bit(64)::bigint) as chksum from only _TABLE_ t #compare_fmt = %(cnt)d rows, checksum=%(chksum)s """ @@ -485,11 +488,11 @@ class Replicator(CascadedWorker): else: # regular provider is used if t.name not in pmap: - self.log.warning("Table %s not available on provider" % t.name) + self.log.warning("Table %s not available on provider", t.name) continue pt = pmap[t.name] if pt.state != TABLE_OK: # or pt.custom_snapshot: # FIXME: does snapsnot matter? - self.log.info("Table %s not OK on provider, waiting" % t.name) + self.log.info("Table %s not OK on provider, waiting", t.name) continue # dont allow more copies than configured @@ -519,7 +522,7 @@ class Replicator(CascadedWorker): # somebody may have done remove-table in the meantime if self.copy_table_name not in self.table_map: - self.log.error("copy_sync: lost table: %s" % self.copy_table_name) + self.log.error("copy_sync: lost table: %s", self.copy_table_name) return SYNC_EXIT # This operates on single table @@ -537,8 +540,8 @@ class Replicator(CascadedWorker): elif self.cur_tick < t.sync_tick_id: return SYNC_OK else: - self.log.error("copy_sync: cur_tick=%d sync_tick=%d" % ( - self.cur_tick, t.sync_tick_id)) + self.log.error("copy_sync: cur_tick=%d sync_tick=%d", + self.cur_tick, t.sync_tick_id) raise Exception('Invalid table state') elif t.state == TABLE_WANNA_SYNC: # wait for main thread to react @@ -597,7 +600,7 @@ class Replicator(CascadedWorker): def process_remote_event(self, src_curs, dst_curs, ev): """handle one event""" - self.log.debug("New event: id=%s / type=%s / data=%s / extra1=%s" % (ev.id, ev.type, ev.data, ev.extra1)) + self.log.debug("New event: id=%s / type=%s / data=%s / extra1=%s", ev.id, ev.type, ev.data, ev.extra1) # set current_event only if processing them one-by-one if self.work_state < 0: @@ -824,8 +827,8 @@ class Replicator(CascadedWorker): if not t.changed: continue merge_state = t.render_state() - self.log.info("storing state of %s: copy:%d new_state:%s" % ( - t.name, self.copy_thread, merge_state)) + self.log.info("storing state of %s: copy:%d new_state:%s", + t.name, self.copy_thread, merge_state) q = "select londiste.local_set_table_state(%s, %s, %s, %s)" curs.execute(q, [self.set_name, t.name, t.str_snapshot, merge_state]) @@ -838,8 +841,8 @@ class Replicator(CascadedWorker): self.save_table_state(dst_db.cursor()) dst_db.commit() - self.log.info("Table %s status changed to '%s'" % ( - tbl.name, tbl.render_state())) + self.log.info("Table %s status changed to '%s'", + tbl.name, tbl.render_state()) def get_tables_in_state(self, state): "get all tables with specific state" @@ -878,11 +881,11 @@ class Replicator(CascadedWorker): time.sleep(2) # launch and wait for daemonization result - self.log.debug("Launch args: "+repr(cmd)) + self.log.debug("Launch args: %r", cmd) res = os.spawnvp(os.P_WAIT, script, cmd) - self.log.debug("Launch result: "+repr(res)) + self.log.debug("Launch result: %r", res) if res != 0: - self.log.error("Failed to launch copy process, result=%d" % res) + self.log.error("Failed to launch copy process, result=%d", res) def sync_database_encodings(self, src_db, dst_db): """Make sure client_encoding is same on both side.""" @@ -979,4 +982,3 @@ class Replicator(CascadedWorker): if __name__ == '__main__': script = Replicator(sys.argv[1:]) script.start() - diff --git a/python/londiste/repair.py b/python/londiste/repair.py index d33e6d62..46ad067b 100644 --- a/python/londiste/repair.py +++ b/python/londiste/repair.py @@ -49,7 +49,7 @@ class Repairer(Syncer): src_curs = src_db.cursor() dst_curs = dst_db.cursor() - self.log.info('Checking %s' % dst_tbl) + self.log.info('Checking %s', dst_tbl) self.common_fields = [] self.fq_common_fields = [] @@ -62,16 +62,16 @@ class Repairer(Syncer): dst_where = t2.plugin.get_copy_condition(src_curs, dst_curs) src_where = dst_where - self.log.info("Dumping src table: %s" % src_tbl) + self.log.info("Dumping src table: %s", src_tbl) self.dump_table(src_tbl, src_curs, dump_src, src_where) src_db.commit() - self.log.info("Dumping dst table: %s" % dst_tbl) + self.log.info("Dumping dst table: %s", dst_tbl) self.dump_table(dst_tbl, dst_curs, dump_dst, dst_where) dst_db.commit() - - self.log.info("Sorting src table: %s" % dump_src) + + self.log.info("Sorting src table: %s", dump_src) self.do_sort(dump_src, dump_src + '.sorted') - self.log.info("Sorting dst table: %s" % dump_dst) + self.log.info("Sorting dst table: %s", dump_dst) self.do_sort(dump_dst, dump_dst + '.sorted') self.dump_compare(dst_tbl, dump_src + ".sorted", dump_dst + ".sorted") @@ -127,7 +127,7 @@ class Repairer(Syncer): self.fq_common_fields = fqlist cols = ",".join(fqlist) - self.log.debug("using columns: %s" % cols) + self.log.debug("using columns: %s", cols) def dump_table(self, tbl, curs, fn, whr): """Dump table to disk.""" @@ -135,12 +135,12 @@ class Repairer(Syncer): if len(whr) == 0: whr = 'true' q = "copy (SELECT %s FROM %s WHERE %s) to stdout" % (cols, skytools.quote_fqident(tbl), whr) - self.log.debug("Query: %s" % q) + self.log.debug("Query: %s", q) f = open(fn, "w", 64*1024) curs.copy_expert(q, f) size = f.tell() f.close() - self.log.info('%s: Got %d bytes' % (tbl, size)) + self.log.info('%s: Got %d bytes', tbl, size) def get_row(self, ln): """Parse a row into dict.""" @@ -154,7 +154,7 @@ class Repairer(Syncer): def dump_compare(self, tbl, src_fn, dst_fn): """Dump + compare single table.""" - self.log.info("Comparing dumps: %s" % tbl) + self.log.info("Comparing dumps: %s", tbl) self.cnt_insert = 0 self.cnt_update = 0 self.cnt_delete = 0 @@ -197,10 +197,10 @@ class Repairer(Syncer): dst_ln = f2.readline() if dst_ln: self.total_dst += 1 - self.log.info("finished %s: src: %d rows, dst: %d rows,"\ - " missed: %d inserts, %d updates, %d deletes" % ( + self.log.info("finished %s: src: %d rows, dst: %d rows," + " missed: %d inserts, %d updates, %d deletes", tbl, self.total_src, self.total_dst, - self.cnt_insert, self.cnt_update, self.cnt_delete)) + self.cnt_insert, self.cnt_update, self.cnt_delete) def got_missed_insert(self, tbl, src_row): """Create sql for missed insert.""" @@ -248,7 +248,7 @@ class Repairer(Syncer): def show_fix(self, tbl, q, desc): """Print/write/apply repair sql.""" - self.log.info("missed %s: %s" % (desc, q)) + self.log.info("missed %s: %s", desc, q) if self.apply_curs: self.apply_curs.execute(q) else: @@ -300,7 +300,7 @@ class Repairer(Syncer): def cmp_keys(self, src_row, dst_row): """Compare primary keys of the rows. - + Returns 1 if src > dst, -1 if src < dst and 0 if src == dst""" # None means table is done. tag it larger than any existing row. @@ -319,4 +319,3 @@ class Repairer(Syncer): elif v1 > v2: return 1 return 0 - diff --git a/python/londiste/setup.py b/python/londiste/setup.py index 0cc4401e..8a64cc8d 100644 --- a/python/londiste/setup.py +++ b/python/londiste/setup.py @@ -140,7 +140,7 @@ class LondisteSetup(CascadeAdmin): for tbl in args: tbl = skytools.fq_name(tbl) if (tbl in src_tbls) and not src_tbls[tbl]['local']: - self.log.error("Table %s does not exist on provider, need to switch to different provider" % tbl) + self.log.error("Table %s does not exist on provider, need to switch to different provider", tbl) problems = True if problems: self.log.error("Problems, canceling operation") @@ -189,12 +189,12 @@ class LondisteSetup(CascadeAdmin): if create_flags: if tbl_exists: - self.log.info('Table %s already exist, not touching' % desc) + self.log.info('Table %s already exist, not touching', desc) else: src_dest_table = src_tbls[tbl]['dest_table'] if not skytools.exists_table(src_curs, src_dest_table): # table not present on provider - nowhere to get the DDL from - self.log.warning('Table %s missing on provider, cannot create, skipping' % desc) + self.log.warning('Table %s missing on provider, cannot create, skipping', desc) return schema = skytools.fq_name_parts(dest_table)[0] if not skytools.exists_schema(dst_curs, schema): @@ -276,7 +276,7 @@ class LondisteSetup(CascadeAdmin): for tbl in src_tbls.keys(): q = "select * from londiste.global_add_table(%s, %s)" if tbl not in dst_tbls: - self.log.info("Table %s info missing from subscriber, adding" % tbl) + self.log.info("Table %s info missing from subscriber, adding", tbl) self.exec_cmd(dst_curs, q, [self.set_name, tbl]) dst_tbls[tbl] = {'local': False, 'dest_table': tbl} for tbl in dst_tbls.keys(): @@ -317,7 +317,7 @@ class LondisteSetup(CascadeAdmin): " where table_name = %s and local" curs.execute(q, [self.set_name, tbl]) if curs.rowcount == 0: - self.log.error("Table %s not found on this node" % tbl) + self.log.error("Table %s not found on this node", tbl) sys.exit(1) attrs, dest_table = curs.fetchone() @@ -382,17 +382,17 @@ class LondisteSetup(CascadeAdmin): seq_exists = skytools.exists_sequence(dst_curs, seq) if create_flags: if seq_exists: - self.log.info('Sequence %s already exist, not creating' % seq) + self.log.info('Sequence %s already exist, not creating', seq) else: if not skytools.exists_sequence(src_curs, seq): # sequence not present on provider - nowhere to get the DDL from - self.log.warning('Sequence "%s" missing on provider, skipping' % seq) + self.log.warning('Sequence "%s" missing on provider, skipping', seq) return s = skytools.SeqStruct(src_curs, seq) src_db.commit() s.create(dst_curs, create_flags, log = self.log) elif not seq_exists: - self.log.warning('Sequence "%s" missing on subscriber, use --create if necessary' % seq) + self.log.warning('Sequence "%s" missing on subscriber, use --create if necessary', seq) return q = "select * from londiste.local_add_seq(%s, %s)" @@ -410,7 +410,7 @@ class LondisteSetup(CascadeAdmin): for seq in src_seqs.keys(): q = "select * from londiste.global_update_seq(%s, %s, %s)" if seq not in dst_seqs: - self.log.info("Sequence %s info missing from subscriber, adding" % seq) + self.log.info("Sequence %s info missing from subscriber, adding", seq) self.exec_cmd(dst_curs, q, [self.set_name, seq, src_seqs[seq]['last_value']]) tmp = src_seqs[seq].copy() tmp['local'] = False @@ -504,7 +504,7 @@ class LondisteSetup(CascadeAdmin): res = self.exec_cmd(db, q, [self.queue_name, fname, sql, attrs.to_urlenc()], commit = False) ret = res[0]['ret_code'] if ret >= 300: - self.log.warning("Skipping execution of '%s'" % fname) + self.log.warning("Skipping execution of '%s'", fname) continue if attrs.need_execute(curs, local_tables, local_seqs): self.log.info("%s: executing sql", fname) @@ -536,7 +536,6 @@ class LondisteSetup(CascadeAdmin): self.provider_location = res[0]['provider_location'] return self.get_database('provider_db', connstr = self.provider_location) - def expand_arg_list(self, db, kind, existing, args, needs_tbl=True): curs = db.cursor() @@ -570,7 +569,6 @@ class LondisteSetup(CascadeAdmin): else: return lst_missing - allow_nonexist = not needs_tbl if existing: res = self.solve_globbing(args, lst_exists, map_exists, map_missing, allow_nonexist) @@ -578,7 +576,6 @@ class LondisteSetup(CascadeAdmin): res = self.solve_globbing(args, lst_missing, map_missing, map_exists, allow_nonexist) return res - def solve_globbing(self, args, full_list, full_map, reverse_map, allow_nonexist): def glob2regex(s): s = s.replace('.', '[.]').replace('?', '.').replace('*', '.*') @@ -605,16 +602,16 @@ class LondisteSetup(CascadeAdmin): res_list.append(a) res_map[a] = 1 elif a in reverse_map: - self.log.info("%s already processed" % a) + self.log.info("%s already processed", a) elif allow_nonexist: res_list.append(a) res_map[a] = 1 elif self.options.force: - self.log.warning("%s not available, but --force is used" % a) + self.log.warning("%s not available, but --force is used", a) res_list.append(a) res_map[a] = 1 else: - self.log.warning("%s not available" % a) + self.log.warning("%s not available", a) err = 1 if err: raise skytools.UsageError("Cannot proceed") @@ -622,6 +619,7 @@ class LondisteSetup(CascadeAdmin): def load_extra_status(self, curs, node): """Fetch extra info.""" + # must be thread-safe (!) CascadeAdmin.load_extra_status(self, curs, node) curs.execute("select * from londiste.get_table_list(%s)", [self.queue_name]) n_ok = n_half = n_ign = 0 diff --git a/python/londiste/syncer.py b/python/londiste/syncer.py index befeaded..1713f6e9 100644 --- a/python/londiste/syncer.py +++ b/python/londiste/syncer.py @@ -145,11 +145,11 @@ class Syncer(skytools.DBScript): for tbl in tlist: tbl = skytools.fq_name(tbl) if not tbl in dst_tables: - self.log.warning('Table not subscribed: %s' % tbl) + self.log.warning('Table not subscribed: %s', tbl) continue t2 = dst_tables[tbl] if t2.merge_state != 'ok': - self.log.warning('Table %s not synced yet, no point' % tbl) + self.log.warning('Table %s not synced yet, no point', tbl) continue pnode, ploc, wname = find_copy_source(self, self.queue_name, tbl, pnode, ploc) @@ -179,12 +179,12 @@ class Syncer(skytools.DBScript): src_tables, ignore = self.get_tables(src_db) if not tbl in src_tables: - self.log.warning('Table not available on provider: %s' % tbl) + self.log.warning('Table not available on provider: %s', tbl) return t1 = src_tables[tbl] if t1.merge_state != 'ok': - self.log.warning('Table %s not ready yet on provider' % tbl) + self.log.warning('Table %s not ready yet on provider', tbl) return #self.check_consumer(setup_db, dst_db) @@ -231,10 +231,10 @@ class Syncer(skytools.DBScript): dst_curs = dst_db.cursor() if not skytools.exists_table(src_curs, src_tbl): - self.log.warning("Table %s does not exist on provider side" % src_tbl) + self.log.warning("Table %s does not exist on provider side", src_tbl) return if not skytools.exists_table(dst_curs, dst_tbl): - self.log.warning("Table %s does not exist on subscriber side" % dst_tbl) + self.log.warning("Table %s does not exist on subscriber side", dst_tbl) return # lock table against changes @@ -273,14 +273,14 @@ class Syncer(skytools.DBScript): lock_curs = lock_db.cursor() # lock table in separate connection - self.log.info('Locking %s' % src_tbl) + self.log.info('Locking %s', src_tbl) lock_db.commit() self.set_lock_timeout(lock_curs) lock_time = time.time() lock_curs.execute("LOCK TABLE %s IN SHARE MODE" % skytools.quote_fqident(src_tbl)) # now wait until consumer has updated target table until locking - self.log.info('Syncing %s' % dst_tbl) + self.log.info('Syncing %s', dst_tbl) # consumer must get futher than this tick tick_id = self.force_tick(setup_curs) @@ -313,7 +313,7 @@ class Syncer(skytools.DBScript): self.old_worker_paused = self.pause_consumer(setup_curs, self.provider_info['worker_name']) lock_curs = lock_db.cursor() - self.log.info('Syncing %s' % dst_tbl) + self.log.info('Syncing %s', dst_tbl) # consumer must get futher than this tick tick_id = self.force_tick(setup_curs, False) @@ -375,4 +375,3 @@ class Syncer(skytools.DBScript): break time.sleep(0.5) return oldflag - diff --git a/python/londiste/table_copy.py b/python/londiste/table_copy.py index f325a7c2..8e026235 100644 --- a/python/londiste/table_copy.py +++ b/python/londiste/table_copy.py @@ -64,7 +64,7 @@ class CopyTable(Replicator): if tbl_stat.copy_role == 'wait-copy': self.log.info('waiting for first partition to initialize copy') elif tbl_stat.max_parallel_copies_reached(): - self.log.info('number of max parallel copies (%s) reached' %\ + self.log.info('number of max parallel copies (%s) reached', tbl_stat.max_parallel_copy) else: break @@ -81,7 +81,7 @@ class CopyTable(Replicator): if pt.state == TABLE_OK: break - self.log.warning("table %s not in sync yet on provider, waiting" % tbl_stat.name) + self.log.warning("table %s not in sync yet on provider, waiting", tbl_stat.name) time.sleep(10) src_real_table = pt.dest_table @@ -102,7 +102,7 @@ class CopyTable(Replicator): self.sync_database_encodings(src_db, dst_db) - self.log.info("Starting full copy of %s" % tbl_stat.name) + self.log.info("Starting full copy of %s", tbl_stat.name) # just in case, drop all fkeys (in case "replay" was skipped) # !! this may commit, so must be done before anything else !! @@ -124,14 +124,14 @@ class CopyTable(Replicator): common_cols = [] for c in slist: if c not in dlist: - self.log.warning("Table %s column %s does not exist on subscriber" - % (tbl_stat.name, c)) + self.log.warning("Table %s column %s does not exist on subscriber", + tbl_stat.name, c) else: common_cols.append(c) for c in dlist: if c not in slist: - self.log.warning("Table %s column %s does not exist on provider" - % (tbl_stat.name, c)) + self.log.warning("Table %s column %s does not exist on provider", + tbl_stat.name, c) # drop unnecessary stuff if cmode > 0: @@ -140,9 +140,9 @@ class CopyTable(Replicator): # drop data if tbl_stat.table_attrs.get('skip_truncate'): - self.log.info("%s: skipping truncate" % tbl_stat.name) + self.log.info("%s: skipping truncate", tbl_stat.name) else: - self.log.info("%s: truncating" % tbl_stat.name) + self.log.info("%s: truncating", tbl_stat.name) q = "truncate " if dst_db.server_version >= 80400: q += "only " @@ -160,12 +160,12 @@ class CopyTable(Replicator): tbl_stat.dropped_ddl = ddl # do truncate & copy - self.log.info("%s: start copy" % tbl_stat.name) + self.log.info("%s: start copy", tbl_stat.name) p = tbl_stat.get_plugin() stats = p.real_copy(src_real_table, src_curs, dst_curs, common_cols) if stats: - self.log.info("%s: copy finished: %d bytes, %d rows" % ( - tbl_stat.name, stats[0], stats[1])) + self.log.info("%s: copy finished: %d bytes, %d rows", + tbl_stat.name, stats[0], stats[1]) # get snapshot src_curs.execute("select txid_current_snapshot()") @@ -269,4 +269,3 @@ class CopyTable(Replicator): if __name__ == '__main__': script = CopyTable(sys.argv[1:]) script.start() - diff --git a/python/pgq/cascade/admin.py b/python/pgq/cascade/admin.py index 200a1015..dd9215d8 100644 --- a/python/pgq/cascade/admin.py +++ b/python/pgq/cascade/admin.py @@ -2,17 +2,22 @@ ## NB: not all commands work ## -"""cascaded queue administration. +"""Cascaded queue administration. londiste.py INI pause [NODE [CONS]] setadm.py INI pause NODE [CONS] - """ -import sys, time, optparse, skytools, os.path +import optparse +import os.path +import Queue +import sys +import threading +import time +import skytools from skytools import UsageError, DBError from pgq.cascade.nodeinfo import * @@ -24,9 +29,9 @@ command_usage = """\ %prog [options] INI CMD [subcmd args] Node Initialization: - create-root NAME PUBLIC_CONNSTR - create-branch NAME PUBLIC_CONNSTR --provider=<public_connstr> - create-leaf NAME PUBLIC_CONNSTR --provider=<public_connstr> + create-root NAME [PUBLIC_CONNSTR] + create-branch NAME [PUBLIC_CONNSTR] --provider=<public_connstr> + create-leaf NAME [PUBLIC_CONNSTR] --provider=<public_connstr> Initializes node. Node Administration: @@ -67,7 +72,7 @@ setadm extra switches: class CascadeAdmin(skytools.AdminScript): - """Cascaded pgq administration.""" + """Cascaded PgQ administration.""" queue_name = None queue_info = None extra_objs = [] @@ -136,24 +141,43 @@ class CascadeAdmin(skytools.AdminScript): db = self.get_database("db") self.install_code(db) - def cmd_create_root(self, node_name, node_location): - return self.create_node('root', node_name, node_location) + def cmd_create_root(self, node_name, *args): + return self.create_node('root', node_name, args) - def cmd_create_branch(self, node_name, node_location): - return self.create_node('branch', node_name, node_location) + def cmd_create_branch(self, node_name, *args): + return self.create_node('branch', node_name, args) - def cmd_create_leaf(self, node_name, node_location): - return self.create_node('leaf', node_name, node_location) + def cmd_create_leaf(self, node_name, *args): + return self.create_node('leaf', node_name, args) - def create_node(self, node_type, node_name, node_location): + def create_node(self, node_type, node_name, args): """Generic node init.""" provider_loc = self.options.provider if node_type not in ('root', 'branch', 'leaf'): raise Exception('unknown node type') + # load public location + if len(args) > 1: + raise UsageError('Too many args, only public connect string allowed') + elif len(args) == 1: + node_location = args[0] + else: + node_location = self.cf.get('public_node_location', '') + if not node_location: + raise UsageError('Node public location must be given either in command line or config') + + # check if sane + ok = 0 + for k, v in skytools.parse_connect_string(node_location): + if k in ('host', 'service'): + ok = 1 + break + if not ok: + raise UsageError('No host= in public connect string, bad idea') + # connect to database - db = self.get_database("new_node", connstr = node_location) + db = self.get_database("db") # check if code is installed self.install_code(db) @@ -162,9 +186,12 @@ class CascadeAdmin(skytools.AdminScript): res = self.exec_query(db, "select * from pgq_node.get_node_info(%s)", [self.queue_name]) info = res[0] if info['node_type'] is not None: - self.log.info("Node is already initialized as %s" % info['node_type']) + self.log.info("Node is already initialized as %s", info['node_type']) return + # check if public connstr is sane + self.check_public_connstr(db, node_location) + self.log.info("Initializing node") node_attrs = {} @@ -199,7 +226,7 @@ class CascadeAdmin(skytools.AdminScript): # check if member already exists if queue_info.get_member(node_name) is not None: - self.log.error("Node '%s' already exists" % node_name) + self.log.error("Node '%s' already exists", node_name) sys.exit(1) combined_set = None @@ -219,7 +246,7 @@ class CascadeAdmin(skytools.AdminScript): # lookup provider provider = queue_info.get_member(provider_name) if not provider: - self.log.error("Node %s does not exist" % provider_name) + self.log.error("Node %s does not exist", provider_name) sys.exit(1) # register on provider @@ -243,7 +270,6 @@ class CascadeAdmin(skytools.AdminScript): [ self.queue_name, node_type, node_name, worker_name, provider_name, global_watermark, combined_queue ]) - self.extra_init(node_type, db, provider_db) if node_attrs: @@ -253,6 +279,43 @@ class CascadeAdmin(skytools.AdminScript): self.log.info("Done") + def check_public_connstr(self, db, pub_connstr): + """Look if public and local connect strings point to same db's. + """ + pub_db = self.get_database("pub_db", connstr = pub_connstr) + curs1 = db.cursor() + curs2 = pub_db.cursor() + q = "select oid, datname, txid_current() as txid, txid_current_snapshot() as snap"\ + " from pg_catalog.pg_database where datname = current_database()" + curs1.execute(q) + res1 = curs1.fetchone() + db.commit() + + curs2.execute(q) + res2 = curs2.fetchone() + pub_db.commit() + + curs1.execute(q) + res3 = curs1.fetchone() + db.commit() + + self.close_database("pub_db") + + failure = 0 + if (res1['oid'], res1['datname']) != (res2['oid'], res2['datname']): + failure += 1 + + sn1 = skytools.Snapshot(res1['snap']) + tx = res2['txid'] + sn2 = skytools.Snapshot(res3['snap']) + if sn1.contains(tx): + failure += 2 + elif not sn2.contains(tx): + failure += 4 + + if failure: + raise UsageError("Public connect string points to different database than local connect string (fail=%d)" % failure) + def extra_init(self, node_type, node_db, provider_db): """Callback to do specific init.""" pass @@ -267,7 +330,6 @@ class CascadeAdmin(skytools.AdminScript): while 1: db = self.get_database('root_db', connstr = loc) - # query current status res = self.exec_query(db, "select * from pgq_node.get_node_info(%s)", [self.queue_name]) info = res[0] @@ -276,7 +338,7 @@ class CascadeAdmin(skytools.AdminScript): self.log.info("Root node not initialized?") sys.exit(1) - self.log.debug("db='%s' -- type='%s' provider='%s'" % (loc, node_type, info['provider_location'])) + self.log.debug("db='%s' -- type='%s' provider='%s'", loc, node_type, info['provider_location']) # configured db may not be root anymore, walk upwards then if node_type in ('root', 'combined-root'): db.commit() @@ -353,32 +415,68 @@ class CascadeAdmin(skytools.AdminScript): """Show set status.""" self.load_local_info() - for mname, minf in self.queue_info.member_map.iteritems(): - #inf = self.get_node_info(mname) - #self.queue_info.add_node(inf) - #continue - - if not self.node_alive(mname): - node = NodeInfo(self.queue_name, None, node_name = mname) - self.queue_info.add_node(node) - continue + # prepare structs for workers + members = Queue.Queue() + for m in self.queue_info.member_map.itervalues(): + members.put(m) + nodes = Queue.Queue() + + # launch workers and wait + num_nodes = len(self.queue_info.member_map) + num_threads = max (min (num_nodes / 4, 100), 1) + tlist = [] + for i in range(num_threads): + t = threading.Thread (target = self._cmd_status_worker, args = (members, nodes)) + t.daemon = True + t.start() + tlist.append(t) + #members.join() + for t in tlist: + t.join() + + while True: try: - db = self.get_database('look_db', connstr = minf.location, autocommit = 1) - curs = db.cursor() - curs.execute("select * from pgq_node.get_node_info(%s)", [self.queue_name]) - node = NodeInfo(self.queue_name, curs.fetchone()) - node.load_status(curs) - self.load_extra_status(curs, node) - self.queue_info.add_node(node) - except DBError, d: - msg = str(d).strip().split('\n', 1)[0] - print('Node %s failure: %s' % (mname, msg)) - node = NodeInfo(self.queue_name, None, node_name = mname) - self.queue_info.add_node(node) - self.close_database('look_db') + node = nodes.get_nowait() + except Queue.Empty: + break + self.queue_info.add_node(node) self.queue_info.print_tree() + def _cmd_status_worker (self, members, nodes): + # members in, nodes out, both thread-safe + while True: + try: + m = members.get_nowait() + except Queue.Empty: + break + node = self.load_node_status (m.name, m.location) + nodes.put(node) + members.task_done() + + def load_node_status (self, name, location): + """ Load node info & status """ + # must be thread-safe (!) + if not self.node_alive(name): + node = NodeInfo(self.queue_name, None, node_name = name) + return node + try: + db = None + db = skytools.connect_database (location) + db.set_isolation_level (skytools.I_AUTOCOMMIT) + curs = db.cursor() + curs.execute("select * from pgq_node.get_node_info(%s)", [self.queue_name]) + node = NodeInfo(self.queue_name, curs.fetchone()) + node.load_status(curs) + self.load_extra_status(curs, node) + except DBError, d: + msg = str(d).strip().split('\n', 1)[0].strip() + print('Node %r failure: %s' % (name, msg)) + node = NodeInfo(self.queue_name, None, node_name = name) + finally: + if db: db.close() + return node + def cmd_node_status(self): """ Show status of a local node. @@ -402,6 +500,7 @@ class CascadeAdmin(skytools.AdminScript): def load_extra_status(self, curs, node): """Fetch extra info.""" + # must be thread-safe (!) pass # @@ -433,8 +532,8 @@ class CascadeAdmin(skytools.AdminScript): old_provider = cinfo['provider_node'] if old_provider == new_provider: - self.log.info("Consumer '%s' at node '%s' has already '%s' as provider" % ( - consumer, node, new_provider)) + self.log.info("Consumer '%s' at node '%s' has already '%s' as provider", + consumer, node, new_provider) return # pause target node @@ -558,9 +657,6 @@ class CascadeAdmin(skytools.AdminScript): except skytools.DBError, d: self.log.warning("Failed to remove from '%s': %s", n.name, str(d)) - - - def node_depends(self, sub_node, top_node): cur_node = sub_node # walk upstream @@ -598,13 +694,12 @@ class CascadeAdmin(skytools.AdminScript): self.log.info('new node seems paused, resuming') self.resume_node(new) while 1: - self.log.debug('waiting for catchup: need=%d, cur=%d' % (last_tick, info.completed_tick)) + self.log.debug('waiting for catchup: need=%d, cur=%d', last_tick, info.completed_tick) time.sleep(1) info = self.load_node_info(new) if info.completed_tick >= last_tick: return info - def takeover_root(self, old_node_name, new_node_name, failover = False): """Root switchover.""" @@ -682,7 +777,7 @@ class CascadeAdmin(skytools.AdminScript): def cmd_takeover(self, old_node_name): """Generic node switchover.""" - self.log.info("old: %s" % old_node_name) + self.log.info("old: %s", old_node_name) self.load_local_info() new_node_name = self.options.node if not new_node_name: @@ -764,7 +859,7 @@ class CascadeAdmin(skytools.AdminScript): self.load_local_info() # tag node dead in memory - self.log.info("Tagging node '%s' as dead" % dead_node_name) + self.log.info("Tagging node '%s' as dead", dead_node_name) self.queue_info.tag_dead(dead_node_name) # tag node dead in local node @@ -955,7 +1050,7 @@ class CascadeAdmin(skytools.AdminScript): node_db.commit() if len(cons_rows) == 1: if prov_node: - raise Exception('Unexcpeted situation: there are two gravestones - on nodes %s and %s' % (prov_node, node_name)) + raise Exception('Unexpected situation: there are two gravestones - on nodes %s and %s' % (prov_node, node_name)) prov_node = node_name failover_tick = cons_rows[0]['last_tick'] self.log.info("Found gravestone on node: %s", node_name) @@ -1065,7 +1160,7 @@ class CascadeAdmin(skytools.AdminScript): klist.sort() for k in klist: v = stats[k] - self.log.info(" %s: %s", k, str(v)) + self.log.info(" %s: %s", k, v) self.log.info("** Resurrection done, worker paused **") def resurrect_process_lost_events(self, db, failover_tick): @@ -1163,7 +1258,7 @@ class CascadeAdmin(skytools.AdminScript): self.log.info("Deleting lost events") for i in range(ntables): del_count = 0 - self.log.debug("Deleting events from table %d" % i) + self.log.debug("Deleting events from table %d", i) qtbl = "%s.%s" % (skytools.quote_ident(schema), skytools.quote_ident(table + '_' + str(i))) q = "delete from " + qtbl + " where " @@ -1247,9 +1342,9 @@ class CascadeAdmin(skytools.AdminScript): else: m = self.queue_info.get_member(node_name) if not m: - self.log.error("get_node_database: cannot resolve %s" % node_name) + self.log.error("get_node_database: cannot resolve %s", node_name) sys.exit(1) - #self.log.info("%s: dead=%s" % (m.name, m.dead)) + #self.log.info("%s: dead=%s", m.name, m.dead) if m.dead: return None loc = m.location @@ -1264,7 +1359,7 @@ class CascadeAdmin(skytools.AdminScript): res = False else: res = True - #self.log.warning('node_alive(%s) = %s' % (node_name, res)) + #self.log.warning('node_alive(%s) = %s', node_name, res) return res def close_node_database(self, node_name): @@ -1278,8 +1373,8 @@ class CascadeAdmin(skytools.AdminScript): """Execute SQL command on particular node.""" db = self.get_node_database(node_name) if not db: - self.log.warning("ignoring cmd for dead node '%s': %s" % ( - node_name, skytools.quote_statement(sql, args))) + self.log.warning("ignoring cmd for dead node '%s': %s", + node_name, skytools.quote_statement(sql, args)) return None return self.exec_cmd(db, sql, args, quiet = quiet, prefix=node_name) @@ -1302,7 +1397,7 @@ class CascadeAdmin(skytools.AdminScript): if stat['uptodate']: op = pause_flag and "paused" or "resumed" - self.log.info("Consumer '%s' on node '%s' %s" % (consumer, node, op)) + self.log.info("Consumer '%s' on node '%s' %s", consumer, node, op) return time.sleep(1) raise Exception('process canceled') @@ -1349,7 +1444,7 @@ class CascadeAdmin(skytools.AdminScript): """Non-cached node info lookup.""" db = self.get_node_database(node_name) if not db: - self.log.warning('load_node_info(%s): ignoring dead node' % node_name) + self.log.warning('load_node_info(%s): ignoring dead node', node_name) return None q = "select * from pgq_node.get_node_info(%s)" rows = self.exec_query(db, q, [self.queue_name]) @@ -1366,7 +1461,7 @@ class CascadeAdmin(skytools.AdminScript): qinf = QueueInfo(self.queue_name, info, member_list) if self.options.dead: for node in self.options.dead: - self.log.info("Assuming node '%s' as dead" % node) + self.log.info("Assuming node '%s' as dead", node) qinf.tag_dead(node) return qinf diff --git a/python/pgq/cascade/nodeinfo.py b/python/pgq/cascade/nodeinfo.py index 726b311e..60f07583 100644 --- a/python/pgq/cascade/nodeinfo.py +++ b/python/pgq/cascade/nodeinfo.py @@ -150,7 +150,7 @@ class NodeInfo: err = err[:pos] lst.append("ERR: %s: %s" % (cname, err)) return lst - + def add_info_line(self, ln): self._info_lines.append(ln) @@ -166,6 +166,7 @@ class NodeInfo: for row in curs.fetchall(): cname = row['consumer_name'] self.consumer_map[cname] = row + q = "select current_timestamp - ticker_lag as tick_time,"\ " ticker_lag, current_timestamp as now "\ "from pgq.get_queue_info(%s)" @@ -180,7 +181,7 @@ class NodeInfo: class QueueInfo: """Info about cascaded queue. - + Slightly broken, as all info is per-node. """ @@ -192,8 +193,11 @@ class QueueInfo: self.add_node(self.local_node) for r in member_rows: - n = MemberInfo(r) - self.member_map[n.name] = n + m = MemberInfo(r) + self._add_member(m) + + def _add_member(self, member): + self.member_map[member.name] = member def get_member(self, name): return self.member_map.get(name) @@ -288,4 +292,3 @@ def _setpfx(pfx, sfx): def _node_key(n): return (n.levels, n.total_childs, n.name) - diff --git a/python/pgq/cascade/worker.py b/python/pgq/cascade/worker.py index 59775683..dbe6ba3c 100644 --- a/python/pgq/cascade/worker.py +++ b/python/pgq/cascade/worker.py @@ -121,7 +121,7 @@ class CascadedWorker(CascadedConsumer): def __init__(self, service_name, db_name, args): """Initialize new consumer. - + @param service_name: service_name for DBScript @param db_name: target database name for get_database() @param args: cmdline args for DBScript @@ -229,7 +229,7 @@ class CascadedWorker(CascadedConsumer): # ahead from source queue, use current batch then wm = self.batch_info['cur_tick_id'] - self.log.debug("Publishing local watermark: %d" % wm) + self.log.debug("Publishing local watermark: %d", wm) src_curs = src_db.cursor() q = "select * from pgq_node.set_subscriber_watermark(%s, %s, %s)" src_curs.execute(q, [self.pgq_queue_name, st.node_name, wm]) @@ -257,7 +257,7 @@ class CascadedWorker(CascadedConsumer): continue if node not in nmap: # dont ignore missing nodes - cluster may be partially set up - self.log.warning('Unknown node in sync_watermark list: %s' % node) + self.log.warning('Unknown node in sync_watermark list: %s', node) return n = nmap[node] if n['dead']: @@ -270,7 +270,7 @@ class CascadedWorker(CascadedConsumer): row = wmcurs.fetchone() if not row: # partially set up node? - self.log.warning('Node not working: %s' % node) + self.log.warning('Node not working: %s', node) elif row['local_watermark'] < wm: # keep lowest wm wm = row['local_watermark'] @@ -310,7 +310,7 @@ class CascadedWorker(CascadedConsumer): if ev.ev_extra1 != self.pgq_queue_name and t != "pgq.tick-id": raise Exception("bad event in queue: "+str(ev)) - self.log.debug("got cascade event: %s(%s)" % (t, ev.ev_data)) + self.log.debug("got cascade event: %s(%s)", t, ev.ev_data) st = self._worker_state if t == "pgq.location-info": node = ev.ev_data @@ -444,4 +444,3 @@ class CascadedWorker(CascadedConsumer): dst_curs.execute(q, [self.pgq_queue_name]) dst_db.commit() self.global_wm_publish_time = t - diff --git a/python/skytools/__init__.py b/python/skytools/__init__.py index 8f2c52a3..048d41fb 100644 --- a/python/skytools/__init__.py +++ b/python/skytools/__init__.py @@ -47,7 +47,9 @@ _symbols = { # skytools.parsing 'dedent': 'skytools.parsing:dedent', 'hsize_to_bytes': 'skytools.parsing:hsize_to_bytes', + 'merge_connect_string': 'skytools.parsing:merge_connect_string', 'parse_acl': 'skytools.parsing:parse_acl', + 'parse_connect_string': 'skytools.parsing:parse_connect_string', 'parse_logtriga_sql': 'skytools.parsing:parse_logtriga_sql', 'parse_pgarray': 'skytools.parsing:parse_pgarray', 'parse_sqltriga_sql': 'skytools.parsing:parse_sqltriga_sql', diff --git a/python/skytools/parsing.py b/python/skytools/parsing.py index decc7e7e..318b1bf9 100644 --- a/python/skytools/parsing.py +++ b/python/skytools/parsing.py @@ -7,7 +7,8 @@ import skytools __all__ = [ "parse_pgarray", "parse_logtriga_sql", "parse_tabbed_table", "parse_statements", 'sql_tokenizer', 'parse_sqltriga_sql', - "parse_acl", "dedent", "hsize_to_bytes"] + "parse_acl", "dedent", "hsize_to_bytes", + "parse_connect_string", "merge_connect_string"] _rc_listelem = re.compile(r'( [^,"}]+ | ["] ( [^"\\]+ | [\\]. )* ["] )', re.X) @@ -445,6 +446,61 @@ def hsize_to_bytes (input): bytes = int(m.group(1)) * 1024 ** units.index(m.group(2).upper()) return bytes +# +# Connect string parsing +# + +_cstr_rx = r""" \s* (\w+) \s* = \s* ( ' ( \\.| [^'\\] )* ' | \S+ ) \s* """ +_cstr_unesc_rx = r"\\(.)" +_cstr_badval_rx = r"[\s'\\]" +_cstr_rc = None +_cstr_unesc_rc = None +_cstr_badval_rc = None + +def parse_connect_string(cstr): + r"""Parse Postgres connect string. + + >>> parse_connect_string("host=foo") + [('host', 'foo')] + >>> parse_connect_string(r" host = foo password = ' f\\\o\'o ' ") + [('host', 'foo'), ('password', "' f\\o'o '")] + """ + global _cstr_rc, _cstr_unesc_rc + if not _cstr_rc: + _cstr_rc = re.compile(_cstr_rx, re.X) + _cstr_unesc_rc = re.compile(_cstr_unesc_rx) + pos = 0 + res = [] + while pos < len(cstr): + m = _cstr_rc.match(cstr, pos) + if not m: + raise ValueError('Invalid connect string') + pos = m.end() + k = m.group(1) + v = m.group(2) + if v[0] == "'": + v = _cstr_unesc_rc.sub(r"\1", v) + res.append( (k,v) ) + return res + +def merge_connect_string(cstr_arg_list): + """Put fragments back together. + + >>> merge_connect_string([('host', 'ip'), ('pass', ''), ('x', ' ')]) + "host=ip pass='' x=' '" + """ + global _cstr_badval_rc + if not _cstr_badval_rc: + _cstr_badval_rc = re.compile(_cstr_badval_rx) + + buf = [] + for k, v in cstr_arg_list: + if not v or _cstr_badval_rc.search(v): + v = v.replace('\\', r'\\') + v = v.replace("'", r"\'") + v = "'" + v + "'" + buf.append("%s=%s" % (k, v)) + return ' '.join(buf) if __name__ == '__main__': import doctest diff --git a/python/skytools/quoting.py b/python/skytools/quoting.py index bb1263a6..16b85d9d 100644 --- a/python/skytools/quoting.py +++ b/python/skytools/quoting.py @@ -50,22 +50,25 @@ def quote_statement(sql, dict_or_list): qvals = [quote_literal(v) for v in dict_or_list] return sql % tuple(qvals) -# reserved keywords +# reserved keywords (RESERVED_KEYWORD + TYPE_FUNC_NAME_KEYWORD) _ident_kwmap = { "all":1, "analyse":1, "analyze":1, "and":1, "any":1, "array":1, "as":1, -"asc":1, "asymmetric":1, "both":1, "case":1, "cast":1, "check":1, "collate":1, -"column":1, "constraint":1, "create":1, "current_date":1, "current_role":1, -"current_time":1, "current_timestamp":1, "current_user":1, "default":1, -"deferrable":1, "desc":1, "distinct":1, "do":1, "else":1, "end":1, "except":1, -"false":1, "for":1, "foreign":1, "from":1, "grant":1, "group":1, "having":1, -"in":1, "initially":1, "intersect":1, "into":1, "leading":1, "limit":1, -"localtime":1, "localtimestamp":1, "new":1, "not":1, "null":1, "off":1, -"offset":1, "old":1, "on":1, "only":1, "or":1, "order":1, "placing":1, -"primary":1, "references":1, "returning":1, "select":1, "session_user":1, -"some":1, "symmetric":1, "table":1, "then":1, "to":1, "trailing":1, "true":1, -"union":1, "unique":1, "user":1, "using":1, "when":1, "where":1, -# greenplum? -"errors":1, +"asc":1, "asymmetric":1, "authorization":1, "binary":1, "both":1, "case":1, +"cast":1, "check":1, "collate":1, "collation":1, "column":1, "concurrently":1, +"constraint":1, "create":1, "cross":1, "current_catalog":1, "current_date":1, +"current_role":1, "current_schema":1, "current_time":1, "current_timestamp":1, +"current_user":1, "default":1, "deferrable":1, "desc":1, "distinct":1, +"do":1, "else":1, "end":1, "errors":1, "except":1, "false":1, "fetch":1, +"for":1, "foreign":1, "freeze":1, "from":1, "full":1, "grant":1, "group":1, +"having":1, "ilike":1, "in":1, "initially":1, "inner":1, "intersect":1, +"into":1, "is":1, "isnull":1, "join":1, "lateral":1, "leading":1, "left":1, +"like":1, "limit":1, "localtime":1, "localtimestamp":1, "natural":1, "new":1, +"not":1, "notnull":1, "null":1, "off":1, "offset":1, "old":1, "on":1, "only":1, +"or":1, "order":1, "outer":1, "over":1, "overlaps":1, "placing":1, "primary":1, +"references":1, "returning":1, "right":1, "select":1, "session_user":1, +"similar":1, "some":1, "symmetric":1, "table":1, "then":1, "to":1, "trailing":1, +"true":1, "union":1, "unique":1, "user":1, "using":1, "variadic":1, "verbose":1, +"when":1, "where":1, "window":1, "with":1, } _ident_bad = re.compile(r"[^a-z0-9_]|^[0-9]") diff --git a/python/skytools/scripting.py b/python/skytools/scripting.py index 23c3ef56..e4efdf00 100644 --- a/python/skytools/scripting.py +++ b/python/skytools/scripting.py @@ -227,7 +227,7 @@ class BaseScript(object): # >0 - sleep time if work() requests sleep # 0 - exit if work requests sleep # <0 - run work() once [same as looping=0] - loop_delay = 0 + loop_delay = 1.0 # 0 - run work() once # 1 - run work() repeatedly @@ -466,7 +466,7 @@ class BaseScript(object): self.log.info ("Config reloaded") self.job_name = self.cf.get("job_name") self.pidfile = self.cf.getfile("pidfile", '') - self.loop_delay = self.cf.getfloat("loop_delay", 1.0) + self.loop_delay = self.cf.getfloat("loop_delay", self.loop_delay) self.exception_sleep = self.cf.getfloat("exception_sleep", 20) def hook_sighup(self, sig, frame): diff --git a/python/walmgr.py b/python/walmgr.py index 60b1df3b..5f7292ee 100755 --- a/python/walmgr.py +++ b/python/walmgr.py @@ -23,7 +23,7 @@ Common commands: backup Copies all master data to slave. Will keep backup history if slave keep_backups is set. EXPERIMENTAL: If run on slave, creates backup from in-recovery slave data. - restore [set][dst] Stop postmaster, move new data dir to right location and start + restore [set][dst] Stop postmaster, move new data dir to right location and start postmaster in playback mode. Optionally use [set] as the backupset name to restore. In this case the directory is copied, not moved. cleanup Cleanup any walmgr files after stop. @@ -63,7 +63,7 @@ def die(err,msg): def yesno(prompt): """Ask a Yes/No question""" - while True: + while True: sys.stderr.write(prompt + " ") sys.stderr.flush() answer = sys.stdin.readline() @@ -313,12 +313,12 @@ class PostgresConfiguration: m = r_active.search(self.cf_buf) if m: old_val = m.group(1) - self.log.debug("found parameter %s with value '%s'" % (param, old_val)) + self.log.debug("found parameter %s with value %r", param, old_val) self.cf_buf = "%s%s%s" % (self.cf_buf[:m.start()], cf_full, self.cf_buf[m.end():]) else: m = r_disabled.search(self.cf_buf) if m: - self.log.debug("found disabled parameter %s" % param) + self.log.debug("found disabled parameter %s", param) self.cf_buf = "%s\n%s%s" % (self.cf_buf[:m.end()], cf_full, self.cf_buf[m.end():]) else: # not found, append to the end @@ -345,7 +345,7 @@ class PostgresConfiguration: def set_synchronous_standby_names(self,param_value): """Helper function to change synchronous_standby_names and signal postmaster""" - self.log.info("Changing synchronous_standby_names from '%s' to '%s'" % (self.synchronous_standby_names(),param_value)) + self.log.info("Changing synchronous_standby_names from %r to %r", self.synchronous_standby_names(), param_value) cf_params = dict() cf_params['synchronous_standby_names'] = param_value self.modify(cf_params) @@ -484,7 +484,7 @@ class WalMgr(skytools.DBScript): self.args = [] if self.cmd not in ('sync', 'syncdaemon'): - # don't let pidfile interfere with normal operations, but + # don't let pidfile interfere with normal operations, but # disallow concurrent syncing self.pidfile = None @@ -524,7 +524,7 @@ class WalMgr(skytools.DBScript): def pg_start_backup(self, code): q = "select pg_start_backup('FullBackup')" - self.log.info("Execute SQL: %s; [%s]" % (q, self.cf.get("master_db"))) + self.log.info("Execute SQL: %s; [%s]", q, self.cf.get("master_db")) if self.not_really: self.pg_backup = 1 return @@ -539,7 +539,7 @@ class WalMgr(skytools.DBScript): return q = "select pg_stop_backup()" - self.log.debug("Execute SQL: %s; [%s]" % (q, self.cf.get("master_db"))) + self.log.info("Execute SQL: %s; [%s]", q, self.cf.get("master_db")) if self.not_really: return db = self.get_database("master_db") @@ -554,7 +554,7 @@ class WalMgr(skytools.DBScript): return False buf = open(pidfile, "r").readline() pid = int(buf.strip()) - self.log.debug("Signal %d to process %d" % (sgn, pid)) + self.log.debug("Signal %d to process %d", sgn, pid) if sgn == 0 or not self.not_really: try: os.kill(pid, sgn) @@ -573,15 +573,15 @@ class WalMgr(skytools.DBScript): cmdline += args cmd = "' '".join(cmdline) - self.log.debug("Execute rsync cmd: '%s'" % (cmd)) + self.log.debug("Execute rsync cmd: %r", cmd) if self.not_really: return 0 - res = os.spawnvp(os.P_WAIT, cmdline[0], cmdline) + res = os.spawnvp(os.P_WAIT, cmdline[0], cmdline) if res == 24: self.log.info("Some files vanished, but thats OK") res = 0 elif res != 0: - self.log.fatal("rsync exec failed, res=%d" % res) + self.log.fatal("rsync exec failed, res=%d", res) if die_on_error: sys.exit(1) return res @@ -593,21 +593,21 @@ class WalMgr(skytools.DBScript): sys.exit(1) def rsync_log_directory(self, source_dir, dst_loc): - """rsync a pg_log or pg_xlog directory - ignore most of the + """rsync a pg_log or pg_xlog directory - ignore most of the directory contents, and pay attention to symlinks """ keep_symlinks = self.cf.getint("keep_symlinks", 1) subdir = os.path.basename(source_dir) if not os.path.exists(source_dir): - self.log.info("%s does not exist, skipping" % subdir) + self.log.info("%s does not exist, skipping", subdir) return cmdline = [] # if this is a symlink, copy it's target first if os.path.islink(source_dir) and keep_symlinks: - self.log.info('%s is a symlink, attempting to create link target' % subdir) + self.log.info('%s is a symlink, attempting to create link target', subdir) # expand the link link = os.readlink(source_dir) @@ -621,7 +621,7 @@ class WalMgr(skytools.DBScript): if self.exec_rsync( options + [ link_target, remote_target ]): # unable to create the link target, just convert the links # to directories in PGDATA - self.log.warning('Unable to create symlinked %s on target, copying' % subdir) + self.log.warning('Unable to create symlinked %s on target, copying', subdir) cmdline += [ "--copy-unsafe-links" ] cmdline += [ "--exclude=pg_log/*" ] @@ -633,27 +633,27 @@ class WalMgr(skytools.DBScript): def exec_cmd(self, cmdline, allow_error=False): cmd = "' '".join(cmdline) - self.log.debug("Execute cmd: '%s'" % (cmd)) + self.log.debug("Execute cmd: %r", cmd) if self.not_really: return process = subprocess.Popen(cmdline,stdout=subprocess.PIPE) output = process.communicate() res = process.returncode - + if res != 0 and not allow_error: - self.log.fatal("exec failed, res=%d (%s)" % (res, repr(cmdline))) + self.log.fatal("exec failed, res=%d (%r)", res, cmdline) sys.exit(1) return (res,output[0]) def exec_system(self, cmdline): - self.log.debug("Execute cmd: '%s'" % (cmdline)) + self.log.debug("Execute cmd: %r", cmdline) if self.not_really: return 0 return os.WEXITSTATUS(os.system(cmdline)) def chdir(self, loc): - self.log.debug("chdir: '%s'" % (loc)) + self.log.debug("chdir: %r", loc) if self.not_really: return try: @@ -672,7 +672,7 @@ class WalMgr(skytools.DBScript): last = open(fn, "r").read().strip() return last except: - self.log.info("Failed to read %s" % fn) + self.log.info("Failed to read %s", fn) return None def set_last_complete(self, last): @@ -687,7 +687,7 @@ class WalMgr(skytools.DBScript): f.close() os.rename(fn_tmp, fn) except: - self.log.fatal("Cannot write to %s" % fn) + self.log.fatal("Cannot write to %s", fn) def master_stop(self): @@ -711,11 +711,11 @@ class WalMgr(skytools.DBScript): # stop any running syncdaemons pidfile = self.cf.getfile("pidfile", "") if os.path.exists(pidfile): - self.log.info('Pidfile %s exists, attempting to stop syncdaemon.' % pidfile) + self.log.info('Pidfile %s exists, attempting to stop syncdaemon.', pidfile) self.exec_cmd([self.script, self.cfgfile, "syncdaemon", "-s"]) self.log.info("Done") - + def walmgr_cleanup(self): """ Clean up any walmgr files on slave and master. @@ -724,14 +724,14 @@ class WalMgr(skytools.DBScript): if not self.is_master: # remove walshipping directory dirname = self.cf.getfile("walmgr_data") - self.log.info("Removing walmgr data directory: %s" % dirname) + self.log.info("Removing walmgr data directory: %s", dirname) if not self.not_really: shutil.rmtree(dirname) # remove backup 8.3/main.X directories backups = glob.glob(self.cf.getfile("slave_data") + ".[0-9]") for dirname in backups: - self.log.info("Removing backup main directory: %s" % dirname) + self.log.info("Removing backup main directory: %s", dirname) if not self.not_really: shutil.rmtree(dirname) @@ -745,7 +745,7 @@ class WalMgr(skytools.DBScript): if not self.options.ssh_remove_key in key: keys += key else: - self.log.info("Removed %s from %s" % (self.options.ssh_remove_key, auth_file)) + self.log.info("Removed %s from %s", self.options.ssh_remove_key, auth_file) self.log.info("Overwriting authorized_keys file") @@ -756,7 +756,7 @@ class WalMgr(skytools.DBScript): f.close() os.rename(tmpfile, auth_file) else: - self.log.debug("authorized_keys:\n%s" % keys) + self.log.debug("authorized_keys:\n%s", keys) # remove password from .pgpass primary_conninfo = self.cf.get("primary_conninfo", "") @@ -768,7 +768,7 @@ class WalMgr(skytools.DBScript): pg.write() # get rid of the configuration file, both master and slave - self.log.info("Removing config file: %s" % self.cfgfile) + self.log.info("Removing config file: %s", self.cfgfile) if not self.not_really: os.remove(self.cfgfile) @@ -805,7 +805,7 @@ class WalMgr(skytools.DBScript): slave_found = None for new_synch_slave in re.findall(r"[^\s,]+",names): if new_synch_slave not in slave_names: - self.log.warning("No slave available with name %s" % new_synch_slave) + self.log.warning("No slave available with name %s", new_synch_slave) else: slave_found = True break @@ -842,7 +842,7 @@ class WalMgr(skytools.DBScript): # wal level set in config, enable it wal_level = self.cf.getboolean("hot_standby", False) and "hot_standby" or "archive" - self.log.debug("found 'wal_level' in config -- setting to '%s'" % wal_level) + self.log.debug("found 'wal_level' in config -- setting to '%s'", wal_level) cf_params["wal_level"] = wal_level if curr_wal_level not in ("archive", "hot_standby") and not can_restart: @@ -878,7 +878,7 @@ class WalMgr(skytools.DBScript): if cf.synchronous_standby_names(): cf_params['synchronous_standby_names'] = '' - self.log.debug("modifying configuration: %s" % cf_params) + self.log.debug("modifying configuration: %s", cf_params) cf.modify(cf_params) cf.write() @@ -889,12 +889,12 @@ class WalMgr(skytools.DBScript): re-enabled without restarting postgres. Needed when slave is booted with postgresql.conf from master.""" - self.log.debug("Disable archiving in %s" % cf_file) + self.log.debug("Disable archiving in %s", cf_file) cf = PostgresConfiguration(self, cf_file) cf_params = { "archive_command": "/bin/true" } - self.log.debug("modifying configuration: %s" % cf_params) + self.log.debug("modifying configuration: %s", cf_params) cf.modify(cf_params) cf.write() @@ -1046,11 +1046,11 @@ class WalMgr(skytools.DBScript): dirname = os.path.dirname(cf_name) if not os.path.isdir(dirname): - self.log.info('Creating config directory: %s' % dirname) + self.log.info('Creating config directory: %s', dirname) os.makedirs(dirname) - self.log.info('Writing configuration file: %s' % cf_name) - self.log.debug("config data:\n%s" % config_data) + self.log.info('Writing configuration file: %s', cf_name) + self.log.debug("config data:\n%s", config_data) if not self.not_really: cf = open(cf_name, "w") cf.write(config_data) @@ -1112,15 +1112,15 @@ compression = %(compression)s if self.options.ssh_keygen: keyfile = os.path.expanduser("~/.ssh/id_dsa") if os.path.isfile(keyfile): - self.log.info("SSH key %s already exists, skipping" % keyfile) + self.log.info("SSH key %s already exists, skipping", keyfile) else: - self.log.info("Generating ssh key: %s" % keyfile) + self.log.info("Generating ssh key: %s", keyfile) cmdline = ["ssh-keygen", "-t", "dsa", "-N", "", "-q", "-f", keyfile ] self.log.debug(' '.join(cmdline)) if not self.not_really: subprocess.call(cmdline) key = open(keyfile + ".pub").read().strip() - self.log.info("public key: %s" % key) + self.log.info("public key: %s", key) def walmgr_init_slave(self): """ @@ -1174,22 +1174,22 @@ primary_conninfo = %(primary_conninfo)s auth_file = os.path.join(ssh_dir, "authorized_keys") if not os.path.isdir(ssh_dir): - self.log.info("Creating directory: %s" % ssh_dir) + self.log.info("Creating directory: %s", ssh_dir) if not self.not_really: os.mkdir(ssh_dir) - self.log.debug("Reading public key from %s" % self.options.ssh_add_key) + self.log.debug("Reading public key from %s", self.options.ssh_add_key) master_pubkey = open(self.options.ssh_add_key).read() key_present = False if os.path.isfile(auth_file): for key in open(auth_file): if key == master_pubkey: - self.log.info("Key already present in %s, skipping" % auth_file) + self.log.info("Key already present in %s, skipping", auth_file) key_present = True if not key_present: - self.log.info("Adding %s to %s" % (self.options.ssh_add_key, auth_file)) + self.log.info("Adding %s to %s", self.options.ssh_add_key, auth_file) if not self.not_really: af = open(auth_file, "a") af.write(master_pubkey) @@ -1198,7 +1198,7 @@ primary_conninfo = %(primary_conninfo)s if self.options.add_password and self.options.primary_conninfo: # add password to pgpass - self.log.debug("Reading password from file %s" % self.options.add_password) + self.log.debug("Reading password from file %s", self.options.add_password) pwd = open(self.options.add_password).readline().rstrip('\n\r') pg = Pgpass('~/.pgpass') @@ -1206,7 +1206,7 @@ primary_conninfo = %(primary_conninfo)s pg.ensure_user(host, port, user, pwd) pg.write() - self.log.info("Added password from %s to .pgpass" % self.options.add_password) + self.log.info("Added password from %s to .pgpass", self.options.add_password) @@ -1234,7 +1234,7 @@ primary_conninfo = %(primary_conninfo)s # create slave directory structure def mkdirs(dir): if not os.path.exists(dir): - self.log.debug("Creating directory %s" % dir) + self.log.debug("Creating directory %s", dir) if not self.not_really: os.makedirs(dir) @@ -1248,9 +1248,9 @@ primary_conninfo = %(primary_conninfo)s def master_periodic(self): """ - Run periodic command on master node. + Run periodic command on master node. - We keep time using .walshipping.last file, so this has to be run before + We keep time using .walshipping.last file, so this has to be run before set_last_complete() """ @@ -1267,19 +1267,19 @@ primary_conninfo = %(primary_conninfo)s if os.path.isfile(check_file): elapsed = time.time() - os.stat(check_file).st_mtime - self.log.info("Running periodic command: %s" % periodic_command) + self.log.info("Running periodic command: %s", periodic_command) if not elapsed or elapsed > command_interval: if not self.not_really: rc = os.WEXITSTATUS(self.exec_system(periodic_command)) if rc != 0: - self.log.error("Periodic command exited with status %d" % rc) + self.log.error("Periodic command exited with status %d", rc) # dont update timestamp - try again next time else: open(check_file,"w").write("1") else: - self.log.debug("%d seconds elapsed, not enough to run periodic." % elapsed) + self.log.debug("%d seconds elapsed, not enough to run periodic.", elapsed) except Exception, det: - self.log.error("Failed to run periodic command: %s" % str(det)) + self.log.error("Failed to run periodic command: %s", det) def master_backup(self): """ @@ -1334,21 +1334,21 @@ primary_conninfo = %(primary_conninfo)s continue tfn = os.path.join(master_spc_dir, tblspc) if not os.path.islink(tfn): - self.log.info("Suspicious pg_tblspc entry: "+tblspc) + self.log.info("Suspicious pg_tblspc entry: %s", tblspc) continue spc_path = os.path.realpath(tfn) - self.log.info("Got tablespace %s: %s" % (tblspc, spc_path)) + self.log.info("Got tablespace %s: %s", tblspc, spc_path) dstfn = slave_spc_dir + "/" + tblspc try: os.chdir(spc_path) except Exception, det: - self.log.warning("Broken link:" + str(det)) + self.log.warning("Broken link: %s", det) continue cmdline = [ "--delete", "--exclude", ".*", "--copy-unsafe-links", ".", dstfn] self.exec_big_rsync(cmdline) - # copy the pg_log and pg_xlog directories, these may be + # copy the pg_log and pg_xlog directories, these may be # symlinked to nonstandard location, so pay attention self.rsync_log_directory(os.path.join(data_dir, "pg_log"), dst_loc) self.rsync_log_directory(os.path.join(data_dir, "pg_xlog"), dst_loc) @@ -1357,7 +1357,7 @@ primary_conninfo = %(primary_conninfo)s conf_dst_loc = self.cf.getfile("config_backup", "") if conf_dst_loc: master_conf_dir = os.path.dirname(self.cf.getfile("master_config")) - self.log.info("Backup conf files from %s" % master_conf_dir) + self.log.info("Backup conf files from %s", master_conf_dir) self.chdir(master_conf_dir) cmdline = [ "--include", "*.conf", @@ -1414,7 +1414,7 @@ primary_conninfo = %(primary_conninfo)s start_time = time.localtime() cmdline = ["cp", "-a", src, dst ] - self.log.info("Executing %s" % " ".join(cmdline)) + self.log.info("Executing %s", " ".join(cmdline)) if not self.not_really: self.exec_cmd(cmdline) stop_time = time.localtime() @@ -1429,7 +1429,7 @@ primary_conninfo = %(primary_conninfo)s self.log.warning("Unable to determine last restart point, backup_label not created.") else: # Write backup label and history file - + backup_label = \ """START WAL LOCATION: %(xlogid)X/%(xrecoff)X (file %(wal_name)s) CHECKPOINT LOCATION: %(xlogid)X/%(xrecoff)X @@ -1456,7 +1456,7 @@ STOP TIME: %(stop_time)s # Write the label filename = os.path.join(dst, "backup_label") if self.not_really: - self.log.info("Writing backup label to %s" % filename) + self.log.info("Writing backup label to %s", filename) else: lf = open(filename, "w") lf.write(backup_label % label_params) @@ -1467,10 +1467,10 @@ STOP TIME: %(stop_time)s completed_wals = self.cf.getfile("completed_wals") filename = os.path.join(completed_wals, histfile) if os.path.exists(filename): - self.log.warning("%s: already exists, refusing to overwrite." % filename) + self.log.warning("%s: already exists, refusing to overwrite.", filename) else: if self.not_really: - self.log.info("Writing backup history to %s" % filename) + self.log.info("Writing backup history to %s", filename) else: lf = open(filename, "w") lf.write(backup_history % label_params) @@ -1481,7 +1481,7 @@ STOP TIME: %(stop_time)s self.slave_continue() finally: self.slave_resume_backups() - + def run_backup(self): if self.is_master: self.master_backup() @@ -1500,9 +1500,9 @@ STOP TIME: %(stop_time)s start_time = time.time() self.log.debug("%s: start copy", srcname) - + self.master_periodic() - + dst_loc = self.cf.getfile("completed_wals") if dst_loc[-1] != "/": dst_loc += "/" @@ -1527,16 +1527,16 @@ STOP TIME: %(stop_time)s def slave_append_partial(self): """ - Read 'bytes' worth of data from stdin, append to the partial log file - starting from 'offset'. On error it is assumed that master restarts + Read 'bytes' worth of data from stdin, append to the partial log file + starting from 'offset'. On error it is assumed that master restarts from zero. - - The resulting file is always padded to XLOG_SEGMENT_SIZE bytes to + + The resulting file is always padded to XLOG_SEGMENT_SIZE bytes to simplify recovery. """ def fail(message): - self.log.error("Slave: %s: %s" % (filename, message)) + self.log.error("Slave: %s: %s", filename, message) sys.exit(1) self.assert_is_master(False) @@ -1552,12 +1552,12 @@ STOP TIME: %(stop_time)s fail("not enough data, expected %d, got %d" % (bytes, len(data))) chunk = WalChunk(filename, offset, bytes) - self.log.debug("Slave: adding to %s" % chunk) + self.log.debug("Slave: adding to %s", chunk) name = os.path.join(self.cf.getfile("partial_wals"), filename) if self.not_really: - self.log.info("Adding to partial: %s" % name) + self.log.info("Adding to partial: %s", name) return try: @@ -1584,7 +1584,7 @@ STOP TIME: %(stop_time)s try: xlog = open(os.path.join(xlog_dir, chunk.filename)) except IOError, det: - self.log.warning("Cannot access file %s" % chunk.filename) + self.log.warning("Cannot access file %s", chunk.filename) return xlog.seek(chunk.pos) @@ -1602,7 +1602,7 @@ STOP TIME: %(stop_time)s chunk.sync_time += (time.time() - syncstart) status = os.waitpid(childpid, 0) - rc = os.WEXITSTATUS(status[1]) + rc = os.WEXITSTATUS(status[1]) if rc == 0: log = daemon_mode and self.log.debug or self.log.info log("sent to slave: %s" % chunk) @@ -1611,7 +1611,7 @@ STOP TIME: %(stop_time)s else: # Start from zero after an error chunk.pos = 0 - self.log.error("xpartialsync exited with status %d, restarting from zero." % rc) + self.log.error("xpartialsync exited with status %d, restarting from zero.", rc) time.sleep(5) def master_syncdaemon(self): @@ -1623,10 +1623,10 @@ STOP TIME: %(stop_time)s """ Copy partial WAL segments to slave. - On 8.2 set use_xlog_functions=1 in config file - this enables record based + On 8.2 set use_xlog_functions=1 in config file - this enables record based walshipping. On 8.0 the only option is to sync files. - If daemon_mode is specified it never switches from record based shipping to + If daemon_mode is specified it never switches from record based shipping to file based shipping. """ @@ -1657,11 +1657,11 @@ STOP TIME: %(stop_time)s # Switched to new WAL segment. Don't bother to copy the last bits - it # will be obsoleted by the archive_command. if self.walchunk and self.walchunk.sync_count > 0: - self.log.info("Switched in %d seconds, %f sec in %d interim syncs, avg %f" - % (time.time() - self.walchunk.start_time, - self.walchunk.sync_time, - self.walchunk.sync_count, - self.walchunk.sync_time / self.walchunk.sync_count)) + self.log.info("Switched in %d seconds, %f sec in %d interim syncs, avg %f", + time.time() - self.walchunk.start_time, + self.walchunk.sync_time, + self.walchunk.sync_count, + self.walchunk.sync_time / self.walchunk.sync_count) self.walchunk = WalChunk(file_name, 0, file_offs) else: self.walchunk.bytes = file_offs - self.walchunk.pos @@ -1674,7 +1674,7 @@ STOP TIME: %(stop_time)s last = self.get_last_complete() if last: - self.log.info("%s: last complete" % last) + self.log.info("%s: last complete", last) else: self.log.info("last complete not found, copying all") @@ -1690,7 +1690,7 @@ STOP TIME: %(stop_time)s else: # ok, the database is shut down, we can use last checkpoint wal checkpoint_wal = ctl.wal_name - self.log.info("last checkpoint wal: %s" % checkpoint_wal) + self.log.info("last checkpoint wal: %s", checkpoint_wal) else: self.log.info("Unable to obtain control file information, copying all") @@ -1719,9 +1719,9 @@ STOP TIME: %(stop_time)s # got interesting WAL xlog = os.path.join(xlog_dir, fn) # copy data - self.log.info('Syncing %s' % xlog) + self.log.info('Syncing %s', xlog) if self.exec_rsync([xlog, dst_loc], not daemon_mode) != 0: - self.log.error('Cannot sync %s' % xlog) + self.log.error('Cannot sync %s', xlog) break else: self.log.info("Partial copy done") @@ -1750,12 +1750,12 @@ STOP TIME: %(stop_time)s sys.exit(1) except Exception, d: exc, msg, tb = sys.exc_info() - self.log.fatal("xrestore %s crashed: %s: '%s' (%s: %s)" % ( - srcname, str(exc), str(msg).rstrip(), - str(tb), repr(traceback.format_tb(tb)))) + self.log.fatal("xrestore %s crashed: %s: '%s' (%s: %r)", + srcname, exc, str(msg).rstrip(), + tb, traceback.format_tb(tb)) del tb time.sleep(10) - self.log.info("Re-exec: %s", repr(sys.argv)) + self.log.info("Re-exec: %r", sys.argv) os.execv(sys.argv[0], sys.argv) def master_xrestore(self, srcname, dstpath): @@ -1763,14 +1763,14 @@ STOP TIME: %(stop_time)s Restore the xlog file from slave. """ paths = [ self.cf.getfile("completed_wals"), self.cf.getfile("partial_wals") ] - - self.log.info("Restore %s to %s" % (srcname, dstpath)) + + self.log.info("Restore %s to %s", srcname, dstpath) for src in paths: - self.log.debug("Looking in %s" % src) + self.log.debug("Looking in %s", src) srcfile = os.path.join(src, srcname) if self.exec_rsync([srcfile, dstpath]) == 0: return - self.log.warning("Could not restore file %s" % srcname) + self.log.warning("Could not restore file %s", srcname) def is_parent_alive(self, parent_pid): if os.getppid() != parent_pid or parent_pid <= 1: @@ -1786,14 +1786,14 @@ STOP TIME: %(stop_time)s srcfile = os.path.join(srcdir, srcname) partfile = os.path.join(partdir, srcname) - # if we are using streaming replication, exit immediately + # if we are using streaming replication, exit immediately # if the srcfile is not here yet primary_conninfo = self.cf.get("primary_conninfo", "") if primary_conninfo and not os.path.isfile(srcfile): - self.log.info("%s: not found (ignored)" % srcname) + self.log.info("%s: not found (ignored)", srcname) sys.exit(1) - - # assume that postgres has processed the WAL file and is + + # assume that postgres has processed the WAL file and is # asking for next - hence work not in progress anymore if os.path.isfile(prgrfile): os.remove(prgrfile) @@ -1806,23 +1806,23 @@ STOP TIME: %(stop_time)s continue if os.path.isfile(srcfile): - self.log.info("%s: Found" % srcname) + self.log.info("%s: Found", srcname) break # ignore .history files unused, ext = os.path.splitext(srcname) if ext == ".history": - self.log.info("%s: not found, ignoring" % srcname) + self.log.info("%s: not found, ignoring", srcname) sys.exit(1) # if stopping, include also partial wals if os.path.isfile(stopfile): if os.path.isfile(partfile): - self.log.info("%s: found partial" % srcname) + self.log.info("%s: found partial", srcname) srcfile = partfile break else: - self.log.info("%s: not found, stopping" % srcname) + self.log.info("%s: not found, stopping", srcname) sys.exit(1) # nothing to do, just in case check if parent is alive @@ -1831,7 +1831,7 @@ STOP TIME: %(stop_time)s sys.exit(1) # nothing to do, sleep - self.log.debug("%s: not found, sleeping" % srcname) + self.log.debug("%s: not found, sleeping", srcname) time.sleep(1) # got one, copy it @@ -1845,10 +1845,10 @@ STOP TIME: %(stop_time)s # Note that historic WAL files are removed during backup rotation if lstname == None: lstname = self.last_restart_point(srcname) - self.log.debug("calculated restart point: %s" % lstname) + self.log.debug("calculated restart point: %s", lstname) else: - self.log.debug("using supplied restart point: %s" % lstname) - self.log.debug("%s: copy done, cleanup" % srcname) + self.log.debug("using supplied restart point: %s", lstname) + self.log.debug("%s: copy done, cleanup", srcname) self.slave_cleanup(lstname) # create a PROGRESS file to notify that postgres is processing the WAL @@ -1861,8 +1861,8 @@ STOP TIME: %(stop_time)s def restore_database(self): """Restore the database from backup - If setname is specified, the contents of that backup set directory are - restored instead of "full_backup". Also copy is used instead of rename to + If setname is specified, the contents of that backup set directory are + restored instead of "full_backup". Also copy is used instead of rename to restore the directory (unless a pg_xlog directory has been specified). Restore to altdst if specified. Complain if it exists. @@ -1890,7 +1890,7 @@ STOP TIME: %(stop_time)s # stop postmaster if ordered if stop_cmd and os.path.isfile(pidfile): - self.log.info("Stopping postmaster: " + stop_cmd) + self.log.info("Stopping postmaster: %s", stop_cmd) self.exec_system(stop_cmd) time.sleep(3) @@ -1922,7 +1922,7 @@ STOP TIME: %(stop_time)s # nothing to back up createbackup = False - # see if we have to make a backup of the data directory + # see if we have to make a backup of the data directory backup_datadir = self.cf.getboolean('backup_datadir', True) if os.path.isdir(data_dir) and not backup_datadir: @@ -1936,12 +1936,12 @@ STOP TIME: %(stop_time)s # move old data away if createbackup and os.path.isdir(data_dir): - self.log.info("Move %s to %s" % (data_dir, bak)) + self.log.info("Move %s to %s", data_dir, bak) if not self.not_really: os.rename(data_dir, bak) # move new data, copy if setname specified - self.log.info("%s %s to %s" % (setname and "Copy" or "Move", full_dir, data_dir)) + self.log.info("%s %s to %s", setname and "Copy" or "Move", full_dir, data_dir) if self.cf.getfile('slave_pg_xlog', ''): link_xlog_dir = True @@ -1965,7 +1965,7 @@ STOP TIME: %(stop_time)s os.symlink(self.cf.getfile('slave_pg_xlog'), "%s/pg_xlog" % data_dir) if (self.is_master and createbackup and os.path.isdir(bak)): - # restore original xlog files to data_dir/pg_xlog + # restore original xlog files to data_dir/pg_xlog # symlinked directories are dereferenced self.exec_cmd(["cp", "-rL", "%s/pg_xlog/" % full_dir, "%s/pg_xlog" % data_dir ]) else: @@ -1983,9 +1983,9 @@ STOP TIME: %(stop_time)s cfsrc = os.path.join(bak, cf) cfdst = os.path.join(data_dir, cf) if os.path.exists(cfdst): - self.log.info("Already exists: %s" % cfdst) + self.log.info("Already exists: %s", cfdst) elif os.path.exists(cfsrc): - self.log.debug("Copy %s to %s" % (cfsrc, cfdst)) + self.log.debug("Copy %s to %s", cfsrc, cfdst) if not self.not_really: copy_conf(cfsrc, cfdst) @@ -1997,19 +1997,19 @@ STOP TIME: %(stop_time)s os.mkdir(spc_dir) if os.path.isdir(tmp_dir): self.log.info("Linking tablespaces to temporary location") - + # don't look into spc_dir, thus allowing # user to move them before. re-link only those # that are still in tmp_dir list = os.listdir(tmp_dir) list.sort() - + for d in list: if d[0] == ".": continue link_loc = os.path.abspath(os.path.join(spc_dir, d)) link_dst = os.path.abspath(os.path.join(tmp_dir, d)) - self.log.info("Linking tablespace %s to %s" % (d, link_dst)) + self.log.info("Linking tablespace %s to %s", d, link_dst) if not self.not_really: if os.path.islink(link_loc): os.remove(link_loc) @@ -2023,7 +2023,7 @@ STOP TIME: %(stop_time)s # determine if we can use %r in restore_command ctl = PgControlData(self.cf.getfile("slave_bin", ""), data_dir, True) if ctl.pg_version > 830: - self.log.debug('pg_version is %s, adding %%r to restore command' % ctl.pg_version) + self.log.debug('pg_version is %s, adding %%r to restore command', ctl.pg_version) restore_command = 'xrestore %f "%p" %r' else: if not ctl.is_valid: @@ -2044,7 +2044,7 @@ STOP TIME: %(stop_time)s (os.path.join(self.cf.getfile("slave_bin"), "pg_archivecleanup"), self.cf.getfile("completed_wals")) - self.log.info("Write %s" % rconf) + self.log.info("Write %s", rconf) if self.not_really: print conf else: @@ -2056,7 +2056,7 @@ STOP TIME: %(stop_time)s if not self.is_master: stopfile = os.path.join(self.cf.getfile("completed_wals"), "STOP") if os.path.isfile(stopfile): - self.log.info("Removing stopfile: "+stopfile) + self.log.info("Removing stopfile: %s", stopfile) if not self.not_really: os.remove(stopfile) @@ -2066,7 +2066,7 @@ STOP TIME: %(stop_time)s self.slave_restore_config() # run database in recovery mode - self.log.info("Starting postmaster: " + start_cmd) + self.log.info("Starting postmaster: %s", start_cmd) self.exec_system(start_cmd) else: self.log.info("Data files restored, recovery.conf created.") @@ -2088,7 +2088,7 @@ STOP TIME: %(stop_time)s return if not os.path.exists(cf_target_dir): - self.log.warning("Configuration directory does not exist: %s" % cf_target_dir) + self.log.warning("Configuration directory does not exist: %s", cf_target_dir) return self.log.info("Restoring configuration files") @@ -2097,15 +2097,15 @@ STOP TIME: %(stop_time)s cfdst = os.path.join(cf_target_dir, cf) if not os.path.isfile(cfsrc): - self.log.warning("Missing configuration file backup: %s" % cf) + self.log.warning("Missing configuration file backup: %s", cf) continue - self.log.debug("Copy %s to %s" % (cfsrc, cfdst)) + self.log.debug("Copy %s to %s", cfsrc, cfdst) if not self.not_really: copy_conf(cfsrc, cfdst) if cf == 'postgresql.conf': self.slave_deconfigure_archiving(cfdst) - + def slave_boot(self): self.assert_is_master(False) @@ -2114,7 +2114,7 @@ STOP TIME: %(stop_time)s stopfile = os.path.join(srcdir, "STOP") if self.not_really: - self.log.info("Writing STOP file: %s" % stopfile) + self.log.info("Writing STOP file: %s", stopfile) else: open(stopfile, "w").write("1") self.log.info("Stopping recovery mode") @@ -2128,7 +2128,7 @@ STOP TIME: %(stop_time)s if not self.not_really: open(pausefile, "w").write("1") else: - self.log.info("Writing PAUSE file: %s" % pausefile) + self.log.info("Writing PAUSE file: %s", pausefile) self.log.info("Pausing recovery mode") # wait for log apply to complete @@ -2172,7 +2172,7 @@ STOP TIME: %(stop_time)s pid = int(pidstring) print("%d" % pid) except ValueError: - self.log.error("lock file does not contain a pid:" + pidstring) + self.log.error("lock file does not contain a pid: %s", pidstring) return 1 if not self.not_really: @@ -2219,7 +2219,7 @@ STOP TIME: %(stop_time)s """Returns the name of the first needed WAL segment for backupset""" label = BackupLabel(backupdir) if not label.first_wal: - self.log.error("WAL name not found at %s" % backupdir) + self.log.error("WAL name not found at %s", backupdir) return None return label.first_wal @@ -2234,7 +2234,7 @@ STOP TIME: %(stop_time)s if os.path.exists(backup_label): # Label file still exists, use it for determining the restart point lbl = BackupLabel(slave_data) - self.log.debug("Last restart point from backup_label: %s" % lbl.first_wal) + self.log.debug("Last restart point from backup_label: %s", lbl.first_wal) return lbl.first_wal ctl = PgControlData(self.cf.getfile("slave_bin", ""), ".", True) @@ -2243,7 +2243,7 @@ STOP TIME: %(stop_time)s self.log.warning("Unable to determine last restart point") return walname - self.log.debug("Last restart point: %s" % ctl.wal_name) + self.log.debug("Last restart point: %s", ctl.wal_name) return ctl.wal_name def order_backupdirs(self,prefix,a,b): @@ -2257,12 +2257,12 @@ STOP TIME: %(stop_time)s if not b_indx: b_indx = -1 return cmp(int(a_indx), int(b_indx)) - + def get_backup_list(self,dst_loc): """Return the list of backup directories""" dirlist = glob.glob(os.path.abspath(dst_loc) + "*") dirlist.sort(lambda x,y: self.order_backupdirs(dst_loc, x,y)) - backupdirs = [ dir for dir in dirlist + backupdirs = [ dir for dir in dirlist if os.path.isdir(dir) and os.path.isfile(os.path.join(dir, "backup_label")) or os.path.isfile(os.path.join(dir, "backup_label.old"))] return backupdirs @@ -2276,7 +2276,7 @@ STOP TIME: %(stop_time)s if backups: lastwal = self.get_first_walname(backups[-1]) if lastwal: - self.log.info("First useful WAL file is: %s" % lastwal) + self.log.info("First useful WAL file is: %s", lastwal) self.slave_cleanup(lastwal) else: self.log.debug("No WAL-s to clean up.") @@ -2284,7 +2284,7 @@ STOP TIME: %(stop_time)s def slave_rotate_backups(self): """ Rotate backups by increasing backup directory suffixes. Note that since - we also have to make room for next backup, we actually have + we also have to make room for next backup, we actually have keep_backups - 1 backups available after this. Unneeded WAL files are not removed here, handled by xpurgewals command instead. @@ -2297,7 +2297,7 @@ STOP TIME: %(stop_time)s backupdirs = self.get_backup_list(dst_loc) if not backupdirs or maxbackups < 1: self.log.debug("Nothing to rotate") - + # remove expired backups while len(backupdirs) >= maxbackups and len(backupdirs) > 0: last = backupdirs.pop() @@ -2306,13 +2306,13 @@ STOP TIME: %(stop_time)s # Resume only if archive command succeeds. if archive_command: cmdline = archive_command.replace("$BACKUPDIR", last) - self.log.info("Executing archive_command: " + cmdline) + self.log.info("Executing archive_command: %s", cmdline) rc = self.exec_system(cmdline) if rc != 0: - self.log.error("Backup archiving returned %d, exiting!" % rc) + self.log.error("Backup archiving returned %d, exiting!", rc) sys.exit(1) - self.log.info("Removing expired backup directory: %s" % last) + self.log.info("Removing expired backup directory: %s", last) if self.not_really: continue cmdline = [ "rm", "-r", last ] @@ -2328,7 +2328,7 @@ STOP TIME: %(stop_time)s index = 0 else: index = int(index[1:])+1 - self.log.debug("Rename %s to %s.%s" % (dir, name, index)) + self.log.debug("Rename %s to %s.%s", dir, name, index) if self.not_really: continue os.rename(dir, "%s.%s" % (name,index)) @@ -2337,14 +2337,14 @@ STOP TIME: %(stop_time)s completed_wals = self.cf.getfile("completed_wals") partial_wals = self.cf.getfile("partial_wals") - self.log.debug("cleaning completed wals before %s" % last_applied) + self.log.debug("cleaning completed wals before %s", last_applied) self.del_wals(completed_wals, last_applied) if os.path.isdir(partial_wals): - self.log.debug("cleaning partial wals before %s" % last_applied) + self.log.debug("cleaning partial wals before %s", last_applied) self.del_wals(partial_wals, last_applied) else: - self.log.warning("partial_wals dir does not exist: %s" % partial_wals) + self.log.warning("partial_wals dir does not exist: %s", partial_wals) self.log.debug("cleaning done") @@ -2367,7 +2367,7 @@ STOP TIME: %(stop_time)s ok_del = 0 if fname < last: - self.log.debug("deleting %s" % full) + self.log.debug("deleting %s", full) if not self.not_really: try: os.remove(full) diff --git a/scripts/data_maintainer.py b/scripts/data_maintainer.py new file mode 100644 index 00000000..0c2c48b0 --- /dev/null +++ b/scripts/data_maintainer.py @@ -0,0 +1,232 @@ +#!/usr/bin/env python + +"""Generic script for processing large data sets in small batches. + +Reads events from one datasource and commits them into another one, +either one by one or in batches. + +Config template:: + + [data_maintainer] + job_name = dm_remove_expired_services + + dbread = dbname=sourcedb_test + dbwrite = dbname=destdb port=1234 host=dbhost.com user=guest password=secret + dbbefore = dbname=destdb_test + dbafter = dbname=destdb_test + dbcrash = dbname=destdb_test + + sql_get_pk_list = + select username + from user_service + where expire_date < now(); + + # It is a good practice to include same where condition on target side as on read side, + # to ensure that you are actually changing the same data you think you are, + # especially when reading from replica database or when processing takes days. + sql_modify = + delete from user_service + where username = %%(username)s + and expire_date < now(); + + # This will be run before executing the sql_get_pk_list query (optional) + sql_before_run = + select * from somefunction1(%(job_name)s); + + # This will be run when the DM finishes (optional) + sql_after_run = + select * from somefunction2(%(job_name)s); + + # Determines whether the sql_after_run query will be run in case the pk list query returns no rows + after_zero_rows = 1 + + # This will be run if the DM crashes (optional) + sql_on_crash = + select * from somefunction3(%(job_name)s); + + # materialize query so that transaction should not be open while processing it + #with_hold = 1 + + # how many records process to fetch at once and if batch processing is used then + # also how many records are processed in one commit + #fetch_count = 100 + + # by default commit after each row (safe when behind plproxy, bouncer or whatever) + # can be turned off for better performance when connected directly to database + #autocommit = 1 + + # just for tuning to throttle how much load we let onto write database + #commit_delay = 0.0 + + # quite often data_maintainer is run from crontab and then loop delay is not needed + # in case it has to be run as daemon set loop delay in seconds + #loop_delay = 1 + + logfile = ~/log/%(job_name)s.log + pidfile = ~/pid/%(job_name)s.pid + use_skylog = 0 +""" + +import datetime +import sys +import time + +import pkgloader +pkgloader.require('skytools', '3.0') +import skytools + + +class DataMaintainer (skytools.DBScript): + __doc__ = __doc__ + loop_delay = -1 + + def __init__(self, args): + super(DataMaintainer, self).__init__("data_maintainer", args) + + # query for fetching the PK-s of the data set to be maintained + self.sql_pk = self.cf.get("sql_get_pk_list") + + # query for changing data tuple ( autocommit ) + self.sql_modify = self.cf.get("sql_modify") + + # query to be run before starting the data maintainer, + # useful for retrieving initialization parameters of the query + self.sql_before = self.cf.get("sql_before_run", "") + + # query to be run after finishing the data maintainer + self.sql_after = self.cf.get("sql_after_run", "") + + # whether to run the sql_after query in case of 0 rows + self.after_zero_rows = self.cf.getint("after_zero_rows", 1) + + # query to be run if the process crashes + self.sql_crash = self.cf.get("sql_on_crash", "") + + # how many records to fetch at once + self.fetchcnt = self.cf.getint("fetchcnt", 100) + self.fetchcnt = self.cf.getint("fetch_count", self.fetchcnt) + + # specifies if non-transactional cursor should be created (0 -> without hold) + self.withhold = self.cf.getint("with_hold", 1) + + # execution mode (0 -> whole batch is committed / 1 -> autocommit) + self.autocommit = self.cf.getint("autocommit", 1) + + # delay in seconds after each commit + self.commit_delay = self.cf.getfloat("commit_delay", 0.0) + + def work(self): + self.log.info('Starting..') + started = lap_time = time.time() + total_count = 0 + bres = {} + + if self.sql_before: + bdb = self.get_database("dbbefore", autocommit=1) + bcur = bdb.cursor() + bcur.execute(self.sql_before) + if bcur.statusmessage.startswith('SELECT'): + res = bcur.dictfetchall() + assert len(res)==1, "Result of a 'before' query must be 1 row" + bres = res[0].copy() + + if self.autocommit: + self.log.info("Autocommit after each modify") + dbw = self.get_database("dbwrite", autocommit=1) + else: + self.log.info("Commit in %i record batches", self.fetchcnt) + dbw = self.get_database("dbwrite", autocommit=0) + if self.withhold: + dbr = self.get_database("dbread", autocommit=1) + sql = "DECLARE data_maint_cur NO SCROLL CURSOR WITH HOLD FOR %s" + else: + dbr = self.get_database("dbread", autocommit=0) + sql = "DECLARE data_maint_cur NO SCROLL CURSOR FOR %s" + rcur = dbr.cursor() + mcur = dbw.cursor() + rcur.execute(sql % self.sql_pk, bres) # pass results from before_query into sql_pk + self.log.debug(rcur.query) + self.log.debug(rcur.statusmessage) + + while True: # loop while fetch returns fetch_count rows + self.fetch_started = time.time() + rcur.execute("FETCH FORWARD %s FROM data_maint_cur" % self.fetchcnt) + self.log.debug(rcur.query) + self.log.debug(rcur.statusmessage) + res = rcur.dictfetchall() + count, lastitem = self.process_batch(res, mcur, bres) + total_count += count + if not self.autocommit: + dbw.commit() + self.stat_put("duration", time.time() - self.fetch_started) + self.send_stats() + if len(res) < self.fetchcnt: + break + if not self.looping: + self.log.info("Exiting on user request") + break + if self.commit_delay > 0.0: + time.sleep(self.commit_delay) + if time.time() - lap_time > 60.0: # if one minute has passed print running totals + self.log.info("--- Running count: %s duration: %s ---", + total_count, datetime.timedelta(0, round(time.time() - started))) + lap_time = time.time() + + rcur.execute("CLOSE data_maint_cur") + if not self.withhold: + dbr.rollback() + self.log.info("--- Total count: %s duration: %s ---", + total_count, datetime.timedelta(0, round(time.time() - started))) + + if self.sql_after and (self.after_zero_rows > 0 or total_count > 0): + adb = self.get_database("dbafter", autocommit=1) + acur = adb.cursor() + + # FIXME: neither of those can be None? + if bres != None and lastitem != None: + bres.update(lastitem) + lastitem = bres + if lastitem != None: + acur.execute(self.sql_after, lastitem) + else: + acur.execute(self.sql_after) + + def process_batch(self, res, mcur, bres): + """ Process events in autocommit mode reading results back and trying to make some sense out of them + """ + try: + count = 0 + item = bres + for i in res: # for each row in read query result + item.update(i) + mcur.execute(self.sql_modify, item) + self.log.debug(mcur.query) + if mcur.statusmessage.startswith('SELECT'): # if select was used we can expect some result + mres = mcur.dictfetchall() + for r in mres: + if 'stats' in r: # if specially handled column 'stats' is present + for k, v in skytools.db_urldecode(r['stats']).items(): + self.stat_increase(k, int(v)) + self.log.debug(r) + else: + self.stat_increase('processed', mcur.rowcount) + self.log.debug(mcur.statusmessage) + if 'cnt' in item: + count += item['cnt'] + self.stat_increase("count", item['cnt']) + else: + count += 1 + self.stat_increase("count") + if not self.looping: + break + return count, item + except: # process has crashed, run sql_crash and re-raise the exception + if self.sql_crash: + dbc = self.get_database("dbcrash", autocommit=1) + ccur = dbc.cursor() + ccur.execute(self.sql_crash, item) + raise + +if __name__ == '__main__': + script = DataMaintainer(sys.argv[1:]) + script.start() diff --git a/scripts/queue_loader.py b/scripts/queue_loader.py index 97742b82..062970d1 100755 --- a/scripts/queue_loader.py +++ b/scripts/queue_loader.py @@ -9,9 +9,9 @@ Config template:: logfile = pidfile = - db = + db = - #rename_tables = + #rename_tables = [DEFAULT] @@ -51,7 +51,7 @@ Config template:: # alter table only %%(part)s add primary key (%%(pkey)s); # ### Inherited partitions - #split_part_template = + #split_part_template = # create table %%(part)s () inherits (%%(parent)s); # alter table only %%(part)s add primary key (%%(pkey)s); @@ -75,7 +75,7 @@ Config template:: #bulk_mode=correct [table public.foo] - mode = + mode = create_sql = """ @@ -88,7 +88,7 @@ import skytools from pgq.cascade.worker import CascadedWorker from skytools import quote_ident, quote_fqident, UsageError -# todo: auto table detect +# TODO: auto table detect # BulkLoader load method METH_CORRECT = 0 @@ -99,6 +99,7 @@ LOAD_METHOD = METH_CORRECT AVOID_BIZGRES_BUG = 0 USE_LONGLIVED_TEMP_TABLES = True + class BasicLoader: """Apply events as-is.""" def __init__(self, table_name, parent_name, log): @@ -123,6 +124,7 @@ class BasicLoader: curs.execute("\n".join(self.sql_list)) self.sql_list = [] + class KeepLatestLoader(BasicLoader): """Keep latest row version. @@ -161,6 +163,7 @@ class BulkEvent(object): self.data = data self.pk_data = pk_data + class BulkLoader(BasicLoader): """Instead of statement-per event, load all data with one big COPY, UPDATE or DELETE statement. @@ -246,7 +249,7 @@ class BulkLoader(BasicLoader): # take last event ev = ev_list[-1] - + # generate needed commands if exists_before and exists_after: upd_list.append(ev.data) @@ -268,8 +271,8 @@ class BulkLoader(BasicLoader): real_update_count = len(upd_list) - #self.log.debug("process_one_table: %s (I/U/D = %d/%d/%d)" % ( - # tbl, len(ins_list), len(upd_list), len(del_list))) + #self.log.debug("process_one_table: %s (I/U/D = %d/%d/%d)", + # tbl, len(ins_list), len(upd_list), len(del_list)) # hack to unbroke stuff if LOAD_METHOD == METH_MERGED: @@ -284,13 +287,13 @@ class BulkLoader(BasicLoader): for fld in self.dist_fields: if fld not in key_fields: key_fields.append(fld) - #self.log.debug("PKey fields: %s Extra fields: %s" % ( - # ",".join(cache.pkey_list), ",".join(extra_fields))) + #self.log.debug("PKey fields: %s Extra fields: %s", + # ",".join(cache.pkey_list), ",".join(extra_fields)) # create temp table temp = self.create_temp_table(curs) tbl = self.table_name - + # where expr must have pkey and dist fields klist = [] for pk in key_fields: @@ -321,43 +324,43 @@ class BulkLoader(BasicLoader): # process deleted rows if len(del_list) > 0: - #self.log.info("Deleting %d rows from %s" % (len(del_list), tbl)) + #self.log.info("Deleting %d rows from %s", len(del_list), tbl) # delete old rows q = "truncate %s" % quote_fqident(temp) self.log.debug(q) curs.execute(q) # copy rows - self.log.debug("COPY %d rows into %s" % (len(del_list), temp)) + self.log.debug("COPY %d rows into %s", len(del_list), temp) skytools.magic_insert(curs, temp, del_list, col_list) # delete rows self.log.debug(del_sql) curs.execute(del_sql) - self.log.debug("%s - %d" % (curs.statusmessage, curs.rowcount)) + self.log.debug("%s - %d", curs.statusmessage, curs.rowcount) if len(del_list) != curs.rowcount: - self.log.warning("Delete mismatch: expected=%s deleted=%d" - % (len(del_list), curs.rowcount)) + self.log.warning("Delete mismatch: expected=%d deleted=%d", + len(del_list), curs.rowcount) temp_used = True # process updated rows if len(upd_list) > 0: - #self.log.info("Updating %d rows in %s" % (len(upd_list), tbl)) + #self.log.info("Updating %d rows in %s", len(upd_list), tbl) # delete old rows q = "truncate %s" % quote_fqident(temp) self.log.debug(q) curs.execute(q) # copy rows - self.log.debug("COPY %d rows into %s" % (len(upd_list), temp)) + self.log.debug("COPY %d rows into %s", len(upd_list), temp) skytools.magic_insert(curs, temp, upd_list, col_list) temp_used = True if LOAD_METHOD == METH_CORRECT: # update main table self.log.debug(upd_sql) curs.execute(upd_sql) - self.log.debug("%s - %d" % (curs.statusmessage, curs.rowcount)) + self.log.debug("%s - %d", curs.statusmessage, curs.rowcount) # check count if len(upd_list) != curs.rowcount: - self.log.warning("Update mismatch: expected=%s updated=%d" - % (len(upd_list), curs.rowcount)) + self.log.warning("Update mismatch: expected=%d updated=%d", + len(upd_list), curs.rowcount) else: # delete from main table self.log.debug(del_sql) @@ -365,12 +368,12 @@ class BulkLoader(BasicLoader): self.log.debug(curs.statusmessage) # check count if real_update_count != curs.rowcount: - self.log.warning("Update mismatch: expected=%s deleted=%d" - % (real_update_count, curs.rowcount)) + self.log.warning("Update mismatch: expected=%d deleted=%d", + real_update_count, curs.rowcount) # insert into main table if AVOID_BIZGRES_BUG: # copy again, into main table - self.log.debug("COPY %d rows into %s" % (len(upd_list), tbl)) + self.log.debug("COPY %d rows into %s", len(upd_list), tbl) skytools.magic_insert(curs, tbl, upd_list, col_list) else: # better way, but does not work due bizgres bug @@ -380,7 +383,7 @@ class BulkLoader(BasicLoader): # process new rows if len(ins_list) > 0: - self.log.info("Inserting %d rows into %s" % (len(ins_list), tbl)) + self.log.info("Inserting %d rows into %s", len(ins_list), tbl) skytools.magic_insert(curs, tbl, ins_list, col_list) # delete remaining rows @@ -402,16 +405,16 @@ class BulkLoader(BasicLoader): # check if exists if USE_LONGLIVED_TEMP_TABLES: if skytools.exists_temp_table(curs, tempname): - self.log.debug("Using existing temp table %s" % tempname) + self.log.debug("Using existing temp table %s", tempname) return tempname - + # bizgres crashes on delete rows arg = "on commit delete rows" arg = "on commit preserve rows" # create temp table for loading q = "create temp table %s (like %s) %s" % ( quote_fqident(tempname), quote_fqident(self.table_name), arg) - self.log.debug("Creating temp table: %s" % q) + self.log.debug("Creating temp table: %s", q) curs.execute(q) return tempname @@ -464,8 +467,8 @@ class TableHandler: self.split_format = self.split_date_from_field else: raise UsageError('Bad value for split_mode: '+smode) - self.log.debug("%s: split_mode=%s, split_field=%s, split_part=%s" % ( - self.table_name, smode, self.split_field, self.split_part)) + self.log.debug("%s: split_mode=%s, split_field=%s, split_part=%s", + self.table_name, smode, self.split_field, self.split_part) elif table_mode == 'ignore': pass else: @@ -603,6 +606,7 @@ class QueueLoader(CascadedWorker): st.flush(curs) CascadedWorker.finish_remote_batch(self, src_db, dst_db, tick_id) + if __name__ == '__main__': script = QueueLoader('queue_loader', 'db', sys.argv[1:]) script.start() diff --git a/scripts/scriptmgr.py b/scripts/scriptmgr.py index e62b0642..9c65a954 100755 --- a/scripts/scriptmgr.py +++ b/scripts/scriptmgr.py @@ -95,7 +95,7 @@ def full_path(job, fn): # find home if user: - home = pwd.getpwuid(os.getuid()).pw_dir + home = pwd.getpwnam(user).pw_dir elif 'HOME' in os.environ: home = os.environ['HOME'] else: @@ -178,7 +178,7 @@ class ScriptMgr(skytools.DBScript): got = 1 self.add_job(fn, sect) if not got: - self.log.warning('Cannot find service for %s' % fn) + self.log.warning('Cannot find service for %s', fn) def add_job(self, cf_file, service_name): svc = self.svc_map[service_name] @@ -237,23 +237,23 @@ class ScriptMgr(skytools.DBScript): job = self.get_job_by_name (job_name) if isinstance (job, int): return job # ret.code - self.log.info('Starting %s' % job_name) + self.log.info('Starting %s', job_name) pidfile = job['pidfile'] if not pidfile: - self.log.warning("No pidfile for %s, cannot launch" % job_name) + self.log.warning("No pidfile for %s, cannot launch", job_name) return 0 if os.path.isfile(pidfile): if skytools.signal_pidfile(pidfile, 0): - self.log.warning("Script %s seems running" % job_name) + self.log.warning("Script %s seems running", job_name) return 0 else: - self.log.info("Ignoring stale pidfile for %s" % job_name) + self.log.info("Ignoring stale pidfile for %s", job_name) os.chdir(job['cwd']) cmd = "%(script)s %(config)s %(args)s -d" % job res = launch_cmd(job, cmd) self.log.debug(res) if res != 0: - self.log.error('startup failed: %s' % job_name) + self.log.error('startup failed: %s', job_name) return 1 else: return 0 @@ -262,23 +262,23 @@ class ScriptMgr(skytools.DBScript): job = self.get_job_by_name (job_name) if isinstance (job, int): return job # ret.code - self.log.info('Stopping %s' % job_name) + self.log.info('Stopping %s', job_name) self.signal_job(job, signal.SIGINT) def cmd_reload(self, job_name): job = self.get_job_by_name (job_name) if isinstance (job, int): return job # ret.code - self.log.info('Reloading %s' % job_name) + self.log.info('Reloading %s', job_name) self.signal_job(job, signal.SIGHUP) def get_job_by_name (self, job_name): if job_name not in self.job_map: - self.log.error ("Unknown job: %s" % job_name) + self.log.error ("Unknown job: %s", job_name) return 1 job = self.job_map[job_name] if job['disabled']: - self.log.info ("Skipping %s" % job_name) + self.log.info ("Skipping %s", job_name) return 0 return job @@ -290,7 +290,7 @@ class ScriptMgr(skytools.DBScript): while True: if skytools.signal_pidfile (job['pidfile'], 0): if not msg: - self.log.info ("Waiting for %s to stop" % job_name) + self.log.info ("Waiting for %s to stop", job_name) msg = True time.sleep (0.1) else: @@ -299,7 +299,7 @@ class ScriptMgr(skytools.DBScript): def signal_job(self, job, sig): pidfile = job['pidfile'] if not pidfile: - self.log.warning("No pidfile for %s (%s)" % (job['job_name'], job['config'])) + self.log.warning("No pidfile for %s (%s)", job['job_name'], job['config']) return if os.path.isfile(pidfile): pid = int(open(pidfile).read()) @@ -307,15 +307,15 @@ class ScriptMgr(skytools.DBScript): # run sudo + kill to avoid killing unrelated processes res = os.system("sudo -u %s kill %d" % (job['user'], pid)) if res: - self.log.warning("Signaling %s failed" % (job['job_name'],)) + self.log.warning("Signaling %s failed", job['job_name']) else: # direct kill try: os.kill(pid, sig) except Exception, det: - self.log.warning("Signaling %s failed: %s" % (job['job_name'], str(det))) + self.log.warning("Signaling %s failed: %s", job['job_name'], det) else: - self.log.warning("Job %s not running" % job['job_name']) + self.log.warning("Job %s not running", job['job_name']) def work(self): self.set_single_loop(1) diff --git a/scripts/simple_consumer.py b/scripts/simple_consumer.py index df0db11c..55ccc31a 100755 --- a/scripts/simple_consumer.py +++ b/scripts/simple_consumer.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/env python """Consumer that simply calls SQL query for each event. @@ -66,4 +66,3 @@ class SimpleConsumer(pgq.Consumer): if __name__ == '__main__': script = SimpleConsumer("simple_consumer3", "src_db", sys.argv[1:]) script.start() - diff --git a/scripts/simple_local_consumer.py b/scripts/simple_local_consumer.py index 6e3eb601..1c8f97dd 100755 --- a/scripts/simple_local_consumer.py +++ b/scripts/simple_local_consumer.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/env python """Consumer that simply calls SQL query for each event. @@ -63,4 +63,3 @@ class SimpleLocalConsumer(pgq.LocalConsumer): if __name__ == '__main__': script = SimpleLocalConsumer("simple_local_consumer3", "src_db", sys.argv[1:]) script.start() - diff --git a/setup_skytools.py b/setup_skytools.py index d8c909d4..cc9a1a4c 100755 --- a/setup_skytools.py +++ b/setup_skytools.py @@ -34,9 +34,10 @@ ac_ver = m.group(1) sfx_scripts = [ 'python/londiste.py', 'python/walmgr.py', - 'scripts/scriptmgr.py', - 'scripts/queue_splitter.py', + 'scripts/data_maintainer.py', 'scripts/queue_mover.py', + 'scripts/queue_splitter.py', + 'scripts/scriptmgr.py', 'scripts/simple_consumer.py', 'scripts/simple_local_consumer.py', 'scripts/skytools_upgrade.py', diff --git a/sql/londiste/functions/londiste.create_partition.sql b/sql/londiste/functions/londiste.create_partition.sql index 3d540e33..1a59e2ee 100644 --- a/sql/londiste/functions/londiste.create_partition.sql +++ b/sql/londiste/functions/londiste.create_partition.sql @@ -123,7 +123,7 @@ begin -- set proper part table ownership if owner != user then - sql = 'alter table ' || fq_part || ' owner to ' || owner; + sql = 'alter table ' || fq_part || ' owner to ' || quote_ident(owner); execute sql; end if; @@ -235,6 +235,7 @@ begin if r_extra is not null then sql := 'ALTER TABLE ' || fq_part || r_extra || quote_ident(r.rulename); + execute sql; end if; end loop; diff --git a/sql/londiste/functions/londiste.create_trigger.sql b/sql/londiste/functions/londiste.create_trigger.sql index 9dbf4cab..336460ed 100644 --- a/sql/londiste/functions/londiste.create_trigger.sql +++ b/sql/londiste/functions/londiste.create_trigger.sql @@ -106,7 +106,7 @@ begin trigger_name := '_londiste_' || i_queue_name; lg_func := 'pgq.logutriga'; lg_event := ''; - lg_args := array[i_queue_name]; + lg_args := array[quote_literal(i_queue_name)]; lg_pos := 'after'; if array_lower(_args, 1) is not null then @@ -187,7 +187,7 @@ begin into _old_tgargs; if found then - _new_tgargs := lg_args[1]; + _new_tgargs := decode(lg_args[1], 'escape'); for i in 2 .. array_upper(lg_args, 1) loop _new_tgargs := _new_tgargs || E'\\000'::bytea || decode(lg_args[i], 'escape'); end loop; @@ -230,7 +230,7 @@ begin where tgrelid = londiste.find_table_oid(i_dest_table) and tgname = trunctrg_name; if not found then - _extra_args := i_queue_name || _extra_args; + _extra_args := quote_literal(i_queue_name) || _extra_args; sql := 'create trigger ' || quote_ident(trunctrg_name) || ' after truncate on ' || londiste.quote_fqname(i_dest_table) || ' for each statement execute procedure pgq.sqltriga(' diff --git a/sql/londiste/functions/londiste.drop_obsolete_partitions.sql b/sql/londiste/functions/londiste.drop_obsolete_partitions.sql new file mode 100644 index 00000000..ee18d904 --- /dev/null +++ b/sql/londiste/functions/londiste.drop_obsolete_partitions.sql @@ -0,0 +1,62 @@ + +create or replace function londiste.drop_obsolete_partitions +( + in i_parent_table text, + in i_retention_period interval, + in i_partition_period text +) + returns setof text +as $$ +------------------------------------------------------------------------------- +-- Function: londiste.drop_obsolete_partitions(3) +-- +-- Drop obsolete partitions of partition-by-date parent table. +-- +-- Parameters: +-- i_parent_table Master table from which partitions are inherited +-- i_retention_period How long to keep partitions around +-- i_partition_period One of: year, month, day, hour +-- +-- Returns: +-- Names of partitions dropped +------------------------------------------------------------------------------- +declare + _schema text not null := lower (split_part (i_parent_table, '.', 1)); + _table text not null := lower (split_part (i_parent_table, '.', 2)); + _part text; + _expr text; + _dfmt text; +begin + if i_partition_period in ('year', 'yearly') then + _expr := '_[0-9]{4}'; + _dfmt := '_YYYY'; + elsif i_partition_period in ('month', 'monthly') then + _expr := '_[0-9]{4}_[0-9]{2}'; + _dfmt := '_YYYY_MM'; + elsif i_partition_period in ('day', 'daily') then + _expr := '_[0-9]{4}_[0-9]{2}_[0-9]{2}'; + _dfmt := '_YYYY_MM_DD'; + elsif i_partition_period in ('hour', 'hourly') then + _expr := '_[0-9]{4}_[0-9]{2}_[0-9]{2}_[0-9]{2}'; + _dfmt := '_YYYY_MM_DD_HH24'; + else + raise exception 'not supported i_partition_period: %', i_partition_period; + end if; + + if length (_table) = 0 then + _table := _schema; + _schema := 'public'; + end if; + + for _part in + select quote_ident (t.schemaname) ||'.'|| quote_ident (t.tablename) + from pg_catalog.pg_tables t + where t.schemaname = _schema + and t.tablename ~ ('^'|| _table || _expr ||'$') + and t.tablename < _table || to_char (now() - i_retention_period, _dfmt) + loop + execute 'drop table '|| _part; + return next _part; + end loop; +end; +$$ language plpgsql; diff --git a/sql/londiste/structure/functions.sql b/sql/londiste/structure/functions.sql index 560185ae..5936536b 100644 --- a/sql/londiste/structure/functions.sql +++ b/sql/londiste/structure/functions.sql @@ -52,4 +52,5 @@ select londiste.upgrade_schema(); -- Group: Utility functions for handlers \i functions/londiste.create_partition.sql +\i functions/londiste.drop_obsolete_partitions.sql diff --git a/sql/londiste/structure/grants.ini b/sql/londiste/structure/grants.ini index 0c704014..ca2a3765 100644 --- a/sql/londiste/structure/grants.ini +++ b/sql/londiste/structure/grants.ini @@ -1,14 +1,14 @@ [GrantFu] # roles that we maintain in this file -roles = londiste_local, londiste_remote, public +roles = londiste_writer, londiste_reader, public, pgq_admin [1.tables] on.tables = londiste.table_info, londiste.seq_info, londiste.pending_fkeys, londiste.applied_execute -londiste_local = select, insert, update, delete -londiste_remote = select +pgq_admin = select, insert, update, delete +londiste_reader = select # backwards compat, should be dropped? public = select @@ -21,13 +21,23 @@ public = execute [3.remote.node] on.functions = %(londiste_remote_fns)s -londiste_remote = execute -londiste_local = execute +londiste_reader = execute +londiste_writer = execute -[3.local.node] +[4.local.node] on.functions = %(londiste_local_fns)s, %(londiste_internal_fns)s -londiste_local = execute +londiste_writer = execute + +[5.seqs] +londiste_writer = usage +on.sequences = + londiste.table_info_nr_seq, + londiste.seq_info_nr_seq + +[6.maint] +pgq_admin = execute +on.functions = londiste.periodic_maintenance() # define various groups of functions @@ -85,5 +95,8 @@ londiste_local_fns = londiste.local_set_table_struct(text, text, text), londiste.drop_table_triggers(text, text), londiste.table_info_trigger(), - londiste.create_partition(text,text,text,text,timestamptz,text) + londiste.create_partition(text, text, text, text, timestamptz, text), + londiste.drop_obsolete_partitions (text, interval, text), + londiste.create_trigger(text,text,text[],text,text) + diff --git a/sql/pgq/structure/grants.ini b/sql/pgq/structure/grants.ini index 451695da..b83d27c5 100644 --- a/sql/pgq/structure/grants.ini +++ b/sql/pgq/structure/grants.ini @@ -24,16 +24,22 @@ on.tables = pgq.queue, pgq.tick, pgq.subscription +pgq_admin = select, insert, update, delete pgq_reader = select public = select [5.event.tables] -on.tables = pgq.event_template, pgq.retry_queue +on.tables = pgq.event_template pgq_reader = select +pgq_admin = select, truncate # drop public access to events public = +[6.retry.event] +on.tables = pgq.retry_queue +pgq_admin = select, insert, update, delete + # # define various groups of functions @@ -68,6 +74,7 @@ pgq_read_fns = pgq.event_retry(bigint, bigint, timestamptz), pgq.event_retry(bigint, bigint, integer), pgq.batch_retry(bigint, integer), + pgq.force_tick(text), pgq.finish_batch(bigint) pgq_write_fns = @@ -90,7 +97,6 @@ pgq_system_fns = pgq.grant_perms(text), pgq._grant_perms_from(text,text,text,text), pgq.tune_storage(text), - pgq.force_tick(text), pgq.seq_setval(text, int8), pgq.create_queue(text), pgq.drop_queue(text, bool), diff --git a/sql/pgq_node/structure/grants.ini b/sql/pgq_node/structure/grants.ini index d1cc4558..39a01265 100644 --- a/sql/pgq_node/structure/grants.ini +++ b/sql/pgq_node/structure/grants.ini @@ -28,10 +28,22 @@ pgq_admin = execute on.functions = %(pgq_node_admin_fns)s pgq_admin = execute +[5.tables] +pgq_reader = select +pgq_writer = select +pgq_admin = select, insert, update, delete +on.tables = + pgq_node.node_location, + pgq_node.node_info, + pgq_node.local_state, + pgq_node.subscriber_info + # define various groups of functions [DEFAULT] pgq_node_remote_fns = + pgq_node.get_consumer_info(text), + pgq_node.get_consumer_state(text, text), pgq_node.get_queue_locations(text), pgq_node.get_node_info(text), pgq_node.get_subscriber_info(text), @@ -45,12 +57,12 @@ pgq_node_public_fns = pgq_node.version() pgq_node_admin_fns = + pgq_node.register_location(text, text, text, boolean), + pgq_node.unregister_location(text, text), pgq_node.upgrade_schema(), pgq_node.maint_watermark(text) pgq_node_consumer_fns = - pgq_node.get_consumer_info(text), - pgq_node.get_consumer_state(text, text), pgq_node.register_consumer(text, text, text, int8), pgq_node.unregister_consumer(text, text), pgq_node.change_consumer_provider(text, text, text), @@ -60,8 +72,6 @@ pgq_node_consumer_fns = pgq_node.set_consumer_error(text, text, text) pgq_node_worker_fns = - pgq_node.register_location(text, text, text, boolean), - pgq_node.unregister_location(text, text), pgq_node.create_node(text, text, text, text, text, bigint, text), pgq_node.drop_node(text, text), pgq_node.demote_root(text, int4, text), diff --git a/tests/londiste/regen.sh b/tests/londiste/regen.sh index b40ae4ec..031d29ad 100755 --- a/tests/londiste/regen.sh +++ b/tests/londiste/regen.sh @@ -33,6 +33,7 @@ cat > conf/londiste_$db.ini <<EOF [londiste3] job_name = londiste_$db db = dbname=$db +public_node_location = dbname=$db host=/tmp queue_name = replika logfile = log/%(job_name)s.log pidfile = pid/%(job_name)s.pid @@ -68,11 +69,11 @@ run cat conf/pgqd.ini run cat conf/londiste_db1.ini msg "Install londiste3 and initialize nodes" -run londiste3 $v conf/londiste_db1.ini create-root node1 'dbname=db1' -run londiste3 $v conf/londiste_db2.ini create-branch node2 'dbname=db2' --provider='dbname=db1' -run londiste3 $v conf/londiste_db3.ini create-branch node3 'dbname=db3' --provider='dbname=db1' -run londiste3 $v conf/londiste_db4.ini create-branch node4 'dbname=db4' --provider='dbname=db2' --sync-watermark=node4,node5 -run londiste3 $v conf/londiste_db5.ini create-branch node5 'dbname=db5' --provider='dbname=db3' --sync-watermark=node4,node5 +run londiste3 $v conf/londiste_db1.ini create-root node1 +run londiste3 $v conf/londiste_db2.ini create-branch node2 --provider='dbname=db1' +run londiste3 $v conf/londiste_db3.ini create-branch node3 --provider='dbname=db1' +run londiste3 $v conf/londiste_db4.ini create-branch node4 --provider='dbname=db2' --sync-watermark=node4,node5 +run londiste3 $v conf/londiste_db5.ini create-branch node5 --provider='dbname=db3' --sync-watermark=node4,node5 msg "Run ticker" run pgqd $v -d conf/pgqd.ini |