diff options
author | Marko Kreen | 2012-09-18 13:39:07 +0000 |
---|---|---|
committer | Marko Kreen | 2012-09-18 13:39:07 +0000 |
commit | c49f3ec9548db549b68533016a113336bdf05ff4 (patch) | |
tree | 4d656872cf40c7459015d391e8940c7a94f4791a | |
parent | 6a58e74aa18aa5a43761418b29eefd7e3dfa026f (diff) |
londiste compare improvements
* Calculate common column names, to allow different order
or different subset.
* Use different hash function on 8.3 <> something else comparision
as hashtext() changed in 8.4.
* Always calculate final hash as bigint.
-rw-r--r-- | python/londiste/compare.py | 48 |
1 files changed, 47 insertions, 1 deletions
diff --git a/python/londiste/compare.py b/python/londiste/compare.py index f053e60d..b08a04e4 100644 --- a/python/londiste/compare.py +++ b/python/londiste/compare.py @@ -23,8 +23,19 @@ class Comparator(Syncer): self.log.info('Counting %s' % dst_tbl) - q = "select count(1) as cnt, sum(hashtext(t.*::text)) as chksum from only _TABLE_ t" + # get common cols + cols = self.calc_cols(src_curs, src_tbl, dst_curs, dst_tbl) + + # get sane query + v1 = src_db.server_version + v2 = dst_db.server_version + if (v1 < 80400 or v2 < 80400) and v1 != v2: + q = "select count(1) as cnt, sum(md5('x'||substr(md5(_COLS_::text),1,16))::bit(64)::bigint) as chksum from only _TABLE_" + else: + q = "select count(1) as cnt, sum(hashtext(_COLS_::text)::bigint) as chksum from only _TABLE_" + q = self.cf.get('compare_sql', q) + q = q.replace("_COLS_", cols) src_q = q.replace('_TABLE_', skytools.quote_fqident(src_tbl)) dst_q = q.replace('_TABLE_', skytools.quote_fqident(dst_tbl)) @@ -50,6 +61,41 @@ class Comparator(Syncer): return 1 return 0 + def calc_cols(self, src_curs, src_tbl, dst_curs, dst_tbl): + cols1 = self.load_cols(src_curs, src_tbl) + cols2 = self.load_cols(dst_curs, dst_tbl) + + qcols = [] + for c in self.calc_common(cols1, cols2): + qcols.append(skytools.quote_ident(c)) + return "(%s)" % ",".join(qcols) + + def load_cols(self, curs, tbl): + schema, table = skytools.fq_name_parts(tbl) + q = "select column_name from information_schema.columns"\ + " where table_schema = %s and table_name = %s" + curs.execute(q, [schema, table]) + cols = [] + for row in curs.fetchall(): + cols.append(row[0]) + return cols + + def calc_common(self, cols1, cols2): + common = [] + map2 = {} + for c in cols2: + map2[c] = 1 + for c in cols1: + if c in map2: + common.append(c) + if len(common) == 0: + raise Exception("no common columns found") + + if len(common) != len(cols1) or len(cols2) != len(cols1): + self.log.warning("Ignoring some columns") + + return common + if __name__ == '__main__': script = Comparator(sys.argv[1:]) script.start() |