summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarko Kreen2012-09-18 13:39:07 +0000
committerMarko Kreen2012-09-18 13:39:07 +0000
commitc49f3ec9548db549b68533016a113336bdf05ff4 (patch)
tree4d656872cf40c7459015d391e8940c7a94f4791a
parent6a58e74aa18aa5a43761418b29eefd7e3dfa026f (diff)
londiste compare improvements
* Calculate common column names, to allow different order or different subset. * Use different hash function on 8.3 <> something else comparision as hashtext() changed in 8.4. * Always calculate final hash as bigint.
-rw-r--r--python/londiste/compare.py48
1 files changed, 47 insertions, 1 deletions
diff --git a/python/londiste/compare.py b/python/londiste/compare.py
index f053e60d..b08a04e4 100644
--- a/python/londiste/compare.py
+++ b/python/londiste/compare.py
@@ -23,8 +23,19 @@ class Comparator(Syncer):
self.log.info('Counting %s' % dst_tbl)
- q = "select count(1) as cnt, sum(hashtext(t.*::text)) as chksum from only _TABLE_ t"
+ # get common cols
+ cols = self.calc_cols(src_curs, src_tbl, dst_curs, dst_tbl)
+
+ # get sane query
+ v1 = src_db.server_version
+ v2 = dst_db.server_version
+ if (v1 < 80400 or v2 < 80400) and v1 != v2:
+ q = "select count(1) as cnt, sum(md5('x'||substr(md5(_COLS_::text),1,16))::bit(64)::bigint) as chksum from only _TABLE_"
+ else:
+ q = "select count(1) as cnt, sum(hashtext(_COLS_::text)::bigint) as chksum from only _TABLE_"
+
q = self.cf.get('compare_sql', q)
+ q = q.replace("_COLS_", cols)
src_q = q.replace('_TABLE_', skytools.quote_fqident(src_tbl))
dst_q = q.replace('_TABLE_', skytools.quote_fqident(dst_tbl))
@@ -50,6 +61,41 @@ class Comparator(Syncer):
return 1
return 0
+ def calc_cols(self, src_curs, src_tbl, dst_curs, dst_tbl):
+ cols1 = self.load_cols(src_curs, src_tbl)
+ cols2 = self.load_cols(dst_curs, dst_tbl)
+
+ qcols = []
+ for c in self.calc_common(cols1, cols2):
+ qcols.append(skytools.quote_ident(c))
+ return "(%s)" % ",".join(qcols)
+
+ def load_cols(self, curs, tbl):
+ schema, table = skytools.fq_name_parts(tbl)
+ q = "select column_name from information_schema.columns"\
+ " where table_schema = %s and table_name = %s"
+ curs.execute(q, [schema, table])
+ cols = []
+ for row in curs.fetchall():
+ cols.append(row[0])
+ return cols
+
+ def calc_common(self, cols1, cols2):
+ common = []
+ map2 = {}
+ for c in cols2:
+ map2[c] = 1
+ for c in cols1:
+ if c in map2:
+ common.append(c)
+ if len(common) == 0:
+ raise Exception("no common columns found")
+
+ if len(common) != len(cols1) or len(cols2) != len(cols1):
+ self.log.warning("Ignoring some columns")
+
+ return common
+
if __name__ == '__main__':
script = Comparator(sys.argv[1:])
script.start()