summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAsko Oja2012-10-08 14:38:10 +0000
committerAsko Oja2012-10-09 06:42:58 +0000
commitccfcf87df7fe0a123b563efec1639dec8367c2da (patch)
treebb76b6bc8648c7617d1d527bcc5b723704b29c8d
parent7c85ff093c17a819f23ecd522b5614aaf753d7ee (diff)
Londiste compare and repair fixes
Deprecated Londiste command line parameter --copy-condition. Londiste handler part.py must be used instead as it leaves configuration in maintainable state. Fixed Londiste compare and repair for one to many and many to one repliaction use cases. Now filtering condition is applied on both ends and datasets compared should be the same. Cleaned up database splitting howto.
-rw-r--r--doc/howto/londiste3_partitioning_howto.txt158
-rwxr-xr-xpython/londiste.py2
-rw-r--r--python/londiste/compare.py12
-rw-r--r--python/londiste/handler.py16
-rw-r--r--python/londiste/handlers/dispatch.py4
-rw-r--r--python/londiste/handlers/part.py9
-rw-r--r--python/londiste/handlers/qtable.py2
-rw-r--r--python/londiste/repair.py19
-rw-r--r--python/londiste/setup.py4
-rw-r--r--python/londiste/syncer.py11
-rw-r--r--python/londiste/table_copy.py23
11 files changed, 124 insertions, 136 deletions
diff --git a/doc/howto/londiste3_partitioning_howto.txt b/doc/howto/londiste3_partitioning_howto.txt
index ba83fabb..d206add1 100644
--- a/doc/howto/londiste3_partitioning_howto.txt
+++ b/doc/howto/londiste3_partitioning_howto.txt
@@ -9,51 +9,46 @@ The target databases will have `partconf` schema which is usually
used to drive PL/Proxy. Here it is used simply to provide
configuration to `part` handler.
+== Prerequisites ==
+
+Obviously skytools must be installed but in addition we need pghashlib and
+pgbench.
+
== Setting up the Root Database ==
-=== Create database ===
+=== Create databases ===
+
+Create root database that will contain all data and two shard databases
Run the following SQL:
----
-CREATE DATABASE l3part_root;
+psql -c "CREATE DATABASE rootdb;"
+psql -c "CREATE DATABASE sharddb_0;"
+psql -c "CREATE DATABASE sharddb_1;"
----
+Deploy hash function everywhere. This is needed because internal hashtext
+function was changed between 8.3 and 8.4 versions and may be changed again
+in future withoud consideration for it's users.
+
+---
+psql rootdb < /usr/share/postgresql/8.4/contrib/hashlib.sql
+psql sharddb_0 < /usr/share/postgresql/8.4/contrib/hashlib.sql
+psql sharddb_1 < /usr/share/postgresql/8.4/contrib/hashlib.sql
+---
+
=== Set up pgbench schema ===
In this HowTo we are using pgbench for setting up the schema,
populating it with sampledata and later running SQL loads to be replicated.
-
This command will create pgbanch tables and fill them with data:
----
-/usr/lib/postgresql/9.1/bin/pgbench -i -s 2 -F 80 l3part_root
-----
-
-=== Change primary key columns to text ===
-
-Standard pgbench schema has integer primary key columns for its tables.
-The standard partitioning handler is able to partition only text columns,
-so we change the primary key column types to text
-
-
-----
-alter table pgbench_accounts alter column aid type text;
-alter table pgbench_branches alter column bid type text;
-alter table pgbench_tellers alter column tid type text;
-----
-
-Now create the partition databases to replicate to.
-Each of these will get roughly half of the individual data rows.
-
-
-Create database for partition #0:
-----
-createdb l3part_part0;
+/usr/lib/postgresql/8.4/bin/pgbench -i -s 2 -F 80 rootdb
----
-And create a partition configuration table in this database
+Write partconf.sql that will be deployed to all db's
----
-
CREATE SCHEMA partconf;
CREATE TABLE partconf.conf (
part_nr integer,
@@ -63,91 +58,85 @@ CREATE TABLE partconf.conf (
max_slot integer,
cluster_name text
);
-insert into partconf.conf(part_nr, max_part) values(0,1);
+CREATE FUNCTION partconf.get_hash_raw
+( i_input integer
+) RETURNS integer
+AS
+$_$
+-- used to wrap hashtext so that we can replace it in 8.4 with
+-- older implementation to keep compatibility
+select hash_string($1::text, 'lookup2');
+$_$
+LANGUAGE sql;
----
+Populate shard configuration tables. These values are used inside part.py
-Create database for partition #1:
----
-CREATE DATABASE l3part_part1;
-----
-
-
-----
-
-CREATE SCHEMA partconf;
-CREATE TABLE partconf.conf (
- part_nr integer,
- max_part integer,
- db_code bigint,
- is_primary boolean,
- max_slot integer,
- cluster_name text
-);
-insert into partconf.conf(part_nr, max_part) values(1,1);
+psql rootdb < partconf.sql
+psql sharddb_0 < partconf.sql
+psql sharddb_1 < partconf.sql
+psql sharddb_0 -c "insert into partconf.conf(part_nr, max_part) values(0,1);"
+psql sharddb_1 -c "insert into partconf.conf(part_nr, max_part) values(1,1);"
----
Next create configuration files file for root node and both partitions
-st3partsplit/st3_l3part_root.ini
+st3partsplit/st3_rootdb.ini
----
[londiste3]
-job_name = st3_l3part_root
-db = dbname=l3part_root
+job_name = st3_rootdb
+db = dbname=rootdb
queue_name = replika
-logfile = st3partsplit/log/st3_l3part_root.log
-pidfile = st3partsplit/pid/st3_l3part_root.pid
+logfile = st3partsplit/log/st3_rootdb.log
+pidfile = st3partsplit/pid/st3_rootdb.pid
----
-st3partsplit/st3_l3part_part0.ini
+st3partsplit/st3_sharddb_0.ini
----
[londiste3]
-job_name = st3_l3part_part0
-db = dbname=l3part_part0
+job_name = st3_sharddb_0
+db = dbname=sharddb_0
queue_name = replika
-logfile = st3partsplit/log/st3_l3part_part0.log
-pidfile = st3partsplit/pid/st3_l3part_part0.pid
+logfile = st3partsplit/log/st3_sharddb_0.log
+pidfile = st3partsplit/pid/st3_sharddb_0.pid
----
-st3partsplit/st3_l3part_part1.ini
+st3partsplit/st3_sharddb_1.ini
----
[londiste3]
-job_name = st3_l3part_part1
-db = dbname=l3part_part1
+job_name = st3_sharddb_1
+db = dbname=sharddb_1
queue_name = replika
-logfile = st3partsplit/log/st3_l3part_part1.log
-pidfile = st3partsplit/pid/st3_l3part_part1.pid
+logfile = st3partsplit/log/st3_sharddb_1.log
+pidfile = st3partsplit/pid/st3_sharddb_1.pid
----
Then create root node:
----
-londiste3 st3partsplit/st3_l3part_root.ini create-root node1 dbname=l3part_root
+londiste3 st3partsplit/st3_rootdb.ini create-root node1 dbname=rootdb
----
And start the worker on root:
----
-londiste3 -d st3partsplit/st3_l3part_root.ini worker
+londiste3 -d st3partsplit/st3_rootdb.ini worker
----
-
-
And create leaf nodes and start the workers on partitions :
----
-londiste3 st3partsplit/st3_l3part_part0.ini create-leaf node2_0 dbname=l3part_part0 --provider=dbname=l3part_root
-londiste3 -d st3partsplit/st3_l3part_part0.ini worker
+londiste3 st3partsplit/st3_sharddb_0.ini create-leaf node2_0 dbname=sharddb_0 --provider=dbname=rootdb
+londiste3 -d st3partsplit/st3_sharddb_0.ini worker
----
Second node:
----
-londiste3 st3partsplit/st3_l3part_part1.ini create-leaf node2_1 dbname=l3part_part1 --provider=dbname=l3part_root
-londiste3 -d st3partsplit/st3_l3part_part1.ini worker
+londiste3 st3partsplit/st3_sharddb_1.ini create-leaf node2_1 dbname=sharddb_1 --provider=dbname=rootdb
+londiste3 -d st3partsplit/st3_sharddb_1.ini worker
----
-
-
Create config file st3partsplit/pgqd.ini for `pgqd` ("the ticker")
----
[pgqd]
@@ -157,14 +146,11 @@ pidfile = st3partsplit/pid/pgqd.pid
----
-
Start the ticker process :
----
pgqd -d st3partsplit/pgqd.ini
----
-
-
Now add the replicated tables to root and partitions.
Here we use `--create` switch to add them to partition,
which means Londiste takes schema from root node and
@@ -177,31 +163,31 @@ the `--handler-arg=key=*id` specifyies which key field to partition on.
Run command the following commands :
----
-londiste3 st3partsplit/st3_l3part_root.ini add-table pgbench_accounts --handler=part --handler-arg=key=aid
-londiste3 st3partsplit/st3_l3part_part0.ini add-table pgbench_accounts --create --handler=part --handler-arg=key=aid
-londiste3 st3partsplit/st3_l3part_part1.ini add-table pgbench_accounts --create --handler=part --handler-arg=key=aid
+londiste3 st3partsplit/st3_rootdb.ini add-table pgbench_accounts --handler=part --handler-arg=key=aid
+londiste3 st3partsplit/st3_sharddb_0.ini add-table pgbench_accounts --create --handler=part --handler-arg=key=aid
+londiste3 st3partsplit/st3_sharddb_1.ini add-table pgbench_accounts --create --handler=part --handler-arg=key=aid
-londiste3 st3partsplit/st3_l3part_root.ini add-table pgbench_branches --handler=part --handler-arg=key=bid
-londiste3 st3partsplit/st3_l3part_part0.ini add-table pgbench_branches --create --handler=part --handler-arg=key=bid
-londiste3 st3partsplit/st3_l3part_part1.ini add-table pgbench_branches --create --handler=part --handler-arg=key=bid
+londiste3 st3partsplit/st3_rootdb.ini add-table pgbench_branches --handler=part --handler-arg=key=bid
+londiste3 st3partsplit/st3_sharddb_0.ini add-table pgbench_branches --create --handler=part --handler-arg=key=bid
+londiste3 st3partsplit/st3_sharddb_1.ini add-table pgbench_branches --create --handler=part --handler-arg=key=bid
-londiste3 st3partsplit/st3_l3part_root.ini add-table pgbench_tellers --handler=part --handler-arg=key=tid
-londiste3 st3partsplit/st3_l3part_part0.ini add-table pgbench_tellers --create --handler=part --handler-arg=key=tid
-londiste3 st3partsplit/st3_l3part_part1.ini add-table pgbench_tellers --create --handler=part --handler-arg=key=tid
+londiste3 st3partsplit/st3_rootdb.ini add-table pgbench_tellers --handler=part --handler-arg=key=tid
+londiste3 st3partsplit/st3_sharddb_0.ini add-table pgbench_tellers --create --handler=part --handler-arg=key=tid
+londiste3 st3partsplit/st3_sharddb_1.ini add-table pgbench_tellers --create --handler=part --handler-arg=key=tid
----
The following command will run pgbench full speed with 5 parallel
database connections for 10 seconds.
----
-/usr/lib/postgresql/9.1/bin/pgbench -T 10 -c 5 l3part_root
+/usr/lib/postgresql/8.4/bin/pgbench -T 10 -c 5 rootdb
----
After this is done, you can check that the tables on both sides hanve the same data with
----
-londiste3 st3partsplit/st3_l3part_part0.ini compare
-londiste3 st3partsplit/st3_l3part_part0.ini compare
+londiste3 st3partsplit/st3_sharddb_0.ini compare
+londiste3 st3partsplit/st3_sharddb_0.ini compare
----
Except of course that they dont - each partition will only have roughly half
diff --git a/python/londiste.py b/python/londiste.py
index 5d46bbdc..b6c59e15 100755
--- a/python/londiste.py
+++ b/python/londiste.py
@@ -138,8 +138,6 @@ class Londiste(skytools.DBScript):
help = "add: walk upstream to find node to copy from")
g.add_option("--copy-node", dest="copy_node",
help = "add: use NODE as source for initial COPY")
- g.add_option("--copy-condition", dest="copy_condition",
- help = "add: set WHERE expression for copy")
g.add_option("--merge-all", action="store_true",
help="merge tables from all source queues", default=False)
g.add_option("--no-merge", action="store_true",
diff --git a/python/londiste/compare.py b/python/londiste/compare.py
index b08a04e4..1412336c 100644
--- a/python/londiste/compare.py
+++ b/python/londiste/compare.py
@@ -15,12 +15,18 @@ class Comparator(Syncer):
"""Simple checker based in Syncer.
When tables are in sync runs simple SQL query on them.
"""
- def process_sync(self, src_tbl, dst_tbl, src_db, dst_db):
+ def process_sync(self, t1, t2, src_db, dst_db):
"""Actual comparision."""
+ src_tbl = t1.dest_table
+ dst_tbl = t2.dest_table
+
src_curs = src_db.cursor()
dst_curs = dst_db.cursor()
+ src_where = t1.plugin.get_copy_condition(src_curs, dst_curs)
+ dst_where = t2.plugin.get_copy_condition(src_curs, dst_curs)
+
self.log.info('Counting %s' % dst_tbl)
# get common cols
@@ -37,7 +43,11 @@ class Comparator(Syncer):
q = self.cf.get('compare_sql', q)
q = q.replace("_COLS_", cols)
src_q = q.replace('_TABLE_', skytools.quote_fqident(src_tbl))
+ if src_where:
+ src_q = src_q + " WHERE " + src_where
dst_q = q.replace('_TABLE_', skytools.quote_fqident(dst_tbl))
+ if dst_where:
+ dst_q = dst_q + " WHERE " + dst_where
f = "%(cnt)d rows, checksum=%(chksum)s"
f = self.cf.get('compare_fmt', f)
diff --git a/python/londiste/handler.py b/python/londiste/handler.py
index 87a16b62..ad4239ff 100644
--- a/python/londiste/handler.py
+++ b/python/londiste/handler.py
@@ -106,11 +106,15 @@ class BaseHandler:
"""Called when batch finishes."""
pass
- def real_copy(self, src_tablename, src_curs, dst_curs, column_list, cond_list):
+ def get_copy_condition(self, src_curs, dst_curs):
+ """ Use if you want to filter data """
+ return ''
+
+ def real_copy(self, src_tablename, src_curs, dst_curs, column_list):
"""do actual table copy and return tuple with number of bytes and rows
copyed
"""
- condition = ' and '.join(cond_list)
+ condition = self.get_copy_condition(src_curs, dst_curs)
return skytools.full_copy(src_tablename, src_curs, dst_curs,
column_list, condition,
dst_tablename = self.dest_table)
@@ -184,19 +188,17 @@ class TableHandler(BaseHandler):
return self.enc.validate_dict(row, self.table_name)
return row
- def real_copy(self, src_tablename, src_curs, dst_curs, column_list, cond_list):
+ def real_copy(self, src_tablename, src_curs, dst_curs, column_list):
"""do actual table copy and return tuple with number of bytes and rows
copyed
"""
-
- condition = ' and '.join(cond_list)
-
+
if self.enc:
def _write_hook(obj, data):
return self.enc.validate_copy(data, column_list, src_tablename)
else:
_write_hook = None
-
+ condition = self.get_copy_condition(src_curs, dst_curs)
return skytools.full_copy(src_tablename, src_curs, dst_curs,
column_list, condition,
dst_tablename = self.dest_table,
diff --git a/python/londiste/handlers/dispatch.py b/python/londiste/handlers/dispatch.py
index dcfede52..2917472b 100644
--- a/python/londiste/handlers/dispatch.py
+++ b/python/londiste/handlers/dispatch.py
@@ -866,12 +866,12 @@ class Dispatcher(BaseHandler):
exec_with_vals(self.conf.post_part)
self.log.info("Created table: %s" % dst)
- def real_copy(self, tablename, src_curs, dst_curs, column_list, cond_list):
+ def real_copy(self, tablename, src_curs, dst_curs, column_list):
"""do actual table copy and return tuple with number of bytes and rows
copyed
"""
_src_cols = _dst_cols = column_list
- condition = ' and '.join(cond_list)
+ condition = ''
if self.conf.skip_fields:
_src_cols = [col for col in column_list
diff --git a/python/londiste/handlers/part.py b/python/londiste/handlers/part.py
index 8f0eb378..5213f67d 100644
--- a/python/londiste/handlers/part.py
+++ b/python/londiste/handlers/part.py
@@ -81,15 +81,12 @@ class PartHandler(TableHandler):
self.log.debug('part.process_event: my event, processing')
TableHandler.process_event(self, ev, sql_queue_func, arg)
- def real_copy(self, tablename, src_curs, dst_curs, column_list, cond_list):
- """Copy only slots needed locally."""
+ def get_copy_condition(self, src_curs, dst_curs):
+ """Prepare the where condition for copy and replay filtering"""
self.load_part_info(dst_curs)
w = "(%s & %d) = %d" % (self.hashexpr, self.max_part, self.local_part)
self.log.debug('part: copy_condition=%s' % w)
- cond_list.append(w)
-
- return TableHandler.real_copy(self, tablename, src_curs, dst_curs,
- column_list, cond_list)
+ return w
def load_part_info(self, curs):
"""Load slot info from database."""
diff --git a/python/londiste/handlers/qtable.py b/python/londiste/handlers/qtable.py
index b93543e9..13ca4099 100644
--- a/python/londiste/handlers/qtable.py
+++ b/python/londiste/handlers/qtable.py
@@ -33,7 +33,7 @@ class QueueTableHandler(BaseHandler):
trigger_arg_list.append('SKIP')
trigger_arg_list.append('expect_sync')
- def real_copy(self, tablename, src_curs, dst_curs, column_list, cond_list):
+ def real_copy(self, tablename, src_curs, dst_curs, column_list):
"""Force copy not to start"""
return (0,0)
diff --git a/python/londiste/repair.py b/python/londiste/repair.py
index 02494b53..5a04d7a0 100644
--- a/python/londiste/repair.py
+++ b/python/londiste/repair.py
@@ -33,7 +33,7 @@ class Repairer(Syncer):
p.add_option("--apply", action="store_true", help="apply fixes")
return p
- def process_sync(self, src_tbl, dst_tbl, src_db, dst_db):
+ def process_sync(self, t1, t2, src_db, dst_db):
"""Actual comparision."""
apply_db = None
@@ -43,6 +43,9 @@ class Repairer(Syncer):
self.apply_curs = apply_db.cursor()
self.apply_curs.execute("set session_replication_role = 'replica'")
+ src_tbl = t1.dest_table
+ dst_tbl = t2.dest_table
+
src_curs = src_db.cursor()
dst_curs = dst_db.cursor()
@@ -57,10 +60,12 @@ class Repairer(Syncer):
dump_dst = dst_tbl + ".dst"
self.log.info("Dumping src table: %s" % src_tbl)
- self.dump_table(src_tbl, src_curs, dump_src)
+ src_where = t1.plugin.get_copy_condition(src_curs, dst_curs)
+ self.dump_table(src_tbl, src_curs, dump_src, src_where)
src_db.commit()
self.log.info("Dumping dst table: %s" % dst_tbl)
- self.dump_table(dst_tbl, dst_curs, dump_dst)
+ dst_where = t2.plugin.get_copy_condition(src_curs, dst_curs)
+ self.dump_table(dst_tbl, dst_curs, dump_dst, dst_where)
dst_db.commit()
self.log.info("Sorting src table: %s" % dump_src)
@@ -123,11 +128,13 @@ class Repairer(Syncer):
cols = ",".join(fqlist)
self.log.debug("using columns: %s" % cols)
- def dump_table(self, tbl, curs, fn):
+ def dump_table(self, tbl, curs, fn, whr):
"""Dump table to disk."""
cols = ','.join(self.fq_common_fields)
- q = "copy %s (%s) to stdout" % (skytools.quote_fqident(tbl), cols)
-
+ if len(whr) == 0:
+ whr = 'true'
+ q = "copy (SELECT %s FROM %s WHERE %s) to stdout" % (cols, skytools.quote_fqident(tbl), whr)
+ self.log.debug("Query: %s" % q)
f = open(fn, "w", 64*1024)
curs.copy_expert(q, f)
size = f.tell()
diff --git a/python/londiste/setup.py b/python/londiste/setup.py
index b8ca4a0c..f06d9b0b 100644
--- a/python/londiste/setup.py
+++ b/python/londiste/setup.py
@@ -50,8 +50,6 @@ class LondisteSetup(CascadeAdmin):
help = "add: find table source for copy by walking upwards")
p.add_option("--copy-node", dest="copy_node",
help = "add: use NODE as source for initial copy")
- p.add_option("--copy-condition", dest="copy_condition",
- help = "copy: where expression")
p.add_option("--force", action="store_true",
help="force", default=False)
p.add_option("--all", action="store_true",
@@ -247,8 +245,6 @@ class LondisteSetup(CascadeAdmin):
if not self.options.expect_sync:
if self.options.skip_truncate:
attrs['skip_truncate'] = 1
- if self.options.copy_condition:
- attrs['copy_condition'] = self.options.copy_condition
if self.options.max_parallel_copy:
attrs['max_parallel_copy'] = self.options.max_parallel_copy
diff --git a/python/londiste/syncer.py b/python/londiste/syncer.py
index 05df41ad..5eb5da73 100644
--- a/python/londiste/syncer.py
+++ b/python/londiste/syncer.py
@@ -155,7 +155,7 @@ class Syncer(skytools.DBScript):
self.check_consumer(setup_db)
- self.check_table(t1.dest_table, t2.dest_table, lock_db, src_db, dst_db, setup_db)
+ self.check_table(t1, t2, lock_db, src_db, dst_db, setup_db)
lock_db.commit()
src_db.commit()
dst_db.commit()
@@ -185,9 +185,12 @@ class Syncer(skytools.DBScript):
if dur > 10 and not self.options.force:
raise Exception("Ticker seems dead")
- def check_table(self, src_tbl, dst_tbl, lock_db, src_db, dst_db, setup_db):
+ def check_table(self, t1, t2, lock_db, src_db, dst_db, setup_db):
"""Get transaction to same state, then process."""
+ src_tbl = t1.dest_table
+ dst_tbl = t2.dest_table
+
lock_curs = lock_db.cursor()
src_curs = src_db.cursor()
dst_curs = dst_db.cursor()
@@ -221,7 +224,7 @@ class Syncer(skytools.DBScript):
self.unlock_table_branch(lock_db, setup_db)
# do work
- bad = self.process_sync(src_tbl, dst_tbl, src_db, dst_db)
+ bad = self.process_sync(t1, t2, src_db, dst_db)
if bad:
self.bad_tables += 1
@@ -320,7 +323,7 @@ class Syncer(skytools.DBScript):
setup_curs = setup_db.cursor()
self.resume_consumer(setup_curs, self.provider_node['worker_name'])
- def process_sync(self, src_tbl, dst_tbl, src_db, dst_db):
+ def process_sync(self, t1, t2, src_db, dst_db):
"""It gets 2 connections in state where tbl should be in same state.
"""
raise Exception('process_sync not implemented')
diff --git a/python/londiste/table_copy.py b/python/londiste/table_copy.py
index 65a702fb..a2138d98 100644
--- a/python/londiste/table_copy.py
+++ b/python/londiste/table_copy.py
@@ -159,7 +159,12 @@ class CopyTable(Replicator):
tbl_stat.dropped_ddl = ddl
# do truncate & copy
- self.real_copy(src_curs, dst_curs, tbl_stat, common_cols, src_real_table)
+ self.log.info("%s: start copy" % tbl_stat.name)
+ p = tbl_stat.get_plugin()
+ stats = p.real_copy(src_real_table, src_curs, dst_curs, common_cols)
+ if stats:
+ self.log.info("%s: copy finished: %d bytes, %d rows" % (
+ tbl_stat.name, stats[0], stats[1]))
# get snapshot
src_curs.execute("select txid_current_snapshot()")
@@ -215,22 +220,6 @@ class CopyTable(Replicator):
src_curs.execute(q, [self.queue_name])
src_db.commit()
- def real_copy(self, srccurs, dstcurs, tbl_stat, col_list, src_real_table):
- "Actual copy."
-
- tablename = tbl_stat.name
- # do copy
- self.log.info("%s: start copy" % tablename)
- p = tbl_stat.get_plugin()
- cond_list = []
- cond = tbl_stat.table_attrs.get('copy_condition')
- if cond:
- cond_list.append(cond)
- stats = p.real_copy(src_real_table, srccurs, dstcurs, col_list, cond_list)
- if stats:
- self.log.info("%s: copy finished: %d bytes, %d rows" % (
- tablename, stats[0], stats[1]))
-
def work(self):
if not self.reg_ok:
# check if needed? (table, not existing reg)