diff --git a/Cluster.pm b/Cluster.pm index 78410ccdb2..ea7860f695 100644 --- a/Cluster.pm +++ b/Cluster.pm @@ -340,7 +340,8 @@ sub add_node() { my ($self) = @_; - my $new_node = get_new_node("node@{[$#{$self->{nodes}} + 2]}"); + my $new_node = get_new_node("node@{[$#{$self->{nodes}} + 2]}", + (port => mm_get_free_port())); push(@{$self->{nodes}}, $new_node); return $#{$self->{nodes}}; diff --git a/Makefile b/Makefile index 505f5d9f57..994e04ff8a 100644 --- a/Makefile +++ b/Makefile @@ -4,7 +4,7 @@ OBJS = src/multimaster.o src/dmq.o src/commit.o src/bytebuf.o src/bgwpool.o \ src/pglogical_output.o src/pglogical_proto.o src/pglogical_receiver.o \ src/pglogical_apply.o src/pglogical_hooks.o src/pglogical_config.o \ src/pglogical_relid_map.o src/ddd.o src/bkb.o src/spill.o src/state.o \ -src/resolver.o src/ddl.o src/syncpoint.o src/global_tx.o +src/resolver.o src/ddl.o src/syncpoint.o src/global_tx.o src/mtm_utils.o MODULE_big = multimaster ifndef USE_PGXS # hmm, user didn't requested to use pgxs diff --git a/doc/multimaster.xml b/doc/multimaster.xml index 8e3eb2ff56..8fdec0f124 100644 --- a/doc/multimaster.xml +++ b/doc/multimaster.xml @@ -1255,19 +1255,83 @@ WARNING: mismatch in column 'b' of row 0: 256 on node0, 255 on node1 + + multimaster.break_connection + multimaster.break_connection + + + Break connection with clients connected to the node if this node disconnects + from the cluster. If this variable is set to false, the client stays + connected to the node but receives an error that the node is disabled. + + Default: false + + + + + multimaster.connect_timeout + multimaster.connect_timeout + + + + Maximum time to wait while connecting, in seconds. Zero, negative, or not specified means + wait indefinitely. The minimum allowed timeout is 2 seconds, therefore + a value of 1 is interpreted as 2. + + Default: 0 + + + + + multimaster.ignore_tables_without_pk + multimaster.ignore_tables_without_pk + + + + Do not replicate tables without primary key. When false, + such tables are replicated. + + Default: false + + + + + multimaster.syncpoint_interval + multimaster.syncpoint_interval + + + + Amount of WAL generated between synchronization points. + + Default: 10 MB + + + + + multimaster.binary_basetypes + multimaster.binary_basetypes + + + + Send data of built-in types in binary format. + + Default: true + + + + + multimaster.wait_peer_commits + multimaster.wait_peer_commits + + + + Wait until all peers commit the transaction before the command returns + a success indication to the client. + + Default: true + + - - multimaster.break_connection - multimaster.break_connection - - - Break connection with clients connected to the node if this node disconnects - from the cluster. If this variable is set to false, the client stays - connected to the node but receives an error that the node is disabled. - - Default: false - - diff --git a/expected/regression_ee.diff b/expected/regression_ee.diff index f75fa64e6f..a54758b41f 100644 --- a/expected/regression_ee.diff +++ b/expected/regression_ee.diff @@ -29,7 +29,7 @@ diff ../../../src/test/regress/expected/create_table.out ../tmp_check/regress_ou diff ../../../src/test/regress/expected/create_index.out ../tmp_check/regress_outdir/results/create_index.out --- ../../../src/test/regress/expected/create_index.out CENSORED +++ ../tmp_check/regress_outdir/results/create_index.out CENSORED -@@ -1367,31 +1367,33 @@ +@@ -1394,31 +1394,33 @@ CREATE TABLE concur_heap (f1 text, f2 text); -- empty table CREATE INDEX CONCURRENTLY concur_index1 ON concur_heap(f2,f1); @@ -70,7 +70,7 @@ diff ../../../src/test/regress/expected/create_index.out ../tmp_check/regress_ou COMMIT; -- test where predicate is able to do a transactional update during -- a concurrent build before switching pg_index state flags. -@@ -1403,7 +1405,9 @@ +@@ -1430,7 +1432,9 @@ END; $$; CREATE INDEX CONCURRENTLY concur_index8 ON concur_heap (f1) WHERE predicate_stable(); @@ -80,7 +80,7 @@ diff ../../../src/test/regress/expected/create_index.out ../tmp_check/regress_ou DROP FUNCTION predicate_stable(); -- But you can do a regular index build in a transaction BEGIN; -@@ -1412,8 +1416,6 @@ +@@ -1439,8 +1443,6 @@ -- Failed builds are left invalid by VACUUM FULL, fixed by REINDEX VACUUM FULL concur_heap; REINDEX TABLE concur_heap; @@ -89,7 +89,7 @@ diff ../../../src/test/regress/expected/create_index.out ../tmp_check/regress_ou DELETE FROM concur_heap WHERE f1 = 'b'; VACUUM FULL concur_heap; \d concur_heap -@@ -1423,12 +1425,6 @@ +@@ -1450,12 +1452,6 @@ f1 | text | | | f2 | text | | | Indexes: @@ -102,7 +102,7 @@ diff ../../../src/test/regress/expected/create_index.out ../tmp_check/regress_ou "std_index" btree (f2) REINDEX TABLE concur_heap; -@@ -1439,12 +1435,6 @@ +@@ -1466,12 +1462,6 @@ f1 | text | | | f2 | text | | | Indexes: @@ -115,7 +115,7 @@ diff ../../../src/test/regress/expected/create_index.out ../tmp_check/regress_ou "std_index" btree (f2) -- Temporary tables with concurrent builds and on-commit actions -@@ -1454,7 +1444,9 @@ +@@ -1481,7 +1471,9 @@ ON COMMIT PRESERVE ROWS; INSERT INTO concur_temp VALUES (1, 'foo'), (2, 'bar'); CREATE INDEX CONCURRENTLY concur_temp_ind ON concur_temp(f1); @@ -125,7 +125,7 @@ diff ../../../src/test/regress/expected/create_index.out ../tmp_check/regress_ou DROP TABLE concur_temp; -- ON COMMIT DROP BEGIN; -@@ -1463,34 +1455,42 @@ +@@ -1490,34 +1482,42 @@ INSERT INTO concur_temp VALUES (1, 'foo'), (2, 'bar'); -- Fails when running in a transaction. CREATE INDEX CONCURRENTLY concur_temp_ind ON concur_temp(f1); @@ -172,7 +172,7 @@ diff ../../../src/test/regress/expected/create_index.out ../tmp_check/regress_ou \d concur_heap Table "public.concur_heap" Column | Type | Collation | Nullable | Default -@@ -2428,46 +2428,38 @@ +@@ -2474,46 +2474,38 @@ INSERT INTO concur_reindex_tab4 VALUES (1), (1), (2); -- This trick creates an invalid index. CREATE UNIQUE INDEX CONCURRENTLY concur_reindex_ind5 ON concur_reindex_tab4 (c1); @@ -318,9 +318,9 @@ diff ../../../src/test/regress/expected/sanity_check.out ../tmp_check/regress_ou stud_emp|f student|f +syncpoints|t + tab_core_types|f tableam_parted_a_heap2|f tableam_parted_b_heap2|f - tableam_parted_c_heap2|f diff ../../../src/test/regress/expected/transactions.out ../tmp_check/regress_outdir/results/transactions.out --- ../../../src/test/regress/expected/transactions.out CENSORED +++ ../tmp_check/regress_outdir/results/transactions.out CENSORED @@ -503,6 +503,26 @@ diff ../../../src/test/regress/expected/brin.out ../tmp_check/regress_outdir/res -- vacuum the table, to discard TOAST data VACUUM brintest_3; -- retry insert with a different random-looking (but deterministic) value +diff ../../../src/test/regress/expected/privileges.out ../tmp_check/regress_outdir/results/privileges.out +--- ../../../src/test/regress/expected/privileges.out CENSORED ++++ ../tmp_check/regress_outdir/results/privileges.out CENSORED +@@ -1437,11 +1437,16 @@ + -- Do the same concurrently + CREATE INDEX CONCURRENTLY sro_idx ON sro_tab ((sro_ifun(a) + sro_ifun(0))) + WHERE sro_ifun(a + 10) > sro_ifun(10); ++ERROR: multimaster doesn't support CREATE INDEX CONCURRENTLY + -- REINDEX + REINDEX TABLE sro_tab; ++NOTICE: table "sro_tab" has no indexes to reindex + REINDEX INDEX sro_idx; ++ERROR: relation "sro_idx" does not exist + REINDEX TABLE CONCURRENTLY sro_tab; ++NOTICE: table "sro_tab" has no indexes that can be reindexed concurrently + DROP INDEX sro_idx; ++ERROR: index "sro_idx" does not exist + -- CLUSTER + CREATE INDEX sro_cluster_idx ON sro_tab ((sro_ifun(a) + sro_ifun(0))); + CLUSTER sro_tab USING sro_cluster_idx; diff ../../../src/test/regress/expected/rowsecurity.out ../tmp_check/regress_outdir/results/rowsecurity.out --- ../../../src/test/regress/expected/rowsecurity.out CENSORED +++ ../tmp_check/regress_outdir/results/rowsecurity.out CENSORED @@ -570,7 +590,47 @@ diff ../../../src/test/regress/expected/rowsecurity.out ../tmp_check/regress_out diff ../../../src/test/regress/expected/atx.out ../tmp_check/regress_outdir/results/atx.out --- ../../../src/test/regress/expected/atx.out CENSORED +++ ../tmp_check/regress_outdir/results/atx.out CENSORED -@@ -1143,6 +1143,7 @@ +@@ -816,11 +816,7 @@ + (1 row) + + select lo_unlink(:my_loid); +- lo_unlink +------------ +- 1 +-(1 row) +- ++ERROR: [MTM] failed to prepare transaction at peer node + -- + DO $body$ + begin +@@ -857,6 +853,7 @@ + declare c2 cursor with hold for select count_tt1_v(), count_tt1_s(); + insert into atx_tt1 values(2); + commit; ++ERROR: cannot PREPARE a transaction that has created a cursor WITH HOLD + commit; + begin; + begin autonomous; +@@ -872,6 +869,7 @@ + drop function count_tt1_s(); + drop table if exists atx_tt1; + close c2; ++ERROR: cursor "c2" does not exist + -- + create table atx_13_t(i int); + begin; +@@ -991,9 +989,7 @@ + insert into atx_tt2 values(1); + declare c2 cursor with hold for select error_function(); + commit; +-NOTICE: other exception 22012, division by zero +-ERROR: control reached end of function without RETURN +-CONTEXT: PL/pgSQL function error_function() ++ERROR: cannot PREPARE a transaction that has created a cursor WITH HOLD + commit; + drop function if exists error_function(); + drop table if exists atx_tt2; +@@ -1083,6 +1079,7 @@ RESET client_min_messages; create database regression_atx_test_database; ALTER DATABASE "regression_atx_test_database" SET lc_messages TO 'C'; @@ -581,7 +641,7 @@ diff ../../../src/test/regress/expected/atx.out ../tmp_check/regress_outdir/resu diff ../../../src/test/regress/expected/atx5.out ../tmp_check/regress_outdir/results/atx5.out --- ../../../src/test/regress/expected/atx5.out CENSORED +++ ../tmp_check/regress_outdir/results/atx5.out CENSORED -@@ -24,10 +24,7 @@ +@@ -23,10 +23,7 @@ NOTICE: function atx_test_30_one() does not exist, skipping NOTICE: function atx_test_30_one() does not exist, skipping NOTICE: function atx_test_30_one() does not exist, skipping @@ -596,7 +656,7 @@ diff ../../../src/test/regress/expected/atx5.out ../tmp_check/regress_outdir/res diff ../../../src/test/regress/expected/atx9.out ../tmp_check/regress_outdir/results/atx9.out --- ../../../src/test/regress/expected/atx9.out CENSORED +++ ../tmp_check/regress_outdir/results/atx9.out CENSORED -@@ -29,50 +29,38 @@ +@@ -28,50 +28,38 @@ INSERT INTO abc VALUES (1); INSERT INTO abc VALUES (2); COMMIT AND CHAIN; -- TBLOCK_END @@ -656,7 +716,7 @@ diff ../../../src/test/regress/expected/atx9.out ../tmp_check/regress_outdir/res COMMIT; ROLLBACK; BEGIN; -@@ -144,24 +132,13 @@ +@@ -143,24 +131,13 @@ SAVEPOINT x; COMMIT AND CHAIN; -- TBLOCK_SUBCOMMIT @@ -685,7 +745,7 @@ diff ../../../src/test/regress/expected/atx9.out ../tmp_check/regress_outdir/res COMMIT; ROLLBACK; -- different mix of options just for fun -@@ -232,17 +209,14 @@ +@@ -231,17 +208,14 @@ COMMIT; -- not allowed outside a transaction block COMMIT AUTONOMOUS AND CHAIN; -- error diff --git a/src/bgwpool.c b/src/bgwpool.c index 7288ba3781..efc1cd24e5 100644 --- a/src/bgwpool.c +++ b/src/bgwpool.c @@ -31,6 +31,7 @@ #include "multimaster.h" #include "state.h" #include "logger.h" +#include "mtm_utils.h" /* * Store the size of tx body, position of it in the tx list and transaction @@ -324,6 +325,7 @@ BgwPoolMainLoop(BgwPool *poolDesc) void BgwPoolDynamicWorkerMainLoop(Datum arg) { + MtmDisableTimeouts(); BgwPoolMainLoop((BgwPool *) DatumGetPointer(arg)); } diff --git a/src/ddl.c b/src/ddl.c index cf9d08e2a3..49ccc2d0c4 100644 --- a/src/ddl.c +++ b/src/ddl.c @@ -666,7 +666,6 @@ MtmProcessUtility(PlannedStmt *pstmt, const char *queryString, QueryEnvironment *queryEnv, DestReceiver *dest, QueryCompletion *qc) { - /* * Quick exit if multimaster is not enabled. * XXX it's better to do MtmIsEnabled here, but this needs cache access diff --git a/src/dmq.c b/src/dmq.c index a02c02fd95..163e0a260b 100644 --- a/src/dmq.c +++ b/src/dmq.c @@ -31,6 +31,9 @@ #include "dmq.h" #include "logger.h" #include "compat.h" +#include "mtm_utils.h" +#include "multimaster.h" +#include "state.h" #include "access/transam.h" #include "libpq/libpq.h" @@ -41,7 +44,9 @@ #include "utils/builtins.h" #include "utils/timestamp.h" #include "storage/shm_toc.h" +#include "postmaster/autovacuum.h" #include "postmaster/interrupt.h" +#include "replication/walsender.h" #include "storage/shm_mq.h" #include "storage/ipc.h" #include "tcop/tcopprot.h" @@ -51,23 +56,6 @@ #define DMQ_MQ_SIZE ((Size) 65536) #define DMQ_MQ_MAGIC 0x646d71 -/* XXX: move to common */ -#define BIT_CLEAR(mask, bit) ((mask) &= ~((uint64)1 << (bit))) -#define BIT_CHECK(mask, bit) (((mask) & ((uint64)1 << (bit))) != 0) -static int -first_set_bit(uint64 mask) -{ - int i; - - for (i = 0; i < DMQ_N_MASK_POS; i++) - { - if (BIT_CHECK(mask, i)) - return i; - } - return -1; -} - - /* * Shared data structures to hold current connections topology. * All that stuff can be moved to persistent tables to avoid hardcoded @@ -376,9 +364,13 @@ static Size dmq_shmem_size(void) { Size size = 0; + int maxbackends = 0; + + maxbackends = MaxConnections + autovacuum_max_workers + + max_worker_processes + max_wal_senders + 1; size = add_size(size, sizeof(struct DmqSharedState)); - size = add_size(size, hash_estimate_size(DMQ_MAX_SUBS_PER_BACKEND * MaxBackends, + size = add_size(size, hash_estimate_size(DMQ_MAX_SUBS_PER_BACKEND * maxbackends, sizeof(DmqStreamSubscription))); return MAXALIGN(size); } @@ -496,6 +488,12 @@ dmq_sender_at_exit(int status, Datum arg) } } LWLockRelease(dmq_state->lock); + + /* + * Restart the Campaigner to be sure that all critical data reset before the + * next voting. + */ + CampaignerStop(); } void @@ -527,6 +525,8 @@ dmq_sender_main(Datum main_arg) pqsignal(SIGTERM, die); BackgroundWorkerUnblockSignals(); + MtmDisableTimeouts(); + memcpy(&heartbeat_send_timeout, MyBgworkerEntry->bgw_extra, sizeof(int)); memcpy(&connect_timeout, MyBgworkerEntry->bgw_extra + sizeof(int), sizeof(int)); @@ -796,7 +796,7 @@ dmq_sender_main(Datum main_arg) int pos = event.pos; pqtime = dmq_now(); - status = PQconnectPoll(conns[conn_id].pgconn); + status = MtmPQconnectPoll(conns[conn_id].pgconn); mtm_log(DmqPqTiming, "[DMQ] [TIMING] pqp = %f ms", dmq_now() - pqtime); mtm_log(DmqStateIntermediate, @@ -1386,6 +1386,11 @@ dmq_receiver_loop(PG_FUNCTION_ARGS) double last_message_at = dmq_now(); void *extra = NULL; + /* + * We do not call MtmDisableTimeouts() here because of connection to this + * client is made by MtmPQconnectPoll() that sets all needed timeouts. + */ + sender_name = text_to_cstring(PG_GETARG_TEXT_PP(0)); recv_timeout = PG_GETARG_INT32(1); @@ -1652,7 +1657,7 @@ dmq_push_buffer(DmqDestinationId dest_id, char *stream_name, const void *payload res = shm_mq_send(dmq_local.mq_outh, buf.len, buf.data, false); pfree(buf.data); if (res != SHM_MQ_SUCCESS) - mtm_log(WARNING, "[DMQ] dmq_push: can't send to queue"); + mtm_log(ERROR, "[DMQ] dmq_push: can't send to queue, status = %d", res); } /* @@ -1768,7 +1773,8 @@ dmq_reattach_shm_mq(int handle_id) * from which receivers caller wants to get message and filters inhandles * through it. */ -void dmq_attach_receiver(char *sender_name, int8 mask_pos) +void +dmq_attach_receiver(char *sender_name, int8 mask_pos) { int i; int handle_id = -1; diff --git a/src/include/mtm_utils.h b/src/include/mtm_utils.h new file mode 100644 index 0000000000..51eaf19c80 --- /dev/null +++ b/src/include/mtm_utils.h @@ -0,0 +1,24 @@ +/*------------------------------------------------------------------------- + * + * mtm_utils.h + * Utility functions: + * - disable global timeouts settings; + * - libpq connect function wrappers. + * + * + * Copyright (c) 2022, Postgres Professional + * + *------------------------------------------------------------------------- + */ +#ifndef MTM_UTILS_H +#define MTM_UTILS_H + +#include "libpq/pqformat.h" +#include "libpq-fe.h" + +extern void MtmDisableTimeouts(void); + +extern PostgresPollingStatusType MtmPQconnectPoll(PGconn *conn); +extern PGconn* MtmPQconnectdb(const char *conninfo); + +#endif diff --git a/src/include/state.h b/src/include/state.h index d8565374ea..f361769dff 100644 --- a/src/include/state.h +++ b/src/include/state.h @@ -149,5 +149,6 @@ extern void MtmMonitorStart(Oid db_id, Oid user_id); extern void MtmRefreshClusterStatus(void); extern nodemask_t MtmGetDisabledNodeMask(void); extern nodemask_t MtmGetEnabledNodeMask(bool ignore_disabled); +extern void CampaignerStop(void); #endif diff --git a/src/mtm_utils.c b/src/mtm_utils.c new file mode 100644 index 0000000000..e01e5ce4bf --- /dev/null +++ b/src/mtm_utils.c @@ -0,0 +1,133 @@ +/*---------------------------------------------------------------------------- + * + * mtm_utils.c + * Utility functions + * + * Copyright (c) 2022, Postgres Professional + * + *---------------------------------------------------------------------------- + */ + +#include "logger.h" +#include "mtm_utils.h" + +#include "utils/timeout.h" + +/* + * Disables timeouts on a client side: + * - statement_timeout; + * - lock_timeout; + * - idle_in_transaction_session_timeout; + * - idle_session_timeout. + * + * This timeouts, when set in the postgres config file, affect all process. + * The multimaster needs his sessions not to be interrupted, so we disable + * these timeouts. + * + * This function raises an error on PQExec failed. + */ +static bool +disable_client_timeouts(PGconn *conn) +{ + PGresult *res; + + res = PQexec(conn, "SET statement_timeout = 0"); + if (PQresultStatus(res) != PGRES_COMMAND_OK) + { + mtm_log(WARNING, "failed to set statement_timeout: %s", + pchomp(PQerrorMessage(conn))); + return false; + } + + res = PQexec(conn, "SET lock_timeout = 0"); + if (PQresultStatus(res) != PGRES_COMMAND_OK) + { + mtm_log(WARNING, "failed to set lock_timeout: %s", + pchomp(PQerrorMessage(conn))); + return false; + } + + res = PQexec(conn, "SET idle_in_transaction_session_timeout = 0"); + if (PQresultStatus(res) != PGRES_COMMAND_OK) + { + mtm_log(WARNING, "failed to set idle_in_transaction_session_timeout: %s", + pchomp(PQerrorMessage(conn))); + return false; + } + + res = PQexec(conn, "SET idle_session_timeout = 0"); + if (PQresultStatus(res) != PGRES_COMMAND_OK) + { + mtm_log(WARNING, "failed to set idle_session_timeout: %s", + pchomp(PQerrorMessage(conn))); + return false; + } + + return true; +} + +/* + * Disable timeouts for a current process + * - statement_timeout; + * - lock_timeout; + * - idle_in_transaction_session_timeout; + * - idle_session_timeout. + * + * We disable these timeout for the same reason as in the disable_client_timeout() + */ +extern void +MtmDisableTimeouts(void) +{ + if (get_timeout_active(STATEMENT_TIMEOUT)) + disable_timeout(STATEMENT_TIMEOUT, false); + if (get_timeout_active(LOCK_TIMEOUT)) + disable_timeout(LOCK_TIMEOUT, false); + if (get_timeout_active(IDLE_IN_TRANSACTION_SESSION_TIMEOUT)) + disable_timeout(IDLE_IN_TRANSACTION_SESSION_TIMEOUT, false); + if (get_timeout_active(IDLE_SESSION_TIMEOUT)) + disable_timeout(IDLE_SESSION_TIMEOUT, false); +} + +/* + * Wrapper on PQconnectPoll + * + * On connect disables timeouts on a client side + */ +PostgresPollingStatusType +MtmPQconnectPoll(PGconn *conn) +{ + PostgresPollingStatusType status; + + status = PQconnectPoll(conn); + if (status != PGRES_POLLING_OK) + return status; + + if (!disable_client_timeouts(conn)) + status = PGRES_POLLING_FAILED; + + return status; +} + +/* + * Wrapper on PQconnectdb + * + * On connect disables timeouts on a client side + */ +PGconn * +MtmPQconnectdb(const char *conninfo) +{ + PGconn *conn; + + conn = PQconnectdb(conninfo); + if (PQstatus(conn) != CONNECTION_OK) + return conn; + + if (!disable_client_timeouts(conn)) + { + PQfinish(conn); + return NULL; + } + + return conn; +} + diff --git a/src/multimaster.c b/src/multimaster.c index 9f4d3c6479..9165492afc 100644 --- a/src/multimaster.c +++ b/src/multimaster.c @@ -48,6 +48,7 @@ #include "commit.h" #include "messaging.h" #include "syncpoint.h" +#include "mtm_utils.h" #include "compat.h" @@ -98,7 +99,6 @@ static void MtmShmemStartup(void); static void launcher_init(void); void launcher_main(Datum main_arg); -void drop_node_entry(int node_id); MtmShared *Mtm; @@ -333,7 +333,6 @@ MtmSleep(int64 usec) } } - /* * These were once used to setup mtm state in parallel workers, but as long as * they are read-only we don't really need it (historically it imported csn @@ -933,7 +932,8 @@ mtm_init_cluster(PG_FUNCTION_ARGS) /* parse array with peer connstrings */ Assert(ARR_ELEMTYPE(peers_arr) == TEXTOID); - Assert(ARR_NDIM(peers_arr) == 1); + if (ARR_NDIM(peers_arr) != 1) + mtm_log(ERROR, "node list should not be empty"); deconstruct_array(peers_arr, TEXTOID, -1, false, 'i', &peers_datums, &peers_nulls, &n_peers); @@ -969,7 +969,7 @@ mtm_init_cluster(PG_FUNCTION_ARGS) int j; /* connect */ - peer_conns[i] = PQconnectdb(conninfo); + peer_conns[i] = MtmPQconnectdb(conninfo); if (PQstatus(peer_conns[i]) != CONNECTION_OK) { char *msg = pchomp(PQerrorMessage(peer_conns[i])); @@ -1299,7 +1299,7 @@ mtm_join_node(PG_FUNCTION_ARGS) if (new_node == NULL) mtm_log(ERROR, "new node %d not found", new_node_id); conninfo = new_node->conninfo; - conn = PQconnectdb(conninfo); + conn = MtmPQconnectdb(conninfo); if (PQstatus(conn) != CONNECTION_OK) { char *msg = pchomp(PQerrorMessage(conn)); @@ -1494,7 +1494,7 @@ mtm_ping(PG_FUNCTION_ARGS) if (!BIT_CHECK(curr_gen.members, peer->node_id - 1)) continue; - conn = PQconnectdb(peer->conninfo); + conn = MtmPQconnectdb(peer->conninfo); if (PQstatus(conn) != CONNECTION_OK) { char *msg = pchomp(PQerrorMessage(conn)); @@ -1862,6 +1862,7 @@ launcher_main(Datum main_arg) /* init this worker */ pqsignal(SIGTERM, die); BackgroundWorkerUnblockSignals(); + MtmBackgroundWorker = true; memset(&hash_info, 0, sizeof(hash_info)); hash_info.entrysize = hash_info.keysize = sizeof(Oid); @@ -2553,7 +2554,7 @@ _mtm_get_snapshots(const MtmConfig *mcfg, PGconn **conns, char **snapnames, for (i = 0; i < mcfg->n_nodes; i++) { /* Establish connection to each node */ - conns[i] = PQconnectdb(mcfg->nodes[i].conninfo); + conns[i] = MtmPQconnectdb(mcfg->nodes[i].conninfo); if (conns[i] == NULL || PQstatus(conns[i]) == CONNECTION_BAD) { @@ -2679,7 +2680,7 @@ mtm_check_query(PG_FUNCTION_ARGS) int pos = index[i]; /* Establish connection to each online node */ - conn = PQconnectdb(cfg->nodes[pos].conninfo); + conn = MtmPQconnectdb(cfg->nodes[pos].conninfo); if (conn == NULL || PQstatus(conn) == CONNECTION_BAD) { diff --git a/src/pglogical_apply.c b/src/pglogical_apply.c index 850810bae2..566d5acb97 100644 --- a/src/pglogical_apply.c +++ b/src/pglogical_apply.c @@ -194,9 +194,10 @@ static void process_remote_begin(StringInfo s, MtmReceiverWorkerContext *rwctx) { /* there is no need to send this, but since we do, check its sanity */ -#ifdef USE_ASSERT_CHECKING int sender_node_id = pq_getmsgint(s, 4); -#endif + + (void) sender_node_id; /* keep the compiler quiet when asserts are disabled*/ + Assert(rwctx->sender_node_id == sender_node_id); rwctx->origin_xid = pq_getmsgint64(s); mtm_log(MtmApplyTrace, "processing begin of xid " XID_FMT, rwctx->origin_xid); diff --git a/src/pglogical_output.c b/src/pglogical_output.c index b1675f1a73..95bfa00540 100644 --- a/src/pglogical_output.c +++ b/src/pglogical_output.c @@ -57,6 +57,7 @@ #include "multimaster.h" #include "logger.h" #include "state.h" +#include "mtm_utils.h" extern void _PG_output_plugin_init(OutputPluginCallbacks *cb); @@ -143,6 +144,8 @@ _PG_output_plugin_init(OutputPluginCallbacks *cb) cb->shutdown_cb = pg_decode_shutdown; cb->message_cb = pg_decode_message; cb->caughtup_cb = pg_decode_caughtup; + + MtmDisableTimeouts(); } #if 0 diff --git a/src/pglogical_receiver.c b/src/pglogical_receiver.c index b75e7be445..ef30ed1c30 100644 --- a/src/pglogical_receiver.c +++ b/src/pglogical_receiver.c @@ -56,6 +56,7 @@ #include "compat.h" #include "syncpoint.h" #include "global_tx.h" +#include "mtm_utils.h" #define ERRCODE_DUPLICATE_OBJECT_STR "42710" @@ -318,12 +319,12 @@ MtmFilterTransaction(char *record, int size, Syncpoint *spvector, } else if (msgtype == 'M') { -#ifdef USE_ASSERT_CHECKING char action = pq_getmsgbyte(&s); -#endif int messageSize; char const *messageBody; + (void) action; /* keep the compiler quiet when asserts are disabled*/ + end_lsn = pq_getmsgint64(&s); messageSize = pq_getmsgint(&s, 4); messageBody = pq_getmsgbytes(&s, messageSize); @@ -584,6 +585,8 @@ pglogical_receiver_main(Datum main_arg) */ on_shmem_exit(pglogical_receiver_at_exit, PointerGetDatum(rctx)); + MtmDisableTimeouts(); + MtmIsReceiver = true; /* Run as replica session replication role. */ SetConfigOption("session_replication_role", "replica", diff --git a/src/resolver.c b/src/resolver.c index f6fccea9da..659e9a2276 100644 --- a/src/resolver.c +++ b/src/resolver.c @@ -30,6 +30,7 @@ #include "commit.h" #include "global_tx.h" #include "messaging.h" +#include "mtm_utils.h" static MtmConfig *mtm_cfg = NULL; static bool send_requests; @@ -449,7 +450,7 @@ handle_response(MtmConfig *mtm_cfg, MtmMessage *raw_msg) else if (raw_msg->tag == T_Mtm2AResponse) gid = ((Mtm2AResponse *) raw_msg)->gid; else - Assert(false); + mtm_log(ERROR, "Illegal message tag %d", raw_msg->tag); mtm_log(ResolverTx, "handle_response: got '%s'", MtmMesageToString(raw_msg)); @@ -637,6 +638,8 @@ ResolverMain(Datum main_arg) Oid db_id, user_id; + MtmDisableTimeouts(); + /* init this worker */ pqsignal(SIGHUP, ResolverSigHupHandler); pqsignal(SIGTERM, die); diff --git a/src/state.c b/src/state.c index 53c8fa674b..01b502b775 100644 --- a/src/state.c +++ b/src/state.c @@ -45,6 +45,7 @@ #include "syncpoint.h" #include "logger.h" #include "messaging.h" +#include "mtm_utils.h" char const *const MtmNeighborEventMnem[] = { @@ -881,6 +882,14 @@ CampaignerWake(void) kill(mtm_state->campaigner_pid, SIGHUP); } +/* Service to restart a campaigner process. */ +void +CampaignerStop(void) +{ + if (mtm_state->campaigner_pid != 0) + kill(mtm_state->campaigner_pid, SIGTERM); +} + /* campaigner never rereads PG config, but it currently it hardly needs to */ static void CampaignerSigHupHandler(SIGNAL_ARGS) @@ -1672,6 +1681,8 @@ CampaignerMain(Datum main_arg) TimestampTz last_campaign_at = 0; int rc = WL_TIMEOUT; + MtmDisableTimeouts(); + MtmBackgroundWorker = true; mtm_log(MtmStateMessage, "campaigner started"); before_shmem_exit(CampaignerOnExit, (Datum) 0); @@ -3417,6 +3428,8 @@ ReplierMain(Datum main_arg) ALLOCSET_DEFAULT_SIZES); bool job_pending; + MtmDisableTimeouts(); + MtmBackgroundWorker = true; before_shmem_exit(ReplierOnExit, (Datum) 0); mtm_log(MtmStateMessage, "replier started"); @@ -3851,6 +3864,12 @@ stop_node_workers(int node_id, MtmConfig *new_cfg, Datum arg) pfree(bgws[node_id - 1].handle); bgws[node_id - 1].handle = NULL; + /* + * Only cleaning a name field guarantees that monitor wouldn't restart this + * receiver. + */ + bgws[node_id - 1].name[0] = '\0'; + /* delete recovery slot, was acquired by receiver */ ReplicationSlotDrop(filter_slot_name, true); @@ -4097,6 +4116,8 @@ MtmMonitor(Datum arg) * Note that if for some reason monitor wasn't running * (e.g. process killed) during node drop, cleanup in * stop_node_workers will be skipped. Very unlikely, but not nice. + * XXX: Should rethink this code, because problem, described above, + * has cought in PGPRO-6146. */ mtm_cfg = MtmReloadConfig(mtm_cfg, start_node_workers, stop_node_workers, PointerGetDatum(bgws), diff --git a/t/001_regress.pl b/t/001_regress.pl index fc9397fb11..12ecdefa6d 100644 --- a/t/001_regress.pl +++ b/t/001_regress.pl @@ -89,10 +89,11 @@ $schedule =~ s/test: cfs/#test: cfs/g; $schedule =~ s/test: largeobject//; # serial schedule $schedule =~ s/largeobject//; # parallel schedule +$schedule =~ s/atx0//; # parallel schedule unlink('parallel_schedule'); TestLib::append_to_file('parallel_schedule', $schedule); -my $regress_shlib = TestLib::perl2host($ENV{REGRESS_SHLIB}); +my $regress_shlib = $ENV{REGRESS_SHLIB}; my $regress_libdir = dirname($regress_shlib); my $regress_outdir = "$ENV{TESTDIR}/tmp_check/regress_outdir"; mkdir($regress_outdir);