summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Paquier2018-10-25 00:46:00 +0000
committerMichael Paquier2018-10-25 00:46:00 +0000
commit10074651e3355e2405015f6253602be8344bc829 (patch)
tree3b53798231baca437d9c27f934e010c9d4bcf74a
parent0a8590b2a09e7a9e10612bc38d5e872b0c234eca (diff)
Add pg_promote function
This function is able to promote a standby with this new SQL-callable function. Execution access can be granted to non-superusers so that failover tools can observe the principle of least privilege. Catalog version is bumped. Author: Laurenz Albe Reviewed-by: Michael Paquier, Masahiko Sawada Discussion: https://postgr.es/m/[email protected]
-rw-r--r--doc/src/sgml/func.sgml19
-rw-r--r--doc/src/sgml/high-availability.sgml19
-rw-r--r--doc/src/sgml/monitoring.sgml6
-rw-r--r--doc/src/sgml/recovery-config.sgml3
-rw-r--r--src/backend/access/transam/xlog.c6
-rw-r--r--src/backend/access/transam/xlogfuncs.c77
-rw-r--r--src/backend/catalog/system_views.sql6
-rw-r--r--src/backend/postmaster/pgstat.c3
-rw-r--r--src/include/access/xlog.h6
-rw-r--r--src/include/catalog/catversion.h2
-rw-r--r--src/include/catalog/pg_proc.dat4
-rw-r--r--src/include/pgstat.h1
-rw-r--r--src/test/recovery/t/004_timeline_switch.pl11
13 files changed, 143 insertions, 20 deletions
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 5193df3366..96d45419e5 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -19203,6 +19203,9 @@ postgres=# SELECT * FROM pg_walfile_name_offset(pg_stop_backup());
<primary>pg_is_wal_replay_paused</primary>
</indexterm>
<indexterm>
+ <primary>pg_promote</primary>
+ </indexterm>
+ <indexterm>
<primary>pg_wal_replay_pause</primary>
</indexterm>
<indexterm>
@@ -19234,6 +19237,22 @@ postgres=# SELECT * FROM pg_walfile_name_offset(pg_stop_backup());
</row>
<row>
<entry>
+ <literal><function>pg_promote(<parameter>wait</parameter> <type>boolean</type> DEFAULT true, <parameter>wait_seconds</parameter> <type>integer</type> DEFAULT 60)</function></literal>
+ </entry>
+ <entry><type>boolean</type></entry>
+ <entry>
+ Promotes a physical standby server. Returns <literal>true</literal>
+ if promotion is successful and <literal>false</literal> otherwise.
+ With <parameter>wait</parameter> set to <literal>true</literal>, the
+ default, the function waits until promotion is completed or
+ <parameter>wait_seconds</parameter> seconds have passed, otherwise the
+ function returns immediately after sending the promotion signal to the
+ postmaster. This function is restricted to superusers by default, but
+ other users can be granted EXECUTE to run the function.
+ </entry>
+ </row>
+ <row>
+ <entry>
<literal><function>pg_wal_replay_pause()</function></literal>
</entry>
<entry><type>void</type></entry>
diff --git a/doc/src/sgml/high-availability.sgml b/doc/src/sgml/high-availability.sgml
index ebcb3daaed..faf8e71854 100644
--- a/doc/src/sgml/high-availability.sgml
+++ b/doc/src/sgml/high-availability.sgml
@@ -1471,14 +1471,17 @@ synchronous_standby_names = 'ANY 2 (s1, s2, s3)'
</para>
<para>
- To trigger failover of a log-shipping standby server,
- run <command>pg_ctl promote</command> or create a trigger
- file with the file name and path specified by the <varname>trigger_file</varname>
- setting in <filename>recovery.conf</filename>. If you're planning to use
- <command>pg_ctl promote</command> to fail over, <varname>trigger_file</varname> is
- not required. If you're setting up the reporting servers that are
- only used to offload read-only queries from the primary, not for high
- availability purposes, you don't need to promote it.
+ To trigger failover of a log-shipping standby server, run
+ <command>pg_ctl promote</command>, call <function>pg_promote</function>,
+ or create a trigger file with the file name and path specified by the
+ <varname>trigger_file</varname> setting in
+ <filename>recovery.conf</filename>. If you're planning to use
+ <command>pg_ctl promote</command> or to call
+ <function>pg_promote</function> to fail over,
+ <varname>trigger_file</varname> is not required. If you're
+ setting up the reporting servers that are only used to offload read-only
+ queries from the primary, not for high availability purposes, you don't
+ need to promote it.
</para>
</sect1>
diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml
index d4285ea56a..add71458e2 100644
--- a/doc/src/sgml/monitoring.sgml
+++ b/doc/src/sgml/monitoring.sgml
@@ -1268,7 +1268,7 @@ postgres 27093 0.0 0.0 30096 2752 ? Ss 11:34 0:00 postgres: ser
<entry>Waiting in an extension.</entry>
</row>
<row>
- <entry morerows="33"><literal>IPC</literal></entry>
+ <entry morerows="34"><literal>IPC</literal></entry>
<entry><literal>BgWorkerShutdown</literal></entry>
<entry>Waiting for background worker to shut down.</entry>
</row>
@@ -1389,6 +1389,10 @@ postgres 27093 0.0 0.0 30096 2752 ? Ss 11:34 0:00 postgres: ser
<entry>Waiting for group leader to clear transaction id at transaction end.</entry>
</row>
<row>
+ <entry><literal>Promote</literal></entry>
+ <entry>Waiting for standby promotion.</entry>
+ </row>
+ <row>
<entry><literal>ReplicationOriginDrop</literal></entry>
<entry>Waiting for a replication origin to become inactive to be dropped.</entry>
</row>
diff --git a/doc/src/sgml/recovery-config.sgml b/doc/src/sgml/recovery-config.sgml
index 92825fdf19..a2bdffda94 100644
--- a/doc/src/sgml/recovery-config.sgml
+++ b/doc/src/sgml/recovery-config.sgml
@@ -439,7 +439,8 @@ restore_command = 'copy "C:\\server\\archivedir\\%f" "%p"' # Windows
<para>
Specifies a trigger file whose presence ends recovery in the
standby. Even if this value is not set, you can still promote
- the standby using <command>pg_ctl promote</command>.
+ the standby using <command>pg_ctl promote</command> or calling
+ <function>pg_promote</function>.
This setting has no effect if <varname>standby_mode</varname> is <literal>off</literal>.
</para>
</listitem>
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 7375a78ffc..62fc418893 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -78,12 +78,6 @@
extern uint32 bootstrap_data_checksum_version;
-/* File path names (all relative to $PGDATA) */
-#define RECOVERY_COMMAND_FILE "recovery.conf"
-#define RECOVERY_COMMAND_DONE "recovery.done"
-#define PROMOTE_SIGNAL_FILE "promote"
-#define FALLBACK_PROMOTE_SIGNAL_FILE "fallback_promote"
-
/* User-settable parameters */
int max_wal_size_mb = 1024; /* 1 GB */
diff --git a/src/backend/access/transam/xlogfuncs.c b/src/backend/access/transam/xlogfuncs.c
index 9731742978..a31adcca5e 100644
--- a/src/backend/access/transam/xlogfuncs.c
+++ b/src/backend/access/transam/xlogfuncs.c
@@ -16,6 +16,8 @@
*/
#include "postgres.h"
+#include <unistd.h>
+
#include "access/htup_details.h"
#include "access/xlog.h"
#include "access/xlog_internal.h"
@@ -23,6 +25,7 @@
#include "catalog/pg_type.h"
#include "funcapi.h"
#include "miscadmin.h"
+#include "pgstat.h"
#include "replication/walreceiver.h"
#include "storage/smgr.h"
#include "utils/builtins.h"
@@ -697,3 +700,77 @@ pg_backup_start_time(PG_FUNCTION_ARGS)
PG_RETURN_DATUM(xtime);
}
+
+/*
+ * Promotes a standby server.
+ *
+ * A result of "true" means that promotion has been completed if "wait" is
+ * "true", or initiated if "wait" is false.
+ */
+Datum
+pg_promote(PG_FUNCTION_ARGS)
+{
+ bool wait = PG_GETARG_BOOL(0);
+ int wait_seconds = PG_GETARG_INT32(1);
+ FILE *promote_file;
+ int i;
+
+ if (!RecoveryInProgress())
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("recovery is not in progress"),
+ errhint("Recovery control functions can only be executed during recovery.")));
+
+ if (wait_seconds <= 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("\"wait_seconds\" cannot be negative or equal zero")));
+
+ /* create the promote signal file */
+ promote_file = AllocateFile(PROMOTE_SIGNAL_FILE, "w");
+ if (!promote_file)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not create file \"%s\": %m",
+ PROMOTE_SIGNAL_FILE)));
+
+ if (FreeFile(promote_file))
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not write file \"%s\": %m",
+ PROMOTE_SIGNAL_FILE)));
+
+ /* signal the postmaster */
+ if (kill(PostmasterPid, SIGUSR1) != 0)
+ {
+ ereport(WARNING,
+ (errmsg("failed to send signal to postmaster: %m")));
+ (void) unlink(PROMOTE_SIGNAL_FILE);
+ PG_RETURN_BOOL(false);
+ }
+
+ /* return immediately if waiting was not requested */
+ if (!wait)
+ PG_RETURN_BOOL(true);
+
+ /* wait for the amount of time wanted until promotion */
+#define WAITS_PER_SECOND 10
+ for (i = 0; i < WAITS_PER_SECOND * wait_seconds; i++)
+ {
+ ResetLatch(MyLatch);
+
+ if (!RecoveryInProgress())
+ PG_RETURN_BOOL(true);
+
+ CHECK_FOR_INTERRUPTS();
+
+ WaitLatch(MyLatch,
+ WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH,
+ 1000L / WAITS_PER_SECOND,
+ WAIT_EVENT_PROMOTE);
+ }
+
+ ereport(WARNING,
+ (errmsg("server did not promote within %d seconds", wait_seconds)));
+ PG_RETURN_BOOL(false);
+}
diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql
index a03b005f73..53ddc593a8 100644
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -1027,6 +1027,11 @@ CREATE OR REPLACE FUNCTION pg_stop_backup (
RETURNS SETOF record STRICT VOLATILE LANGUAGE internal as 'pg_stop_backup_v2'
PARALLEL RESTRICTED;
+CREATE OR REPLACE FUNCTION
+ pg_promote(wait boolean DEFAULT true, wait_seconds integer DEFAULT 60)
+ RETURNS boolean STRICT VOLATILE LANGUAGE INTERNAL AS 'pg_promote'
+ PARALLEL RESTRICTED;
+
-- legacy definition for compatibility with 9.3
CREATE OR REPLACE FUNCTION
json_populate_record(base anyelement, from_json json, use_json_as_text boolean DEFAULT false)
@@ -1138,6 +1143,7 @@ REVOKE EXECUTE ON FUNCTION pg_rotate_logfile() FROM public;
REVOKE EXECUTE ON FUNCTION pg_reload_conf() FROM public;
REVOKE EXECUTE ON FUNCTION pg_current_logfile() FROM public;
REVOKE EXECUTE ON FUNCTION pg_current_logfile(text) FROM public;
+REVOKE EXECUTE ON FUNCTION pg_promote(boolean, integer) FROM public;
REVOKE EXECUTE ON FUNCTION pg_stat_reset() FROM public;
REVOKE EXECUTE ON FUNCTION pg_stat_reset_shared(text) FROM public;
diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c
index 774f03f570..42bccce0af 100644
--- a/src/backend/postmaster/pgstat.c
+++ b/src/backend/postmaster/pgstat.c
@@ -3663,6 +3663,9 @@ pgstat_get_wait_ipc(WaitEventIPC w)
case WAIT_EVENT_PROCARRAY_GROUP_UPDATE:
event_name = "ProcArrayGroupUpdate";
break;
+ case WAIT_EVENT_PROMOTE:
+ event_name = "Promote";
+ break;
case WAIT_EVENT_REPLICATION_ORIGIN_DROP:
event_name = "ReplicationOriginDrop";
break;
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index 421ba6d775..e01d12eb7c 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -319,10 +319,16 @@ extern void do_pg_abort_backup(void);
extern SessionBackupState get_backup_status(void);
/* File path names (all relative to $PGDATA) */
+#define RECOVERY_COMMAND_FILE "recovery.conf"
+#define RECOVERY_COMMAND_DONE "recovery.done"
#define BACKUP_LABEL_FILE "backup_label"
#define BACKUP_LABEL_OLD "backup_label.old"
#define TABLESPACE_MAP "tablespace_map"
#define TABLESPACE_MAP_OLD "tablespace_map.old"
+/* files to signal promotion to primary */
+#define PROMOTE_SIGNAL_FILE "promote"
+#define FALLBACK_PROMOTE_SIGNAL_FILE "fallback_promote"
+
#endif /* XLOG_H */
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h
index e04eabf683..1d5fe83c1a 100644
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -53,6 +53,6 @@
*/
/* yyyymmddN */
-#define CATALOG_VERSION_NO 201810111
+#define CATALOG_VERSION_NO 201810251
#endif
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index cff58ed2d8..4d7fe1b383 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -5824,6 +5824,10 @@
proname => 'pg_backup_start_time', provolatile => 's',
prorettype => 'timestamptz', proargtypes => '',
prosrc => 'pg_backup_start_time' },
+{ oid => '3436', descr => 'promote standby server',
+ proname => 'pg_promote', provolatile => 'v',
+ prorettype => 'bool', proargtypes => 'bool int4', proargnames => '{wait,wait_seconds}',
+ prosrc => 'pg_promote' },
{ oid => '2848', descr => 'switch to new wal file',
proname => 'pg_switch_wal', provolatile => 'v', prorettype => 'pg_lsn',
proargtypes => '', prosrc => 'pg_switch_wal' },
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index fd1d52a0c5..f1c10d16b8 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -829,6 +829,7 @@ typedef enum
WAIT_EVENT_PARALLEL_CREATE_INDEX_SCAN,
WAIT_EVENT_PARALLEL_FINISH,
WAIT_EVENT_PROCARRAY_GROUP_UPDATE,
+ WAIT_EVENT_PROMOTE,
WAIT_EVENT_REPLICATION_ORIGIN_DROP,
WAIT_EVENT_REPLICATION_SLOT_DROP,
WAIT_EVENT_SAFE_SNAPSHOT,
diff --git a/src/test/recovery/t/004_timeline_switch.pl b/src/test/recovery/t/004_timeline_switch.pl
index 34ee335129..a7ccb7b4a3 100644
--- a/src/test/recovery/t/004_timeline_switch.pl
+++ b/src/test/recovery/t/004_timeline_switch.pl
@@ -6,7 +6,7 @@ use warnings;
use File::Path qw(rmtree);
use PostgresNode;
use TestLib;
-use Test::More tests => 1;
+use Test::More tests => 2;
$ENV{PGDATABASE} = 'postgres';
@@ -37,9 +37,14 @@ $node_master->safe_psql('postgres',
$node_master->wait_for_catchup($node_standby_1, 'replay',
$node_master->lsn('write'));
-# Stop and remove master, and promote standby 1, switching it to a new timeline
+# Stop and remove master
$node_master->teardown_node;
-$node_standby_1->promote;
+
+# promote standby 1 using "pg_promote", switching it to a new timeline
+my $psql_out = '';
+$node_standby_1->psql('postgres', "SELECT pg_promote(wait_seconds => 300)",
+ stdout => \$psql_out);
+is($psql_out, 't', "promotion of standby with pg_promote");
# Switch standby 2 to replay from standby 1
rmtree($node_standby_2->data_dir . '/recovery.conf');