Ensure that the sync slots reach a consistent state after promotion without losing...

author Amit Kapila <[email protected]>

Wed, 3 Apr 2024 08:34:59 +0000 (14:04 +0530)

committer Amit Kapila <[email protected]>

Wed, 3 Apr 2024 08:34:59 +0000 (14:04 +0530)
author Amit Kapila <[email protected]>
Wed, 3 Apr 2024 08:34:59 +0000 (14:04 +0530)
committer Amit Kapila <[email protected]>
Wed, 3 Apr 2024 08:34:59 +0000 (14:04 +0530)
diff --git a/src/backend/replication/logical/logical.c b/src/backend/replication/logical/logical.c

index 51ffb623c01e14001098f53caa1ce3cb35cc89f3..97a4d99c4e74f20f4ce97a2e96d4b536b0d50d65 100644 (file)
--- a/src/backend/replication/logical/logical.c
+++ b/src/backend/replication/logical/logical.c
@@ -36,6 +36,7 @@
  #include "replication/decode.h"
  #include "replication/logical.h"
  #include "replication/reorderbuffer.h"
+#include "replication/slotsync.h"
  #include "replication/snapbuild.h"
  #include "storage/proc.h"
  #include "storage/procarray.h"
@@ -516,17 +517,23 @@ CreateDecodingContext(XLogRecPtr start_lsn,
                 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
                  errmsg("cannot use physical replication slot for logical decoding")));
  
-   if (slot->data.database != MyDatabaseId)
+   /*
+    * We need to access the system tables during decoding to build the
+    * logical changes unless we are in fast_forward mode where no changes are
+    * generated.
+    */
+   if (slot->data.database != MyDatabaseId && !fast_forward)
         ereport(ERROR,
                 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
                  errmsg("replication slot \"%s\" was not created in this database",
                         NameStr(slot->data.name))));
  
     /*
-    * Do not allow consumption of a "synchronized" slot until the standby
-    * gets promoted.
+    * The slots being synced from the primary can't be used for decoding as
+    * they are used after failover. However, we do allow advancing the LSNs
+    * during the synchronization of slots. See update_local_synced_slot.
      */
-   if (RecoveryInProgress() && slot->data.synced)
+   if (RecoveryInProgress() && slot->data.synced && !IsSyncingReplicationSlots())
         ereport(ERROR,
                 errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
                 errmsg("cannot use replication slot \"%s\" for logical decoding",
@@ -2034,3 +2041,135 @@ LogicalReplicationSlotHasPendingWal(XLogRecPtr end_of_wal)
  
     return has_pending_wal;
  }
+
+/*
+ * Helper function for advancing our logical replication slot forward.
+ *
+ * The slot's restart_lsn is used as start point for reading records, while
+ * confirmed_flush is used as base point for the decoding context.
+ *
+ * We cannot just do LogicalConfirmReceivedLocation to update confirmed_flush,
+ * because we need to digest WAL to advance restart_lsn allowing to recycle
+ * WAL and removal of old catalog tuples.  As decoding is done in fast_forward
+ * mode, no changes are generated anyway.
+ *
+ * *found_consistent_snapshot will be true if the initial decoding snapshot has
+ * been built; Otherwise, it will be false.
+ */
+XLogRecPtr
+LogicalSlotAdvanceAndCheckSnapState(XLogRecPtr moveto,
+                                   bool *found_consistent_snapshot)
+{
+   LogicalDecodingContext *ctx;
+   ResourceOwner old_resowner = CurrentResourceOwner;
+   XLogRecPtr  retlsn;
+
+   Assert(moveto != InvalidXLogRecPtr);
+
+   if (found_consistent_snapshot)
+       *found_consistent_snapshot = false;
+
+   PG_TRY();
+   {
+       /*
+        * Create our decoding context in fast_forward mode, passing start_lsn
+        * as InvalidXLogRecPtr, so that we start processing from my slot's
+        * confirmed_flush.
+        */
+       ctx = CreateDecodingContext(InvalidXLogRecPtr,
+                                   NIL,
+                                   true,   /* fast_forward */
+                                   XL_ROUTINE(.page_read = read_local_xlog_page,
+                                              .segment_open = wal_segment_open,
+                                              .segment_close = wal_segment_close),
+                                   NULL, NULL, NULL);
+
+       /*
+        * Wait for specified streaming replication standby servers (if any)
+        * to confirm receipt of WAL up to moveto lsn.
+        */
+       WaitForStandbyConfirmation(moveto);
+
+       /*
+        * Start reading at the slot's restart_lsn, which we know to point to
+        * a valid record.
+        */
+       XLogBeginRead(ctx->reader, MyReplicationSlot->data.restart_lsn);
+
+       /* invalidate non-timetravel entries */
+       InvalidateSystemCaches();
+
+       /* Decode records until we reach the requested target */
+       while (ctx->reader->EndRecPtr < moveto)
+       {
+           char       *errm = NULL;
+           XLogRecord *record;
+
+           /*
+            * Read records.  No changes are generated in fast_forward mode,
+            * but snapbuilder/slot statuses are updated properly.
+            */
+           record = XLogReadRecord(ctx->reader, &errm);
+           if (errm)
+               elog(ERROR, "could not find record while advancing replication slot: %s",
+                    errm);
+
+           /*
+            * Process the record.  Storage-level changes are ignored in
+            * fast_forward mode, but other modules (such as snapbuilder)
+            * might still have critical updates to do.
+            */
+           if (record)
+               LogicalDecodingProcessRecord(ctx, ctx->reader);
+
+           CHECK_FOR_INTERRUPTS();
+       }
+
+       if (found_consistent_snapshot && DecodingContextReady(ctx))
+           *found_consistent_snapshot = true;
+
+       /*
+        * Logical decoding could have clobbered CurrentResourceOwner during
+        * transaction management, so restore the executor's value.  (This is
+        * a kluge, but it's not worth cleaning up right now.)
+        */
+       CurrentResourceOwner = old_resowner;
+
+       if (ctx->reader->EndRecPtr != InvalidXLogRecPtr)
+       {
+           LogicalConfirmReceivedLocation(moveto);
+
+           /*
+            * If only the confirmed_flush LSN has changed the slot won't get
+            * marked as dirty by the above. Callers on the walsender
+            * interface are expected to keep track of their own progress and
+            * don't need it written out. But SQL-interface users cannot
+            * specify their own start positions and it's harder for them to
+            * keep track of their progress, so we should make more of an
+            * effort to save it for them.
+            *
+            * Dirty the slot so it is written out at the next checkpoint. The
+            * LSN position advanced to may still be lost on a crash but this
+            * makes the data consistent after a clean shutdown.
+            */
+           ReplicationSlotMarkDirty();
+       }
+
+       retlsn = MyReplicationSlot->data.confirmed_flush;
+
+       /* free context, call shutdown callback */
+       FreeDecodingContext(ctx);
+
+       InvalidateSystemCaches();
+   }
+   PG_CATCH();
+   {
+       /* clear all timetravel entries */
+       InvalidateSystemCaches();
+
+       PG_RE_THROW();
+   }
+   PG_END_TRY();
+
+   return retlsn;
+}
diff --git a/src/backend/replication/logical/slotsync.c b/src/backend/replication/logical/slotsync.c

index 30480960c55d0237e95972ce5e228c13c97d6068..9ac847b7806756e2fa643af5d89e8d1a0c5f7fb4 100644 (file)
--- a/src/backend/replication/logical/slotsync.c
+++ b/src/backend/replication/logical/slotsync.c
@@ -25,6 +25,15 @@
   * which slot sync worker can perform the sync periodically or user can call
   * pg_sync_replication_slots() periodically to perform the syncs.
   *
+ * If synchronized slots fail to build a consistent snapshot from the
+ * restart_lsn before reaching confirmed_flush_lsn, they would become
+ * unreliable after promotion due to potential data loss from changes
+ * before reaching a consistent point. This can happen because the slots can
+ * be synced at some random time and we may not reach the consistent point
+ * at the same WAL location as the primary. So, we mark such slots as
+ * RS_TEMPORARY. Once the decoding from corresponding LSNs can reach a
+ * consistent point, they will be marked as RS_PERSISTENT.
+ *
   * The slot sync worker waits for some time before the next synchronization,
   * with the duration varying based on whether any slots were updated during
   * the last cycle. Refer to the comments above wait_for_slot_activity() for
@@ -49,8 +58,9 @@
  #include "postmaster/fork_process.h"
  #include "postmaster/interrupt.h"
  #include "postmaster/postmaster.h"
-#include "replication/slot.h"
+#include "replication/logical.h"
  #include "replication/slotsync.h"
+#include "replication/snapbuild.h"
  #include "storage/ipc.h"
  #include "storage/lmgr.h"
  #include "storage/proc.h"
@@ -147,50 +157,85 @@ static void slotsync_failure_callback(int code, Datum arg);
   *
   * If no update was needed (the data of the remote slot is the same as the
   * local slot) return false, otherwise true.
+ *
+ * *found_consistent_snapshot will be true iff the remote slot's LSN or xmin is
+ * modified, and decoding from the corresponding LSN's can reach a
+ * consistent snapshot.
   */
  static bool
-update_local_synced_slot(RemoteSlot *remote_slot, Oid remote_dbid)
+update_local_synced_slot(RemoteSlot *remote_slot, Oid remote_dbid,
+                        bool *found_consistent_snapshot)
  {
     ReplicationSlot *slot = MyReplicationSlot;
-   bool        xmin_changed;
-   bool        restart_lsn_changed;
-   NameData    plugin_name;
+   bool        slot_updated = false;
  
     Assert(slot->data.invalidated == RS_INVAL_NONE);
  
-   xmin_changed = (remote_slot->catalog_xmin != slot->data.catalog_xmin);
-   restart_lsn_changed = (remote_slot->restart_lsn != slot->data.restart_lsn);
+   if (found_consistent_snapshot)
+       *found_consistent_snapshot = false;
  
-   if (!xmin_changed &&
-       !restart_lsn_changed &&
-       remote_dbid == slot->data.database &&
-       remote_slot->two_phase == slot->data.two_phase &&
-       remote_slot->failover == slot->data.failover &&
-       remote_slot->confirmed_lsn == slot->data.confirmed_flush &&
-       strcmp(remote_slot->plugin, NameStr(slot->data.plugin)) == 0)
-       return false;
+   if (remote_slot->confirmed_lsn != slot->data.confirmed_flush ||
+       remote_slot->restart_lsn != slot->data.restart_lsn ||
+       remote_slot->catalog_xmin != slot->data.catalog_xmin)
+   {
+       /*
+        * We can't directly copy the remote slot's LSN or xmin unless there
+        * exists a consistent snapshot at that point. Otherwise, after
+        * promotion, the slots may not reach a consistent point before the
+        * confirmed_flush_lsn which can lead to a data loss. To avoid data
+        * loss, we let slot machinery advance the slot which ensures that
+        * snapbuilder/slot statuses are updated properly.
+        */
+       if (SnapBuildSnapshotExists(remote_slot->restart_lsn))
+       {
+           /*
+            * Update the slot info directly if there is a serialized snapshot
+            * at the restart_lsn, as the slot can quickly reach consistency
+            * at restart_lsn by restoring the snapshot.
+            */
+           SpinLockAcquire(&slot->mutex);
+           slot->data.restart_lsn = remote_slot->restart_lsn;
+           slot->data.confirmed_flush = remote_slot->confirmed_lsn;
+           slot->data.catalog_xmin = remote_slot->catalog_xmin;
+           slot->effective_catalog_xmin = remote_slot->catalog_xmin;
+           SpinLockRelease(&slot->mutex);
  
-   /* Avoid expensive operations while holding a spinlock. */
-   namestrcpy(&plugin_name, remote_slot->plugin);
-
-   SpinLockAcquire(&slot->mutex);
-   slot->data.plugin = plugin_name;
-   slot->data.database = remote_dbid;
-   slot->data.two_phase = remote_slot->two_phase;
-   slot->data.failover = remote_slot->failover;
-   slot->data.restart_lsn = remote_slot->restart_lsn;
-   slot->data.confirmed_flush = remote_slot->confirmed_lsn;
-   slot->data.catalog_xmin = remote_slot->catalog_xmin;
-   slot->effective_catalog_xmin = remote_slot->catalog_xmin;
-   SpinLockRelease(&slot->mutex);
-
-   if (xmin_changed)
-       ReplicationSlotsComputeRequiredXmin(false);
+           if (found_consistent_snapshot)
+               *found_consistent_snapshot = true;
+       }
+       else
+       {
+           LogicalSlotAdvanceAndCheckSnapState(remote_slot->confirmed_lsn,
+                                               found_consistent_snapshot);
+       }
  
-   if (restart_lsn_changed)
+       ReplicationSlotsComputeRequiredXmin(false);
         ReplicationSlotsComputeRequiredLSN();
  
-   return true;
+       slot_updated = true;
+   }
+
+   if (remote_dbid != slot->data.database ||
+       remote_slot->two_phase != slot->data.two_phase ||
+       remote_slot->failover != slot->data.failover ||
+       strcmp(remote_slot->plugin, NameStr(slot->data.plugin)) != 0)
+   {
+       NameData    plugin_name;
+
+       /* Avoid expensive operations while holding a spinlock. */
+       namestrcpy(&plugin_name, remote_slot->plugin);
+
+       SpinLockAcquire(&slot->mutex);
+       slot->data.plugin = plugin_name;
+       slot->data.database = remote_dbid;
+       slot->data.two_phase = remote_slot->two_phase;
+       slot->data.failover = remote_slot->failover;
+       SpinLockRelease(&slot->mutex);
+
+       slot_updated = true;
+   }
+
+   return slot_updated;
  }
  
  /*
@@ -413,6 +458,7 @@ static bool
  update_and_persist_local_synced_slot(RemoteSlot *remote_slot, Oid remote_dbid)
  {
     ReplicationSlot *slot = MyReplicationSlot;
+   bool        found_consistent_snapshot = false;
  
     /*
      * Check if the primary server has caught up. Refer to the comment atop
@@ -443,9 +489,22 @@ update_and_persist_local_synced_slot(RemoteSlot *remote_slot, Oid remote_dbid)
         return false;
     }
  
-   /* First time slot update, the function must return true */
-   if (!update_local_synced_slot(remote_slot, remote_dbid))
-       elog(ERROR, "failed to update slot");
+   (void) update_local_synced_slot(remote_slot, remote_dbid,
+                                   &found_consistent_snapshot);
+
+   /*
+    * Don't persist the slot if it cannot reach the consistent point from the
+    * restart_lsn. See comments atop this file.
+    */
+   if (!found_consistent_snapshot)
+   {
+       ereport(LOG,
+               errmsg("could not sync slot \"%s\"", remote_slot->name),
+               errdetail("Logical decoding cannot find consistent point from local slot's LSN %X/%X.",
+                         LSN_FORMAT_ARGS(slot->data.restart_lsn)));
+
+       return false;
+   }
  
     ReplicationSlotPersist();
  
@@ -578,7 +637,7 @@ synchronize_one_slot(RemoteSlot *remote_slot, Oid remote_dbid)
                      LSN_FORMAT_ARGS(remote_slot->restart_lsn));
  
             /* Make sure the slot changes persist across server restart */
-           if (update_local_synced_slot(remote_slot, remote_dbid))
+           if (update_local_synced_slot(remote_slot, remote_dbid, NULL))
             {
                 ReplicationSlotMarkDirty();
                 ReplicationSlotSave();
diff --git a/src/backend/replication/logical/snapbuild.c b/src/backend/replication/logical/snapbuild.c

index ac24b51860357882a2a061d8a1487d95611f2516..e37e22f4417c557e7457347250f95f6d1872d77d 100644 (file)
--- a/src/backend/replication/logical/snapbuild.c
+++ b/src/backend/replication/logical/snapbuild.c
@@ -2134,3 +2134,26 @@ CheckPointSnapBuild(void)
     }
     FreeDir(snap_dir);
  }
+
+/*
+ * Check if a logical snapshot at the specified point has been serialized.
+ */
+bool
+SnapBuildSnapshotExists(XLogRecPtr lsn)
+{
+   char        path[MAXPGPATH];
+   int         ret;
+   struct stat stat_buf;
+
+   sprintf(path, "pg_logical/snapshots/%X-%X.snap",
+           LSN_FORMAT_ARGS(lsn));
+
+   ret = stat(path, &stat_buf);
+
+   if (ret != 0 && errno != ENOENT)
+       ereport(ERROR,
+               (errcode_for_file_access(),
+                errmsg("could not stat file \"%s\": %m", path)));
+
+   return ret == 0;
+}
diff --git a/src/backend/replication/slotfuncs.c b/src/backend/replication/slotfuncs.c

index da57177c25b34f8cec49f0ee2b161c063d536af7..dd6c1d5a7e3537d987e59647cdb4d23fc8271b18 100644 (file)
--- a/src/backend/replication/slotfuncs.c
+++ b/src/backend/replication/slotfuncs.c
@@ -492,125 +492,13 @@ pg_physical_replication_slot_advance(XLogRecPtr moveto)
  }
  
  /*
- * Helper function for advancing our logical replication slot forward.
- *
- * The slot's restart_lsn is used as start point for reading records, while
- * confirmed_flush is used as base point for the decoding context.
- *
- * We cannot just do LogicalConfirmReceivedLocation to update confirmed_flush,
- * because we need to digest WAL to advance restart_lsn allowing to recycle
- * WAL and removal of old catalog tuples.  As decoding is done in fast_forward
- * mode, no changes are generated anyway.
+ * Advance our logical replication slot forward. See
+ * LogicalSlotAdvanceAndCheckSnapState for details.
   */
  static XLogRecPtr
  pg_logical_replication_slot_advance(XLogRecPtr moveto)
  {
-   LogicalDecodingContext *ctx;
-   ResourceOwner old_resowner = CurrentResourceOwner;
-   XLogRecPtr  retlsn;
-
-   Assert(moveto != InvalidXLogRecPtr);
-
-   PG_TRY();
-   {
-       /*
-        * Create our decoding context in fast_forward mode, passing start_lsn
-        * as InvalidXLogRecPtr, so that we start processing from my slot's
-        * confirmed_flush.
-        */
-       ctx = CreateDecodingContext(InvalidXLogRecPtr,
-                                   NIL,
-                                   true,   /* fast_forward */
-                                   XL_ROUTINE(.page_read = read_local_xlog_page,
-                                              .segment_open = wal_segment_open,
-                                              .segment_close = wal_segment_close),
-                                   NULL, NULL, NULL);
-
-       /*
-        * Wait for specified streaming replication standby servers (if any)
-        * to confirm receipt of WAL up to moveto lsn.
-        */
-       WaitForStandbyConfirmation(moveto);
-
-       /*
-        * Start reading at the slot's restart_lsn, which we know to point to
-        * a valid record.
-        */
-       XLogBeginRead(ctx->reader, MyReplicationSlot->data.restart_lsn);
-
-       /* invalidate non-timetravel entries */
-       InvalidateSystemCaches();
-
-       /* Decode records until we reach the requested target */
-       while (ctx->reader->EndRecPtr < moveto)
-       {
-           char       *errm = NULL;
-           XLogRecord *record;
-
-           /*
-            * Read records.  No changes are generated in fast_forward mode,
-            * but snapbuilder/slot statuses are updated properly.
-            */
-           record = XLogReadRecord(ctx->reader, &errm);
-           if (errm)
-               elog(ERROR, "could not find record while advancing replication slot: %s",
-                    errm);
-
-           /*
-            * Process the record.  Storage-level changes are ignored in
-            * fast_forward mode, but other modules (such as snapbuilder)
-            * might still have critical updates to do.
-            */
-           if (record)
-               LogicalDecodingProcessRecord(ctx, ctx->reader);
-
-           CHECK_FOR_INTERRUPTS();
-       }
-
-       /*
-        * Logical decoding could have clobbered CurrentResourceOwner during
-        * transaction management, so restore the executor's value.  (This is
-        * a kluge, but it's not worth cleaning up right now.)
-        */
-       CurrentResourceOwner = old_resowner;
-
-       if (ctx->reader->EndRecPtr != InvalidXLogRecPtr)
-       {
-           LogicalConfirmReceivedLocation(moveto);
-
-           /*
-            * If only the confirmed_flush LSN has changed the slot won't get
-            * marked as dirty by the above. Callers on the walsender
-            * interface are expected to keep track of their own progress and
-            * don't need it written out. But SQL-interface users cannot
-            * specify their own start positions and it's harder for them to
-            * keep track of their progress, so we should make more of an
-            * effort to save it for them.
-            *
-            * Dirty the slot so it is written out at the next checkpoint. The
-            * LSN position advanced to may still be lost on a crash but this
-            * makes the data consistent after a clean shutdown.
-            */
-           ReplicationSlotMarkDirty();
-       }
-
-       retlsn = MyReplicationSlot->data.confirmed_flush;
-
-       /* free context, call shutdown callback */
-       FreeDecodingContext(ctx);
-
-       InvalidateSystemCaches();
-   }
-   PG_CATCH();
-   {
-       /* clear all timetravel entries */
-       InvalidateSystemCaches();
-
-       PG_RE_THROW();
-   }
-   PG_END_TRY();
-
-   return retlsn;
+   return LogicalSlotAdvanceAndCheckSnapState(moveto, NULL);
  }
  
  /*
diff --git a/src/include/replication/logical.h b/src/include/replication/logical.h

index dc2df4ce92a6495b2686d1bc02cbe691ea185f1a..aff38e8d049f236c3797752314c3bb2676326f4f 100644 (file)
--- a/src/include/replication/logical.h
+++ b/src/include/replication/logical.h
@@ -149,5 +149,7 @@ extern void ResetLogicalStreamingState(void);
  extern void UpdateDecodingStats(LogicalDecodingContext *ctx);
  
  extern bool LogicalReplicationSlotHasPendingWal(XLogRecPtr end_of_wal);
+extern XLogRecPtr LogicalSlotAdvanceAndCheckSnapState(XLogRecPtr moveto,
+                                                     bool *found_consistent_snapshot);
  
  #endif
diff --git a/src/include/replication/snapbuild.h b/src/include/replication/snapbuild.h

index fbdf3623969fef9fe6100e8220fdfc0d1de4766d..a3360a1c5ea7a8d07021227d5c8c198426fc952b 100644 (file)
--- a/src/include/replication/snapbuild.h
+++ b/src/include/replication/snapbuild.h
@@ -91,4 +91,6 @@ extern void SnapBuildProcessRunningXacts(SnapBuild *builder, XLogRecPtr lsn,
                                          struct xl_running_xacts *running);
  extern void SnapBuildSerializationPoint(SnapBuild *builder, XLogRecPtr lsn);
  
+extern bool SnapBuildSnapshotExists(XLogRecPtr lsn);
+
  #endif                         /* SNAPBUILD_H */
diff --git a/src/test/recovery/t/040_standby_failover_slots_sync.pl b/src/test/recovery/t/040_standby_failover_slots_sync.pl

index f47bfd78eb6ee119fd38ec79d5562412446193df..869e3d2e9146188a52957cb3ddec8ee7018bdf54 100644 (file)
--- a/src/test/recovery/t/040_standby_failover_slots_sync.pl
+++ b/src/test/recovery/t/040_standby_failover_slots_sync.pl
@@ -365,6 +365,68 @@ ok( $stderr =~
  
  $cascading_standby->stop;
  
+##################################################
+# Create a failover slot and advance the restart_lsn to a position where a
+# running transaction exists. This setup is for testing that the synced slots
+# can achieve the consistent snapshot state starting from the restart_lsn
+# after promotion without losing any data that otherwise would have been
+# received from the primary.
+##################################################
+
+$primary->safe_psql('postgres',
+   "SELECT pg_create_logical_replication_slot('snap_test_slot', 'test_decoding', false, false, true);"
+);
+
+$standby1->safe_psql('postgres', "SELECT pg_sync_replication_slots();");
+
+# Two xl_running_xacts logs are generated here. When decoding the first log, it
+# only serializes the snapshot, without advancing the restart_lsn to the latest
+# position. This is because if a transaction is running, the restart_lsn can
+# only move to a position before that transaction. Hence, the second
+# xl_running_xacts log is needed, the decoding for which allows the restart_lsn
+# to advance to the last serialized snapshot's position (the first log).
+$primary->safe_psql(
+   'postgres', qq(
+       BEGIN;
+       SELECT txid_current();
+       SELECT pg_log_standby_snapshot();
+       COMMIT;
+       BEGIN;
+       SELECT txid_current();
+       SELECT pg_log_standby_snapshot();
+       COMMIT;
+));
+
+$primary->wait_for_replay_catchup($standby1);
+
+# Advance the restart_lsn to the position of the first xl_running_xacts log
+# generated above. Note that there might be concurrent xl_running_xacts logs
+# written by the bgwriter, which could cause the position to be advanced to an
+# unexpected point, but that would be a rare scenario and doesn't affect the
+# test results.
+$primary->safe_psql('postgres',
+   "SELECT pg_replication_slot_advance('snap_test_slot', pg_current_wal_lsn());"
+);
+
+# Log a message that will be consumed on the standby after promotion using the
+# synced slot. See the test where we promote standby (Promote the standby1 to
+# primary.)
+$primary->safe_psql('postgres',
+   "SELECT pg_logical_emit_message(false, 'test', 'test');"
+);
+
+# Get the confirmed_flush_lsn for the logical slot snap_test_slot on the primary
+my $confirmed_flush_lsn = $primary->safe_psql('postgres',
+   "SELECT confirmed_flush_lsn from pg_replication_slots WHERE slot_name = 'snap_test_slot';");
+
+$standby1->safe_psql('postgres', "SELECT pg_sync_replication_slots();");
+
+# Verify that confirmed_flush_lsn of snap_test_slot slot is synced to the standby
+ok( $standby1->poll_query_until(
+       'postgres',
+       "SELECT '$confirmed_flush_lsn' = confirmed_flush_lsn from pg_replication_slots WHERE slot_name = 'snap_test_slot' AND synced AND NOT temporary;"),
+   'confirmed_flush_lsn of slot snap_test_slot synced to standby');
+
  ##################################################
  # Test to confirm that the slot synchronization is protected from malicious
  # users.
@@ -458,8 +520,8 @@ $standby1->wait_for_log(qr/slot sync worker started/,
     $log_offset);
  
  ##################################################
-# Test to confirm that restart_lsn and confirmed_flush_lsn of the logical slot
-# on the primary is synced to the standby via the slot sync worker.
+# Test to confirm that confirmed_flush_lsn of the logical slot on the primary
+# is synced to the standby via the slot sync worker.
  ##################################################
  
  # Insert data on the primary
@@ -479,8 +541,8 @@ $subscriber1->safe_psql(
  
  $subscriber1->wait_for_subscription_sync;
  
-# Do not allow any further advancement of the restart_lsn and
-# confirmed_flush_lsn for the lsub1_slot.
+# Do not allow any further advancement of the confirmed_flush_lsn for the
+# lsub1_slot.
  $subscriber1->safe_psql('postgres', "ALTER SUBSCRIPTION regress_mysub1 DISABLE");
  
  # Wait for the replication slot to become inactive on the publisher
@@ -489,20 +551,15 @@ $primary->poll_query_until(
     "SELECT COUNT(*) FROM pg_catalog.pg_replication_slots WHERE slot_name = 'lsub1_slot' AND active='f'",
     1);
  
-# Get the restart_lsn for the logical slot lsub1_slot on the primary
-my $primary_restart_lsn = $primary->safe_psql('postgres',
-   "SELECT restart_lsn from pg_replication_slots WHERE slot_name = 'lsub1_slot';");
-
  # Get the confirmed_flush_lsn for the logical slot lsub1_slot on the primary
  my $primary_flush_lsn = $primary->safe_psql('postgres',
     "SELECT confirmed_flush_lsn from pg_replication_slots WHERE slot_name = 'lsub1_slot';");
  
-# Confirm that restart_lsn and confirmed_flush_lsn of lsub1_slot slot are synced
-# to the standby
+# Confirm that confirmed_flush_lsn of lsub1_slot slot is synced to the standby
  ok( $standby1->poll_query_until(
         'postgres',
-       "SELECT '$primary_restart_lsn' = restart_lsn AND '$primary_flush_lsn' = confirmed_flush_lsn from pg_replication_slots WHERE slot_name = 'lsub1_slot' AND synced AND NOT temporary;"),
-   'restart_lsn and confirmed_flush_lsn of slot lsub1_slot synced to standby');
+       "SELECT '$primary_flush_lsn' = confirmed_flush_lsn from pg_replication_slots WHERE slot_name = 'lsub1_slot' AND synced AND NOT temporary;"),
+   'confirmed_flush_lsn of slot lsub1_slot synced to standby');
  
  ##################################################
  # Test that logical failover replication slots wait for the specified
@@ -744,8 +801,9 @@ $primary->reload;
  
  ##################################################
  # Promote the standby1 to primary. Confirm that:
-# a) the slot 'lsub1_slot' is retained on the new primary
+# a) the slot 'lsub1_slot' and 'snap_test_slot' are retained on the new primary
  # b) logical replication for regress_mysub1 is resumed successfully after failover
+# c) changes can be consumed from the synced slot 'snap_test_slot'
  ##################################################
  $standby1->start;
  $primary->wait_for_replay_catchup($standby1);
@@ -759,8 +817,8 @@ $subscriber1->safe_psql('postgres',
  
  # Confirm the synced slot 'lsub1_slot' is retained on the new primary
  is($standby1->safe_psql('postgres',
-   q{SELECT slot_name FROM pg_replication_slots WHERE slot_name = 'lsub1_slot' AND synced AND NOT temporary;}),
-   'lsub1_slot',
+   q{SELECT count(*) = 2 FROM pg_replication_slots WHERE slot_name IN ('lsub1_slot', 'snap_test_slot') AND synced AND NOT temporary;}),
+   't',
     'synced slot retained on the new primary');
  
  # Insert data on the new primary
@@ -773,4 +831,13 @@ is( $subscriber1->safe_psql('postgres', q{SELECT count(*) FROM tab_int;}),
     "20",
     'data replicated from the new primary');
  
+# Consume the data from the snap_test_slot. The synced slot should reach a
+# consistent point by restoring the snapshot at the restart_lsn serialized
+# during slot synchronization.
+$result = $standby1->safe_psql('postgres',
+   "SELECT count(*) FROM pg_logical_slot_get_changes('snap_test_slot', NULL, NULL) WHERE data ~ 'message*';"
+);
+
+is($result, '1', "data can be consumed using snap_test_slot");
+
  done_testing();
author	Amit Kapila <[email protected]>
	Wed, 3 Apr 2024 08:34:59 +0000 (14:04 +0530)
committer	Amit Kapila <[email protected]>
	Wed, 3 Apr 2024 08:34:59 +0000 (14:04 +0530)
src/backend/replication/logical/logical.c		patch \| blob \| blame \| history
src/backend/replication/logical/slotsync.c		patch \| blob \| blame \| history
src/backend/replication/logical/snapbuild.c		patch \| blob \| blame \| history
src/backend/replication/slotfuncs.c		patch \| blob \| blame \| history
src/include/replication/logical.h		patch \| blob \| blame \| history
src/include/replication/snapbuild.h		patch \| blob \| blame \| history
src/test/recovery/t/040_standby_failover_slots_sync.pl		patch \| blob \| blame \| history