24 files changed, 566 insertions, 322 deletions
diff --git a/configure b/configure
index 16ef5b58d1a..cfaf3757dd7 100755
--- a/configure
+++ b/configure
@@ -13309,6 +13309,23 @@ fi
 
 fi
 
+if test "$with_liburing" = yes; then
+  _LIBS="$LIBS"
+  LIBS="$LIBURING_LIBS $LIBS"
+  for ac_func in io_uring_queue_init_mem
+do :
+  ac_fn_c_check_func "$LINENO" "io_uring_queue_init_mem" "ac_cv_func_io_uring_queue_init_mem"
+if test "x$ac_cv_func_io_uring_queue_init_mem" = xyes; then :
+  cat >>confdefs.h <<_ACEOF
+#define HAVE_IO_URING_QUEUE_INIT_MEM 1
+_ACEOF
+
+fi
+done
+
+  LIBS="$_LIBS"
+fi
+
 if test "$with_lz4" = yes ; then
   { $as_echo "$as_me:${as_lineno-$LINENO}: checking for LZ4_compress_default in -llz4" >&5
 $as_echo_n "checking for LZ4_compress_default in -llz4... " >&6; }
diff --git a/configure.ac b/configure.ac
index b3efc49c97a..c2877e36935 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1420,6 +1420,13 @@ if test "$with_libxslt" = yes ; then
   AC_CHECK_LIB(xslt, xsltCleanupGlobals, [], [AC_MSG_ERROR([library 'xslt' is required for XSLT support])])
 fi
 
+if test "$with_liburing" = yes; then
+  _LIBS="$LIBS"
+  LIBS="$LIBURING_LIBS $LIBS"
+  AC_CHECK_FUNCS([io_uring_queue_init_mem])
+  LIBS="$_LIBS"
+fi
+
 if test "$with_lz4" = yes ; then
   AC_CHECK_LIB(lz4, LZ4_compress_default, [], [AC_MSG_ERROR([library 'lz4' is required for LZ4 support])])
 fi
diff --git a/meson.build b/meson.build
index a97854a947d..5365aaf95e6 100644
--- a/meson.build
+++ b/meson.build
@@ -948,10 +948,10 @@ if not libcurlopt.disabled()
   # libcurl and one of either epoll or kqueue.
   oauth_flow_supported = (
     libcurl.found()
-    and (cc.check_header('sys/event.h', required: false,
-                         args: test_c_args, include_directories: postgres_inc)
-         or cc.check_header('sys/epoll.h', required: false,
-                            args: test_c_args, include_directories: postgres_inc))
+    and (cc.has_header('sys/event.h',
+                       args: test_c_args, include_directories: postgres_inc)
+         or cc.has_header('sys/epoll.h',
+                          args: test_c_args, include_directories: postgres_inc))
   )
 
   if oauth_flow_supported
@@ -995,6 +995,12 @@ liburingopt = get_option('liburing')
 liburing = dependency('liburing', required: liburingopt)
 if liburing.found()
   cdata.set('USE_LIBURING', 1)
+
+  if cc.has_function('io_uring_queue_init_mem',
+      dependencies: liburing, args: test_c_args)
+    cdata.set('HAVE_LIBURING_QUEUE_INIT_MEM', 1)
+  endif
+
 endif
 
 
diff --git a/src/backend/access/transam/clog.c b/src/backend/access/transam/clog.c
index 48f10bec91e..e80fbe109cf 100644
--- a/src/backend/access/transam/clog.c
+++ b/src/backend/access/transam/clog.c
@@ -110,9 +110,7 @@ static SlruCtlData XactCtlData;
 #define XactCtl (&XactCtlData)
 
 
-static int	ZeroCLOGPage(int64 pageno, bool writeXlog);
 static bool CLOGPagePrecedes(int64 page1, int64 page2);
-static void WriteZeroPageXlogRec(int64 pageno);
 static void WriteTruncateXlogRec(int64 pageno, TransactionId oldestXact,
 								 Oid oldestXactDb);
 static void TransactionIdSetPageStatus(TransactionId xid, int nsubxids,
@@ -832,41 +830,8 @@ check_transaction_buffers(int *newval, void **extra, GucSource source)
 void
 BootStrapCLOG(void)
 {
-	int			slotno;
-	LWLock	   *lock = SimpleLruGetBankLock(XactCtl, 0);
-
-	LWLockAcquire(lock, LW_EXCLUSIVE);
-
-	/* Create and zero the first page of the commit log */
-	slotno = ZeroCLOGPage(0, false);
-
-	/* Make sure it's written out */
-	SimpleLruWritePage(XactCtl, slotno);
-	Assert(!XactCtl->shared->page_dirty[slotno]);
-
-	LWLockRelease(lock);
-}
-
-/*
- * Initialize (or reinitialize) a page of CLOG to zeroes.
- * If writeXlog is true, also emit an XLOG record saying we did this.
- *
- * The page is not actually written, just set up in shared memory.
- * The slot number of the new page is returned.
- *
- * Control lock must be held at entry, and will be held at exit.
- */
-static int
-ZeroCLOGPage(int64 pageno, bool writeXlog)
-{
-	int			slotno;
-
-	slotno = SimpleLruZeroPage(XactCtl, pageno);
-
-	if (writeXlog)
-		WriteZeroPageXlogRec(pageno);
-
-	return slotno;
+	/* Zero the initial page and flush it to disk */
+	SimpleLruZeroAndWritePage(XactCtl, 0);
 }
 
 /*
@@ -974,8 +939,9 @@ ExtendCLOG(TransactionId newestXact)
 
 	LWLockAcquire(lock, LW_EXCLUSIVE);
 
-	/* Zero the page and make an XLOG entry about it */
-	ZeroCLOGPage(pageno, true);
+	/* Zero the page and make a WAL entry about it */
+	SimpleLruZeroPage(XactCtl, pageno);
+	XLogSimpleInsertInt64(RM_CLOG_ID, CLOG_ZEROPAGE, pageno);
 
 	LWLockRelease(lock);
 }
@@ -1068,17 +1034,6 @@ CLOGPagePrecedes(int64 page1, int64 page2)
 
 
 /*
- * Write a ZEROPAGE xlog record
- */
-static void
-WriteZeroPageXlogRec(int64 pageno)
-{
-	XLogBeginInsert();
-	XLogRegisterData(&pageno, sizeof(pageno));
-	(void) XLogInsert(RM_CLOG_ID, CLOG_ZEROPAGE);
-}
-
-/*
  * Write a TRUNCATE xlog record
  *
  * We must flush the xlog record to disk before returning --- see notes
@@ -1114,19 +1069,9 @@ clog_redo(XLogReaderState *record)
 	if (info == CLOG_ZEROPAGE)
 	{
 		int64		pageno;
-		int			slotno;
-		LWLock	   *lock;
 
 		memcpy(&pageno, XLogRecGetData(record), sizeof(pageno));
-
-		lock = SimpleLruGetBankLock(XactCtl, pageno);
-		LWLockAcquire(lock, LW_EXCLUSIVE);
-
-		slotno = ZeroCLOGPage(pageno, false);
-		SimpleLruWritePage(XactCtl, slotno);
-		Assert(!XactCtl->shared->page_dirty[slotno]);
-
-		LWLockRelease(lock);
+		SimpleLruZeroAndWritePage(XactCtl, pageno);
 	}
 	else if (info == CLOG_TRUNCATE)
 	{
diff --git a/src/backend/access/transam/commit_ts.c b/src/backend/access/transam/commit_ts.c
index 225ff7ca9f2..370b38e048b 100644
--- a/src/backend/access/transam/commit_ts.c
+++ b/src/backend/access/transam/commit_ts.c
@@ -114,11 +114,9 @@ static void SetXidCommitTsInPage(TransactionId xid, int nsubxids,
 static void TransactionIdSetCommitTs(TransactionId xid, TimestampTz ts,
 									 RepOriginId nodeid, int slotno);
 static void error_commit_ts_disabled(void);
-static int	ZeroCommitTsPage(int64 pageno, bool writeXlog);
 static bool CommitTsPagePrecedes(int64 page1, int64 page2);
 static void ActivateCommitTs(void);
 static void DeactivateCommitTs(void);
-static void WriteZeroPageXlogRec(int64 pageno);
 static void WriteTruncateXlogRec(int64 pageno, TransactionId oldestXid);
 
 /*
@@ -603,28 +601,6 @@ BootStrapCommitTs(void)
 }
 
 /*
- * Initialize (or reinitialize) a page of CommitTs to zeroes.
- * If writeXlog is true, also emit an XLOG record saying we did this.
- *
- * The page is not actually written, just set up in shared memory.
- * The slot number of the new page is returned.
- *
- * Control lock must be held at entry, and will be held at exit.
- */
-static int
-ZeroCommitTsPage(int64 pageno, bool writeXlog)
-{
-	int			slotno;
-
-	slotno = SimpleLruZeroPage(CommitTsCtl, pageno);
-
-	if (writeXlog)
-		WriteZeroPageXlogRec(pageno);
-
-	return slotno;
-}
-
-/*
  * This must be called ONCE during postmaster or standalone-backend startup,
  * after StartupXLOG has initialized TransamVariables->nextXid.
  */
@@ -754,16 +730,7 @@ ActivateCommitTs(void)
 
 	/* Create the current segment file, if necessary */
 	if (!SimpleLruDoesPhysicalPageExist(CommitTsCtl, pageno))
-	{
-		LWLock	   *lock = SimpleLruGetBankLock(CommitTsCtl, pageno);
-		int			slotno;
-
-		LWLockAcquire(lock, LW_EXCLUSIVE);
-		slotno = ZeroCommitTsPage(pageno, false);
-		SimpleLruWritePage(CommitTsCtl, slotno);
-		Assert(!CommitTsCtl->shared->page_dirty[slotno]);
-		LWLockRelease(lock);
-	}
+		SimpleLruZeroAndWritePage(CommitTsCtl, pageno);
 
 	/* Change the activation status in shared memory. */
 	LWLockAcquire(CommitTsLock, LW_EXCLUSIVE);
@@ -874,8 +841,12 @@ ExtendCommitTs(TransactionId newestXact)
 
 	LWLockAcquire(lock, LW_EXCLUSIVE);
 
-	/* Zero the page and make an XLOG entry about it */
-	ZeroCommitTsPage(pageno, !InRecovery);
+	/* Zero the page ... */
+	SimpleLruZeroPage(CommitTsCtl, pageno);
+
+	/* and make a WAL entry about that, unless we're in REDO */
+	if (!InRecovery)
+		XLogSimpleInsertInt64(RM_COMMIT_TS_ID, COMMIT_TS_ZEROPAGE, pageno);
 
 	LWLockRelease(lock);
 }
@@ -990,17 +961,6 @@ CommitTsPagePrecedes(int64 page1, int64 page2)
 
 
 /*
- * Write a ZEROPAGE xlog record
- */
-static void
-WriteZeroPageXlogRec(int64 pageno)
-{
-	XLogBeginInsert();
-	XLogRegisterData(&pageno, sizeof(pageno));
-	(void) XLogInsert(RM_COMMIT_TS_ID, COMMIT_TS_ZEROPAGE);
-}
-
-/*
  * Write a TRUNCATE xlog record
  */
 static void
@@ -1030,19 +990,9 @@ commit_ts_redo(XLogReaderState *record)
 	if (info == COMMIT_TS_ZEROPAGE)
 	{
 		int64		pageno;
-		int			slotno;
-		LWLock	   *lock;
 
 		memcpy(&pageno, XLogRecGetData(record), sizeof(pageno));
-
-		lock = SimpleLruGetBankLock(CommitTsCtl, pageno);
-		LWLockAcquire(lock, LW_EXCLUSIVE);
-
-		slotno = ZeroCommitTsPage(pageno, false);
-		SimpleLruWritePage(CommitTsCtl, slotno);
-		Assert(!CommitTsCtl->shared->page_dirty[slotno]);
-
-		LWLockRelease(lock);
+		SimpleLruZeroAndWritePage(CommitTsCtl, pageno);
 	}
 	else if (info == COMMIT_TS_TRUNCATE)
 	{
diff --git a/src/backend/access/transam/multixact.c b/src/backend/access/transam/multixact.c
index 7a7afe3edc6..3cb09c3d598 100644
--- a/src/backend/access/transam/multixact.c
+++ b/src/backend/access/transam/multixact.c
@@ -401,8 +401,6 @@ static void mXactCachePut(MultiXactId multi, int nmembers,
 static char *mxstatus_to_string(MultiXactStatus status);
 
 /* management of SLRU infrastructure */
-static int	ZeroMultiXactOffsetPage(int64 pageno, bool writeXlog);
-static int	ZeroMultiXactMemberPage(int64 pageno, bool writeXlog);
 static bool MultiXactOffsetPagePrecedes(int64 page1, int64 page2);
 static bool MultiXactMemberPagePrecedes(int64 page1, int64 page2);
 static bool MultiXactOffsetPrecedes(MultiXactOffset offset1,
@@ -413,7 +411,6 @@ static bool MultiXactOffsetWouldWrap(MultiXactOffset boundary,
 									 MultiXactOffset start, uint32 distance);
 static bool SetOffsetVacuumLimit(bool is_startup);
 static bool find_multixact_start(MultiXactId multi, MultiXactOffset *result);
-static void WriteMZeroPageXlogRec(int64 pageno, uint8 info);
 static void WriteMTruncateXlogRec(Oid oldestMultiDB,
 								  MultiXactId startTruncOff,
 								  MultiXactId endTruncOff,
@@ -2033,70 +2030,9 @@ check_multixact_member_buffers(int *newval, void **extra, GucSource source)
 void
 BootStrapMultiXact(void)
 {
-	int			slotno;
-	LWLock	   *lock;
-
-	lock = SimpleLruGetBankLock(MultiXactOffsetCtl, 0);
-	LWLockAcquire(lock, LW_EXCLUSIVE);
-
-	/* Create and zero the first page of the offsets log */
-	slotno = ZeroMultiXactOffsetPage(0, false);
-
-	/* Make sure it's written out */
-	SimpleLruWritePage(MultiXactOffsetCtl, slotno);
-	Assert(!MultiXactOffsetCtl->shared->page_dirty[slotno]);
-
-	LWLockRelease(lock);
-
-	lock = SimpleLruGetBankLock(MultiXactMemberCtl, 0);
-	LWLockAcquire(lock, LW_EXCLUSIVE);
-
-	/* Create and zero the first page of the members log */
-	slotno = ZeroMultiXactMemberPage(0, false);
-
-	/* Make sure it's written out */
-	SimpleLruWritePage(MultiXactMemberCtl, slotno);
-	Assert(!MultiXactMemberCtl->shared->page_dirty[slotno]);
-
-	LWLockRelease(lock);
-}
-
-/*
- * Initialize (or reinitialize) a page of MultiXactOffset to zeroes.
- * If writeXlog is true, also emit an XLOG record saying we did this.
- *
- * The page is not actually written, just set up in shared memory.
- * The slot number of the new page is returned.
- *
- * Control lock must be held at entry, and will be held at exit.
- */
-static int
-ZeroMultiXactOffsetPage(int64 pageno, bool writeXlog)
-{
-	int			slotno;
-
-	slotno = SimpleLruZeroPage(MultiXactOffsetCtl, pageno);
-
-	if (writeXlog)
-		WriteMZeroPageXlogRec(pageno, XLOG_MULTIXACT_ZERO_OFF_PAGE);
-
-	return slotno;
-}
-
-/*
- * Ditto, for MultiXactMember
- */
-static int
-ZeroMultiXactMemberPage(int64 pageno, bool writeXlog)
-{
-	int			slotno;
-
-	slotno = SimpleLruZeroPage(MultiXactMemberCtl, pageno);
-
-	if (writeXlog)
-		WriteMZeroPageXlogRec(pageno, XLOG_MULTIXACT_ZERO_MEM_PAGE);
-
-	return slotno;
+	/* Zero the initial pages and flush them to disk */
+	SimpleLruZeroAndWritePage(MultiXactOffsetCtl, 0);
+	SimpleLruZeroAndWritePage(MultiXactMemberCtl, 0);
 }
 
 /*
@@ -2134,7 +2070,7 @@ MaybeExtendOffsetSlru(void)
 		 * with creating a new segment file even if the page we're writing is
 		 * not the first in it, so this is enough.
 		 */
-		slotno = ZeroMultiXactOffsetPage(pageno, false);
+		slotno = SimpleLruZeroPage(MultiXactOffsetCtl, pageno);
 		SimpleLruWritePage(MultiXactOffsetCtl, slotno);
 	}
 
@@ -2568,8 +2504,10 @@ ExtendMultiXactOffset(MultiXactId multi)
 
 	LWLockAcquire(lock, LW_EXCLUSIVE);
 
-	/* Zero the page and make an XLOG entry about it */
-	ZeroMultiXactOffsetPage(pageno, true);
+	/* Zero the page and make a WAL entry about it */
+	SimpleLruZeroPage(MultiXactOffsetCtl, pageno);
+	XLogSimpleInsertInt64(RM_MULTIXACT_ID, XLOG_MULTIXACT_ZERO_OFF_PAGE,
+						  pageno);
 
 	LWLockRelease(lock);
 }
@@ -2611,8 +2549,10 @@ ExtendMultiXactMember(MultiXactOffset offset, int nmembers)
 
 			LWLockAcquire(lock, LW_EXCLUSIVE);
 
-			/* Zero the page and make an XLOG entry about it */
-			ZeroMultiXactMemberPage(pageno, true);
+			/* Zero the page and make a WAL entry about it */
+			SimpleLruZeroPage(MultiXactMemberCtl, pageno);
+			XLogSimpleInsertInt64(RM_MULTIXACT_ID,
+								  XLOG_MULTIXACT_ZERO_MEM_PAGE, pageno);
 
 			LWLockRelease(lock);
 		}
@@ -3348,18 +3288,6 @@ MultiXactOffsetPrecedes(MultiXactOffset offset1, MultiXactOffset offset2)
 }
 
 /*
- * Write an xlog record reflecting the zeroing of either a MEMBERs or
- * OFFSETs page (info shows which)
- */
-static void
-WriteMZeroPageXlogRec(int64 pageno, uint8 info)
-{
-	XLogBeginInsert();
-	XLogRegisterData(&pageno, sizeof(pageno));
-	(void) XLogInsert(RM_MULTIXACT_ID, info);
-}
-
-/*
  * Write a TRUNCATE xlog record
  *
  * We must flush the xlog record to disk before returning --- see notes in
@@ -3401,36 +3329,16 @@ multixact_redo(XLogReaderState *record)
 	if (info == XLOG_MULTIXACT_ZERO_OFF_PAGE)
 	{
 		int64		pageno;
-		int			slotno;
-		LWLock	   *lock;
 
 		memcpy(&pageno, XLogRecGetData(record), sizeof(pageno));
-
-		lock = SimpleLruGetBankLock(MultiXactOffsetCtl, pageno);
-		LWLockAcquire(lock, LW_EXCLUSIVE);
-
-		slotno = ZeroMultiXactOffsetPage(pageno, false);
-		SimpleLruWritePage(MultiXactOffsetCtl, slotno);
-		Assert(!MultiXactOffsetCtl->shared->page_dirty[slotno]);
-
-		LWLockRelease(lock);
+		SimpleLruZeroAndWritePage(MultiXactOffsetCtl, pageno);
 	}
 	else if (info == XLOG_MULTIXACT_ZERO_MEM_PAGE)
 	{
 		int64		pageno;
-		int			slotno;
-		LWLock	   *lock;
 
 		memcpy(&pageno, XLogRecGetData(record), sizeof(pageno));
-
-		lock = SimpleLruGetBankLock(MultiXactMemberCtl, pageno);
-		LWLockAcquire(lock, LW_EXCLUSIVE);
-
-		slotno = ZeroMultiXactMemberPage(pageno, false);
-		SimpleLruWritePage(MultiXactMemberCtl, slotno);
-		Assert(!MultiXactMemberCtl->shared->page_dirty[slotno]);
-
-		LWLockRelease(lock);
+		SimpleLruZeroAndWritePage(MultiXactMemberCtl, pageno);
 	}
 	else if (info == XLOG_MULTIXACT_CREATE_ID)
 	{
diff --git a/src/backend/access/transam/slru.c b/src/backend/access/transam/slru.c
index fe56286d9a9..10ec259f382 100644
--- a/src/backend/access/transam/slru.c
+++ b/src/backend/access/transam/slru.c
@@ -434,6 +434,31 @@ SimpleLruZeroLSNs(SlruCtl ctl, int slotno)
 }
 
 /*
+ * This is a convenience wrapper for the common case of zeroing a page and
+ * immediately flushing it to disk.
+ *
+ * Control lock is acquired and released here.
+ */
+void
+SimpleLruZeroAndWritePage(SlruCtl ctl, int64 pageno)
+{
+	int			slotno;
+	LWLock	   *lock;
+
+	lock = SimpleLruGetBankLock(ctl, pageno);
+	LWLockAcquire(lock, LW_EXCLUSIVE);
+
+	/* Create and zero the page */
+	slotno = SimpleLruZeroPage(ctl, pageno);
+
+	/* Make sure it's written out */
+	SimpleLruWritePage(ctl, slotno);
+	Assert(!ctl->shared->page_dirty[slotno]);
+
+	LWLockRelease(lock);
+}
+
+/*
  * Wait for any active I/O on a page slot to finish.  (This does not
  * guarantee that new I/O hasn't been started before we return, though.
  * In fact the slot might not even contain the same page anymore.)
diff --git a/src/backend/access/transam/subtrans.c b/src/backend/access/transam/subtrans.c
index 15153618fad..09aace9e09f 100644
--- a/src/backend/access/transam/subtrans.c
+++ b/src/backend/access/transam/subtrans.c
@@ -74,7 +74,6 @@ static SlruCtlData SubTransCtlData;
 #define SubTransCtl  (&SubTransCtlData)
 
 
-static int	ZeroSUBTRANSPage(int64 pageno);
 static bool SubTransPagePrecedes(int64 page1, int64 page2);
 
 
@@ -269,33 +268,8 @@ check_subtrans_buffers(int *newval, void **extra, GucSource source)
 void
 BootStrapSUBTRANS(void)
 {
-	int			slotno;
-	LWLock	   *lock = SimpleLruGetBankLock(SubTransCtl, 0);
-
-	LWLockAcquire(lock, LW_EXCLUSIVE);
-
-	/* Create and zero the first page of the subtrans log */
-	slotno = ZeroSUBTRANSPage(0);
-
-	/* Make sure it's written out */
-	SimpleLruWritePage(SubTransCtl, slotno);
-	Assert(!SubTransCtl->shared->page_dirty[slotno]);
-
-	LWLockRelease(lock);
-}
-
-/*
- * Initialize (or reinitialize) a page of SUBTRANS to zeroes.
- *
- * The page is not actually written, just set up in shared memory.
- * The slot number of the new page is returned.
- *
- * Control lock must be held at entry, and will be held at exit.
- */
-static int
-ZeroSUBTRANSPage(int64 pageno)
-{
-	return SimpleLruZeroPage(SubTransCtl, pageno);
+	/* Zero the initial page and flush it to disk */
+	SimpleLruZeroAndWritePage(SubTransCtl, 0);
 }
 
 /*
@@ -335,7 +309,7 @@ StartupSUBTRANS(TransactionId oldestActiveXID)
 			prevlock = lock;
 		}
 
-		(void) ZeroSUBTRANSPage(startPage);
+		(void) SimpleLruZeroPage(SubTransCtl, startPage);
 		if (startPage == endPage)
 			break;
 
@@ -395,7 +369,7 @@ ExtendSUBTRANS(TransactionId newestXact)
 	LWLockAcquire(lock, LW_EXCLUSIVE);
 
 	/* Zero the page */
-	ZeroSUBTRANSPage(pageno);
+	SimpleLruZeroPage(SubTransCtl, pageno);
 
 	LWLockRelease(lock);
 }
diff --git a/src/backend/access/transam/xloginsert.c b/src/backend/access/transam/xloginsert.c
index 5ee9d0b028e..c7571429e8e 100644
--- a/src/backend/access/transam/xloginsert.c
+++ b/src/backend/access/transam/xloginsert.c
@@ -530,6 +530,18 @@ XLogInsert(RmgrId rmid, uint8 info)
 }
 
 /*
+ * Simple wrapper to XLogInsert to insert a WAL record with elementary
+ * contents (only an int64 is supported as value currently).
+ */
+XLogRecPtr
+XLogSimpleInsertInt64(RmgrId rmid, uint8 info, int64 value)
+{
+	XLogBeginInsert();
+	XLogRegisterData(&value, sizeof(value));
+	return XLogInsert(rmid, info);
+}
+
+/*
  * Assemble a WAL record from the registered data and buffers into an
  * XLogRecData chain, ready for insertion with XLogInsertRecord().
  *
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index 3d44815ed5a..1f04a2c182c 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -2247,7 +2247,7 @@ append_nonpartial_cost(List *subpaths, int numpaths, int parallel_workers)
  *	  Determines and returns the cost of an Append node.
  */
 void
-cost_append(AppendPath *apath)
+cost_append(AppendPath *apath, PlannerInfo *root)
 {
 	ListCell   *l;
 
@@ -2309,26 +2309,52 @@ cost_append(AppendPath *apath)
 			foreach(l, apath->subpaths)
 			{
 				Path	   *subpath = (Path *) lfirst(l);
-				Path		sort_path;	/* dummy for result of cost_sort */
+				int			presorted_keys;
+				Path		sort_path;	/* dummy for result of
+										 * cost_sort/cost_incremental_sort */
 
-				if (!pathkeys_contained_in(pathkeys, subpath->pathkeys))
+				if (!pathkeys_count_contained_in(pathkeys, subpath->pathkeys,
+												 &presorted_keys))
 				{
 					/*
 					 * We'll need to insert a Sort node, so include costs for
-					 * that.  We can use the parent's LIMIT if any, since we
+					 * that.  We choose to use incremental sort if it is
+					 * enabled and there are presorted keys; otherwise we use
+					 * full sort.
+					 *
+					 * We can use the parent's LIMIT if any, since we
 					 * certainly won't pull more than that many tuples from
 					 * any child.
 					 */
-					cost_sort(&sort_path,
-							  NULL, /* doesn't currently need root */
-							  pathkeys,
-							  subpath->disabled_nodes,
-							  subpath->total_cost,
-							  subpath->rows,
-							  subpath->pathtarget->width,
-							  0.0,
-							  work_mem,
-							  apath->limit_tuples);
+					if (enable_incremental_sort && presorted_keys > 0)
+					{
+						cost_incremental_sort(&sort_path,
+											  root,
+											  pathkeys,
+											  presorted_keys,
+											  subpath->disabled_nodes,
+											  subpath->startup_cost,
+											  subpath->total_cost,
+											  subpath->rows,
+											  subpath->pathtarget->width,
+											  0.0,
+											  work_mem,
+											  apath->limit_tuples);
+					}
+					else
+					{
+						cost_sort(&sort_path,
+								  root,
+								  pathkeys,
+								  subpath->disabled_nodes,
+								  subpath->total_cost,
+								  subpath->rows,
+								  subpath->pathtarget->width,
+								  0.0,
+								  work_mem,
+								  apath->limit_tuples);
+					}
+
 					subpath = &sort_path;
 				}
 
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index 0b61aef962c..8a9f1d7a943 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -1318,6 +1318,7 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path, int flags)
 			Oid		   *sortOperators;
 			Oid		   *collations;
 			bool	   *nullsFirst;
+			int			presorted_keys;
 
 			/*
 			 * Compute sort column info, and adjust subplan's tlist as needed.
@@ -1353,14 +1354,38 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path, int flags)
 						  numsortkeys * sizeof(bool)) == 0);
 
 			/* Now, insert a Sort node if subplan isn't sufficiently ordered */
-			if (!pathkeys_contained_in(pathkeys, subpath->pathkeys))
+			if (!pathkeys_count_contained_in(pathkeys, subpath->pathkeys,
+											 &presorted_keys))
 			{
-				Sort	   *sort = make_sort(subplan, numsortkeys,
+				Plan	   *sort_plan;
+
+				/*
+				 * We choose to use incremental sort if it is enabled and
+				 * there are presorted keys; otherwise we use full sort.
+				 */
+				if (enable_incremental_sort && presorted_keys > 0)
+				{
+					sort_plan = (Plan *)
+						make_incrementalsort(subplan, numsortkeys, presorted_keys,
 											 sortColIdx, sortOperators,
 											 collations, nullsFirst);
 
-				label_sort_with_costsize(root, sort, best_path->limit_tuples);
-				subplan = (Plan *) sort;
+					label_incrementalsort_with_costsize(root,
+														(IncrementalSort *) sort_plan,
+														pathkeys,
+														best_path->limit_tuples);
+				}
+				else
+				{
+					sort_plan = (Plan *) make_sort(subplan, numsortkeys,
+												   sortColIdx, sortOperators,
+												   collations, nullsFirst);
+
+					label_sort_with_costsize(root, (Sort *) sort_plan,
+											 best_path->limit_tuples);
+				}
+
+				subplan = sort_plan;
 			}
 		}
 
@@ -1491,6 +1516,7 @@ create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path,
 		Oid		   *sortOperators;
 		Oid		   *collations;
 		bool	   *nullsFirst;
+		int			presorted_keys;
 
 		/* Build the child plan */
 		/* Must insist that all children return the same tlist */
@@ -1525,14 +1551,38 @@ create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path,
 					  numsortkeys * sizeof(bool)) == 0);
 
 		/* Now, insert a Sort node if subplan isn't sufficiently ordered */
-		if (!pathkeys_contained_in(pathkeys, subpath->pathkeys))
+		if (!pathkeys_count_contained_in(pathkeys, subpath->pathkeys,
+										 &presorted_keys))
 		{
-			Sort	   *sort = make_sort(subplan, numsortkeys,
+			Plan	   *sort_plan;
+
+			/*
+			 * We choose to use incremental sort if it is enabled and there
+			 * are presorted keys; otherwise we use full sort.
+			 */
+			if (enable_incremental_sort && presorted_keys > 0)
+			{
+				sort_plan = (Plan *)
+					make_incrementalsort(subplan, numsortkeys, presorted_keys,
 										 sortColIdx, sortOperators,
 										 collations, nullsFirst);
 
-			label_sort_with_costsize(root, sort, best_path->limit_tuples);
-			subplan = (Plan *) sort;
+				label_incrementalsort_with_costsize(root,
+													(IncrementalSort *) sort_plan,
+													pathkeys,
+													best_path->limit_tuples);
+			}
+			else
+			{
+				sort_plan = (Plan *) make_sort(subplan, numsortkeys,
+											   sortColIdx, sortOperators,
+											   collations, nullsFirst);
+
+				label_sort_with_costsize(root, (Sort *) sort_plan,
+										 best_path->limit_tuples);
+			}
+
+			subplan = sort_plan;
 		}
 
 		subplans = lappend(subplans, subplan);
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index e0192d4a491..9cc602788ea 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -1404,12 +1404,12 @@ create_append_path(PlannerInfo *root,
 			pathnode->path.total_cost = child->total_cost;
 		}
 		else
-			cost_append(pathnode);
+			cost_append(pathnode, root);
 		/* Must do this last, else cost_append complains */
 		pathnode->path.pathkeys = child->pathkeys;
 	}
 	else
-		cost_append(pathnode);
+		cost_append(pathnode, root);
 
 	/* If the caller provided a row estimate, override the computed value. */
 	if (rows >= 0)
@@ -1515,6 +1515,9 @@ create_merge_append_path(PlannerInfo *root,
 	foreach(l, subpaths)
 	{
 		Path	   *subpath = (Path *) lfirst(l);
+		int			presorted_keys;
+		Path		sort_path;	/* dummy for result of
+								 * cost_sort/cost_incremental_sort */
 
 		/* All child paths should be unparameterized */
 		Assert(bms_is_empty(PATH_REQ_OUTER(subpath)));
@@ -1523,32 +1526,52 @@ create_merge_append_path(PlannerInfo *root,
 		pathnode->path.parallel_safe = pathnode->path.parallel_safe &&
 			subpath->parallel_safe;
 
-		if (pathkeys_contained_in(pathkeys, subpath->pathkeys))
+		if (!pathkeys_count_contained_in(pathkeys, subpath->pathkeys,
+										 &presorted_keys))
 		{
-			/* Subpath is adequately ordered, we won't need to sort it */
-			input_disabled_nodes += subpath->disabled_nodes;
-			input_startup_cost += subpath->startup_cost;
-			input_total_cost += subpath->total_cost;
-		}
-		else
-		{
-			/* We'll need to insert a Sort node, so include cost for that */
-			Path		sort_path;	/* dummy for result of cost_sort */
+			/*
+			 * We'll need to insert a Sort node, so include costs for that. We
+			 * choose to use incremental sort if it is enabled and there are
+			 * presorted keys; otherwise we use full sort.
+			 *
+			 * We can use the parent's LIMIT if any, since we certainly won't
+			 * pull more than that many tuples from any child.
+			 */
+			if (enable_incremental_sort && presorted_keys > 0)
+			{
+				cost_incremental_sort(&sort_path,
+									  root,
+									  pathkeys,
+									  presorted_keys,
+									  subpath->disabled_nodes,
+									  subpath->startup_cost,
+									  subpath->total_cost,
+									  subpath->rows,
+									  subpath->pathtarget->width,
+									  0.0,
+									  work_mem,
+									  pathnode->limit_tuples);
+			}
+			else
+			{
+				cost_sort(&sort_path,
+						  root,
+						  pathkeys,
+						  subpath->disabled_nodes,
+						  subpath->total_cost,
+						  subpath->rows,
+						  subpath->pathtarget->width,
+						  0.0,
+						  work_mem,
+						  pathnode->limit_tuples);
+			}
 
-			cost_sort(&sort_path,
-					  root,
-					  pathkeys,
-					  subpath->disabled_nodes,
-					  subpath->total_cost,
-					  subpath->rows,
-					  subpath->pathtarget->width,
-					  0.0,
-					  work_mem,
-					  pathnode->limit_tuples);
-			input_disabled_nodes += sort_path.disabled_nodes;
-			input_startup_cost += sort_path.startup_cost;
-			input_total_cost += sort_path.total_cost;
+			subpath = &sort_path;
 		}
+
+		input_disabled_nodes += subpath->disabled_nodes;
+		input_startup_cost += subpath->startup_cost;
+		input_total_cost += subpath->total_cost;
 	}
 
 	/*
diff --git a/src/backend/storage/aio/method_io_uring.c b/src/backend/storage/aio/method_io_uring.c
index b78048328e1..0a8c054162f 100644
--- a/src/backend/storage/aio/method_io_uring.c
+++ b/src/backend/storage/aio/method_io_uring.c
@@ -29,6 +29,9 @@
 
 #ifdef IOMETHOD_IO_URING_ENABLED
 
+#include <sys/mman.h>
+#include <unistd.h>
+
 #include <liburing.h>
 
 #include "miscadmin.h"
@@ -94,12 +97,32 @@ PgAioUringContext
 	struct io_uring io_uring_ring;
 } PgAioUringContext;
 
+/*
+ * Information about the capabilities that io_uring has.
+ *
+ * Depending on liburing and kernel version different features are
+ * supported. At least for the kernel a kernel version check does not suffice
+ * as various vendors do backport features to older kernels :(.
+ */
+typedef struct PgAioUringCaps
+{
+	bool		checked;
+	/* -1 if io_uring_queue_init_mem() is unsupported */
+	int			mem_init_size;
+} PgAioUringCaps;
+
+
 /* PgAioUringContexts for all backends */
 static PgAioUringContext *pgaio_uring_contexts;
 
 /* the current backend's context */
 static PgAioUringContext *pgaio_my_uring_context;
 
+static PgAioUringCaps pgaio_uring_caps =
+{
+	.checked = false,
+	.mem_init_size = -1,
+};
 
 static uint32
 pgaio_uring_procs(void)
@@ -111,16 +134,145 @@ pgaio_uring_procs(void)
 	return MaxBackends + NUM_AUXILIARY_PROCS - MAX_IO_WORKERS;
 }
 
-static Size
+/*
+ * Initializes pgaio_uring_caps, unless that's already done.
+ */
+static void
+pgaio_uring_check_capabilities(void)
+{
+	if (pgaio_uring_caps.checked)
+		return;
+
+	/*
+	 * By default io_uring creates a shared memory mapping for each io_uring
+	 * instance, leading to a large number of memory mappings. Unfortunately a
+	 * large number of memory mappings slows things down, backend exit is
+	 * particularly affected.  To address that, newer kernels (6.5) support
+	 * using user-provided memory for the memory, by putting the relevant
+	 * memory into shared memory we don't need any additional mappings.
+	 *
+	 * To know whether this is supported, we unfortunately need to probe the
+	 * kernel by trying to create a ring with userspace-provided memory. This
+	 * also has a secondary benefit: We can determine precisely how much
+	 * memory we need for each io_uring instance.
+	 */
+#if defined(HAVE_LIBURING_QUEUE_INIT_MEM) && defined(IORING_SETUP_NO_MMAP)
+	{
+		struct io_uring test_ring;
+		size_t		ring_size;
+		void	   *ring_ptr;
+		struct io_uring_params p = {0};
+		int			ret;
+
+		/*
+		 * Liburing does not yet provide an API to query how much memory a
+		 * ring will need. So we over-estimate it here. As the memory is freed
+		 * just below that's small temporary waste of memory.
+		 *
+		 * 1MB is more than enough for rings within io_max_concurrency's
+		 * range.
+		 */
+		ring_size = 1024 * 1024;
+
+		/*
+		 * Hard to believe a system exists where 1MB would not be a multiple
+		 * of the page size. But it's cheap to ensure...
+		 */
+		ring_size -= ring_size % sysconf(_SC_PAGESIZE);
+
+		ring_ptr = mmap(NULL, ring_size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
+		if (ring_ptr == MAP_FAILED)
+			elog(ERROR,
+				 "mmap(%zu) to determine io_uring_queue_init_mem() support failed: %m",
+				 ring_size);
+
+		ret = io_uring_queue_init_mem(io_max_concurrency, &test_ring, &p, ring_ptr, ring_size);
+		if (ret > 0)
+		{
+			pgaio_uring_caps.mem_init_size = ret;
+
+			elog(DEBUG1,
+				 "can use combined memory mapping for io_uring, each ring needs %d bytes",
+				 ret);
+
+			/* clean up the created ring, it was just for a test */
+			io_uring_queue_exit(&test_ring);
+		}
+		else
+		{
+			/*
+			 * There are different reasons for ring creation to fail, but it's
+			 * ok to treat that just as io_uring_queue_init_mem() not being
+			 * supported. We'll report a more detailed error in
+			 * pgaio_uring_shmem_init().
+			 */
+			errno = -ret;
+			elog(DEBUG1,
+				 "cannot use combined memory mapping for io_uring, ring creation failed: %m");
+
+		}
+
+		if (munmap(ring_ptr, ring_size) != 0)
+			elog(ERROR, "munmap() failed: %m");
+	}
+#else
+	{
+		elog(DEBUG1,
+			 "can't use combined memory mapping for io_uring, kernel or liburing too old");
+	}
+#endif
+
+	pgaio_uring_caps.checked = true;
+}
+
+/*
+ * Memory for all PgAioUringContext instances
+ */
+static size_t
 pgaio_uring_context_shmem_size(void)
 {
 	return mul_size(pgaio_uring_procs(), sizeof(PgAioUringContext));
 }
 
+/*
+ * Memory for the combined memory used by io_uring instances. Returns 0 if
+ * that is not supported by kernel/liburing.
+ */
+static size_t
+pgaio_uring_ring_shmem_size(void)
+{
+	size_t		sz = 0;
+
+	if (pgaio_uring_caps.mem_init_size > 0)
+	{
+		/*
+		 * Memory for rings needs to be allocated to the page boundary,
+		 * reserve space. Luckily it does not need to be aligned to hugepage
+		 * boundaries, even if huge pages are used.
+		 */
+		sz = add_size(sz, sysconf(_SC_PAGESIZE));
+		sz = add_size(sz, mul_size(pgaio_uring_procs(),
+								   pgaio_uring_caps.mem_init_size));
+	}
+
+	return sz;
+}
+
 static size_t
 pgaio_uring_shmem_size(void)
 {
-	return pgaio_uring_context_shmem_size();
+	size_t		sz;
+
+	/*
+	 * Kernel and liburing support for various features influences how much
+	 * shmem we need, perform the necessary checks.
+	 */
+	pgaio_uring_check_capabilities();
+
+	sz = pgaio_uring_context_shmem_size();
+	sz = add_size(sz, pgaio_uring_ring_shmem_size());
+
+	return sz;
 }
 
 static void
@@ -128,13 +280,38 @@ pgaio_uring_shmem_init(bool first_time)
 {
 	int			TotalProcs = pgaio_uring_procs();
 	bool		found;
+	char	   *shmem;
+	size_t		ring_mem_remain = 0;
+	char	   *ring_mem_next = 0;
 
-	pgaio_uring_contexts = (PgAioUringContext *)
-		ShmemInitStruct("AioUring", pgaio_uring_shmem_size(), &found);
-
+	/*
+	 * We allocate memory for all PgAioUringContext instances and, if
+	 * supported, the memory required for each of the io_uring instances, in
+	 * one ShmemInitStruct().
+	 */
+	shmem = ShmemInitStruct("AioUringContext", pgaio_uring_shmem_size(), &found);
 	if (found)
 		return;
 
+	pgaio_uring_contexts = (PgAioUringContext *) shmem;
+	shmem += pgaio_uring_context_shmem_size();
+
+	/* if supported, handle memory alignment / sizing for io_uring memory */
+	if (pgaio_uring_caps.mem_init_size > 0)
+	{
+		ring_mem_remain = pgaio_uring_ring_shmem_size();
+		ring_mem_next = (char *) shmem;
+
+		/* align to page boundary, see also pgaio_uring_ring_shmem_size() */
+		ring_mem_next = (char *) TYPEALIGN(sysconf(_SC_PAGESIZE), ring_mem_next);
+
+		/* account for alignment */
+		ring_mem_remain -= ring_mem_next - shmem;
+		shmem += ring_mem_next - shmem;
+
+		shmem += ring_mem_remain;
+	}
+
 	for (int contextno = 0; contextno < TotalProcs; contextno++)
 	{
 		PgAioUringContext *context = &pgaio_uring_contexts[contextno];
@@ -158,7 +335,28 @@ pgaio_uring_shmem_init(bool first_time)
 		 * be worth using that - also need to evaluate if that causes
 		 * noticeable additional contention?
 		 */
-		ret = io_uring_queue_init(io_max_concurrency, &context->io_uring_ring, 0);
+
+		/*
+		 * If supported (c.f. pgaio_uring_check_capabilities()), create ring
+		 * with its data in shared memory. Otherwise fall back io_uring
+		 * creating a memory mapping for each ring.
+		 */
+#if defined(HAVE_LIBURING_QUEUE_INIT_MEM) && defined(IORING_SETUP_NO_MMAP)
+		if (pgaio_uring_caps.mem_init_size > 0)
+		{
+			struct io_uring_params p = {0};
+
+			ret = io_uring_queue_init_mem(io_max_concurrency, &context->io_uring_ring, &p, ring_mem_next, ring_mem_remain);
+
+			ring_mem_remain -= ret;
+			ring_mem_next += ret;
+		}
+		else
+#endif
+		{
+			ret = io_uring_queue_init(io_max_concurrency, &context->io_uring_ring, 0);
+		}
+
 		if (ret < 0)
 		{
 			char	   *hint = NULL;
diff --git a/src/bin/pg_walsummary/t/002_blocks.pl b/src/bin/pg_walsummary/t/002_blocks.pl
index 270332780a4..0f98c7df82e 100644
--- a/src/bin/pg_walsummary/t/002_blocks.pl
+++ b/src/bin/pg_walsummary/t/002_blocks.pl
@@ -47,11 +47,12 @@ EOM
 ok($result, "WAL summarization caught up after insert");
 
 # The WAL summarizer should have generated some IO statistics.
-my $stats_reads = $node1->safe_psql(
+$node1->poll_query_until(
 	'postgres',
-	qq{SELECT sum(reads) > 0 FROM pg_stat_io
-   WHERE backend_type = 'walsummarizer' AND object = 'wal'});
-is($stats_reads, 't', "WAL summarizer generates statistics for WAL reads");
+	q{SELECT sum(reads) > 0 FROM pg_stat_io
+   WHERE backend_type = 'walsummarizer' AND object = 'wal'})
+  or die
+  "Timed out while waiting for WAL summarizer to generate statistics for WAL reads";
 
 # Find the highest LSN that is summarized on disk.
 my $summarized_lsn = $node1->safe_psql('postgres', <<EOM);
diff --git a/src/include/access/slru.h b/src/include/access/slru.h
index e142800aab2..20dbd1e0070 100644
--- a/src/include/access/slru.h
+++ b/src/include/access/slru.h
@@ -187,6 +187,7 @@ extern void SimpleLruInit(SlruCtl ctl, const char *name, int nslots, int nlsns,
 						  int bank_tranche_id, SyncRequestHandler sync_handler,
 						  bool long_segment_names);
 extern int	SimpleLruZeroPage(SlruCtl ctl, int64 pageno);
+extern void SimpleLruZeroAndWritePage(SlruCtl ctl, int64 pageno);
 extern int	SimpleLruReadPage(SlruCtl ctl, int64 pageno, bool write_ok,
 							  TransactionId xid);
 extern int	SimpleLruReadPage_ReadOnly(SlruCtl ctl, int64 pageno,
diff --git a/src/include/access/xloginsert.h b/src/include/access/xloginsert.h
index cf057f033a2..d6a71415d4f 100644
--- a/src/include/access/xloginsert.h
+++ b/src/include/access/xloginsert.h
@@ -44,6 +44,7 @@
 extern void XLogBeginInsert(void);
 extern void XLogSetRecordFlags(uint8 flags);
 extern XLogRecPtr XLogInsert(RmgrId rmid, uint8 info);
+extern XLogRecPtr XLogSimpleInsertInt64(RmgrId rmid, uint8 info, int64 value);
 extern void XLogEnsureRecordSpace(int max_block_id, int ndatas);
 extern void XLogRegisterData(const void *data, uint32 len);
 extern void XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags);
diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h
index d397fe27dc1..b523bcda8f3 100644
--- a/src/include/optimizer/cost.h
+++ b/src/include/optimizer/cost.h
@@ -118,7 +118,7 @@ extern void cost_incremental_sort(Path *path,
 								  Cost input_startup_cost, Cost input_total_cost,
 								  double input_tuples, int width, Cost comparison_cost, int sort_mem,
 								  double limit_tuples);
-extern void cost_append(AppendPath *apath);
+extern void cost_append(AppendPath *apath, PlannerInfo *root);
 extern void cost_merge_append(Path *path, PlannerInfo *root,
 							  List *pathkeys, int n_streams,
 							  int input_disabled_nodes,
diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in
index 726a7c1be1f..c4dc5d72bdb 100644
--- a/src/include/pg_config.h.in
+++ b/src/include/pg_config.h.in
@@ -229,6 +229,9 @@
 /* Define to 1 if you have the global variable 'int timezone'. */
 #undef HAVE_INT_TIMEZONE
 
+/* Define to 1 if you have the `io_uring_queue_init_mem' function. */
+#undef HAVE_IO_URING_QUEUE_INIT_MEM
+
 /* Define to 1 if __builtin_constant_p(x) implies "i"(x) acceptance. */
 #undef HAVE_I_CONSTRAINT__BUILTIN_CONSTANT_P
 
diff --git a/src/pl/plpgsql/src/pl_exec.c b/src/pl/plpgsql/src/pl_exec.c
index bb99781c56e..b9acc790dc6 100644
--- a/src/pl/plpgsql/src/pl_exec.c
+++ b/src/pl/plpgsql/src/pl_exec.c
@@ -5703,7 +5703,7 @@ exec_eval_expr(PLpgSQL_execstate *estate,
 	/*
 	 * Else do it the hard way via exec_run_select
 	 */
-	rc = exec_run_select(estate, expr, 2, NULL);
+	rc = exec_run_select(estate, expr, 0, NULL);
 	if (rc != SPI_OK_SELECT)
 		ereport(ERROR,
 				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
@@ -5757,6 +5757,10 @@ exec_eval_expr(PLpgSQL_execstate *estate,
 
 /* ----------
  * exec_run_select			Execute a select query
+ *
+ * Note: passing maxtuples different from 0 ("return all tuples") is
+ * deprecated because it will prevent parallel execution of the query.
+ * However, we retain the parameter in case we need it someday.
  * ----------
  */
 static int
diff --git a/src/test/perl/PostgreSQL/Test/AdjustUpgrade.pm b/src/test/perl/PostgreSQL/Test/AdjustUpgrade.pm
index 1725fe2f948..7224c286e1d 100644
--- a/src/test/perl/PostgreSQL/Test/AdjustUpgrade.pm
+++ b/src/test/perl/PostgreSQL/Test/AdjustUpgrade.pm
@@ -251,6 +251,32 @@ sub adjust_database_contents
 			'drop operator if exists public.=> (bigint, NONE)');
 	}
 
+	# Version 19 changed the output format of pg_lsn.  To avoid output
+	# differences, set all pg_lsn columns to NULL if the old version is
+	# older than 19.
+	if ($old_version < 19)
+	{
+		if ($old_version >= '9.5')
+		{
+			_add_st($result, 'regression',
+				"update brintest set lsncol = NULL");
+		}
+
+		if ($old_version >= 12)
+		{
+			_add_st($result, 'regression',
+				"update tab_core_types set pg_lsn = NULL");
+		}
+
+		if ($old_version >= 14)
+		{
+			_add_st($result, 'regression',
+				"update brintest_multi set lsncol = NULL");
+			_add_st($result, 'regression',
+				"update brintest_bloom set lsncol = NULL");
+		}
+	}
+
 	return $result;
 }
 
diff --git a/src/test/regress/expected/incremental_sort.out b/src/test/regress/expected/incremental_sort.out
index b00219643b9..5a1dd9fc022 100644
--- a/src/test/regress/expected/incremental_sort.out
+++ b/src/test/regress/expected/incremental_sort.out
@@ -1722,3 +1722,43 @@ order by t1.four, t1.two limit 1;
                ->  Seq Scan on tenk1 t2
 (12 rows)
 
+--
+-- Test incremental sort for Append/MergeAppend
+--
+create table prt_tbl (a int, b int) partition by range (a);
+create table prt_tbl_1 partition of prt_tbl for values from (0) to (100);
+create table prt_tbl_2 partition of prt_tbl for values from (100) to (200);
+insert into prt_tbl select i%200, i from generate_series(1,1000)i;
+create index on prt_tbl_1(a);
+create index on prt_tbl_2(a, b);
+analyze prt_tbl;
+set enable_seqscan to off;
+set enable_bitmapscan to off;
+-- Ensure we get an incremental sort for the subpath of Append
+explain (costs off) select * from prt_tbl order by a, b;
+                         QUERY PLAN                         
+------------------------------------------------------------
+ Append
+   ->  Incremental Sort
+         Sort Key: prt_tbl_1.a, prt_tbl_1.b
+         Presorted Key: prt_tbl_1.a
+         ->  Index Scan using prt_tbl_1_a_idx on prt_tbl_1
+   ->  Index Only Scan using prt_tbl_2_a_b_idx on prt_tbl_2
+(6 rows)
+
+-- Ensure we get an incremental sort for the subpath of MergeAppend
+explain (costs off) select * from prt_tbl_1 union all select * from prt_tbl_2 order by a, b;
+                         QUERY PLAN                         
+------------------------------------------------------------
+ Merge Append
+   Sort Key: prt_tbl_1.a, prt_tbl_1.b
+   ->  Incremental Sort
+         Sort Key: prt_tbl_1.a, prt_tbl_1.b
+         Presorted Key: prt_tbl_1.a
+         ->  Index Scan using prt_tbl_1_a_idx on prt_tbl_1
+   ->  Index Only Scan using prt_tbl_2_a_b_idx on prt_tbl_2
+(7 rows)
+
+reset enable_bitmapscan;
+reset enable_seqscan;
+drop table prt_tbl;
diff --git a/src/test/regress/expected/inherit.out b/src/test/regress/expected/inherit.out
index 78dead65325..5b5055babdc 100644
--- a/src/test/regress/expected/inherit.out
+++ b/src/test/regress/expected/inherit.out
@@ -1898,10 +1898,11 @@ ORDER BY thousand, tenthous;
  Merge Append
    Sort Key: tenk1.thousand, tenk1.tenthous
    ->  Index Only Scan using tenk1_thous_tenthous on tenk1
-   ->  Sort
+   ->  Incremental Sort
          Sort Key: tenk1_1.thousand, tenk1_1.thousand
+         Presorted Key: tenk1_1.thousand
          ->  Index Only Scan using tenk1_thous_tenthous on tenk1 tenk1_1
-(6 rows)
+(7 rows)
 
 explain (costs off)
 SELECT thousand, tenthous, thousand+tenthous AS x FROM tenk1
@@ -1982,10 +1983,11 @@ ORDER BY x, y;
  Merge Append
    Sort Key: a.thousand, a.tenthous
    ->  Index Only Scan using tenk1_thous_tenthous on tenk1 a
-   ->  Sort
+   ->  Incremental Sort
          Sort Key: b.unique2, b.unique2
+         Presorted Key: b.unique2
          ->  Index Only Scan using tenk1_unique2 on tenk1 b
-(6 rows)
+(7 rows)
 
 -- exercise rescan code path via a repeatedly-evaluated subquery
 explain (costs off)
diff --git a/src/test/regress/sql/incremental_sort.sql b/src/test/regress/sql/incremental_sort.sql
index f1f8fae5654..bbe658a7588 100644
--- a/src/test/regress/sql/incremental_sort.sql
+++ b/src/test/regress/sql/incremental_sort.sql
@@ -298,3 +298,27 @@ explain (costs off)
 select * from
   (select * from tenk1 order by four) t1 join tenk1 t2 on t1.four = t2.four and t1.two = t2.two
 order by t1.four, t1.two limit 1;
+
+--
+-- Test incremental sort for Append/MergeAppend
+--
+create table prt_tbl (a int, b int) partition by range (a);
+create table prt_tbl_1 partition of prt_tbl for values from (0) to (100);
+create table prt_tbl_2 partition of prt_tbl for values from (100) to (200);
+insert into prt_tbl select i%200, i from generate_series(1,1000)i;
+create index on prt_tbl_1(a);
+create index on prt_tbl_2(a, b);
+analyze prt_tbl;
+
+set enable_seqscan to off;
+set enable_bitmapscan to off;
+
+-- Ensure we get an incremental sort for the subpath of Append
+explain (costs off) select * from prt_tbl order by a, b;
+
+-- Ensure we get an incremental sort for the subpath of MergeAppend
+explain (costs off) select * from prt_tbl_1 union all select * from prt_tbl_2 order by a, b;
+
+reset enable_bitmapscan;
+reset enable_seqscan;
+drop table prt_tbl;
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 114bdafafdf..83192038571 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -2181,6 +2181,7 @@ PgAioReturn
 PgAioTargetData
 PgAioTargetID
 PgAioTargetInfo
+PgAioUringCaps
 PgAioUringContext
 PgAioWaitRef
 PgArchData