summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xconfigure17
-rw-r--r--configure.ac7
-rw-r--r--meson.build14
-rw-r--r--src/backend/access/transam/clog.c67
-rw-r--r--src/backend/access/transam/commit_ts.c66
-rw-r--r--src/backend/access/transam/multixact.c120
-rw-r--r--src/backend/access/transam/slru.c25
-rw-r--r--src/backend/access/transam/subtrans.c34
-rw-r--r--src/backend/access/transam/xloginsert.c12
-rw-r--r--src/backend/optimizer/path/costsize.c54
-rw-r--r--src/backend/optimizer/plan/createplan.c66
-rw-r--r--src/backend/optimizer/util/pathnode.c73
-rw-r--r--src/backend/storage/aio/method_io_uring.c210
-rw-r--r--src/bin/pg_walsummary/t/002_blocks.pl9
-rw-r--r--src/include/access/slru.h1
-rw-r--r--src/include/access/xloginsert.h1
-rw-r--r--src/include/optimizer/cost.h2
-rw-r--r--src/include/pg_config.h.in3
-rw-r--r--src/pl/plpgsql/src/pl_exec.c6
-rw-r--r--src/test/perl/PostgreSQL/Test/AdjustUpgrade.pm26
-rw-r--r--src/test/regress/expected/incremental_sort.out40
-rw-r--r--src/test/regress/expected/inherit.out10
-rw-r--r--src/test/regress/sql/incremental_sort.sql24
-rw-r--r--src/tools/pgindent/typedefs.list1
24 files changed, 566 insertions, 322 deletions
diff --git a/configure b/configure
index 16ef5b58d1a..cfaf3757dd7 100755
--- a/configure
+++ b/configure
@@ -13309,6 +13309,23 @@ fi
fi
+if test "$with_liburing" = yes; then
+ _LIBS="$LIBS"
+ LIBS="$LIBURING_LIBS $LIBS"
+ for ac_func in io_uring_queue_init_mem
+do :
+ ac_fn_c_check_func "$LINENO" "io_uring_queue_init_mem" "ac_cv_func_io_uring_queue_init_mem"
+if test "x$ac_cv_func_io_uring_queue_init_mem" = xyes; then :
+ cat >>confdefs.h <<_ACEOF
+#define HAVE_IO_URING_QUEUE_INIT_MEM 1
+_ACEOF
+
+fi
+done
+
+ LIBS="$_LIBS"
+fi
+
if test "$with_lz4" = yes ; then
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for LZ4_compress_default in -llz4" >&5
$as_echo_n "checking for LZ4_compress_default in -llz4... " >&6; }
diff --git a/configure.ac b/configure.ac
index b3efc49c97a..c2877e36935 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1420,6 +1420,13 @@ if test "$with_libxslt" = yes ; then
AC_CHECK_LIB(xslt, xsltCleanupGlobals, [], [AC_MSG_ERROR([library 'xslt' is required for XSLT support])])
fi
+if test "$with_liburing" = yes; then
+ _LIBS="$LIBS"
+ LIBS="$LIBURING_LIBS $LIBS"
+ AC_CHECK_FUNCS([io_uring_queue_init_mem])
+ LIBS="$_LIBS"
+fi
+
if test "$with_lz4" = yes ; then
AC_CHECK_LIB(lz4, LZ4_compress_default, [], [AC_MSG_ERROR([library 'lz4' is required for LZ4 support])])
fi
diff --git a/meson.build b/meson.build
index a97854a947d..5365aaf95e6 100644
--- a/meson.build
+++ b/meson.build
@@ -948,10 +948,10 @@ if not libcurlopt.disabled()
# libcurl and one of either epoll or kqueue.
oauth_flow_supported = (
libcurl.found()
- and (cc.check_header('sys/event.h', required: false,
- args: test_c_args, include_directories: postgres_inc)
- or cc.check_header('sys/epoll.h', required: false,
- args: test_c_args, include_directories: postgres_inc))
+ and (cc.has_header('sys/event.h',
+ args: test_c_args, include_directories: postgres_inc)
+ or cc.has_header('sys/epoll.h',
+ args: test_c_args, include_directories: postgres_inc))
)
if oauth_flow_supported
@@ -995,6 +995,12 @@ liburingopt = get_option('liburing')
liburing = dependency('liburing', required: liburingopt)
if liburing.found()
cdata.set('USE_LIBURING', 1)
+
+ if cc.has_function('io_uring_queue_init_mem',
+ dependencies: liburing, args: test_c_args)
+ cdata.set('HAVE_LIBURING_QUEUE_INIT_MEM', 1)
+ endif
+
endif
diff --git a/src/backend/access/transam/clog.c b/src/backend/access/transam/clog.c
index 48f10bec91e..e80fbe109cf 100644
--- a/src/backend/access/transam/clog.c
+++ b/src/backend/access/transam/clog.c
@@ -110,9 +110,7 @@ static SlruCtlData XactCtlData;
#define XactCtl (&XactCtlData)
-static int ZeroCLOGPage(int64 pageno, bool writeXlog);
static bool CLOGPagePrecedes(int64 page1, int64 page2);
-static void WriteZeroPageXlogRec(int64 pageno);
static void WriteTruncateXlogRec(int64 pageno, TransactionId oldestXact,
Oid oldestXactDb);
static void TransactionIdSetPageStatus(TransactionId xid, int nsubxids,
@@ -832,41 +830,8 @@ check_transaction_buffers(int *newval, void **extra, GucSource source)
void
BootStrapCLOG(void)
{
- int slotno;
- LWLock *lock = SimpleLruGetBankLock(XactCtl, 0);
-
- LWLockAcquire(lock, LW_EXCLUSIVE);
-
- /* Create and zero the first page of the commit log */
- slotno = ZeroCLOGPage(0, false);
-
- /* Make sure it's written out */
- SimpleLruWritePage(XactCtl, slotno);
- Assert(!XactCtl->shared->page_dirty[slotno]);
-
- LWLockRelease(lock);
-}
-
-/*
- * Initialize (or reinitialize) a page of CLOG to zeroes.
- * If writeXlog is true, also emit an XLOG record saying we did this.
- *
- * The page is not actually written, just set up in shared memory.
- * The slot number of the new page is returned.
- *
- * Control lock must be held at entry, and will be held at exit.
- */
-static int
-ZeroCLOGPage(int64 pageno, bool writeXlog)
-{
- int slotno;
-
- slotno = SimpleLruZeroPage(XactCtl, pageno);
-
- if (writeXlog)
- WriteZeroPageXlogRec(pageno);
-
- return slotno;
+ /* Zero the initial page and flush it to disk */
+ SimpleLruZeroAndWritePage(XactCtl, 0);
}
/*
@@ -974,8 +939,9 @@ ExtendCLOG(TransactionId newestXact)
LWLockAcquire(lock, LW_EXCLUSIVE);
- /* Zero the page and make an XLOG entry about it */
- ZeroCLOGPage(pageno, true);
+ /* Zero the page and make a WAL entry about it */
+ SimpleLruZeroPage(XactCtl, pageno);
+ XLogSimpleInsertInt64(RM_CLOG_ID, CLOG_ZEROPAGE, pageno);
LWLockRelease(lock);
}
@@ -1068,17 +1034,6 @@ CLOGPagePrecedes(int64 page1, int64 page2)
/*
- * Write a ZEROPAGE xlog record
- */
-static void
-WriteZeroPageXlogRec(int64 pageno)
-{
- XLogBeginInsert();
- XLogRegisterData(&pageno, sizeof(pageno));
- (void) XLogInsert(RM_CLOG_ID, CLOG_ZEROPAGE);
-}
-
-/*
* Write a TRUNCATE xlog record
*
* We must flush the xlog record to disk before returning --- see notes
@@ -1114,19 +1069,9 @@ clog_redo(XLogReaderState *record)
if (info == CLOG_ZEROPAGE)
{
int64 pageno;
- int slotno;
- LWLock *lock;
memcpy(&pageno, XLogRecGetData(record), sizeof(pageno));
-
- lock = SimpleLruGetBankLock(XactCtl, pageno);
- LWLockAcquire(lock, LW_EXCLUSIVE);
-
- slotno = ZeroCLOGPage(pageno, false);
- SimpleLruWritePage(XactCtl, slotno);
- Assert(!XactCtl->shared->page_dirty[slotno]);
-
- LWLockRelease(lock);
+ SimpleLruZeroAndWritePage(XactCtl, pageno);
}
else if (info == CLOG_TRUNCATE)
{
diff --git a/src/backend/access/transam/commit_ts.c b/src/backend/access/transam/commit_ts.c
index 225ff7ca9f2..370b38e048b 100644
--- a/src/backend/access/transam/commit_ts.c
+++ b/src/backend/access/transam/commit_ts.c
@@ -114,11 +114,9 @@ static void SetXidCommitTsInPage(TransactionId xid, int nsubxids,
static void TransactionIdSetCommitTs(TransactionId xid, TimestampTz ts,
RepOriginId nodeid, int slotno);
static void error_commit_ts_disabled(void);
-static int ZeroCommitTsPage(int64 pageno, bool writeXlog);
static bool CommitTsPagePrecedes(int64 page1, int64 page2);
static void ActivateCommitTs(void);
static void DeactivateCommitTs(void);
-static void WriteZeroPageXlogRec(int64 pageno);
static void WriteTruncateXlogRec(int64 pageno, TransactionId oldestXid);
/*
@@ -603,28 +601,6 @@ BootStrapCommitTs(void)
}
/*
- * Initialize (or reinitialize) a page of CommitTs to zeroes.
- * If writeXlog is true, also emit an XLOG record saying we did this.
- *
- * The page is not actually written, just set up in shared memory.
- * The slot number of the new page is returned.
- *
- * Control lock must be held at entry, and will be held at exit.
- */
-static int
-ZeroCommitTsPage(int64 pageno, bool writeXlog)
-{
- int slotno;
-
- slotno = SimpleLruZeroPage(CommitTsCtl, pageno);
-
- if (writeXlog)
- WriteZeroPageXlogRec(pageno);
-
- return slotno;
-}
-
-/*
* This must be called ONCE during postmaster or standalone-backend startup,
* after StartupXLOG has initialized TransamVariables->nextXid.
*/
@@ -754,16 +730,7 @@ ActivateCommitTs(void)
/* Create the current segment file, if necessary */
if (!SimpleLruDoesPhysicalPageExist(CommitTsCtl, pageno))
- {
- LWLock *lock = SimpleLruGetBankLock(CommitTsCtl, pageno);
- int slotno;
-
- LWLockAcquire(lock, LW_EXCLUSIVE);
- slotno = ZeroCommitTsPage(pageno, false);
- SimpleLruWritePage(CommitTsCtl, slotno);
- Assert(!CommitTsCtl->shared->page_dirty[slotno]);
- LWLockRelease(lock);
- }
+ SimpleLruZeroAndWritePage(CommitTsCtl, pageno);
/* Change the activation status in shared memory. */
LWLockAcquire(CommitTsLock, LW_EXCLUSIVE);
@@ -874,8 +841,12 @@ ExtendCommitTs(TransactionId newestXact)
LWLockAcquire(lock, LW_EXCLUSIVE);
- /* Zero the page and make an XLOG entry about it */
- ZeroCommitTsPage(pageno, !InRecovery);
+ /* Zero the page ... */
+ SimpleLruZeroPage(CommitTsCtl, pageno);
+
+ /* and make a WAL entry about that, unless we're in REDO */
+ if (!InRecovery)
+ XLogSimpleInsertInt64(RM_COMMIT_TS_ID, COMMIT_TS_ZEROPAGE, pageno);
LWLockRelease(lock);
}
@@ -990,17 +961,6 @@ CommitTsPagePrecedes(int64 page1, int64 page2)
/*
- * Write a ZEROPAGE xlog record
- */
-static void
-WriteZeroPageXlogRec(int64 pageno)
-{
- XLogBeginInsert();
- XLogRegisterData(&pageno, sizeof(pageno));
- (void) XLogInsert(RM_COMMIT_TS_ID, COMMIT_TS_ZEROPAGE);
-}
-
-/*
* Write a TRUNCATE xlog record
*/
static void
@@ -1030,19 +990,9 @@ commit_ts_redo(XLogReaderState *record)
if (info == COMMIT_TS_ZEROPAGE)
{
int64 pageno;
- int slotno;
- LWLock *lock;
memcpy(&pageno, XLogRecGetData(record), sizeof(pageno));
-
- lock = SimpleLruGetBankLock(CommitTsCtl, pageno);
- LWLockAcquire(lock, LW_EXCLUSIVE);
-
- slotno = ZeroCommitTsPage(pageno, false);
- SimpleLruWritePage(CommitTsCtl, slotno);
- Assert(!CommitTsCtl->shared->page_dirty[slotno]);
-
- LWLockRelease(lock);
+ SimpleLruZeroAndWritePage(CommitTsCtl, pageno);
}
else if (info == COMMIT_TS_TRUNCATE)
{
diff --git a/src/backend/access/transam/multixact.c b/src/backend/access/transam/multixact.c
index 7a7afe3edc6..3cb09c3d598 100644
--- a/src/backend/access/transam/multixact.c
+++ b/src/backend/access/transam/multixact.c
@@ -401,8 +401,6 @@ static void mXactCachePut(MultiXactId multi, int nmembers,
static char *mxstatus_to_string(MultiXactStatus status);
/* management of SLRU infrastructure */
-static int ZeroMultiXactOffsetPage(int64 pageno, bool writeXlog);
-static int ZeroMultiXactMemberPage(int64 pageno, bool writeXlog);
static bool MultiXactOffsetPagePrecedes(int64 page1, int64 page2);
static bool MultiXactMemberPagePrecedes(int64 page1, int64 page2);
static bool MultiXactOffsetPrecedes(MultiXactOffset offset1,
@@ -413,7 +411,6 @@ static bool MultiXactOffsetWouldWrap(MultiXactOffset boundary,
MultiXactOffset start, uint32 distance);
static bool SetOffsetVacuumLimit(bool is_startup);
static bool find_multixact_start(MultiXactId multi, MultiXactOffset *result);
-static void WriteMZeroPageXlogRec(int64 pageno, uint8 info);
static void WriteMTruncateXlogRec(Oid oldestMultiDB,
MultiXactId startTruncOff,
MultiXactId endTruncOff,
@@ -2033,70 +2030,9 @@ check_multixact_member_buffers(int *newval, void **extra, GucSource source)
void
BootStrapMultiXact(void)
{
- int slotno;
- LWLock *lock;
-
- lock = SimpleLruGetBankLock(MultiXactOffsetCtl, 0);
- LWLockAcquire(lock, LW_EXCLUSIVE);
-
- /* Create and zero the first page of the offsets log */
- slotno = ZeroMultiXactOffsetPage(0, false);
-
- /* Make sure it's written out */
- SimpleLruWritePage(MultiXactOffsetCtl, slotno);
- Assert(!MultiXactOffsetCtl->shared->page_dirty[slotno]);
-
- LWLockRelease(lock);
-
- lock = SimpleLruGetBankLock(MultiXactMemberCtl, 0);
- LWLockAcquire(lock, LW_EXCLUSIVE);
-
- /* Create and zero the first page of the members log */
- slotno = ZeroMultiXactMemberPage(0, false);
-
- /* Make sure it's written out */
- SimpleLruWritePage(MultiXactMemberCtl, slotno);
- Assert(!MultiXactMemberCtl->shared->page_dirty[slotno]);
-
- LWLockRelease(lock);
-}
-
-/*
- * Initialize (or reinitialize) a page of MultiXactOffset to zeroes.
- * If writeXlog is true, also emit an XLOG record saying we did this.
- *
- * The page is not actually written, just set up in shared memory.
- * The slot number of the new page is returned.
- *
- * Control lock must be held at entry, and will be held at exit.
- */
-static int
-ZeroMultiXactOffsetPage(int64 pageno, bool writeXlog)
-{
- int slotno;
-
- slotno = SimpleLruZeroPage(MultiXactOffsetCtl, pageno);
-
- if (writeXlog)
- WriteMZeroPageXlogRec(pageno, XLOG_MULTIXACT_ZERO_OFF_PAGE);
-
- return slotno;
-}
-
-/*
- * Ditto, for MultiXactMember
- */
-static int
-ZeroMultiXactMemberPage(int64 pageno, bool writeXlog)
-{
- int slotno;
-
- slotno = SimpleLruZeroPage(MultiXactMemberCtl, pageno);
-
- if (writeXlog)
- WriteMZeroPageXlogRec(pageno, XLOG_MULTIXACT_ZERO_MEM_PAGE);
-
- return slotno;
+ /* Zero the initial pages and flush them to disk */
+ SimpleLruZeroAndWritePage(MultiXactOffsetCtl, 0);
+ SimpleLruZeroAndWritePage(MultiXactMemberCtl, 0);
}
/*
@@ -2134,7 +2070,7 @@ MaybeExtendOffsetSlru(void)
* with creating a new segment file even if the page we're writing is
* not the first in it, so this is enough.
*/
- slotno = ZeroMultiXactOffsetPage(pageno, false);
+ slotno = SimpleLruZeroPage(MultiXactOffsetCtl, pageno);
SimpleLruWritePage(MultiXactOffsetCtl, slotno);
}
@@ -2568,8 +2504,10 @@ ExtendMultiXactOffset(MultiXactId multi)
LWLockAcquire(lock, LW_EXCLUSIVE);
- /* Zero the page and make an XLOG entry about it */
- ZeroMultiXactOffsetPage(pageno, true);
+ /* Zero the page and make a WAL entry about it */
+ SimpleLruZeroPage(MultiXactOffsetCtl, pageno);
+ XLogSimpleInsertInt64(RM_MULTIXACT_ID, XLOG_MULTIXACT_ZERO_OFF_PAGE,
+ pageno);
LWLockRelease(lock);
}
@@ -2611,8 +2549,10 @@ ExtendMultiXactMember(MultiXactOffset offset, int nmembers)
LWLockAcquire(lock, LW_EXCLUSIVE);
- /* Zero the page and make an XLOG entry about it */
- ZeroMultiXactMemberPage(pageno, true);
+ /* Zero the page and make a WAL entry about it */
+ SimpleLruZeroPage(MultiXactMemberCtl, pageno);
+ XLogSimpleInsertInt64(RM_MULTIXACT_ID,
+ XLOG_MULTIXACT_ZERO_MEM_PAGE, pageno);
LWLockRelease(lock);
}
@@ -3348,18 +3288,6 @@ MultiXactOffsetPrecedes(MultiXactOffset offset1, MultiXactOffset offset2)
}
/*
- * Write an xlog record reflecting the zeroing of either a MEMBERs or
- * OFFSETs page (info shows which)
- */
-static void
-WriteMZeroPageXlogRec(int64 pageno, uint8 info)
-{
- XLogBeginInsert();
- XLogRegisterData(&pageno, sizeof(pageno));
- (void) XLogInsert(RM_MULTIXACT_ID, info);
-}
-
-/*
* Write a TRUNCATE xlog record
*
* We must flush the xlog record to disk before returning --- see notes in
@@ -3401,36 +3329,16 @@ multixact_redo(XLogReaderState *record)
if (info == XLOG_MULTIXACT_ZERO_OFF_PAGE)
{
int64 pageno;
- int slotno;
- LWLock *lock;
memcpy(&pageno, XLogRecGetData(record), sizeof(pageno));
-
- lock = SimpleLruGetBankLock(MultiXactOffsetCtl, pageno);
- LWLockAcquire(lock, LW_EXCLUSIVE);
-
- slotno = ZeroMultiXactOffsetPage(pageno, false);
- SimpleLruWritePage(MultiXactOffsetCtl, slotno);
- Assert(!MultiXactOffsetCtl->shared->page_dirty[slotno]);
-
- LWLockRelease(lock);
+ SimpleLruZeroAndWritePage(MultiXactOffsetCtl, pageno);
}
else if (info == XLOG_MULTIXACT_ZERO_MEM_PAGE)
{
int64 pageno;
- int slotno;
- LWLock *lock;
memcpy(&pageno, XLogRecGetData(record), sizeof(pageno));
-
- lock = SimpleLruGetBankLock(MultiXactMemberCtl, pageno);
- LWLockAcquire(lock, LW_EXCLUSIVE);
-
- slotno = ZeroMultiXactMemberPage(pageno, false);
- SimpleLruWritePage(MultiXactMemberCtl, slotno);
- Assert(!MultiXactMemberCtl->shared->page_dirty[slotno]);
-
- LWLockRelease(lock);
+ SimpleLruZeroAndWritePage(MultiXactMemberCtl, pageno);
}
else if (info == XLOG_MULTIXACT_CREATE_ID)
{
diff --git a/src/backend/access/transam/slru.c b/src/backend/access/transam/slru.c
index fe56286d9a9..10ec259f382 100644
--- a/src/backend/access/transam/slru.c
+++ b/src/backend/access/transam/slru.c
@@ -434,6 +434,31 @@ SimpleLruZeroLSNs(SlruCtl ctl, int slotno)
}
/*
+ * This is a convenience wrapper for the common case of zeroing a page and
+ * immediately flushing it to disk.
+ *
+ * Control lock is acquired and released here.
+ */
+void
+SimpleLruZeroAndWritePage(SlruCtl ctl, int64 pageno)
+{
+ int slotno;
+ LWLock *lock;
+
+ lock = SimpleLruGetBankLock(ctl, pageno);
+ LWLockAcquire(lock, LW_EXCLUSIVE);
+
+ /* Create and zero the page */
+ slotno = SimpleLruZeroPage(ctl, pageno);
+
+ /* Make sure it's written out */
+ SimpleLruWritePage(ctl, slotno);
+ Assert(!ctl->shared->page_dirty[slotno]);
+
+ LWLockRelease(lock);
+}
+
+/*
* Wait for any active I/O on a page slot to finish. (This does not
* guarantee that new I/O hasn't been started before we return, though.
* In fact the slot might not even contain the same page anymore.)
diff --git a/src/backend/access/transam/subtrans.c b/src/backend/access/transam/subtrans.c
index 15153618fad..09aace9e09f 100644
--- a/src/backend/access/transam/subtrans.c
+++ b/src/backend/access/transam/subtrans.c
@@ -74,7 +74,6 @@ static SlruCtlData SubTransCtlData;
#define SubTransCtl (&SubTransCtlData)
-static int ZeroSUBTRANSPage(int64 pageno);
static bool SubTransPagePrecedes(int64 page1, int64 page2);
@@ -269,33 +268,8 @@ check_subtrans_buffers(int *newval, void **extra, GucSource source)
void
BootStrapSUBTRANS(void)
{
- int slotno;
- LWLock *lock = SimpleLruGetBankLock(SubTransCtl, 0);
-
- LWLockAcquire(lock, LW_EXCLUSIVE);
-
- /* Create and zero the first page of the subtrans log */
- slotno = ZeroSUBTRANSPage(0);
-
- /* Make sure it's written out */
- SimpleLruWritePage(SubTransCtl, slotno);
- Assert(!SubTransCtl->shared->page_dirty[slotno]);
-
- LWLockRelease(lock);
-}
-
-/*
- * Initialize (or reinitialize) a page of SUBTRANS to zeroes.
- *
- * The page is not actually written, just set up in shared memory.
- * The slot number of the new page is returned.
- *
- * Control lock must be held at entry, and will be held at exit.
- */
-static int
-ZeroSUBTRANSPage(int64 pageno)
-{
- return SimpleLruZeroPage(SubTransCtl, pageno);
+ /* Zero the initial page and flush it to disk */
+ SimpleLruZeroAndWritePage(SubTransCtl, 0);
}
/*
@@ -335,7 +309,7 @@ StartupSUBTRANS(TransactionId oldestActiveXID)
prevlock = lock;
}
- (void) ZeroSUBTRANSPage(startPage);
+ (void) SimpleLruZeroPage(SubTransCtl, startPage);
if (startPage == endPage)
break;
@@ -395,7 +369,7 @@ ExtendSUBTRANS(TransactionId newestXact)
LWLockAcquire(lock, LW_EXCLUSIVE);
/* Zero the page */
- ZeroSUBTRANSPage(pageno);
+ SimpleLruZeroPage(SubTransCtl, pageno);
LWLockRelease(lock);
}
diff --git a/src/backend/access/transam/xloginsert.c b/src/backend/access/transam/xloginsert.c
index 5ee9d0b028e..c7571429e8e 100644
--- a/src/backend/access/transam/xloginsert.c
+++ b/src/backend/access/transam/xloginsert.c
@@ -530,6 +530,18 @@ XLogInsert(RmgrId rmid, uint8 info)
}
/*
+ * Simple wrapper to XLogInsert to insert a WAL record with elementary
+ * contents (only an int64 is supported as value currently).
+ */
+XLogRecPtr
+XLogSimpleInsertInt64(RmgrId rmid, uint8 info, int64 value)
+{
+ XLogBeginInsert();
+ XLogRegisterData(&value, sizeof(value));
+ return XLogInsert(rmid, info);
+}
+
+/*
* Assemble a WAL record from the registered data and buffers into an
* XLogRecData chain, ready for insertion with XLogInsertRecord().
*
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index 3d44815ed5a..1f04a2c182c 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -2247,7 +2247,7 @@ append_nonpartial_cost(List *subpaths, int numpaths, int parallel_workers)
* Determines and returns the cost of an Append node.
*/
void
-cost_append(AppendPath *apath)
+cost_append(AppendPath *apath, PlannerInfo *root)
{
ListCell *l;
@@ -2309,26 +2309,52 @@ cost_append(AppendPath *apath)
foreach(l, apath->subpaths)
{
Path *subpath = (Path *) lfirst(l);
- Path sort_path; /* dummy for result of cost_sort */
+ int presorted_keys;
+ Path sort_path; /* dummy for result of
+ * cost_sort/cost_incremental_sort */
- if (!pathkeys_contained_in(pathkeys, subpath->pathkeys))
+ if (!pathkeys_count_contained_in(pathkeys, subpath->pathkeys,
+ &presorted_keys))
{
/*
* We'll need to insert a Sort node, so include costs for
- * that. We can use the parent's LIMIT if any, since we
+ * that. We choose to use incremental sort if it is
+ * enabled and there are presorted keys; otherwise we use
+ * full sort.
+ *
+ * We can use the parent's LIMIT if any, since we
* certainly won't pull more than that many tuples from
* any child.
*/
- cost_sort(&sort_path,
- NULL, /* doesn't currently need root */
- pathkeys,
- subpath->disabled_nodes,
- subpath->total_cost,
- subpath->rows,
- subpath->pathtarget->width,
- 0.0,
- work_mem,
- apath->limit_tuples);
+ if (enable_incremental_sort && presorted_keys > 0)
+ {
+ cost_incremental_sort(&sort_path,
+ root,
+ pathkeys,
+ presorted_keys,
+ subpath->disabled_nodes,
+ subpath->startup_cost,
+ subpath->total_cost,
+ subpath->rows,
+ subpath->pathtarget->width,
+ 0.0,
+ work_mem,
+ apath->limit_tuples);
+ }
+ else
+ {
+ cost_sort(&sort_path,
+ root,
+ pathkeys,
+ subpath->disabled_nodes,
+ subpath->total_cost,
+ subpath->rows,
+ subpath->pathtarget->width,
+ 0.0,
+ work_mem,
+ apath->limit_tuples);
+ }
+
subpath = &sort_path;
}
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index 0b61aef962c..8a9f1d7a943 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -1318,6 +1318,7 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path, int flags)
Oid *sortOperators;
Oid *collations;
bool *nullsFirst;
+ int presorted_keys;
/*
* Compute sort column info, and adjust subplan's tlist as needed.
@@ -1353,14 +1354,38 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path, int flags)
numsortkeys * sizeof(bool)) == 0);
/* Now, insert a Sort node if subplan isn't sufficiently ordered */
- if (!pathkeys_contained_in(pathkeys, subpath->pathkeys))
+ if (!pathkeys_count_contained_in(pathkeys, subpath->pathkeys,
+ &presorted_keys))
{
- Sort *sort = make_sort(subplan, numsortkeys,
+ Plan *sort_plan;
+
+ /*
+ * We choose to use incremental sort if it is enabled and
+ * there are presorted keys; otherwise we use full sort.
+ */
+ if (enable_incremental_sort && presorted_keys > 0)
+ {
+ sort_plan = (Plan *)
+ make_incrementalsort(subplan, numsortkeys, presorted_keys,
sortColIdx, sortOperators,
collations, nullsFirst);
- label_sort_with_costsize(root, sort, best_path->limit_tuples);
- subplan = (Plan *) sort;
+ label_incrementalsort_with_costsize(root,
+ (IncrementalSort *) sort_plan,
+ pathkeys,
+ best_path->limit_tuples);
+ }
+ else
+ {
+ sort_plan = (Plan *) make_sort(subplan, numsortkeys,
+ sortColIdx, sortOperators,
+ collations, nullsFirst);
+
+ label_sort_with_costsize(root, (Sort *) sort_plan,
+ best_path->limit_tuples);
+ }
+
+ subplan = sort_plan;
}
}
@@ -1491,6 +1516,7 @@ create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path,
Oid *sortOperators;
Oid *collations;
bool *nullsFirst;
+ int presorted_keys;
/* Build the child plan */
/* Must insist that all children return the same tlist */
@@ -1525,14 +1551,38 @@ create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path,
numsortkeys * sizeof(bool)) == 0);
/* Now, insert a Sort node if subplan isn't sufficiently ordered */
- if (!pathkeys_contained_in(pathkeys, subpath->pathkeys))
+ if (!pathkeys_count_contained_in(pathkeys, subpath->pathkeys,
+ &presorted_keys))
{
- Sort *sort = make_sort(subplan, numsortkeys,
+ Plan *sort_plan;
+
+ /*
+ * We choose to use incremental sort if it is enabled and there
+ * are presorted keys; otherwise we use full sort.
+ */
+ if (enable_incremental_sort && presorted_keys > 0)
+ {
+ sort_plan = (Plan *)
+ make_incrementalsort(subplan, numsortkeys, presorted_keys,
sortColIdx, sortOperators,
collations, nullsFirst);
- label_sort_with_costsize(root, sort, best_path->limit_tuples);
- subplan = (Plan *) sort;
+ label_incrementalsort_with_costsize(root,
+ (IncrementalSort *) sort_plan,
+ pathkeys,
+ best_path->limit_tuples);
+ }
+ else
+ {
+ sort_plan = (Plan *) make_sort(subplan, numsortkeys,
+ sortColIdx, sortOperators,
+ collations, nullsFirst);
+
+ label_sort_with_costsize(root, (Sort *) sort_plan,
+ best_path->limit_tuples);
+ }
+
+ subplan = sort_plan;
}
subplans = lappend(subplans, subplan);
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index e0192d4a491..9cc602788ea 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -1404,12 +1404,12 @@ create_append_path(PlannerInfo *root,
pathnode->path.total_cost = child->total_cost;
}
else
- cost_append(pathnode);
+ cost_append(pathnode, root);
/* Must do this last, else cost_append complains */
pathnode->path.pathkeys = child->pathkeys;
}
else
- cost_append(pathnode);
+ cost_append(pathnode, root);
/* If the caller provided a row estimate, override the computed value. */
if (rows >= 0)
@@ -1515,6 +1515,9 @@ create_merge_append_path(PlannerInfo *root,
foreach(l, subpaths)
{
Path *subpath = (Path *) lfirst(l);
+ int presorted_keys;
+ Path sort_path; /* dummy for result of
+ * cost_sort/cost_incremental_sort */
/* All child paths should be unparameterized */
Assert(bms_is_empty(PATH_REQ_OUTER(subpath)));
@@ -1523,32 +1526,52 @@ create_merge_append_path(PlannerInfo *root,
pathnode->path.parallel_safe = pathnode->path.parallel_safe &&
subpath->parallel_safe;
- if (pathkeys_contained_in(pathkeys, subpath->pathkeys))
+ if (!pathkeys_count_contained_in(pathkeys, subpath->pathkeys,
+ &presorted_keys))
{
- /* Subpath is adequately ordered, we won't need to sort it */
- input_disabled_nodes += subpath->disabled_nodes;
- input_startup_cost += subpath->startup_cost;
- input_total_cost += subpath->total_cost;
- }
- else
- {
- /* We'll need to insert a Sort node, so include cost for that */
- Path sort_path; /* dummy for result of cost_sort */
+ /*
+ * We'll need to insert a Sort node, so include costs for that. We
+ * choose to use incremental sort if it is enabled and there are
+ * presorted keys; otherwise we use full sort.
+ *
+ * We can use the parent's LIMIT if any, since we certainly won't
+ * pull more than that many tuples from any child.
+ */
+ if (enable_incremental_sort && presorted_keys > 0)
+ {
+ cost_incremental_sort(&sort_path,
+ root,
+ pathkeys,
+ presorted_keys,
+ subpath->disabled_nodes,
+ subpath->startup_cost,
+ subpath->total_cost,
+ subpath->rows,
+ subpath->pathtarget->width,
+ 0.0,
+ work_mem,
+ pathnode->limit_tuples);
+ }
+ else
+ {
+ cost_sort(&sort_path,
+ root,
+ pathkeys,
+ subpath->disabled_nodes,
+ subpath->total_cost,
+ subpath->rows,
+ subpath->pathtarget->width,
+ 0.0,
+ work_mem,
+ pathnode->limit_tuples);
+ }
- cost_sort(&sort_path,
- root,
- pathkeys,
- subpath->disabled_nodes,
- subpath->total_cost,
- subpath->rows,
- subpath->pathtarget->width,
- 0.0,
- work_mem,
- pathnode->limit_tuples);
- input_disabled_nodes += sort_path.disabled_nodes;
- input_startup_cost += sort_path.startup_cost;
- input_total_cost += sort_path.total_cost;
+ subpath = &sort_path;
}
+
+ input_disabled_nodes += subpath->disabled_nodes;
+ input_startup_cost += subpath->startup_cost;
+ input_total_cost += subpath->total_cost;
}
/*
diff --git a/src/backend/storage/aio/method_io_uring.c b/src/backend/storage/aio/method_io_uring.c
index b78048328e1..0a8c054162f 100644
--- a/src/backend/storage/aio/method_io_uring.c
+++ b/src/backend/storage/aio/method_io_uring.c
@@ -29,6 +29,9 @@
#ifdef IOMETHOD_IO_URING_ENABLED
+#include <sys/mman.h>
+#include <unistd.h>
+
#include <liburing.h>
#include "miscadmin.h"
@@ -94,12 +97,32 @@ PgAioUringContext
struct io_uring io_uring_ring;
} PgAioUringContext;
+/*
+ * Information about the capabilities that io_uring has.
+ *
+ * Depending on liburing and kernel version different features are
+ * supported. At least for the kernel a kernel version check does not suffice
+ * as various vendors do backport features to older kernels :(.
+ */
+typedef struct PgAioUringCaps
+{
+ bool checked;
+ /* -1 if io_uring_queue_init_mem() is unsupported */
+ int mem_init_size;
+} PgAioUringCaps;
+
+
/* PgAioUringContexts for all backends */
static PgAioUringContext *pgaio_uring_contexts;
/* the current backend's context */
static PgAioUringContext *pgaio_my_uring_context;
+static PgAioUringCaps pgaio_uring_caps =
+{
+ .checked = false,
+ .mem_init_size = -1,
+};
static uint32
pgaio_uring_procs(void)
@@ -111,16 +134,145 @@ pgaio_uring_procs(void)
return MaxBackends + NUM_AUXILIARY_PROCS - MAX_IO_WORKERS;
}
-static Size
+/*
+ * Initializes pgaio_uring_caps, unless that's already done.
+ */
+static void
+pgaio_uring_check_capabilities(void)
+{
+ if (pgaio_uring_caps.checked)
+ return;
+
+ /*
+ * By default io_uring creates a shared memory mapping for each io_uring
+ * instance, leading to a large number of memory mappings. Unfortunately a
+ * large number of memory mappings slows things down, backend exit is
+ * particularly affected. To address that, newer kernels (6.5) support
+ * using user-provided memory for the memory, by putting the relevant
+ * memory into shared memory we don't need any additional mappings.
+ *
+ * To know whether this is supported, we unfortunately need to probe the
+ * kernel by trying to create a ring with userspace-provided memory. This
+ * also has a secondary benefit: We can determine precisely how much
+ * memory we need for each io_uring instance.
+ */
+#if defined(HAVE_LIBURING_QUEUE_INIT_MEM) && defined(IORING_SETUP_NO_MMAP)
+ {
+ struct io_uring test_ring;
+ size_t ring_size;
+ void *ring_ptr;
+ struct io_uring_params p = {0};
+ int ret;
+
+ /*
+ * Liburing does not yet provide an API to query how much memory a
+ * ring will need. So we over-estimate it here. As the memory is freed
+ * just below that's small temporary waste of memory.
+ *
+ * 1MB is more than enough for rings within io_max_concurrency's
+ * range.
+ */
+ ring_size = 1024 * 1024;
+
+ /*
+ * Hard to believe a system exists where 1MB would not be a multiple
+ * of the page size. But it's cheap to ensure...
+ */
+ ring_size -= ring_size % sysconf(_SC_PAGESIZE);
+
+ ring_ptr = mmap(NULL, ring_size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
+ if (ring_ptr == MAP_FAILED)
+ elog(ERROR,
+ "mmap(%zu) to determine io_uring_queue_init_mem() support failed: %m",
+ ring_size);
+
+ ret = io_uring_queue_init_mem(io_max_concurrency, &test_ring, &p, ring_ptr, ring_size);
+ if (ret > 0)
+ {
+ pgaio_uring_caps.mem_init_size = ret;
+
+ elog(DEBUG1,
+ "can use combined memory mapping for io_uring, each ring needs %d bytes",
+ ret);
+
+ /* clean up the created ring, it was just for a test */
+ io_uring_queue_exit(&test_ring);
+ }
+ else
+ {
+ /*
+ * There are different reasons for ring creation to fail, but it's
+ * ok to treat that just as io_uring_queue_init_mem() not being
+ * supported. We'll report a more detailed error in
+ * pgaio_uring_shmem_init().
+ */
+ errno = -ret;
+ elog(DEBUG1,
+ "cannot use combined memory mapping for io_uring, ring creation failed: %m");
+
+ }
+
+ if (munmap(ring_ptr, ring_size) != 0)
+ elog(ERROR, "munmap() failed: %m");
+ }
+#else
+ {
+ elog(DEBUG1,
+ "can't use combined memory mapping for io_uring, kernel or liburing too old");
+ }
+#endif
+
+ pgaio_uring_caps.checked = true;
+}
+
+/*
+ * Memory for all PgAioUringContext instances
+ */
+static size_t
pgaio_uring_context_shmem_size(void)
{
return mul_size(pgaio_uring_procs(), sizeof(PgAioUringContext));
}
+/*
+ * Memory for the combined memory used by io_uring instances. Returns 0 if
+ * that is not supported by kernel/liburing.
+ */
+static size_t
+pgaio_uring_ring_shmem_size(void)
+{
+ size_t sz = 0;
+
+ if (pgaio_uring_caps.mem_init_size > 0)
+ {
+ /*
+ * Memory for rings needs to be allocated to the page boundary,
+ * reserve space. Luckily it does not need to be aligned to hugepage
+ * boundaries, even if huge pages are used.
+ */
+ sz = add_size(sz, sysconf(_SC_PAGESIZE));
+ sz = add_size(sz, mul_size(pgaio_uring_procs(),
+ pgaio_uring_caps.mem_init_size));
+ }
+
+ return sz;
+}
+
static size_t
pgaio_uring_shmem_size(void)
{
- return pgaio_uring_context_shmem_size();
+ size_t sz;
+
+ /*
+ * Kernel and liburing support for various features influences how much
+ * shmem we need, perform the necessary checks.
+ */
+ pgaio_uring_check_capabilities();
+
+ sz = pgaio_uring_context_shmem_size();
+ sz = add_size(sz, pgaio_uring_ring_shmem_size());
+
+ return sz;
}
static void
@@ -128,13 +280,38 @@ pgaio_uring_shmem_init(bool first_time)
{
int TotalProcs = pgaio_uring_procs();
bool found;
+ char *shmem;
+ size_t ring_mem_remain = 0;
+ char *ring_mem_next = 0;
- pgaio_uring_contexts = (PgAioUringContext *)
- ShmemInitStruct("AioUring", pgaio_uring_shmem_size(), &found);
-
+ /*
+ * We allocate memory for all PgAioUringContext instances and, if
+ * supported, the memory required for each of the io_uring instances, in
+ * one ShmemInitStruct().
+ */
+ shmem = ShmemInitStruct("AioUringContext", pgaio_uring_shmem_size(), &found);
if (found)
return;
+ pgaio_uring_contexts = (PgAioUringContext *) shmem;
+ shmem += pgaio_uring_context_shmem_size();
+
+ /* if supported, handle memory alignment / sizing for io_uring memory */
+ if (pgaio_uring_caps.mem_init_size > 0)
+ {
+ ring_mem_remain = pgaio_uring_ring_shmem_size();
+ ring_mem_next = (char *) shmem;
+
+ /* align to page boundary, see also pgaio_uring_ring_shmem_size() */
+ ring_mem_next = (char *) TYPEALIGN(sysconf(_SC_PAGESIZE), ring_mem_next);
+
+ /* account for alignment */
+ ring_mem_remain -= ring_mem_next - shmem;
+ shmem += ring_mem_next - shmem;
+
+ shmem += ring_mem_remain;
+ }
+
for (int contextno = 0; contextno < TotalProcs; contextno++)
{
PgAioUringContext *context = &pgaio_uring_contexts[contextno];
@@ -158,7 +335,28 @@ pgaio_uring_shmem_init(bool first_time)
* be worth using that - also need to evaluate if that causes
* noticeable additional contention?
*/
- ret = io_uring_queue_init(io_max_concurrency, &context->io_uring_ring, 0);
+
+ /*
+ * If supported (c.f. pgaio_uring_check_capabilities()), create ring
+ * with its data in shared memory. Otherwise fall back io_uring
+ * creating a memory mapping for each ring.
+ */
+#if defined(HAVE_LIBURING_QUEUE_INIT_MEM) && defined(IORING_SETUP_NO_MMAP)
+ if (pgaio_uring_caps.mem_init_size > 0)
+ {
+ struct io_uring_params p = {0};
+
+ ret = io_uring_queue_init_mem(io_max_concurrency, &context->io_uring_ring, &p, ring_mem_next, ring_mem_remain);
+
+ ring_mem_remain -= ret;
+ ring_mem_next += ret;
+ }
+ else
+#endif
+ {
+ ret = io_uring_queue_init(io_max_concurrency, &context->io_uring_ring, 0);
+ }
+
if (ret < 0)
{
char *hint = NULL;
diff --git a/src/bin/pg_walsummary/t/002_blocks.pl b/src/bin/pg_walsummary/t/002_blocks.pl
index 270332780a4..0f98c7df82e 100644
--- a/src/bin/pg_walsummary/t/002_blocks.pl
+++ b/src/bin/pg_walsummary/t/002_blocks.pl
@@ -47,11 +47,12 @@ EOM
ok($result, "WAL summarization caught up after insert");
# The WAL summarizer should have generated some IO statistics.
-my $stats_reads = $node1->safe_psql(
+$node1->poll_query_until(
'postgres',
- qq{SELECT sum(reads) > 0 FROM pg_stat_io
- WHERE backend_type = 'walsummarizer' AND object = 'wal'});
-is($stats_reads, 't', "WAL summarizer generates statistics for WAL reads");
+ q{SELECT sum(reads) > 0 FROM pg_stat_io
+ WHERE backend_type = 'walsummarizer' AND object = 'wal'})
+ or die
+ "Timed out while waiting for WAL summarizer to generate statistics for WAL reads";
# Find the highest LSN that is summarized on disk.
my $summarized_lsn = $node1->safe_psql('postgres', <<EOM);
diff --git a/src/include/access/slru.h b/src/include/access/slru.h
index e142800aab2..20dbd1e0070 100644
--- a/src/include/access/slru.h
+++ b/src/include/access/slru.h
@@ -187,6 +187,7 @@ extern void SimpleLruInit(SlruCtl ctl, const char *name, int nslots, int nlsns,
int bank_tranche_id, SyncRequestHandler sync_handler,
bool long_segment_names);
extern int SimpleLruZeroPage(SlruCtl ctl, int64 pageno);
+extern void SimpleLruZeroAndWritePage(SlruCtl ctl, int64 pageno);
extern int SimpleLruReadPage(SlruCtl ctl, int64 pageno, bool write_ok,
TransactionId xid);
extern int SimpleLruReadPage_ReadOnly(SlruCtl ctl, int64 pageno,
diff --git a/src/include/access/xloginsert.h b/src/include/access/xloginsert.h
index cf057f033a2..d6a71415d4f 100644
--- a/src/include/access/xloginsert.h
+++ b/src/include/access/xloginsert.h
@@ -44,6 +44,7 @@
extern void XLogBeginInsert(void);
extern void XLogSetRecordFlags(uint8 flags);
extern XLogRecPtr XLogInsert(RmgrId rmid, uint8 info);
+extern XLogRecPtr XLogSimpleInsertInt64(RmgrId rmid, uint8 info, int64 value);
extern void XLogEnsureRecordSpace(int max_block_id, int ndatas);
extern void XLogRegisterData(const void *data, uint32 len);
extern void XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags);
diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h
index d397fe27dc1..b523bcda8f3 100644
--- a/src/include/optimizer/cost.h
+++ b/src/include/optimizer/cost.h
@@ -118,7 +118,7 @@ extern void cost_incremental_sort(Path *path,
Cost input_startup_cost, Cost input_total_cost,
double input_tuples, int width, Cost comparison_cost, int sort_mem,
double limit_tuples);
-extern void cost_append(AppendPath *apath);
+extern void cost_append(AppendPath *apath, PlannerInfo *root);
extern void cost_merge_append(Path *path, PlannerInfo *root,
List *pathkeys, int n_streams,
int input_disabled_nodes,
diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in
index 726a7c1be1f..c4dc5d72bdb 100644
--- a/src/include/pg_config.h.in
+++ b/src/include/pg_config.h.in
@@ -229,6 +229,9 @@
/* Define to 1 if you have the global variable 'int timezone'. */
#undef HAVE_INT_TIMEZONE
+/* Define to 1 if you have the `io_uring_queue_init_mem' function. */
+#undef HAVE_IO_URING_QUEUE_INIT_MEM
+
/* Define to 1 if __builtin_constant_p(x) implies "i"(x) acceptance. */
#undef HAVE_I_CONSTRAINT__BUILTIN_CONSTANT_P
diff --git a/src/pl/plpgsql/src/pl_exec.c b/src/pl/plpgsql/src/pl_exec.c
index bb99781c56e..b9acc790dc6 100644
--- a/src/pl/plpgsql/src/pl_exec.c
+++ b/src/pl/plpgsql/src/pl_exec.c
@@ -5703,7 +5703,7 @@ exec_eval_expr(PLpgSQL_execstate *estate,
/*
* Else do it the hard way via exec_run_select
*/
- rc = exec_run_select(estate, expr, 2, NULL);
+ rc = exec_run_select(estate, expr, 0, NULL);
if (rc != SPI_OK_SELECT)
ereport(ERROR,
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
@@ -5757,6 +5757,10 @@ exec_eval_expr(PLpgSQL_execstate *estate,
/* ----------
* exec_run_select Execute a select query
+ *
+ * Note: passing maxtuples different from 0 ("return all tuples") is
+ * deprecated because it will prevent parallel execution of the query.
+ * However, we retain the parameter in case we need it someday.
* ----------
*/
static int
diff --git a/src/test/perl/PostgreSQL/Test/AdjustUpgrade.pm b/src/test/perl/PostgreSQL/Test/AdjustUpgrade.pm
index 1725fe2f948..7224c286e1d 100644
--- a/src/test/perl/PostgreSQL/Test/AdjustUpgrade.pm
+++ b/src/test/perl/PostgreSQL/Test/AdjustUpgrade.pm
@@ -251,6 +251,32 @@ sub adjust_database_contents
'drop operator if exists public.=> (bigint, NONE)');
}
+ # Version 19 changed the output format of pg_lsn. To avoid output
+ # differences, set all pg_lsn columns to NULL if the old version is
+ # older than 19.
+ if ($old_version < 19)
+ {
+ if ($old_version >= '9.5')
+ {
+ _add_st($result, 'regression',
+ "update brintest set lsncol = NULL");
+ }
+
+ if ($old_version >= 12)
+ {
+ _add_st($result, 'regression',
+ "update tab_core_types set pg_lsn = NULL");
+ }
+
+ if ($old_version >= 14)
+ {
+ _add_st($result, 'regression',
+ "update brintest_multi set lsncol = NULL");
+ _add_st($result, 'regression',
+ "update brintest_bloom set lsncol = NULL");
+ }
+ }
+
return $result;
}
diff --git a/src/test/regress/expected/incremental_sort.out b/src/test/regress/expected/incremental_sort.out
index b00219643b9..5a1dd9fc022 100644
--- a/src/test/regress/expected/incremental_sort.out
+++ b/src/test/regress/expected/incremental_sort.out
@@ -1722,3 +1722,43 @@ order by t1.four, t1.two limit 1;
-> Seq Scan on tenk1 t2
(12 rows)
+--
+-- Test incremental sort for Append/MergeAppend
+--
+create table prt_tbl (a int, b int) partition by range (a);
+create table prt_tbl_1 partition of prt_tbl for values from (0) to (100);
+create table prt_tbl_2 partition of prt_tbl for values from (100) to (200);
+insert into prt_tbl select i%200, i from generate_series(1,1000)i;
+create index on prt_tbl_1(a);
+create index on prt_tbl_2(a, b);
+analyze prt_tbl;
+set enable_seqscan to off;
+set enable_bitmapscan to off;
+-- Ensure we get an incremental sort for the subpath of Append
+explain (costs off) select * from prt_tbl order by a, b;
+ QUERY PLAN
+------------------------------------------------------------
+ Append
+ -> Incremental Sort
+ Sort Key: prt_tbl_1.a, prt_tbl_1.b
+ Presorted Key: prt_tbl_1.a
+ -> Index Scan using prt_tbl_1_a_idx on prt_tbl_1
+ -> Index Only Scan using prt_tbl_2_a_b_idx on prt_tbl_2
+(6 rows)
+
+-- Ensure we get an incremental sort for the subpath of MergeAppend
+explain (costs off) select * from prt_tbl_1 union all select * from prt_tbl_2 order by a, b;
+ QUERY PLAN
+------------------------------------------------------------
+ Merge Append
+ Sort Key: prt_tbl_1.a, prt_tbl_1.b
+ -> Incremental Sort
+ Sort Key: prt_tbl_1.a, prt_tbl_1.b
+ Presorted Key: prt_tbl_1.a
+ -> Index Scan using prt_tbl_1_a_idx on prt_tbl_1
+ -> Index Only Scan using prt_tbl_2_a_b_idx on prt_tbl_2
+(7 rows)
+
+reset enable_bitmapscan;
+reset enable_seqscan;
+drop table prt_tbl;
diff --git a/src/test/regress/expected/inherit.out b/src/test/regress/expected/inherit.out
index 78dead65325..5b5055babdc 100644
--- a/src/test/regress/expected/inherit.out
+++ b/src/test/regress/expected/inherit.out
@@ -1898,10 +1898,11 @@ ORDER BY thousand, tenthous;
Merge Append
Sort Key: tenk1.thousand, tenk1.tenthous
-> Index Only Scan using tenk1_thous_tenthous on tenk1
- -> Sort
+ -> Incremental Sort
Sort Key: tenk1_1.thousand, tenk1_1.thousand
+ Presorted Key: tenk1_1.thousand
-> Index Only Scan using tenk1_thous_tenthous on tenk1 tenk1_1
-(6 rows)
+(7 rows)
explain (costs off)
SELECT thousand, tenthous, thousand+tenthous AS x FROM tenk1
@@ -1982,10 +1983,11 @@ ORDER BY x, y;
Merge Append
Sort Key: a.thousand, a.tenthous
-> Index Only Scan using tenk1_thous_tenthous on tenk1 a
- -> Sort
+ -> Incremental Sort
Sort Key: b.unique2, b.unique2
+ Presorted Key: b.unique2
-> Index Only Scan using tenk1_unique2 on tenk1 b
-(6 rows)
+(7 rows)
-- exercise rescan code path via a repeatedly-evaluated subquery
explain (costs off)
diff --git a/src/test/regress/sql/incremental_sort.sql b/src/test/regress/sql/incremental_sort.sql
index f1f8fae5654..bbe658a7588 100644
--- a/src/test/regress/sql/incremental_sort.sql
+++ b/src/test/regress/sql/incremental_sort.sql
@@ -298,3 +298,27 @@ explain (costs off)
select * from
(select * from tenk1 order by four) t1 join tenk1 t2 on t1.four = t2.four and t1.two = t2.two
order by t1.four, t1.two limit 1;
+
+--
+-- Test incremental sort for Append/MergeAppend
+--
+create table prt_tbl (a int, b int) partition by range (a);
+create table prt_tbl_1 partition of prt_tbl for values from (0) to (100);
+create table prt_tbl_2 partition of prt_tbl for values from (100) to (200);
+insert into prt_tbl select i%200, i from generate_series(1,1000)i;
+create index on prt_tbl_1(a);
+create index on prt_tbl_2(a, b);
+analyze prt_tbl;
+
+set enable_seqscan to off;
+set enable_bitmapscan to off;
+
+-- Ensure we get an incremental sort for the subpath of Append
+explain (costs off) select * from prt_tbl order by a, b;
+
+-- Ensure we get an incremental sort for the subpath of MergeAppend
+explain (costs off) select * from prt_tbl_1 union all select * from prt_tbl_2 order by a, b;
+
+reset enable_bitmapscan;
+reset enable_seqscan;
+drop table prt_tbl;
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 114bdafafdf..83192038571 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -2181,6 +2181,7 @@ PgAioReturn
PgAioTargetData
PgAioTargetID
PgAioTargetInfo
+PgAioUringCaps
PgAioUringContext
PgAioWaitRef
PgArchData