summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/backend/optimizer/path/costsize.c54
-rw-r--r--src/backend/optimizer/plan/createplan.c66
-rw-r--r--src/backend/optimizer/util/pathnode.c73
-rw-r--r--src/backend/storage/aio/method_io_uring.c210
-rw-r--r--src/backend/utils/adt/xml.c21
-rw-r--r--src/bin/pg_test_timing/pg_test_timing.c172
-rw-r--r--src/bin/pg_test_timing/t/001_basic.pl17
-rw-r--r--src/bin/pg_walsummary/t/002_blocks.pl9
-rw-r--r--src/include/optimizer/cost.h2
-rw-r--r--src/include/pg_config.h.in3
-rw-r--r--src/pl/plpgsql/src/pl_exec.c6
-rw-r--r--src/test/perl/PostgreSQL/Test/AdjustUpgrade.pm26
-rw-r--r--src/test/regress/expected/incremental_sort.out40
-rw-r--r--src/test/regress/expected/inherit.out10
-rw-r--r--src/test/regress/sql/incremental_sort.sql24
-rw-r--r--src/tools/pgindent/typedefs.list1
16 files changed, 632 insertions, 102 deletions
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index 3d44815ed5a..1f04a2c182c 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -2247,7 +2247,7 @@ append_nonpartial_cost(List *subpaths, int numpaths, int parallel_workers)
* Determines and returns the cost of an Append node.
*/
void
-cost_append(AppendPath *apath)
+cost_append(AppendPath *apath, PlannerInfo *root)
{
ListCell *l;
@@ -2309,26 +2309,52 @@ cost_append(AppendPath *apath)
foreach(l, apath->subpaths)
{
Path *subpath = (Path *) lfirst(l);
- Path sort_path; /* dummy for result of cost_sort */
+ int presorted_keys;
+ Path sort_path; /* dummy for result of
+ * cost_sort/cost_incremental_sort */
- if (!pathkeys_contained_in(pathkeys, subpath->pathkeys))
+ if (!pathkeys_count_contained_in(pathkeys, subpath->pathkeys,
+ &presorted_keys))
{
/*
* We'll need to insert a Sort node, so include costs for
- * that. We can use the parent's LIMIT if any, since we
+ * that. We choose to use incremental sort if it is
+ * enabled and there are presorted keys; otherwise we use
+ * full sort.
+ *
+ * We can use the parent's LIMIT if any, since we
* certainly won't pull more than that many tuples from
* any child.
*/
- cost_sort(&sort_path,
- NULL, /* doesn't currently need root */
- pathkeys,
- subpath->disabled_nodes,
- subpath->total_cost,
- subpath->rows,
- subpath->pathtarget->width,
- 0.0,
- work_mem,
- apath->limit_tuples);
+ if (enable_incremental_sort && presorted_keys > 0)
+ {
+ cost_incremental_sort(&sort_path,
+ root,
+ pathkeys,
+ presorted_keys,
+ subpath->disabled_nodes,
+ subpath->startup_cost,
+ subpath->total_cost,
+ subpath->rows,
+ subpath->pathtarget->width,
+ 0.0,
+ work_mem,
+ apath->limit_tuples);
+ }
+ else
+ {
+ cost_sort(&sort_path,
+ root,
+ pathkeys,
+ subpath->disabled_nodes,
+ subpath->total_cost,
+ subpath->rows,
+ subpath->pathtarget->width,
+ 0.0,
+ work_mem,
+ apath->limit_tuples);
+ }
+
subpath = &sort_path;
}
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index 0b61aef962c..8a9f1d7a943 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -1318,6 +1318,7 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path, int flags)
Oid *sortOperators;
Oid *collations;
bool *nullsFirst;
+ int presorted_keys;
/*
* Compute sort column info, and adjust subplan's tlist as needed.
@@ -1353,14 +1354,38 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path, int flags)
numsortkeys * sizeof(bool)) == 0);
/* Now, insert a Sort node if subplan isn't sufficiently ordered */
- if (!pathkeys_contained_in(pathkeys, subpath->pathkeys))
+ if (!pathkeys_count_contained_in(pathkeys, subpath->pathkeys,
+ &presorted_keys))
{
- Sort *sort = make_sort(subplan, numsortkeys,
+ Plan *sort_plan;
+
+ /*
+ * We choose to use incremental sort if it is enabled and
+ * there are presorted keys; otherwise we use full sort.
+ */
+ if (enable_incremental_sort && presorted_keys > 0)
+ {
+ sort_plan = (Plan *)
+ make_incrementalsort(subplan, numsortkeys, presorted_keys,
sortColIdx, sortOperators,
collations, nullsFirst);
- label_sort_with_costsize(root, sort, best_path->limit_tuples);
- subplan = (Plan *) sort;
+ label_incrementalsort_with_costsize(root,
+ (IncrementalSort *) sort_plan,
+ pathkeys,
+ best_path->limit_tuples);
+ }
+ else
+ {
+ sort_plan = (Plan *) make_sort(subplan, numsortkeys,
+ sortColIdx, sortOperators,
+ collations, nullsFirst);
+
+ label_sort_with_costsize(root, (Sort *) sort_plan,
+ best_path->limit_tuples);
+ }
+
+ subplan = sort_plan;
}
}
@@ -1491,6 +1516,7 @@ create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path,
Oid *sortOperators;
Oid *collations;
bool *nullsFirst;
+ int presorted_keys;
/* Build the child plan */
/* Must insist that all children return the same tlist */
@@ -1525,14 +1551,38 @@ create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path,
numsortkeys * sizeof(bool)) == 0);
/* Now, insert a Sort node if subplan isn't sufficiently ordered */
- if (!pathkeys_contained_in(pathkeys, subpath->pathkeys))
+ if (!pathkeys_count_contained_in(pathkeys, subpath->pathkeys,
+ &presorted_keys))
{
- Sort *sort = make_sort(subplan, numsortkeys,
+ Plan *sort_plan;
+
+ /*
+ * We choose to use incremental sort if it is enabled and there
+ * are presorted keys; otherwise we use full sort.
+ */
+ if (enable_incremental_sort && presorted_keys > 0)
+ {
+ sort_plan = (Plan *)
+ make_incrementalsort(subplan, numsortkeys, presorted_keys,
sortColIdx, sortOperators,
collations, nullsFirst);
- label_sort_with_costsize(root, sort, best_path->limit_tuples);
- subplan = (Plan *) sort;
+ label_incrementalsort_with_costsize(root,
+ (IncrementalSort *) sort_plan,
+ pathkeys,
+ best_path->limit_tuples);
+ }
+ else
+ {
+ sort_plan = (Plan *) make_sort(subplan, numsortkeys,
+ sortColIdx, sortOperators,
+ collations, nullsFirst);
+
+ label_sort_with_costsize(root, (Sort *) sort_plan,
+ best_path->limit_tuples);
+ }
+
+ subplan = sort_plan;
}
subplans = lappend(subplans, subplan);
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index e0192d4a491..9cc602788ea 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -1404,12 +1404,12 @@ create_append_path(PlannerInfo *root,
pathnode->path.total_cost = child->total_cost;
}
else
- cost_append(pathnode);
+ cost_append(pathnode, root);
/* Must do this last, else cost_append complains */
pathnode->path.pathkeys = child->pathkeys;
}
else
- cost_append(pathnode);
+ cost_append(pathnode, root);
/* If the caller provided a row estimate, override the computed value. */
if (rows >= 0)
@@ -1515,6 +1515,9 @@ create_merge_append_path(PlannerInfo *root,
foreach(l, subpaths)
{
Path *subpath = (Path *) lfirst(l);
+ int presorted_keys;
+ Path sort_path; /* dummy for result of
+ * cost_sort/cost_incremental_sort */
/* All child paths should be unparameterized */
Assert(bms_is_empty(PATH_REQ_OUTER(subpath)));
@@ -1523,32 +1526,52 @@ create_merge_append_path(PlannerInfo *root,
pathnode->path.parallel_safe = pathnode->path.parallel_safe &&
subpath->parallel_safe;
- if (pathkeys_contained_in(pathkeys, subpath->pathkeys))
+ if (!pathkeys_count_contained_in(pathkeys, subpath->pathkeys,
+ &presorted_keys))
{
- /* Subpath is adequately ordered, we won't need to sort it */
- input_disabled_nodes += subpath->disabled_nodes;
- input_startup_cost += subpath->startup_cost;
- input_total_cost += subpath->total_cost;
- }
- else
- {
- /* We'll need to insert a Sort node, so include cost for that */
- Path sort_path; /* dummy for result of cost_sort */
+ /*
+ * We'll need to insert a Sort node, so include costs for that. We
+ * choose to use incremental sort if it is enabled and there are
+ * presorted keys; otherwise we use full sort.
+ *
+ * We can use the parent's LIMIT if any, since we certainly won't
+ * pull more than that many tuples from any child.
+ */
+ if (enable_incremental_sort && presorted_keys > 0)
+ {
+ cost_incremental_sort(&sort_path,
+ root,
+ pathkeys,
+ presorted_keys,
+ subpath->disabled_nodes,
+ subpath->startup_cost,
+ subpath->total_cost,
+ subpath->rows,
+ subpath->pathtarget->width,
+ 0.0,
+ work_mem,
+ pathnode->limit_tuples);
+ }
+ else
+ {
+ cost_sort(&sort_path,
+ root,
+ pathkeys,
+ subpath->disabled_nodes,
+ subpath->total_cost,
+ subpath->rows,
+ subpath->pathtarget->width,
+ 0.0,
+ work_mem,
+ pathnode->limit_tuples);
+ }
- cost_sort(&sort_path,
- root,
- pathkeys,
- subpath->disabled_nodes,
- subpath->total_cost,
- subpath->rows,
- subpath->pathtarget->width,
- 0.0,
- work_mem,
- pathnode->limit_tuples);
- input_disabled_nodes += sort_path.disabled_nodes;
- input_startup_cost += sort_path.startup_cost;
- input_total_cost += sort_path.total_cost;
+ subpath = &sort_path;
}
+
+ input_disabled_nodes += subpath->disabled_nodes;
+ input_startup_cost += subpath->startup_cost;
+ input_total_cost += subpath->total_cost;
}
/*
diff --git a/src/backend/storage/aio/method_io_uring.c b/src/backend/storage/aio/method_io_uring.c
index b78048328e1..0a8c054162f 100644
--- a/src/backend/storage/aio/method_io_uring.c
+++ b/src/backend/storage/aio/method_io_uring.c
@@ -29,6 +29,9 @@
#ifdef IOMETHOD_IO_URING_ENABLED
+#include <sys/mman.h>
+#include <unistd.h>
+
#include <liburing.h>
#include "miscadmin.h"
@@ -94,12 +97,32 @@ PgAioUringContext
struct io_uring io_uring_ring;
} PgAioUringContext;
+/*
+ * Information about the capabilities that io_uring has.
+ *
+ * Depending on liburing and kernel version different features are
+ * supported. At least for the kernel a kernel version check does not suffice
+ * as various vendors do backport features to older kernels :(.
+ */
+typedef struct PgAioUringCaps
+{
+ bool checked;
+ /* -1 if io_uring_queue_init_mem() is unsupported */
+ int mem_init_size;
+} PgAioUringCaps;
+
+
/* PgAioUringContexts for all backends */
static PgAioUringContext *pgaio_uring_contexts;
/* the current backend's context */
static PgAioUringContext *pgaio_my_uring_context;
+static PgAioUringCaps pgaio_uring_caps =
+{
+ .checked = false,
+ .mem_init_size = -1,
+};
static uint32
pgaio_uring_procs(void)
@@ -111,16 +134,145 @@ pgaio_uring_procs(void)
return MaxBackends + NUM_AUXILIARY_PROCS - MAX_IO_WORKERS;
}
-static Size
+/*
+ * Initializes pgaio_uring_caps, unless that's already done.
+ */
+static void
+pgaio_uring_check_capabilities(void)
+{
+ if (pgaio_uring_caps.checked)
+ return;
+
+ /*
+ * By default io_uring creates a shared memory mapping for each io_uring
+ * instance, leading to a large number of memory mappings. Unfortunately a
+ * large number of memory mappings slows things down, backend exit is
+ * particularly affected. To address that, newer kernels (6.5) support
+ * using user-provided memory for the memory, by putting the relevant
+ * memory into shared memory we don't need any additional mappings.
+ *
+ * To know whether this is supported, we unfortunately need to probe the
+ * kernel by trying to create a ring with userspace-provided memory. This
+ * also has a secondary benefit: We can determine precisely how much
+ * memory we need for each io_uring instance.
+ */
+#if defined(HAVE_LIBURING_QUEUE_INIT_MEM) && defined(IORING_SETUP_NO_MMAP)
+ {
+ struct io_uring test_ring;
+ size_t ring_size;
+ void *ring_ptr;
+ struct io_uring_params p = {0};
+ int ret;
+
+ /*
+ * Liburing does not yet provide an API to query how much memory a
+ * ring will need. So we over-estimate it here. As the memory is freed
+ * just below that's small temporary waste of memory.
+ *
+ * 1MB is more than enough for rings within io_max_concurrency's
+ * range.
+ */
+ ring_size = 1024 * 1024;
+
+ /*
+ * Hard to believe a system exists where 1MB would not be a multiple
+ * of the page size. But it's cheap to ensure...
+ */
+ ring_size -= ring_size % sysconf(_SC_PAGESIZE);
+
+ ring_ptr = mmap(NULL, ring_size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
+ if (ring_ptr == MAP_FAILED)
+ elog(ERROR,
+ "mmap(%zu) to determine io_uring_queue_init_mem() support failed: %m",
+ ring_size);
+
+ ret = io_uring_queue_init_mem(io_max_concurrency, &test_ring, &p, ring_ptr, ring_size);
+ if (ret > 0)
+ {
+ pgaio_uring_caps.mem_init_size = ret;
+
+ elog(DEBUG1,
+ "can use combined memory mapping for io_uring, each ring needs %d bytes",
+ ret);
+
+ /* clean up the created ring, it was just for a test */
+ io_uring_queue_exit(&test_ring);
+ }
+ else
+ {
+ /*
+ * There are different reasons for ring creation to fail, but it's
+ * ok to treat that just as io_uring_queue_init_mem() not being
+ * supported. We'll report a more detailed error in
+ * pgaio_uring_shmem_init().
+ */
+ errno = -ret;
+ elog(DEBUG1,
+ "cannot use combined memory mapping for io_uring, ring creation failed: %m");
+
+ }
+
+ if (munmap(ring_ptr, ring_size) != 0)
+ elog(ERROR, "munmap() failed: %m");
+ }
+#else
+ {
+ elog(DEBUG1,
+ "can't use combined memory mapping for io_uring, kernel or liburing too old");
+ }
+#endif
+
+ pgaio_uring_caps.checked = true;
+}
+
+/*
+ * Memory for all PgAioUringContext instances
+ */
+static size_t
pgaio_uring_context_shmem_size(void)
{
return mul_size(pgaio_uring_procs(), sizeof(PgAioUringContext));
}
+/*
+ * Memory for the combined memory used by io_uring instances. Returns 0 if
+ * that is not supported by kernel/liburing.
+ */
+static size_t
+pgaio_uring_ring_shmem_size(void)
+{
+ size_t sz = 0;
+
+ if (pgaio_uring_caps.mem_init_size > 0)
+ {
+ /*
+ * Memory for rings needs to be allocated to the page boundary,
+ * reserve space. Luckily it does not need to be aligned to hugepage
+ * boundaries, even if huge pages are used.
+ */
+ sz = add_size(sz, sysconf(_SC_PAGESIZE));
+ sz = add_size(sz, mul_size(pgaio_uring_procs(),
+ pgaio_uring_caps.mem_init_size));
+ }
+
+ return sz;
+}
+
static size_t
pgaio_uring_shmem_size(void)
{
- return pgaio_uring_context_shmem_size();
+ size_t sz;
+
+ /*
+ * Kernel and liburing support for various features influences how much
+ * shmem we need, perform the necessary checks.
+ */
+ pgaio_uring_check_capabilities();
+
+ sz = pgaio_uring_context_shmem_size();
+ sz = add_size(sz, pgaio_uring_ring_shmem_size());
+
+ return sz;
}
static void
@@ -128,13 +280,38 @@ pgaio_uring_shmem_init(bool first_time)
{
int TotalProcs = pgaio_uring_procs();
bool found;
+ char *shmem;
+ size_t ring_mem_remain = 0;
+ char *ring_mem_next = 0;
- pgaio_uring_contexts = (PgAioUringContext *)
- ShmemInitStruct("AioUring", pgaio_uring_shmem_size(), &found);
-
+ /*
+ * We allocate memory for all PgAioUringContext instances and, if
+ * supported, the memory required for each of the io_uring instances, in
+ * one ShmemInitStruct().
+ */
+ shmem = ShmemInitStruct("AioUringContext", pgaio_uring_shmem_size(), &found);
if (found)
return;
+ pgaio_uring_contexts = (PgAioUringContext *) shmem;
+ shmem += pgaio_uring_context_shmem_size();
+
+ /* if supported, handle memory alignment / sizing for io_uring memory */
+ if (pgaio_uring_caps.mem_init_size > 0)
+ {
+ ring_mem_remain = pgaio_uring_ring_shmem_size();
+ ring_mem_next = (char *) shmem;
+
+ /* align to page boundary, see also pgaio_uring_ring_shmem_size() */
+ ring_mem_next = (char *) TYPEALIGN(sysconf(_SC_PAGESIZE), ring_mem_next);
+
+ /* account for alignment */
+ ring_mem_remain -= ring_mem_next - shmem;
+ shmem += ring_mem_next - shmem;
+
+ shmem += ring_mem_remain;
+ }
+
for (int contextno = 0; contextno < TotalProcs; contextno++)
{
PgAioUringContext *context = &pgaio_uring_contexts[contextno];
@@ -158,7 +335,28 @@ pgaio_uring_shmem_init(bool first_time)
* be worth using that - also need to evaluate if that causes
* noticeable additional contention?
*/
- ret = io_uring_queue_init(io_max_concurrency, &context->io_uring_ring, 0);
+
+ /*
+ * If supported (c.f. pgaio_uring_check_capabilities()), create ring
+ * with its data in shared memory. Otherwise fall back io_uring
+ * creating a memory mapping for each ring.
+ */
+#if defined(HAVE_LIBURING_QUEUE_INIT_MEM) && defined(IORING_SETUP_NO_MMAP)
+ if (pgaio_uring_caps.mem_init_size > 0)
+ {
+ struct io_uring_params p = {0};
+
+ ret = io_uring_queue_init_mem(io_max_concurrency, &context->io_uring_ring, &p, ring_mem_next, ring_mem_remain);
+
+ ring_mem_remain -= ret;
+ ring_mem_next += ret;
+ }
+ else
+#endif
+ {
+ ret = io_uring_queue_init(io_max_concurrency, &context->io_uring_ring, 0);
+ }
+
if (ret < 0)
{
char *hint = NULL;
diff --git a/src/backend/utils/adt/xml.c b/src/backend/utils/adt/xml.c
index 2bd39b6ac4b..f7b731825fc 100644
--- a/src/backend/utils/adt/xml.c
+++ b/src/backend/utils/adt/xml.c
@@ -532,7 +532,7 @@ xmltext(PG_FUNCTION_ARGS)
volatile xmlChar *xmlbuf = NULL;
PgXmlErrorContext *xmlerrcxt;
- /* Otherwise, we gotta spin up some error handling. */
+ /* First we gotta spin up some error handling. */
xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
PG_TRY();
@@ -685,7 +685,7 @@ xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg, bool indent)
volatile xmlBufferPtr buf = NULL;
volatile xmlSaveCtxtPtr ctxt = NULL;
ErrorSaveContext escontext = {T_ErrorSaveContext};
- PgXmlErrorContext *xmlerrcxt;
+ PgXmlErrorContext *volatile xmlerrcxt = NULL;
#endif
if (xmloption_arg != XMLOPTION_DOCUMENT && !indent)
@@ -726,13 +726,18 @@ xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg, bool indent)
return (text *) data;
}
- /* Otherwise, we gotta spin up some error handling. */
- xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
-
+ /*
+ * Otherwise, we gotta spin up some error handling. Unlike most other
+ * routines in this module, we already have a libxml "doc" structure to
+ * free, so we need to call pg_xml_init() inside the PG_TRY and be
+ * prepared for it to fail (typically due to palloc OOM).
+ */
PG_TRY();
{
size_t decl_len = 0;
+ xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
+
/* The serialized data will go into this buffer. */
buf = xmlBufferCreate();
@@ -863,10 +868,10 @@ xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg, bool indent)
xmlSaveClose(ctxt);
if (buf)
xmlBufferFree(buf);
- if (doc)
- xmlFreeDoc(doc);
+ xmlFreeDoc(doc);
- pg_xml_done(xmlerrcxt, true);
+ if (xmlerrcxt)
+ pg_xml_done(xmlerrcxt, true);
PG_RE_THROW();
}
diff --git a/src/bin/pg_test_timing/pg_test_timing.c b/src/bin/pg_test_timing/pg_test_timing.c
index ce7aad4b25a..64d080335eb 100644
--- a/src/bin/pg_test_timing/pg_test_timing.c
+++ b/src/bin/pg_test_timing/pg_test_timing.c
@@ -9,19 +9,30 @@
#include <limits.h>
#include "getopt_long.h"
+#include "port/pg_bitutils.h"
#include "portability/instr_time.h"
static const char *progname;
static unsigned int test_duration = 3;
+static double max_rprct = 99.99;
+
+/* record duration in powers of 2 nanoseconds */
+static long long int histogram[32];
+
+/* record counts of first 1024 durations directly */
+#define NUM_DIRECT 1024
+static long long int direct_histogram[NUM_DIRECT];
+
+/* separately record highest observed duration */
+static int32 largest_diff;
+static long long int largest_diff_count;
+
static void handle_args(int argc, char *argv[]);
static uint64 test_timing(unsigned int duration);
static void output(uint64 loop_count);
-/* record duration in powers of 2 microseconds */
-static long long int histogram[32];
-
int
main(int argc, char *argv[])
{
@@ -44,6 +55,7 @@ handle_args(int argc, char *argv[])
{
static struct option long_options[] = {
{"duration", required_argument, NULL, 'd'},
+ {"cutoff", required_argument, NULL, 'c'},
{NULL, 0, NULL, 0}
};
@@ -56,7 +68,7 @@ handle_args(int argc, char *argv[])
{
if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0)
{
- printf(_("Usage: %s [-d DURATION]\n"), progname);
+ printf(_("Usage: %s [-d DURATION] [-c CUTOFF]\n"), progname);
exit(0);
}
if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0)
@@ -66,7 +78,7 @@ handle_args(int argc, char *argv[])
}
}
- while ((option = getopt_long(argc, argv, "d:",
+ while ((option = getopt_long(argc, argv, "d:c:",
long_options, &optindex)) != -1)
{
switch (option)
@@ -93,6 +105,26 @@ handle_args(int argc, char *argv[])
}
break;
+ case 'c':
+ errno = 0;
+ max_rprct = strtod(optarg, &endptr);
+
+ if (endptr == optarg || *endptr != '\0' || errno != 0)
+ {
+ fprintf(stderr, _("%s: invalid argument for option %s\n"),
+ progname, "--cutoff");
+ fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
+ exit(1);
+ }
+
+ if (max_rprct < 0 || max_rprct > 100)
+ {
+ fprintf(stderr, _("%s: %s must be in range %u..%u\n"),
+ progname, "--cutoff", 0, 100);
+ exit(1);
+ }
+ break;
+
default:
fprintf(stderr, _("Try \"%s --help\" for more information.\n"),
progname);
@@ -111,7 +143,6 @@ handle_args(int argc, char *argv[])
exit(1);
}
-
printf(ngettext("Testing timing overhead for %u second.\n",
"Testing timing overhead for %u seconds.\n",
test_duration),
@@ -130,19 +161,19 @@ test_timing(unsigned int duration)
end_time,
temp;
- total_time = duration > 0 ? duration * INT64CONST(1000000) : 0;
+ total_time = duration > 0 ? duration * INT64CONST(1000000000) : 0;
INSTR_TIME_SET_CURRENT(start_time);
- cur = INSTR_TIME_GET_MICROSEC(start_time);
+ cur = INSTR_TIME_GET_NANOSEC(start_time);
while (time_elapsed < total_time)
{
int32 diff,
- bits = 0;
+ bits;
prev = cur;
INSTR_TIME_SET_CURRENT(temp);
- cur = INSTR_TIME_GET_MICROSEC(temp);
+ cur = INSTR_TIME_GET_NANOSEC(temp);
diff = cur - prev;
/* Did time go backwards? */
@@ -154,18 +185,30 @@ test_timing(unsigned int duration)
}
/* What is the highest bit in the time diff? */
- while (diff)
- {
- diff >>= 1;
- bits++;
- }
+ if (diff > 0)
+ bits = pg_leftmost_one_pos32(diff) + 1;
+ else
+ bits = 0;
/* Update appropriate duration bucket */
histogram[bits]++;
+ /* Update direct histogram of time diffs */
+ if (diff < NUM_DIRECT)
+ direct_histogram[diff]++;
+
+ /* Also track the largest observed duration, even if >= NUM_DIRECT */
+ if (diff > largest_diff)
+ {
+ largest_diff = diff;
+ largest_diff_count = 1;
+ }
+ else if (diff == largest_diff)
+ largest_diff_count++;
+
loop_count++;
INSTR_TIME_SUBTRACT(temp, start_time);
- time_elapsed = INSTR_TIME_GET_MICROSEC(temp);
+ time_elapsed = INSTR_TIME_GET_NANOSEC(temp);
}
INSTR_TIME_SET_CURRENT(end_time);
@@ -181,28 +224,95 @@ test_timing(unsigned int duration)
static void
output(uint64 loop_count)
{
- int64 max_bit = 31,
- i;
- char *header1 = _("< us");
- char *header2 = /* xgettext:no-c-format */ _("% of total");
- char *header3 = _("count");
+ int max_bit = 31;
+ const char *header1 = _("<= ns");
+ const char *header1b = _("ns");
+ const char *header2 = /* xgettext:no-c-format */ _("% of total");
+ const char *header3 = /* xgettext:no-c-format */ _("running %");
+ const char *header4 = _("count");
int len1 = strlen(header1);
int len2 = strlen(header2);
int len3 = strlen(header3);
+ int len4 = strlen(header4);
+ double rprct;
+ bool stopped = false;
/* find highest bit value */
while (max_bit > 0 && histogram[max_bit] == 0)
max_bit--;
+ /* set minimum column widths */
+ len1 = Max(8, len1);
+ len2 = Max(10, len2);
+ len3 = Max(10, len3);
+ len4 = Max(10, len4);
+
printf(_("Histogram of timing durations:\n"));
- printf("%*s %*s %*s\n",
- Max(6, len1), header1,
- Max(10, len2), header2,
- Max(10, len3), header3);
-
- for (i = 0; i <= max_bit; i++)
- printf("%*ld %*.5f %*lld\n",
- Max(6, len1), 1l << i,
- Max(10, len2) - 1, (double) histogram[i] * 100 / loop_count,
- Max(10, len3), histogram[i]);
+ printf("%*s %*s %*s %*s\n",
+ len1, header1,
+ len2, header2,
+ len3, header3,
+ len4, header4);
+
+ rprct = 0;
+ for (int i = 0; i <= max_bit; i++)
+ {
+ double prct = (double) histogram[i] * 100 / loop_count;
+
+ rprct += prct;
+ printf("%*ld %*.4f %*.4f %*lld\n",
+ len1, (1L << i) - 1,
+ len2, prct,
+ len3, rprct,
+ len4, histogram[i]);
+ }
+
+ printf(_("\nObserved timing durations up to %.4f%%:\n"), max_rprct);
+ printf("%*s %*s %*s %*s\n",
+ len1, header1b,
+ len2, header2,
+ len3, header3,
+ len4, header4);
+
+ rprct = 0;
+ for (int i = 0; i < NUM_DIRECT; i++)
+ {
+ if (direct_histogram[i])
+ {
+ double prct = (double) direct_histogram[i] * 100 / loop_count;
+ bool print_it = !stopped;
+
+ rprct += prct;
+
+ /* if largest diff is < NUM_DIRECT, be sure we print it */
+ if (i == largest_diff)
+ {
+ if (stopped)
+ printf("...\n");
+ print_it = true;
+ }
+
+ if (print_it)
+ printf("%*d %*.4f %*.4f %*lld\n",
+ len1, i,
+ len2, prct,
+ len3, rprct,
+ len4, direct_histogram[i]);
+ if (rprct >= max_rprct)
+ stopped = true;
+ }
+ }
+
+ /* print largest diff when it's outside the array range */
+ if (largest_diff >= NUM_DIRECT)
+ {
+ double prct = (double) largest_diff_count * 100 / loop_count;
+
+ printf("...\n");
+ printf("%*d %*.4f %*.4f %*lld\n",
+ len1, largest_diff,
+ len2, prct,
+ len3, 100.0,
+ len4, largest_diff_count);
+ }
}
diff --git a/src/bin/pg_test_timing/t/001_basic.pl b/src/bin/pg_test_timing/t/001_basic.pl
index 6554cd981af..9912acc052a 100644
--- a/src/bin/pg_test_timing/t/001_basic.pl
+++ b/src/bin/pg_test_timing/t/001_basic.pl
@@ -25,5 +25,22 @@ command_fails_like(
[ 'pg_test_timing', '--duration' => '0' ],
qr/\Qpg_test_timing: --duration must be in range 1..4294967295\E/,
'pg_test_timing: --duration must be in range');
+command_fails_like(
+ [ 'pg_test_timing', '--cutoff' => '101' ],
+ qr/\Qpg_test_timing: --cutoff must be in range 0..100\E/,
+ 'pg_test_timing: --cutoff must be in range');
+
+#########################################
+# We obviously can't check for specific output, but we can
+# do a simple run and make sure it produces something.
+
+command_like(
+ [ 'pg_test_timing', '--duration' => '1' ],
+ qr/
+\QTesting timing overhead for 1 second.\E.*
+\QHistogram of timing durations:\E.*
+\QObserved timing durations up to 99.9900%:\E
+/sx,
+ 'pg_test_timing: sanity check');
done_testing();
diff --git a/src/bin/pg_walsummary/t/002_blocks.pl b/src/bin/pg_walsummary/t/002_blocks.pl
index 270332780a4..0f98c7df82e 100644
--- a/src/bin/pg_walsummary/t/002_blocks.pl
+++ b/src/bin/pg_walsummary/t/002_blocks.pl
@@ -47,11 +47,12 @@ EOM
ok($result, "WAL summarization caught up after insert");
# The WAL summarizer should have generated some IO statistics.
-my $stats_reads = $node1->safe_psql(
+$node1->poll_query_until(
'postgres',
- qq{SELECT sum(reads) > 0 FROM pg_stat_io
- WHERE backend_type = 'walsummarizer' AND object = 'wal'});
-is($stats_reads, 't', "WAL summarizer generates statistics for WAL reads");
+ q{SELECT sum(reads) > 0 FROM pg_stat_io
+ WHERE backend_type = 'walsummarizer' AND object = 'wal'})
+ or die
+ "Timed out while waiting for WAL summarizer to generate statistics for WAL reads";
# Find the highest LSN that is summarized on disk.
my $summarized_lsn = $node1->safe_psql('postgres', <<EOM);
diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h
index d397fe27dc1..b523bcda8f3 100644
--- a/src/include/optimizer/cost.h
+++ b/src/include/optimizer/cost.h
@@ -118,7 +118,7 @@ extern void cost_incremental_sort(Path *path,
Cost input_startup_cost, Cost input_total_cost,
double input_tuples, int width, Cost comparison_cost, int sort_mem,
double limit_tuples);
-extern void cost_append(AppendPath *apath);
+extern void cost_append(AppendPath *apath, PlannerInfo *root);
extern void cost_merge_append(Path *path, PlannerInfo *root,
List *pathkeys, int n_streams,
int input_disabled_nodes,
diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in
index 726a7c1be1f..c4dc5d72bdb 100644
--- a/src/include/pg_config.h.in
+++ b/src/include/pg_config.h.in
@@ -229,6 +229,9 @@
/* Define to 1 if you have the global variable 'int timezone'. */
#undef HAVE_INT_TIMEZONE
+/* Define to 1 if you have the `io_uring_queue_init_mem' function. */
+#undef HAVE_IO_URING_QUEUE_INIT_MEM
+
/* Define to 1 if __builtin_constant_p(x) implies "i"(x) acceptance. */
#undef HAVE_I_CONSTRAINT__BUILTIN_CONSTANT_P
diff --git a/src/pl/plpgsql/src/pl_exec.c b/src/pl/plpgsql/src/pl_exec.c
index bb99781c56e..b9acc790dc6 100644
--- a/src/pl/plpgsql/src/pl_exec.c
+++ b/src/pl/plpgsql/src/pl_exec.c
@@ -5703,7 +5703,7 @@ exec_eval_expr(PLpgSQL_execstate *estate,
/*
* Else do it the hard way via exec_run_select
*/
- rc = exec_run_select(estate, expr, 2, NULL);
+ rc = exec_run_select(estate, expr, 0, NULL);
if (rc != SPI_OK_SELECT)
ereport(ERROR,
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
@@ -5757,6 +5757,10 @@ exec_eval_expr(PLpgSQL_execstate *estate,
/* ----------
* exec_run_select Execute a select query
+ *
+ * Note: passing maxtuples different from 0 ("return all tuples") is
+ * deprecated because it will prevent parallel execution of the query.
+ * However, we retain the parameter in case we need it someday.
* ----------
*/
static int
diff --git a/src/test/perl/PostgreSQL/Test/AdjustUpgrade.pm b/src/test/perl/PostgreSQL/Test/AdjustUpgrade.pm
index 1725fe2f948..7224c286e1d 100644
--- a/src/test/perl/PostgreSQL/Test/AdjustUpgrade.pm
+++ b/src/test/perl/PostgreSQL/Test/AdjustUpgrade.pm
@@ -251,6 +251,32 @@ sub adjust_database_contents
'drop operator if exists public.=> (bigint, NONE)');
}
+ # Version 19 changed the output format of pg_lsn. To avoid output
+ # differences, set all pg_lsn columns to NULL if the old version is
+ # older than 19.
+ if ($old_version < 19)
+ {
+ if ($old_version >= '9.5')
+ {
+ _add_st($result, 'regression',
+ "update brintest set lsncol = NULL");
+ }
+
+ if ($old_version >= 12)
+ {
+ _add_st($result, 'regression',
+ "update tab_core_types set pg_lsn = NULL");
+ }
+
+ if ($old_version >= 14)
+ {
+ _add_st($result, 'regression',
+ "update brintest_multi set lsncol = NULL");
+ _add_st($result, 'regression',
+ "update brintest_bloom set lsncol = NULL");
+ }
+ }
+
return $result;
}
diff --git a/src/test/regress/expected/incremental_sort.out b/src/test/regress/expected/incremental_sort.out
index b00219643b9..5a1dd9fc022 100644
--- a/src/test/regress/expected/incremental_sort.out
+++ b/src/test/regress/expected/incremental_sort.out
@@ -1722,3 +1722,43 @@ order by t1.four, t1.two limit 1;
-> Seq Scan on tenk1 t2
(12 rows)
+--
+-- Test incremental sort for Append/MergeAppend
+--
+create table prt_tbl (a int, b int) partition by range (a);
+create table prt_tbl_1 partition of prt_tbl for values from (0) to (100);
+create table prt_tbl_2 partition of prt_tbl for values from (100) to (200);
+insert into prt_tbl select i%200, i from generate_series(1,1000)i;
+create index on prt_tbl_1(a);
+create index on prt_tbl_2(a, b);
+analyze prt_tbl;
+set enable_seqscan to off;
+set enable_bitmapscan to off;
+-- Ensure we get an incremental sort for the subpath of Append
+explain (costs off) select * from prt_tbl order by a, b;
+ QUERY PLAN
+------------------------------------------------------------
+ Append
+ -> Incremental Sort
+ Sort Key: prt_tbl_1.a, prt_tbl_1.b
+ Presorted Key: prt_tbl_1.a
+ -> Index Scan using prt_tbl_1_a_idx on prt_tbl_1
+ -> Index Only Scan using prt_tbl_2_a_b_idx on prt_tbl_2
+(6 rows)
+
+-- Ensure we get an incremental sort for the subpath of MergeAppend
+explain (costs off) select * from prt_tbl_1 union all select * from prt_tbl_2 order by a, b;
+ QUERY PLAN
+------------------------------------------------------------
+ Merge Append
+ Sort Key: prt_tbl_1.a, prt_tbl_1.b
+ -> Incremental Sort
+ Sort Key: prt_tbl_1.a, prt_tbl_1.b
+ Presorted Key: prt_tbl_1.a
+ -> Index Scan using prt_tbl_1_a_idx on prt_tbl_1
+ -> Index Only Scan using prt_tbl_2_a_b_idx on prt_tbl_2
+(7 rows)
+
+reset enable_bitmapscan;
+reset enable_seqscan;
+drop table prt_tbl;
diff --git a/src/test/regress/expected/inherit.out b/src/test/regress/expected/inherit.out
index 78dead65325..5b5055babdc 100644
--- a/src/test/regress/expected/inherit.out
+++ b/src/test/regress/expected/inherit.out
@@ -1898,10 +1898,11 @@ ORDER BY thousand, tenthous;
Merge Append
Sort Key: tenk1.thousand, tenk1.tenthous
-> Index Only Scan using tenk1_thous_tenthous on tenk1
- -> Sort
+ -> Incremental Sort
Sort Key: tenk1_1.thousand, tenk1_1.thousand
+ Presorted Key: tenk1_1.thousand
-> Index Only Scan using tenk1_thous_tenthous on tenk1 tenk1_1
-(6 rows)
+(7 rows)
explain (costs off)
SELECT thousand, tenthous, thousand+tenthous AS x FROM tenk1
@@ -1982,10 +1983,11 @@ ORDER BY x, y;
Merge Append
Sort Key: a.thousand, a.tenthous
-> Index Only Scan using tenk1_thous_tenthous on tenk1 a
- -> Sort
+ -> Incremental Sort
Sort Key: b.unique2, b.unique2
+ Presorted Key: b.unique2
-> Index Only Scan using tenk1_unique2 on tenk1 b
-(6 rows)
+(7 rows)
-- exercise rescan code path via a repeatedly-evaluated subquery
explain (costs off)
diff --git a/src/test/regress/sql/incremental_sort.sql b/src/test/regress/sql/incremental_sort.sql
index f1f8fae5654..bbe658a7588 100644
--- a/src/test/regress/sql/incremental_sort.sql
+++ b/src/test/regress/sql/incremental_sort.sql
@@ -298,3 +298,27 @@ explain (costs off)
select * from
(select * from tenk1 order by four) t1 join tenk1 t2 on t1.four = t2.four and t1.two = t2.two
order by t1.four, t1.two limit 1;
+
+--
+-- Test incremental sort for Append/MergeAppend
+--
+create table prt_tbl (a int, b int) partition by range (a);
+create table prt_tbl_1 partition of prt_tbl for values from (0) to (100);
+create table prt_tbl_2 partition of prt_tbl for values from (100) to (200);
+insert into prt_tbl select i%200, i from generate_series(1,1000)i;
+create index on prt_tbl_1(a);
+create index on prt_tbl_2(a, b);
+analyze prt_tbl;
+
+set enable_seqscan to off;
+set enable_bitmapscan to off;
+
+-- Ensure we get an incremental sort for the subpath of Append
+explain (costs off) select * from prt_tbl order by a, b;
+
+-- Ensure we get an incremental sort for the subpath of MergeAppend
+explain (costs off) select * from prt_tbl_1 union all select * from prt_tbl_2 order by a, b;
+
+reset enable_bitmapscan;
+reset enable_seqscan;
+drop table prt_tbl;
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 114bdafafdf..83192038571 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -2181,6 +2181,7 @@ PgAioReturn
PgAioTargetData
PgAioTargetID
PgAioTargetInfo
+PgAioUringCaps
PgAioUringContext
PgAioWaitRef
PgArchData