diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/backend/optimizer/path/costsize.c | 54 | ||||
-rw-r--r-- | src/backend/optimizer/plan/createplan.c | 66 | ||||
-rw-r--r-- | src/backend/optimizer/util/pathnode.c | 73 | ||||
-rw-r--r-- | src/backend/storage/aio/method_io_uring.c | 210 | ||||
-rw-r--r-- | src/backend/utils/adt/xml.c | 21 | ||||
-rw-r--r-- | src/bin/pg_test_timing/pg_test_timing.c | 172 | ||||
-rw-r--r-- | src/bin/pg_test_timing/t/001_basic.pl | 17 | ||||
-rw-r--r-- | src/bin/pg_walsummary/t/002_blocks.pl | 9 | ||||
-rw-r--r-- | src/include/optimizer/cost.h | 2 | ||||
-rw-r--r-- | src/include/pg_config.h.in | 3 | ||||
-rw-r--r-- | src/pl/plpgsql/src/pl_exec.c | 6 | ||||
-rw-r--r-- | src/test/perl/PostgreSQL/Test/AdjustUpgrade.pm | 26 | ||||
-rw-r--r-- | src/test/regress/expected/incremental_sort.out | 40 | ||||
-rw-r--r-- | src/test/regress/expected/inherit.out | 10 | ||||
-rw-r--r-- | src/test/regress/sql/incremental_sort.sql | 24 | ||||
-rw-r--r-- | src/tools/pgindent/typedefs.list | 1 |
16 files changed, 632 insertions, 102 deletions
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index 3d44815ed5a..1f04a2c182c 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -2247,7 +2247,7 @@ append_nonpartial_cost(List *subpaths, int numpaths, int parallel_workers) * Determines and returns the cost of an Append node. */ void -cost_append(AppendPath *apath) +cost_append(AppendPath *apath, PlannerInfo *root) { ListCell *l; @@ -2309,26 +2309,52 @@ cost_append(AppendPath *apath) foreach(l, apath->subpaths) { Path *subpath = (Path *) lfirst(l); - Path sort_path; /* dummy for result of cost_sort */ + int presorted_keys; + Path sort_path; /* dummy for result of + * cost_sort/cost_incremental_sort */ - if (!pathkeys_contained_in(pathkeys, subpath->pathkeys)) + if (!pathkeys_count_contained_in(pathkeys, subpath->pathkeys, + &presorted_keys)) { /* * We'll need to insert a Sort node, so include costs for - * that. We can use the parent's LIMIT if any, since we + * that. We choose to use incremental sort if it is + * enabled and there are presorted keys; otherwise we use + * full sort. + * + * We can use the parent's LIMIT if any, since we * certainly won't pull more than that many tuples from * any child. */ - cost_sort(&sort_path, - NULL, /* doesn't currently need root */ - pathkeys, - subpath->disabled_nodes, - subpath->total_cost, - subpath->rows, - subpath->pathtarget->width, - 0.0, - work_mem, - apath->limit_tuples); + if (enable_incremental_sort && presorted_keys > 0) + { + cost_incremental_sort(&sort_path, + root, + pathkeys, + presorted_keys, + subpath->disabled_nodes, + subpath->startup_cost, + subpath->total_cost, + subpath->rows, + subpath->pathtarget->width, + 0.0, + work_mem, + apath->limit_tuples); + } + else + { + cost_sort(&sort_path, + root, + pathkeys, + subpath->disabled_nodes, + subpath->total_cost, + subpath->rows, + subpath->pathtarget->width, + 0.0, + work_mem, + apath->limit_tuples); + } + subpath = &sort_path; } diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index 0b61aef962c..8a9f1d7a943 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -1318,6 +1318,7 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path, int flags) Oid *sortOperators; Oid *collations; bool *nullsFirst; + int presorted_keys; /* * Compute sort column info, and adjust subplan's tlist as needed. @@ -1353,14 +1354,38 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path, int flags) numsortkeys * sizeof(bool)) == 0); /* Now, insert a Sort node if subplan isn't sufficiently ordered */ - if (!pathkeys_contained_in(pathkeys, subpath->pathkeys)) + if (!pathkeys_count_contained_in(pathkeys, subpath->pathkeys, + &presorted_keys)) { - Sort *sort = make_sort(subplan, numsortkeys, + Plan *sort_plan; + + /* + * We choose to use incremental sort if it is enabled and + * there are presorted keys; otherwise we use full sort. + */ + if (enable_incremental_sort && presorted_keys > 0) + { + sort_plan = (Plan *) + make_incrementalsort(subplan, numsortkeys, presorted_keys, sortColIdx, sortOperators, collations, nullsFirst); - label_sort_with_costsize(root, sort, best_path->limit_tuples); - subplan = (Plan *) sort; + label_incrementalsort_with_costsize(root, + (IncrementalSort *) sort_plan, + pathkeys, + best_path->limit_tuples); + } + else + { + sort_plan = (Plan *) make_sort(subplan, numsortkeys, + sortColIdx, sortOperators, + collations, nullsFirst); + + label_sort_with_costsize(root, (Sort *) sort_plan, + best_path->limit_tuples); + } + + subplan = sort_plan; } } @@ -1491,6 +1516,7 @@ create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path, Oid *sortOperators; Oid *collations; bool *nullsFirst; + int presorted_keys; /* Build the child plan */ /* Must insist that all children return the same tlist */ @@ -1525,14 +1551,38 @@ create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path, numsortkeys * sizeof(bool)) == 0); /* Now, insert a Sort node if subplan isn't sufficiently ordered */ - if (!pathkeys_contained_in(pathkeys, subpath->pathkeys)) + if (!pathkeys_count_contained_in(pathkeys, subpath->pathkeys, + &presorted_keys)) { - Sort *sort = make_sort(subplan, numsortkeys, + Plan *sort_plan; + + /* + * We choose to use incremental sort if it is enabled and there + * are presorted keys; otherwise we use full sort. + */ + if (enable_incremental_sort && presorted_keys > 0) + { + sort_plan = (Plan *) + make_incrementalsort(subplan, numsortkeys, presorted_keys, sortColIdx, sortOperators, collations, nullsFirst); - label_sort_with_costsize(root, sort, best_path->limit_tuples); - subplan = (Plan *) sort; + label_incrementalsort_with_costsize(root, + (IncrementalSort *) sort_plan, + pathkeys, + best_path->limit_tuples); + } + else + { + sort_plan = (Plan *) make_sort(subplan, numsortkeys, + sortColIdx, sortOperators, + collations, nullsFirst); + + label_sort_with_costsize(root, (Sort *) sort_plan, + best_path->limit_tuples); + } + + subplan = sort_plan; } subplans = lappend(subplans, subplan); diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index e0192d4a491..9cc602788ea 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -1404,12 +1404,12 @@ create_append_path(PlannerInfo *root, pathnode->path.total_cost = child->total_cost; } else - cost_append(pathnode); + cost_append(pathnode, root); /* Must do this last, else cost_append complains */ pathnode->path.pathkeys = child->pathkeys; } else - cost_append(pathnode); + cost_append(pathnode, root); /* If the caller provided a row estimate, override the computed value. */ if (rows >= 0) @@ -1515,6 +1515,9 @@ create_merge_append_path(PlannerInfo *root, foreach(l, subpaths) { Path *subpath = (Path *) lfirst(l); + int presorted_keys; + Path sort_path; /* dummy for result of + * cost_sort/cost_incremental_sort */ /* All child paths should be unparameterized */ Assert(bms_is_empty(PATH_REQ_OUTER(subpath))); @@ -1523,32 +1526,52 @@ create_merge_append_path(PlannerInfo *root, pathnode->path.parallel_safe = pathnode->path.parallel_safe && subpath->parallel_safe; - if (pathkeys_contained_in(pathkeys, subpath->pathkeys)) + if (!pathkeys_count_contained_in(pathkeys, subpath->pathkeys, + &presorted_keys)) { - /* Subpath is adequately ordered, we won't need to sort it */ - input_disabled_nodes += subpath->disabled_nodes; - input_startup_cost += subpath->startup_cost; - input_total_cost += subpath->total_cost; - } - else - { - /* We'll need to insert a Sort node, so include cost for that */ - Path sort_path; /* dummy for result of cost_sort */ + /* + * We'll need to insert a Sort node, so include costs for that. We + * choose to use incremental sort if it is enabled and there are + * presorted keys; otherwise we use full sort. + * + * We can use the parent's LIMIT if any, since we certainly won't + * pull more than that many tuples from any child. + */ + if (enable_incremental_sort && presorted_keys > 0) + { + cost_incremental_sort(&sort_path, + root, + pathkeys, + presorted_keys, + subpath->disabled_nodes, + subpath->startup_cost, + subpath->total_cost, + subpath->rows, + subpath->pathtarget->width, + 0.0, + work_mem, + pathnode->limit_tuples); + } + else + { + cost_sort(&sort_path, + root, + pathkeys, + subpath->disabled_nodes, + subpath->total_cost, + subpath->rows, + subpath->pathtarget->width, + 0.0, + work_mem, + pathnode->limit_tuples); + } - cost_sort(&sort_path, - root, - pathkeys, - subpath->disabled_nodes, - subpath->total_cost, - subpath->rows, - subpath->pathtarget->width, - 0.0, - work_mem, - pathnode->limit_tuples); - input_disabled_nodes += sort_path.disabled_nodes; - input_startup_cost += sort_path.startup_cost; - input_total_cost += sort_path.total_cost; + subpath = &sort_path; } + + input_disabled_nodes += subpath->disabled_nodes; + input_startup_cost += subpath->startup_cost; + input_total_cost += subpath->total_cost; } /* diff --git a/src/backend/storage/aio/method_io_uring.c b/src/backend/storage/aio/method_io_uring.c index b78048328e1..0a8c054162f 100644 --- a/src/backend/storage/aio/method_io_uring.c +++ b/src/backend/storage/aio/method_io_uring.c @@ -29,6 +29,9 @@ #ifdef IOMETHOD_IO_URING_ENABLED +#include <sys/mman.h> +#include <unistd.h> + #include <liburing.h> #include "miscadmin.h" @@ -94,12 +97,32 @@ PgAioUringContext struct io_uring io_uring_ring; } PgAioUringContext; +/* + * Information about the capabilities that io_uring has. + * + * Depending on liburing and kernel version different features are + * supported. At least for the kernel a kernel version check does not suffice + * as various vendors do backport features to older kernels :(. + */ +typedef struct PgAioUringCaps +{ + bool checked; + /* -1 if io_uring_queue_init_mem() is unsupported */ + int mem_init_size; +} PgAioUringCaps; + + /* PgAioUringContexts for all backends */ static PgAioUringContext *pgaio_uring_contexts; /* the current backend's context */ static PgAioUringContext *pgaio_my_uring_context; +static PgAioUringCaps pgaio_uring_caps = +{ + .checked = false, + .mem_init_size = -1, +}; static uint32 pgaio_uring_procs(void) @@ -111,16 +134,145 @@ pgaio_uring_procs(void) return MaxBackends + NUM_AUXILIARY_PROCS - MAX_IO_WORKERS; } -static Size +/* + * Initializes pgaio_uring_caps, unless that's already done. + */ +static void +pgaio_uring_check_capabilities(void) +{ + if (pgaio_uring_caps.checked) + return; + + /* + * By default io_uring creates a shared memory mapping for each io_uring + * instance, leading to a large number of memory mappings. Unfortunately a + * large number of memory mappings slows things down, backend exit is + * particularly affected. To address that, newer kernels (6.5) support + * using user-provided memory for the memory, by putting the relevant + * memory into shared memory we don't need any additional mappings. + * + * To know whether this is supported, we unfortunately need to probe the + * kernel by trying to create a ring with userspace-provided memory. This + * also has a secondary benefit: We can determine precisely how much + * memory we need for each io_uring instance. + */ +#if defined(HAVE_LIBURING_QUEUE_INIT_MEM) && defined(IORING_SETUP_NO_MMAP) + { + struct io_uring test_ring; + size_t ring_size; + void *ring_ptr; + struct io_uring_params p = {0}; + int ret; + + /* + * Liburing does not yet provide an API to query how much memory a + * ring will need. So we over-estimate it here. As the memory is freed + * just below that's small temporary waste of memory. + * + * 1MB is more than enough for rings within io_max_concurrency's + * range. + */ + ring_size = 1024 * 1024; + + /* + * Hard to believe a system exists where 1MB would not be a multiple + * of the page size. But it's cheap to ensure... + */ + ring_size -= ring_size % sysconf(_SC_PAGESIZE); + + ring_ptr = mmap(NULL, ring_size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0); + if (ring_ptr == MAP_FAILED) + elog(ERROR, + "mmap(%zu) to determine io_uring_queue_init_mem() support failed: %m", + ring_size); + + ret = io_uring_queue_init_mem(io_max_concurrency, &test_ring, &p, ring_ptr, ring_size); + if (ret > 0) + { + pgaio_uring_caps.mem_init_size = ret; + + elog(DEBUG1, + "can use combined memory mapping for io_uring, each ring needs %d bytes", + ret); + + /* clean up the created ring, it was just for a test */ + io_uring_queue_exit(&test_ring); + } + else + { + /* + * There are different reasons for ring creation to fail, but it's + * ok to treat that just as io_uring_queue_init_mem() not being + * supported. We'll report a more detailed error in + * pgaio_uring_shmem_init(). + */ + errno = -ret; + elog(DEBUG1, + "cannot use combined memory mapping for io_uring, ring creation failed: %m"); + + } + + if (munmap(ring_ptr, ring_size) != 0) + elog(ERROR, "munmap() failed: %m"); + } +#else + { + elog(DEBUG1, + "can't use combined memory mapping for io_uring, kernel or liburing too old"); + } +#endif + + pgaio_uring_caps.checked = true; +} + +/* + * Memory for all PgAioUringContext instances + */ +static size_t pgaio_uring_context_shmem_size(void) { return mul_size(pgaio_uring_procs(), sizeof(PgAioUringContext)); } +/* + * Memory for the combined memory used by io_uring instances. Returns 0 if + * that is not supported by kernel/liburing. + */ +static size_t +pgaio_uring_ring_shmem_size(void) +{ + size_t sz = 0; + + if (pgaio_uring_caps.mem_init_size > 0) + { + /* + * Memory for rings needs to be allocated to the page boundary, + * reserve space. Luckily it does not need to be aligned to hugepage + * boundaries, even if huge pages are used. + */ + sz = add_size(sz, sysconf(_SC_PAGESIZE)); + sz = add_size(sz, mul_size(pgaio_uring_procs(), + pgaio_uring_caps.mem_init_size)); + } + + return sz; +} + static size_t pgaio_uring_shmem_size(void) { - return pgaio_uring_context_shmem_size(); + size_t sz; + + /* + * Kernel and liburing support for various features influences how much + * shmem we need, perform the necessary checks. + */ + pgaio_uring_check_capabilities(); + + sz = pgaio_uring_context_shmem_size(); + sz = add_size(sz, pgaio_uring_ring_shmem_size()); + + return sz; } static void @@ -128,13 +280,38 @@ pgaio_uring_shmem_init(bool first_time) { int TotalProcs = pgaio_uring_procs(); bool found; + char *shmem; + size_t ring_mem_remain = 0; + char *ring_mem_next = 0; - pgaio_uring_contexts = (PgAioUringContext *) - ShmemInitStruct("AioUring", pgaio_uring_shmem_size(), &found); - + /* + * We allocate memory for all PgAioUringContext instances and, if + * supported, the memory required for each of the io_uring instances, in + * one ShmemInitStruct(). + */ + shmem = ShmemInitStruct("AioUringContext", pgaio_uring_shmem_size(), &found); if (found) return; + pgaio_uring_contexts = (PgAioUringContext *) shmem; + shmem += pgaio_uring_context_shmem_size(); + + /* if supported, handle memory alignment / sizing for io_uring memory */ + if (pgaio_uring_caps.mem_init_size > 0) + { + ring_mem_remain = pgaio_uring_ring_shmem_size(); + ring_mem_next = (char *) shmem; + + /* align to page boundary, see also pgaio_uring_ring_shmem_size() */ + ring_mem_next = (char *) TYPEALIGN(sysconf(_SC_PAGESIZE), ring_mem_next); + + /* account for alignment */ + ring_mem_remain -= ring_mem_next - shmem; + shmem += ring_mem_next - shmem; + + shmem += ring_mem_remain; + } + for (int contextno = 0; contextno < TotalProcs; contextno++) { PgAioUringContext *context = &pgaio_uring_contexts[contextno]; @@ -158,7 +335,28 @@ pgaio_uring_shmem_init(bool first_time) * be worth using that - also need to evaluate if that causes * noticeable additional contention? */ - ret = io_uring_queue_init(io_max_concurrency, &context->io_uring_ring, 0); + + /* + * If supported (c.f. pgaio_uring_check_capabilities()), create ring + * with its data in shared memory. Otherwise fall back io_uring + * creating a memory mapping for each ring. + */ +#if defined(HAVE_LIBURING_QUEUE_INIT_MEM) && defined(IORING_SETUP_NO_MMAP) + if (pgaio_uring_caps.mem_init_size > 0) + { + struct io_uring_params p = {0}; + + ret = io_uring_queue_init_mem(io_max_concurrency, &context->io_uring_ring, &p, ring_mem_next, ring_mem_remain); + + ring_mem_remain -= ret; + ring_mem_next += ret; + } + else +#endif + { + ret = io_uring_queue_init(io_max_concurrency, &context->io_uring_ring, 0); + } + if (ret < 0) { char *hint = NULL; diff --git a/src/backend/utils/adt/xml.c b/src/backend/utils/adt/xml.c index 2bd39b6ac4b..f7b731825fc 100644 --- a/src/backend/utils/adt/xml.c +++ b/src/backend/utils/adt/xml.c @@ -532,7 +532,7 @@ xmltext(PG_FUNCTION_ARGS) volatile xmlChar *xmlbuf = NULL; PgXmlErrorContext *xmlerrcxt; - /* Otherwise, we gotta spin up some error handling. */ + /* First we gotta spin up some error handling. */ xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL); PG_TRY(); @@ -685,7 +685,7 @@ xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg, bool indent) volatile xmlBufferPtr buf = NULL; volatile xmlSaveCtxtPtr ctxt = NULL; ErrorSaveContext escontext = {T_ErrorSaveContext}; - PgXmlErrorContext *xmlerrcxt; + PgXmlErrorContext *volatile xmlerrcxt = NULL; #endif if (xmloption_arg != XMLOPTION_DOCUMENT && !indent) @@ -726,13 +726,18 @@ xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg, bool indent) return (text *) data; } - /* Otherwise, we gotta spin up some error handling. */ - xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL); - + /* + * Otherwise, we gotta spin up some error handling. Unlike most other + * routines in this module, we already have a libxml "doc" structure to + * free, so we need to call pg_xml_init() inside the PG_TRY and be + * prepared for it to fail (typically due to palloc OOM). + */ PG_TRY(); { size_t decl_len = 0; + xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL); + /* The serialized data will go into this buffer. */ buf = xmlBufferCreate(); @@ -863,10 +868,10 @@ xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg, bool indent) xmlSaveClose(ctxt); if (buf) xmlBufferFree(buf); - if (doc) - xmlFreeDoc(doc); + xmlFreeDoc(doc); - pg_xml_done(xmlerrcxt, true); + if (xmlerrcxt) + pg_xml_done(xmlerrcxt, true); PG_RE_THROW(); } diff --git a/src/bin/pg_test_timing/pg_test_timing.c b/src/bin/pg_test_timing/pg_test_timing.c index ce7aad4b25a..64d080335eb 100644 --- a/src/bin/pg_test_timing/pg_test_timing.c +++ b/src/bin/pg_test_timing/pg_test_timing.c @@ -9,19 +9,30 @@ #include <limits.h> #include "getopt_long.h" +#include "port/pg_bitutils.h" #include "portability/instr_time.h" static const char *progname; static unsigned int test_duration = 3; +static double max_rprct = 99.99; + +/* record duration in powers of 2 nanoseconds */ +static long long int histogram[32]; + +/* record counts of first 1024 durations directly */ +#define NUM_DIRECT 1024 +static long long int direct_histogram[NUM_DIRECT]; + +/* separately record highest observed duration */ +static int32 largest_diff; +static long long int largest_diff_count; + static void handle_args(int argc, char *argv[]); static uint64 test_timing(unsigned int duration); static void output(uint64 loop_count); -/* record duration in powers of 2 microseconds */ -static long long int histogram[32]; - int main(int argc, char *argv[]) { @@ -44,6 +55,7 @@ handle_args(int argc, char *argv[]) { static struct option long_options[] = { {"duration", required_argument, NULL, 'd'}, + {"cutoff", required_argument, NULL, 'c'}, {NULL, 0, NULL, 0} }; @@ -56,7 +68,7 @@ handle_args(int argc, char *argv[]) { if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0) { - printf(_("Usage: %s [-d DURATION]\n"), progname); + printf(_("Usage: %s [-d DURATION] [-c CUTOFF]\n"), progname); exit(0); } if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0) @@ -66,7 +78,7 @@ handle_args(int argc, char *argv[]) } } - while ((option = getopt_long(argc, argv, "d:", + while ((option = getopt_long(argc, argv, "d:c:", long_options, &optindex)) != -1) { switch (option) @@ -93,6 +105,26 @@ handle_args(int argc, char *argv[]) } break; + case 'c': + errno = 0; + max_rprct = strtod(optarg, &endptr); + + if (endptr == optarg || *endptr != '\0' || errno != 0) + { + fprintf(stderr, _("%s: invalid argument for option %s\n"), + progname, "--cutoff"); + fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname); + exit(1); + } + + if (max_rprct < 0 || max_rprct > 100) + { + fprintf(stderr, _("%s: %s must be in range %u..%u\n"), + progname, "--cutoff", 0, 100); + exit(1); + } + break; + default: fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname); @@ -111,7 +143,6 @@ handle_args(int argc, char *argv[]) exit(1); } - printf(ngettext("Testing timing overhead for %u second.\n", "Testing timing overhead for %u seconds.\n", test_duration), @@ -130,19 +161,19 @@ test_timing(unsigned int duration) end_time, temp; - total_time = duration > 0 ? duration * INT64CONST(1000000) : 0; + total_time = duration > 0 ? duration * INT64CONST(1000000000) : 0; INSTR_TIME_SET_CURRENT(start_time); - cur = INSTR_TIME_GET_MICROSEC(start_time); + cur = INSTR_TIME_GET_NANOSEC(start_time); while (time_elapsed < total_time) { int32 diff, - bits = 0; + bits; prev = cur; INSTR_TIME_SET_CURRENT(temp); - cur = INSTR_TIME_GET_MICROSEC(temp); + cur = INSTR_TIME_GET_NANOSEC(temp); diff = cur - prev; /* Did time go backwards? */ @@ -154,18 +185,30 @@ test_timing(unsigned int duration) } /* What is the highest bit in the time diff? */ - while (diff) - { - diff >>= 1; - bits++; - } + if (diff > 0) + bits = pg_leftmost_one_pos32(diff) + 1; + else + bits = 0; /* Update appropriate duration bucket */ histogram[bits]++; + /* Update direct histogram of time diffs */ + if (diff < NUM_DIRECT) + direct_histogram[diff]++; + + /* Also track the largest observed duration, even if >= NUM_DIRECT */ + if (diff > largest_diff) + { + largest_diff = diff; + largest_diff_count = 1; + } + else if (diff == largest_diff) + largest_diff_count++; + loop_count++; INSTR_TIME_SUBTRACT(temp, start_time); - time_elapsed = INSTR_TIME_GET_MICROSEC(temp); + time_elapsed = INSTR_TIME_GET_NANOSEC(temp); } INSTR_TIME_SET_CURRENT(end_time); @@ -181,28 +224,95 @@ test_timing(unsigned int duration) static void output(uint64 loop_count) { - int64 max_bit = 31, - i; - char *header1 = _("< us"); - char *header2 = /* xgettext:no-c-format */ _("% of total"); - char *header3 = _("count"); + int max_bit = 31; + const char *header1 = _("<= ns"); + const char *header1b = _("ns"); + const char *header2 = /* xgettext:no-c-format */ _("% of total"); + const char *header3 = /* xgettext:no-c-format */ _("running %"); + const char *header4 = _("count"); int len1 = strlen(header1); int len2 = strlen(header2); int len3 = strlen(header3); + int len4 = strlen(header4); + double rprct; + bool stopped = false; /* find highest bit value */ while (max_bit > 0 && histogram[max_bit] == 0) max_bit--; + /* set minimum column widths */ + len1 = Max(8, len1); + len2 = Max(10, len2); + len3 = Max(10, len3); + len4 = Max(10, len4); + printf(_("Histogram of timing durations:\n")); - printf("%*s %*s %*s\n", - Max(6, len1), header1, - Max(10, len2), header2, - Max(10, len3), header3); - - for (i = 0; i <= max_bit; i++) - printf("%*ld %*.5f %*lld\n", - Max(6, len1), 1l << i, - Max(10, len2) - 1, (double) histogram[i] * 100 / loop_count, - Max(10, len3), histogram[i]); + printf("%*s %*s %*s %*s\n", + len1, header1, + len2, header2, + len3, header3, + len4, header4); + + rprct = 0; + for (int i = 0; i <= max_bit; i++) + { + double prct = (double) histogram[i] * 100 / loop_count; + + rprct += prct; + printf("%*ld %*.4f %*.4f %*lld\n", + len1, (1L << i) - 1, + len2, prct, + len3, rprct, + len4, histogram[i]); + } + + printf(_("\nObserved timing durations up to %.4f%%:\n"), max_rprct); + printf("%*s %*s %*s %*s\n", + len1, header1b, + len2, header2, + len3, header3, + len4, header4); + + rprct = 0; + for (int i = 0; i < NUM_DIRECT; i++) + { + if (direct_histogram[i]) + { + double prct = (double) direct_histogram[i] * 100 / loop_count; + bool print_it = !stopped; + + rprct += prct; + + /* if largest diff is < NUM_DIRECT, be sure we print it */ + if (i == largest_diff) + { + if (stopped) + printf("...\n"); + print_it = true; + } + + if (print_it) + printf("%*d %*.4f %*.4f %*lld\n", + len1, i, + len2, prct, + len3, rprct, + len4, direct_histogram[i]); + if (rprct >= max_rprct) + stopped = true; + } + } + + /* print largest diff when it's outside the array range */ + if (largest_diff >= NUM_DIRECT) + { + double prct = (double) largest_diff_count * 100 / loop_count; + + printf("...\n"); + printf("%*d %*.4f %*.4f %*lld\n", + len1, largest_diff, + len2, prct, + len3, 100.0, + len4, largest_diff_count); + } } diff --git a/src/bin/pg_test_timing/t/001_basic.pl b/src/bin/pg_test_timing/t/001_basic.pl index 6554cd981af..9912acc052a 100644 --- a/src/bin/pg_test_timing/t/001_basic.pl +++ b/src/bin/pg_test_timing/t/001_basic.pl @@ -25,5 +25,22 @@ command_fails_like( [ 'pg_test_timing', '--duration' => '0' ], qr/\Qpg_test_timing: --duration must be in range 1..4294967295\E/, 'pg_test_timing: --duration must be in range'); +command_fails_like( + [ 'pg_test_timing', '--cutoff' => '101' ], + qr/\Qpg_test_timing: --cutoff must be in range 0..100\E/, + 'pg_test_timing: --cutoff must be in range'); + +######################################### +# We obviously can't check for specific output, but we can +# do a simple run and make sure it produces something. + +command_like( + [ 'pg_test_timing', '--duration' => '1' ], + qr/ +\QTesting timing overhead for 1 second.\E.* +\QHistogram of timing durations:\E.* +\QObserved timing durations up to 99.9900%:\E +/sx, + 'pg_test_timing: sanity check'); done_testing(); diff --git a/src/bin/pg_walsummary/t/002_blocks.pl b/src/bin/pg_walsummary/t/002_blocks.pl index 270332780a4..0f98c7df82e 100644 --- a/src/bin/pg_walsummary/t/002_blocks.pl +++ b/src/bin/pg_walsummary/t/002_blocks.pl @@ -47,11 +47,12 @@ EOM ok($result, "WAL summarization caught up after insert"); # The WAL summarizer should have generated some IO statistics. -my $stats_reads = $node1->safe_psql( +$node1->poll_query_until( 'postgres', - qq{SELECT sum(reads) > 0 FROM pg_stat_io - WHERE backend_type = 'walsummarizer' AND object = 'wal'}); -is($stats_reads, 't', "WAL summarizer generates statistics for WAL reads"); + q{SELECT sum(reads) > 0 FROM pg_stat_io + WHERE backend_type = 'walsummarizer' AND object = 'wal'}) + or die + "Timed out while waiting for WAL summarizer to generate statistics for WAL reads"; # Find the highest LSN that is summarized on disk. my $summarized_lsn = $node1->safe_psql('postgres', <<EOM); diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h index d397fe27dc1..b523bcda8f3 100644 --- a/src/include/optimizer/cost.h +++ b/src/include/optimizer/cost.h @@ -118,7 +118,7 @@ extern void cost_incremental_sort(Path *path, Cost input_startup_cost, Cost input_total_cost, double input_tuples, int width, Cost comparison_cost, int sort_mem, double limit_tuples); -extern void cost_append(AppendPath *apath); +extern void cost_append(AppendPath *apath, PlannerInfo *root); extern void cost_merge_append(Path *path, PlannerInfo *root, List *pathkeys, int n_streams, int input_disabled_nodes, diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in index 726a7c1be1f..c4dc5d72bdb 100644 --- a/src/include/pg_config.h.in +++ b/src/include/pg_config.h.in @@ -229,6 +229,9 @@ /* Define to 1 if you have the global variable 'int timezone'. */ #undef HAVE_INT_TIMEZONE +/* Define to 1 if you have the `io_uring_queue_init_mem' function. */ +#undef HAVE_IO_URING_QUEUE_INIT_MEM + /* Define to 1 if __builtin_constant_p(x) implies "i"(x) acceptance. */ #undef HAVE_I_CONSTRAINT__BUILTIN_CONSTANT_P diff --git a/src/pl/plpgsql/src/pl_exec.c b/src/pl/plpgsql/src/pl_exec.c index bb99781c56e..b9acc790dc6 100644 --- a/src/pl/plpgsql/src/pl_exec.c +++ b/src/pl/plpgsql/src/pl_exec.c @@ -5703,7 +5703,7 @@ exec_eval_expr(PLpgSQL_execstate *estate, /* * Else do it the hard way via exec_run_select */ - rc = exec_run_select(estate, expr, 2, NULL); + rc = exec_run_select(estate, expr, 0, NULL); if (rc != SPI_OK_SELECT) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), @@ -5757,6 +5757,10 @@ exec_eval_expr(PLpgSQL_execstate *estate, /* ---------- * exec_run_select Execute a select query + * + * Note: passing maxtuples different from 0 ("return all tuples") is + * deprecated because it will prevent parallel execution of the query. + * However, we retain the parameter in case we need it someday. * ---------- */ static int diff --git a/src/test/perl/PostgreSQL/Test/AdjustUpgrade.pm b/src/test/perl/PostgreSQL/Test/AdjustUpgrade.pm index 1725fe2f948..7224c286e1d 100644 --- a/src/test/perl/PostgreSQL/Test/AdjustUpgrade.pm +++ b/src/test/perl/PostgreSQL/Test/AdjustUpgrade.pm @@ -251,6 +251,32 @@ sub adjust_database_contents 'drop operator if exists public.=> (bigint, NONE)'); } + # Version 19 changed the output format of pg_lsn. To avoid output + # differences, set all pg_lsn columns to NULL if the old version is + # older than 19. + if ($old_version < 19) + { + if ($old_version >= '9.5') + { + _add_st($result, 'regression', + "update brintest set lsncol = NULL"); + } + + if ($old_version >= 12) + { + _add_st($result, 'regression', + "update tab_core_types set pg_lsn = NULL"); + } + + if ($old_version >= 14) + { + _add_st($result, 'regression', + "update brintest_multi set lsncol = NULL"); + _add_st($result, 'regression', + "update brintest_bloom set lsncol = NULL"); + } + } + return $result; } diff --git a/src/test/regress/expected/incremental_sort.out b/src/test/regress/expected/incremental_sort.out index b00219643b9..5a1dd9fc022 100644 --- a/src/test/regress/expected/incremental_sort.out +++ b/src/test/regress/expected/incremental_sort.out @@ -1722,3 +1722,43 @@ order by t1.four, t1.two limit 1; -> Seq Scan on tenk1 t2 (12 rows) +-- +-- Test incremental sort for Append/MergeAppend +-- +create table prt_tbl (a int, b int) partition by range (a); +create table prt_tbl_1 partition of prt_tbl for values from (0) to (100); +create table prt_tbl_2 partition of prt_tbl for values from (100) to (200); +insert into prt_tbl select i%200, i from generate_series(1,1000)i; +create index on prt_tbl_1(a); +create index on prt_tbl_2(a, b); +analyze prt_tbl; +set enable_seqscan to off; +set enable_bitmapscan to off; +-- Ensure we get an incremental sort for the subpath of Append +explain (costs off) select * from prt_tbl order by a, b; + QUERY PLAN +------------------------------------------------------------ + Append + -> Incremental Sort + Sort Key: prt_tbl_1.a, prt_tbl_1.b + Presorted Key: prt_tbl_1.a + -> Index Scan using prt_tbl_1_a_idx on prt_tbl_1 + -> Index Only Scan using prt_tbl_2_a_b_idx on prt_tbl_2 +(6 rows) + +-- Ensure we get an incremental sort for the subpath of MergeAppend +explain (costs off) select * from prt_tbl_1 union all select * from prt_tbl_2 order by a, b; + QUERY PLAN +------------------------------------------------------------ + Merge Append + Sort Key: prt_tbl_1.a, prt_tbl_1.b + -> Incremental Sort + Sort Key: prt_tbl_1.a, prt_tbl_1.b + Presorted Key: prt_tbl_1.a + -> Index Scan using prt_tbl_1_a_idx on prt_tbl_1 + -> Index Only Scan using prt_tbl_2_a_b_idx on prt_tbl_2 +(7 rows) + +reset enable_bitmapscan; +reset enable_seqscan; +drop table prt_tbl; diff --git a/src/test/regress/expected/inherit.out b/src/test/regress/expected/inherit.out index 78dead65325..5b5055babdc 100644 --- a/src/test/regress/expected/inherit.out +++ b/src/test/regress/expected/inherit.out @@ -1898,10 +1898,11 @@ ORDER BY thousand, tenthous; Merge Append Sort Key: tenk1.thousand, tenk1.tenthous -> Index Only Scan using tenk1_thous_tenthous on tenk1 - -> Sort + -> Incremental Sort Sort Key: tenk1_1.thousand, tenk1_1.thousand + Presorted Key: tenk1_1.thousand -> Index Only Scan using tenk1_thous_tenthous on tenk1 tenk1_1 -(6 rows) +(7 rows) explain (costs off) SELECT thousand, tenthous, thousand+tenthous AS x FROM tenk1 @@ -1982,10 +1983,11 @@ ORDER BY x, y; Merge Append Sort Key: a.thousand, a.tenthous -> Index Only Scan using tenk1_thous_tenthous on tenk1 a - -> Sort + -> Incremental Sort Sort Key: b.unique2, b.unique2 + Presorted Key: b.unique2 -> Index Only Scan using tenk1_unique2 on tenk1 b -(6 rows) +(7 rows) -- exercise rescan code path via a repeatedly-evaluated subquery explain (costs off) diff --git a/src/test/regress/sql/incremental_sort.sql b/src/test/regress/sql/incremental_sort.sql index f1f8fae5654..bbe658a7588 100644 --- a/src/test/regress/sql/incremental_sort.sql +++ b/src/test/regress/sql/incremental_sort.sql @@ -298,3 +298,27 @@ explain (costs off) select * from (select * from tenk1 order by four) t1 join tenk1 t2 on t1.four = t2.four and t1.two = t2.two order by t1.four, t1.two limit 1; + +-- +-- Test incremental sort for Append/MergeAppend +-- +create table prt_tbl (a int, b int) partition by range (a); +create table prt_tbl_1 partition of prt_tbl for values from (0) to (100); +create table prt_tbl_2 partition of prt_tbl for values from (100) to (200); +insert into prt_tbl select i%200, i from generate_series(1,1000)i; +create index on prt_tbl_1(a); +create index on prt_tbl_2(a, b); +analyze prt_tbl; + +set enable_seqscan to off; +set enable_bitmapscan to off; + +-- Ensure we get an incremental sort for the subpath of Append +explain (costs off) select * from prt_tbl order by a, b; + +-- Ensure we get an incremental sort for the subpath of MergeAppend +explain (costs off) select * from prt_tbl_1 union all select * from prt_tbl_2 order by a, b; + +reset enable_bitmapscan; +reset enable_seqscan; +drop table prt_tbl; diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index 114bdafafdf..83192038571 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -2181,6 +2181,7 @@ PgAioReturn PgAioTargetData PgAioTargetID PgAioTargetInfo +PgAioUringCaps PgAioUringContext PgAioWaitRef PgArchData |