#include "funcapi.h"
#include "libpq/pqformat.h"
#include "miscadmin.h"
+#include "optimizer/optimizer.h"
#include "nodes/nodeFuncs.h"
#include "nodes/supportnodes.h"
#include "parser/scansup.h"
return generate_series_timestamptz_internal(fcinfo);
}
+/*
+ * Planner support function for generate_series(timestamp, timestamp, interval)
+ */
+Datum
+generate_series_timestamp_support(PG_FUNCTION_ARGS)
+{
+ Node *rawreq = (Node *) PG_GETARG_POINTER(0);
+ Node *ret = NULL;
+
+ if (IsA(rawreq, SupportRequestRows))
+ {
+ /* Try to estimate the number of rows returned */
+ SupportRequestRows *req = (SupportRequestRows *) rawreq;
+
+ if (is_funcclause(req->node)) /* be paranoid */
+ {
+ List *args = ((FuncExpr *) req->node)->args;
+ Node *arg1,
+ *arg2,
+ *arg3;
+
+ /* We can use estimated argument values here */
+ arg1 = estimate_expression_value(req->root, linitial(args));
+ arg2 = estimate_expression_value(req->root, lsecond(args));
+ arg3 = estimate_expression_value(req->root, lthird(args));
+
+ /*
+ * If any argument is constant NULL, we can safely assume that
+ * zero rows are returned. Otherwise, if they're all non-NULL
+ * constants, we can calculate the number of rows that will be
+ * returned.
+ */
+ if ((IsA(arg1, Const) && ((Const *) arg1)->constisnull) ||
+ (IsA(arg2, Const) && ((Const *) arg2)->constisnull) ||
+ (IsA(arg3, Const) && ((Const *) arg3)->constisnull))
+ {
+ req->rows = 0;
+ ret = (Node *) req;
+ }
+ else if (IsA(arg1, Const) && IsA(arg2, Const) && IsA(arg3, Const))
+ {
+ Timestamp start,
+ finish;
+ Interval *step;
+ Datum diff;
+ double dstep;
+ int64 dummy;
+
+ start = DatumGetTimestamp(((Const *) arg1)->constvalue);
+ finish = DatumGetTimestamp(((Const *) arg2)->constvalue);
+ step = DatumGetIntervalP(((Const *) arg3)->constvalue);
+
+ /*
+ * Perform some prechecks which could cause timestamp_mi to
+ * raise an ERROR. It's much better to just return some
+ * default estimate than error out in a support function.
+ */
+ if (!TIMESTAMP_NOT_FINITE(start) && !TIMESTAMP_NOT_FINITE(finish) &&
+ !pg_sub_s64_overflow(finish, start, &dummy))
+ {
+ diff = DirectFunctionCall2(timestamp_mi,
+ TimestampGetDatum(finish),
+ TimestampGetDatum(start));
+
+#define INTERVAL_TO_MICROSECONDS(i) ((((double) (i)->month * DAYS_PER_MONTH + (i)->day)) * USECS_PER_DAY + (i)->time)
+
+ dstep = INTERVAL_TO_MICROSECONDS(step);
+
+ /* This equation works for either sign of step */
+ if (dstep != 0.0)
+ {
+ Interval *idiff = DatumGetIntervalP(diff);
+ double ddiff = INTERVAL_TO_MICROSECONDS(idiff);
+
+ req->rows = floor(ddiff / dstep + 1.0);
+ ret = (Node *) req;
+ }
+#undef INTERVAL_TO_MICROSECONDS
+ }
+ }
+ }
+ }
+
+ PG_RETURN_POINTER(ret);
+}
+
+
/* timestamp_at_local()
* timestamptz_at_local()
*
prorettype => 'numeric', proargtypes => 'numeric numeric',
prosrc => 'generate_series_numeric' },
{ oid => '938', descr => 'non-persistent series generator',
- proname => 'generate_series', prorows => '1000', proretset => 't',
+ proname => 'generate_series', prorows => '1000',
+ prosupport => 'generate_series_timestamp_support', proretset => 't',
prorettype => 'timestamp', proargtypes => 'timestamp timestamp interval',
prosrc => 'generate_series_timestamp' },
{ oid => '939', descr => 'non-persistent series generator',
- proname => 'generate_series', prorows => '1000', proretset => 't',
+ proname => 'generate_series', prorows => '1000',
+ prosupport => 'generate_series_timestamp_support', proretset => 't',
provolatile => 's', prorettype => 'timestamptz',
proargtypes => 'timestamptz timestamptz interval',
prosrc => 'generate_series_timestamptz' },
{ oid => '6274', descr => 'non-persistent series generator',
- proname => 'generate_series', prorows => '1000', proretset => 't',
+ proname => 'generate_series', prorows => '1000',
+ prosupport => 'generate_series_timestamp_support', proretset => 't',
prorettype => 'timestamptz',
proargtypes => 'timestamptz timestamptz interval text',
prosrc => 'generate_series_timestamptz_at_zone' },
+{ oid => '8402', descr => 'planner support for generate_series',
+ proname => 'generate_series_timestamp_support', prorettype => 'internal',
+ proargtypes => 'internal', prosrc => 'generate_series_timestamp_support' },
# boolean aggregates
{ oid => '2515', descr => 'aggregate transition function',
\getenv libdir PG_LIBDIR
\getenv dlsuffix PG_DLSUFFIX
\set regresslib :libdir '/regress' :dlsuffix
+-- Function to assist with verifying EXPLAIN which includes costs. A series
+-- of bool flags allows control over which portions are masked out
+CREATE FUNCTION explain_mask_costs(query text, do_analyze bool,
+ hide_costs bool, hide_row_est bool, hide_width bool) RETURNS setof text
+LANGUAGE plpgsql AS
+$$
+DECLARE
+ ln text;
+ analyze_str text;
+BEGIN
+ IF do_analyze = true THEN
+ analyze_str := 'on';
+ ELSE
+ analyze_str := 'off';
+ END IF;
+
+ FOR ln IN
+ EXECUTE format('explain (analyze %s, costs on, summary off, timing off) %s',
+ analyze_str, query)
+ LOOP
+ IF hide_costs = true THEN
+ ln := regexp_replace(ln, 'cost=\d+\.\d\d\.\.\d+\.\d\d', 'cost=N..N');
+ END IF;
+
+ IF hide_row_est = true THEN
+ -- don't use 'g' so that we leave the actual rows intact
+ ln := regexp_replace(ln, 'rows=\d+', 'rows=N');
+ END IF;
+
+ IF hide_width = true THEN
+ ln := regexp_replace(ln, 'width=\d+', 'width=N');
+ END IF;
+
+ RETURN NEXT ln;
+ END LOOP;
+END;
+$$;
--
-- num_nulls()
--
Index Cond: (unique1 = g.g)
(4 rows)
+--
+-- Test the SupportRequestRows support function for generate_series_timestamp()
+--
+-- Ensure the row estimate matches the actual rows
+SELECT explain_mask_costs($$
+SELECT * FROM generate_series(TIMESTAMPTZ '2024-02-01', TIMESTAMPTZ '2024-03-01', INTERVAL '1 day') g(s);$$,
+true, true, false, true);
+ explain_mask_costs
+------------------------------------------------------------------------------------------
+ Function Scan on generate_series g (cost=N..N rows=30 width=N) (actual rows=30 loops=1)
+(1 row)
+
+-- As above but with generate_series_timestamp
+SELECT explain_mask_costs($$
+SELECT * FROM generate_series(TIMESTAMP '2024-02-01', TIMESTAMP '2024-03-01', INTERVAL '1 day') g(s);$$,
+true, true, false, true);
+ explain_mask_costs
+------------------------------------------------------------------------------------------
+ Function Scan on generate_series g (cost=N..N rows=30 width=N) (actual rows=30 loops=1)
+(1 row)
+
+-- As above but with generate_series_timestamptz_at_zone()
+SELECT explain_mask_costs($$
+SELECT * FROM generate_series(TIMESTAMPTZ '2024-02-01', TIMESTAMPTZ '2024-03-01', INTERVAL '1 day', 'UTC') g(s);$$,
+true, true, false, true);
+ explain_mask_costs
+------------------------------------------------------------------------------------------
+ Function Scan on generate_series g (cost=N..N rows=30 width=N) (actual rows=30 loops=1)
+(1 row)
+
+-- Ensure the estimated and actual row counts match when the range isn't
+-- evenly divisible by the step
+SELECT explain_mask_costs($$
+SELECT * FROM generate_series(TIMESTAMPTZ '2024-02-01', TIMESTAMPTZ '2024-03-01', INTERVAL '7 day') g(s);$$,
+true, true, false, true);
+ explain_mask_costs
+----------------------------------------------------------------------------------------
+ Function Scan on generate_series g (cost=N..N rows=5 width=N) (actual rows=5 loops=1)
+(1 row)
+
+-- Ensure the estimates match when step is decreasing
+SELECT explain_mask_costs($$
+SELECT * FROM generate_series(TIMESTAMPTZ '2024-03-01', TIMESTAMPTZ '2024-02-01', INTERVAL '-1 day') g(s);$$,
+true, true, false, true);
+ explain_mask_costs
+------------------------------------------------------------------------------------------
+ Function Scan on generate_series g (cost=N..N rows=30 width=N) (actual rows=30 loops=1)
+(1 row)
+
+-- Ensure an empty range estimates 1 row
+SELECT explain_mask_costs($$
+SELECT * FROM generate_series(TIMESTAMPTZ '2024-03-01', TIMESTAMPTZ '2024-02-01', INTERVAL '1 day') g(s);$$,
+true, true, false, true);
+ explain_mask_costs
+----------------------------------------------------------------------------------------
+ Function Scan on generate_series g (cost=N..N rows=1 width=N) (actual rows=0 loops=1)
+(1 row)
+
+-- Ensure we get the default row estimate for infinity values
+SELECT explain_mask_costs($$
+SELECT * FROM generate_series(TIMESTAMPTZ '-infinity', TIMESTAMPTZ 'infinity', INTERVAL '1 day') g(s);$$,
+false, true, false, true);
+ explain_mask_costs
+-------------------------------------------------------------------
+ Function Scan on generate_series g (cost=N..N rows=1000 width=N)
+(1 row)
+
+-- Ensure the row estimate behaves correctly when step size is zero.
+-- We expect generate_series_timestamp() to throw the error rather than in
+-- the support function.
+SELECT * FROM generate_series(TIMESTAMPTZ '2024-02-01', TIMESTAMPTZ '2024-03-01', INTERVAL '0 day') g(s);
+ERROR: step size cannot equal zero
-- Test functions for control data
SELECT count(*) > 0 AS ok FROM pg_control_checkpoint();
ok
(1 row)
DROP TABLE test_chunk_id;
+DROP FUNCTION explain_mask_costs(text, bool, bool, bool, bool);
\set regresslib :libdir '/regress' :dlsuffix
+-- Function to assist with verifying EXPLAIN which includes costs. A series
+-- of bool flags allows control over which portions are masked out
+CREATE FUNCTION explain_mask_costs(query text, do_analyze bool,
+ hide_costs bool, hide_row_est bool, hide_width bool) RETURNS setof text
+LANGUAGE plpgsql AS
+$$
+DECLARE
+ ln text;
+ analyze_str text;
+BEGIN
+ IF do_analyze = true THEN
+ analyze_str := 'on';
+ ELSE
+ analyze_str := 'off';
+ END IF;
+
+ FOR ln IN
+ EXECUTE format('explain (analyze %s, costs on, summary off, timing off) %s',
+ analyze_str, query)
+ LOOP
+ IF hide_costs = true THEN
+ ln := regexp_replace(ln, 'cost=\d+\.\d\d\.\.\d+\.\d\d', 'cost=N..N');
+ END IF;
+
+ IF hide_row_est = true THEN
+ -- don't use 'g' so that we leave the actual rows intact
+ ln := regexp_replace(ln, 'rows=\d+', 'rows=N');
+ END IF;
+
+ IF hide_width = true THEN
+ ln := regexp_replace(ln, 'width=\d+', 'width=N');
+ END IF;
+
+ RETURN NEXT ln;
+ END LOOP;
+END;
+$$;
+
--
-- num_nulls()
--
EXPLAIN (COSTS OFF)
SELECT * FROM tenk1 a JOIN my_gen_series(1,10) g ON a.unique1 = g;
+--
+-- Test the SupportRequestRows support function for generate_series_timestamp()
+--
+
+-- Ensure the row estimate matches the actual rows
+SELECT explain_mask_costs($$
+SELECT * FROM generate_series(TIMESTAMPTZ '2024-02-01', TIMESTAMPTZ '2024-03-01', INTERVAL '1 day') g(s);$$,
+true, true, false, true);
+
+-- As above but with generate_series_timestamp
+SELECT explain_mask_costs($$
+SELECT * FROM generate_series(TIMESTAMP '2024-02-01', TIMESTAMP '2024-03-01', INTERVAL '1 day') g(s);$$,
+true, true, false, true);
+
+-- As above but with generate_series_timestamptz_at_zone()
+SELECT explain_mask_costs($$
+SELECT * FROM generate_series(TIMESTAMPTZ '2024-02-01', TIMESTAMPTZ '2024-03-01', INTERVAL '1 day', 'UTC') g(s);$$,
+true, true, false, true);
+
+-- Ensure the estimated and actual row counts match when the range isn't
+-- evenly divisible by the step
+SELECT explain_mask_costs($$
+SELECT * FROM generate_series(TIMESTAMPTZ '2024-02-01', TIMESTAMPTZ '2024-03-01', INTERVAL '7 day') g(s);$$,
+true, true, false, true);
+
+-- Ensure the estimates match when step is decreasing
+SELECT explain_mask_costs($$
+SELECT * FROM generate_series(TIMESTAMPTZ '2024-03-01', TIMESTAMPTZ '2024-02-01', INTERVAL '-1 day') g(s);$$,
+true, true, false, true);
+
+-- Ensure an empty range estimates 1 row
+SELECT explain_mask_costs($$
+SELECT * FROM generate_series(TIMESTAMPTZ '2024-03-01', TIMESTAMPTZ '2024-02-01', INTERVAL '1 day') g(s);$$,
+true, true, false, true);
+
+-- Ensure we get the default row estimate for infinity values
+SELECT explain_mask_costs($$
+SELECT * FROM generate_series(TIMESTAMPTZ '-infinity', TIMESTAMPTZ 'infinity', INTERVAL '1 day') g(s);$$,
+false, true, false, true);
+
+-- Ensure the row estimate behaves correctly when step size is zero.
+-- We expect generate_series_timestamp() to throw the error rather than in
+-- the support function.
+SELECT * FROM generate_series(TIMESTAMPTZ '2024-02-01', TIMESTAMPTZ '2024-03-01', INTERVAL '0 day') g(s);
+
-- Test functions for control data
SELECT count(*) > 0 AS ok FROM pg_control_checkpoint();
SELECT count(*) > 0 AS ok FROM pg_control_init();
pg_column_toast_chunk_id(b) IN (SELECT chunk_id FROM pg_toast.:toastrel)
FROM test_chunk_id;
DROP TABLE test_chunk_id;
+DROP FUNCTION explain_mask_costs(text, bool, bool, bool, bool);