summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Lane2008-12-28 18:54:01 +0000
committerTom Lane2008-12-28 18:54:01 +0000
commit95b07bc7f5010233f52f9d11da74e2e5b653b0a7 (patch)
tree48f5858bf4eca1bfb316ef02bb959ca85f568e0a
parent38e9348282e9d078487147ba8a85aebec54e3a08 (diff)
Support window functions a la SQL:2008.
Hitoshi Harada, with some kibitzing from Heikki and Tom.
-rw-r--r--contrib/tsearch2/tsearch2.c12
-rw-r--r--doc/src/sgml/advanced.sgml250
-rw-r--r--doc/src/sgml/errcodes.sgml20
-rw-r--r--doc/src/sgml/func.sgml274
-rw-r--r--doc/src/sgml/queries.sgml53
-rw-r--r--doc/src/sgml/query.sgml10
-rw-r--r--doc/src/sgml/ref/select.sgml79
-rw-r--r--doc/src/sgml/ref/select_into.sgml3
-rw-r--r--doc/src/sgml/syntax.sgml97
-rw-r--r--doc/src/sgml/xaggr.sgml9
-rw-r--r--src/backend/catalog/dependency.c11
-rw-r--r--src/backend/catalog/heap.c10
-rw-r--r--src/backend/catalog/pg_proc.c26
-rw-r--r--src/backend/commands/explain.c5
-rw-r--r--src/backend/commands/functioncmds.c10
-rw-r--r--src/backend/commands/prepare.c6
-rw-r--r--src/backend/commands/tablecmds.c6
-rw-r--r--src/backend/commands/typecmds.c6
-rw-r--r--src/backend/executor/Makefile4
-rw-r--r--src/backend/executor/execAmi.c9
-rw-r--r--src/backend/executor/execProcnode.c22
-rw-r--r--src/backend/executor/execQual.c78
-rw-r--r--src/backend/executor/nodeWindowAgg.c1854
-rw-r--r--src/backend/nodes/copyfuncs.c92
-rw-r--r--src/backend/nodes/equalfuncs.c54
-rw-r--r--src/backend/nodes/nodeFuncs.c69
-rw-r--r--src/backend/nodes/outfuncs.c88
-rw-r--r--src/backend/nodes/readfuncs.c45
-rw-r--r--src/backend/optimizer/path/allpaths.c19
-rw-r--r--src/backend/optimizer/path/costsize.c41
-rw-r--r--src/backend/optimizer/path/equivclass.c10
-rw-r--r--src/backend/optimizer/plan/createplan.c57
-rw-r--r--src/backend/optimizer/plan/planagg.c10
-rw-r--r--src/backend/optimizer/plan/planmain.c21
-rw-r--r--src/backend/optimizer/plan/planner.c457
-rw-r--r--src/backend/optimizer/plan/setrefs.c8
-rw-r--r--src/backend/optimizer/plan/subselect.c16
-rw-r--r--src/backend/optimizer/prep/prepjointree.c8
-rw-r--r--src/backend/optimizer/prep/prepunion.c3
-rw-r--r--src/backend/optimizer/util/clauses.c124
-rw-r--r--src/backend/optimizer/util/tlist.c18
-rw-r--r--src/backend/parser/analyze.c55
-rw-r--r--src/backend/parser/gram.y158
-rw-r--r--src/backend/parser/keywords.c5
-rw-r--r--src/backend/parser/parse_agg.c198
-rw-r--r--src/backend/parser/parse_clause.c203
-rw-r--r--src/backend/parser/parse_coerce.c3
-rw-r--r--src/backend/parser/parse_expr.c12
-rw-r--r--src/backend/parser/parse_func.c81
-rw-r--r--src/backend/parser/parse_type.c3
-rw-r--r--src/backend/parser/parse_utilcmd.c7
-rw-r--r--src/backend/rewrite/rewriteManip.c92
-rw-r--r--src/backend/utils/adt/Makefile4
-rw-r--r--src/backend/utils/adt/array_userfuncs.c35
-rw-r--r--src/backend/utils/adt/arrayfuncs.c11
-rw-r--r--src/backend/utils/adt/float.c14
-rw-r--r--src/backend/utils/adt/int8.c6
-rw-r--r--src/backend/utils/adt/numeric.c18
-rw-r--r--src/backend/utils/adt/ruleutils.c298
-rw-r--r--src/backend/utils/adt/windowfuncs.c475
-rw-r--r--src/backend/utils/fmgr/fmgr.c75
-rw-r--r--src/backend/utils/sort/tuplestore.c15
-rw-r--r--src/include/catalog/catversion.h4
-rw-r--r--src/include/catalog/pg_proc.h34
-rw-r--r--src/include/executor/nodeWindowAgg.h25
-rw-r--r--src/include/fmgr.h4
-rw-r--r--src/include/nodes/execnodes.h69
-rw-r--r--src/include/nodes/nodes.h9
-rw-r--r--src/include/nodes/parsenodes.h51
-rw-r--r--src/include/nodes/plannodes.h17
-rw-r--r--src/include/nodes/primnodes.h17
-rw-r--r--src/include/nodes/relation.h3
-rw-r--r--src/include/optimizer/clauses.h13
-rw-r--r--src/include/optimizer/cost.h6
-rw-r--r--src/include/optimizer/planmain.h7
-rw-r--r--src/include/optimizer/tlist.h4
-rw-r--r--src/include/parser/parse_agg.h7
-rw-r--r--src/include/parser/parse_clause.h10
-rw-r--r--src/include/parser/parse_func.h5
-rw-r--r--src/include/parser/parse_node.h10
-rw-r--r--src/include/rewrite/rewriteManip.h4
-rw-r--r--src/include/utils/array.h4
-rw-r--r--src/include/utils/builtins.h19
-rw-r--r--src/include/utils/errcodes.h5
-rw-r--r--src/include/utils/tuplestore.h4
-rw-r--r--src/include/windowapi.h64
-rw-r--r--src/pl/plpgsql/src/plerrcodes.h14
-rw-r--r--src/test/regress/expected/window.out672
-rw-r--r--src/test/regress/expected/with.out4
-rw-r--r--src/test/regress/parallel_schedule4
-rw-r--r--src/test/regress/serial_schedule3
-rw-r--r--src/test/regress/sql/window.sql179
92 files changed, 6716 insertions, 317 deletions
diff --git a/contrib/tsearch2/tsearch2.c b/contrib/tsearch2/tsearch2.c
index 7754f574026..bdccba787a9 100644
--- a/contrib/tsearch2/tsearch2.c
+++ b/contrib/tsearch2/tsearch2.c
@@ -7,7 +7,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/contrib/tsearch2/tsearch2.c,v 1.6 2008/03/25 22:42:42 tgl Exp $
+ * $PostgreSQL: pgsql/contrib/tsearch2/tsearch2.c,v 1.7 2008/12/28 18:53:53 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -410,7 +410,15 @@ tsa_rewrite_accum(PG_FUNCTION_ARGS)
MemoryContext aggcontext;
MemoryContext oldcontext;
- aggcontext = ((AggState *) fcinfo->context)->aggcontext;
+ if (fcinfo->context && IsA(fcinfo->context, AggState))
+ aggcontext = ((AggState *) fcinfo->context)->aggcontext;
+ else if (fcinfo->context && IsA(fcinfo->context, WindowAggState))
+ aggcontext = ((WindowAggState *) fcinfo->context)->wincontext;
+ else
+ {
+ elog(ERROR, "tsa_rewrite_accum called in non-aggregate context");
+ aggcontext = NULL; /* keep compiler quiet */
+ }
if (PG_ARGISNULL(0) || PG_GETARG_POINTER(0) == NULL)
{
diff --git a/doc/src/sgml/advanced.sgml b/doc/src/sgml/advanced.sgml
index 2ecb2da5c56..ce8ef535dba 100644
--- a/doc/src/sgml/advanced.sgml
+++ b/doc/src/sgml/advanced.sgml
@@ -1,4 +1,4 @@
-<!-- $PostgreSQL: pgsql/doc/src/sgml/advanced.sgml,v 1.54 2007/02/01 00:28:16 momjian Exp $ -->
+<!-- $PostgreSQL: pgsql/doc/src/sgml/advanced.sgml,v 1.55 2008/12/28 18:53:53 tgl Exp $ -->
<chapter id="tutorial-advanced">
<title>Advanced Features</title>
@@ -240,7 +240,7 @@ COMMIT;
<para>
<productname>PostgreSQL</> actually treats every SQL statement as being
executed within a transaction. If you do not issue a <command>BEGIN</>
- command,
+ command,
then each individual statement has an implicit <command>BEGIN</> and
(if successful) <command>COMMIT</> wrapped around it. A group of
statements surrounded by <command>BEGIN</> and <command>COMMIT</>
@@ -265,7 +265,7 @@ COMMIT;
with <command>ROLLBACK TO</>. All the transaction's database changes
between defining the savepoint and rolling back to it are discarded, but
changes earlier than the savepoint are kept.
- </para>
+ </para>
<para>
After rolling back to a savepoint, it continues to be defined, so you can
@@ -274,7 +274,7 @@ COMMIT;
system can free some resources. Keep in mind that either releasing or
rolling back to a savepoint
will automatically release all savepoints that were defined after it.
- </para>
+ </para>
<para>
All this is happening within the transaction block, so none of it
@@ -282,7 +282,7 @@ COMMIT;
transaction block, the committed actions become visible as a unit
to other sessions, while the rolled-back actions never become visible
at all.
- </para>
+ </para>
<para>
Remembering the bank database, suppose we debit $100.00 from Alice's
@@ -317,6 +317,242 @@ COMMIT;
</sect1>
+ <sect1 id="tutorial-window">
+ <title id="tutorial-window-title">Window Functions</title>
+
+ <indexterm zone="tutorial-window">
+ <primary>window function</primary>
+ </indexterm>
+
+ <para>
+ A <firstterm>window function</> performs a calculation across a set of
+ table rows that are somehow related to the current row. This is comparable
+ to the type of calculation that can be done with an aggregate function.
+ But unlike regular aggregate functions, use of a window function does not
+ cause rows to become grouped into a single output row &mdash; the
+ rows retain their separate identities. Behind the scenes, the window
+ function is able to access more than just the current row of the query
+ result.
+ </para>
+
+ <para>
+ Here is an example that shows how to compare each employee's salary
+ with the average salary in his or her department:
+
+<programlisting>
+SELECT depname, empno, salary, avg(salary) OVER (PARTITION BY depname) FROM empsalary;
+</programlisting>
+
+<screen>
+ depname | empno | salary | avg
+-----------+-------+--------+-----------------------
+ develop | 11 | 5200 | 5020.0000000000000000
+ develop | 7 | 4200 | 5020.0000000000000000
+ develop | 9 | 4500 | 5020.0000000000000000
+ develop | 8 | 6000 | 5020.0000000000000000
+ develop | 10 | 5200 | 5020.0000000000000000
+ personnel | 5 | 3500 | 3700.0000000000000000
+ personnel | 2 | 3900 | 3700.0000000000000000
+ sales | 3 | 4800 | 4866.6666666666666667
+ sales | 1 | 5000 | 4866.6666666666666667
+ sales | 4 | 4800 | 4866.6666666666666667
+(10 rows)
+</screen>
+
+ The first three output columns come directly from the table
+ <structname>empsalary</>, and there is one output row for each row in the
+ table. The fourth column represents an average taken across all the table
+ rows that have the same <structfield>depname</> value as the current row.
+ (This actually is the same function as the regular <function>avg</>
+ aggregate function, but the <literal>OVER</> clause causes it to be
+ treated as a window function and computed across an appropriate set of
+ rows.)
+ </para>
+
+ <para>
+ A window function call always contains an <literal>OVER</> clause
+ following the window function's name and argument(s). This is what
+ syntactically distinguishes it from a regular function or aggregate
+ function. The <literal>OVER</> clause determines exactly how the
+ rows of the query are split up for processing by the window function.
+ The <literal>PARTITION BY</> list within <literal>OVER</> specifies
+ dividing the rows into groups, or partitions, that share the same
+ values of the <literal>PARTITION BY</> expression(s). For each row,
+ the window function is computed across the rows that fall into the
+ same partition as the current row.
+ </para>
+
+ <para>
+ Although <function>avg</> will produce the same result no matter
+ what order it processes the partition's rows in, this is not true of all
+ window functions. When needed, you can control that order using
+ <literal>ORDER BY</> within <literal>OVER</>. Here is an example:
+
+<programlisting>
+SELECT depname, empno, salary, rank() OVER (PARTITION BY depname ORDER BY salary DESC) FROM empsalary;
+</programlisting>
+
+<screen>
+ depname | empno | salary | rank
+-----------+-------+--------+------
+ develop | 8 | 6000 | 1
+ develop | 10 | 5200 | 2
+ develop | 11 | 5200 | 2
+ develop | 9 | 4500 | 4
+ develop | 7 | 4200 | 5
+ personnel | 2 | 3900 | 1
+ personnel | 5 | 3500 | 2
+ sales | 1 | 5000 | 1
+ sales | 4 | 4800 | 2
+ sales | 3 | 4800 | 2
+(10 rows)
+</screen>
+
+ As shown here, the <function>rank</> function produces a numerical rank
+ within the current row's partition for each distinct <literal>ORDER BY</>
+ value, in the order defined by the <literal>ORDER BY</> clause.
+ <function>rank</> needs no explicit parameter, because its behavior
+ is entirely determined by the <literal>OVER</> clause.
+ </para>
+
+ <para>
+ The rows considered by a window function are those of the <quote>virtual
+ table</> produced by the query's <literal>FROM</> clause as filtered by its
+ <literal>WHERE</>, <literal>GROUP BY</>, and <literal>HAVING</> clauses
+ if any. For example, a row removed because it does not meet the
+ <literal>WHERE</> condition is not seen by any window function.
+ A query can contain multiple window functions that slice up the data
+ in different ways by means of different <literal>OVER</> clauses, but
+ they all act on the same collection of rows defined by this virtual table.
+ </para>
+
+ <para>
+ We already saw that <literal>ORDER BY</> can be omitted if the ordering
+ of rows is not important. It is also possible to omit <literal>PARTITION
+ BY</>, in which case the window function is computed over all rows of the
+ virtual table; that is, there is one partition containing all the rows.
+ </para>
+
+ <para>
+ There is another important concept associated with window functions:
+ for each row, there is a set of rows within its partition called its
+ <firstterm>window frame</>. When <literal>ORDER BY</> is omitted the
+ frame is always the same as the partition. If <literal>ORDER BY</> is
+ supplied, the frame consists of all rows from the start of the partition
+ up to the current row, plus any following rows that are equal to the
+ current row according to the <literal>ORDER BY</> clause.
+ <footnote>
+ <para>
+ The SQL standard includes options to define the window frame in
+ other ways, but this definition is the only one currently supported
+ by <productname>PostgreSQL</productname>.
+ </para>
+ </footnote>
+ Many window functions act only on the rows of the window frame, rather
+ than of the whole partition. Here is an example using <function>sum</>:
+ </para>
+
+<programlisting>
+SELECT salary, sum(salary) OVER () FROM empsalary;
+</programlisting>
+
+<screen>
+ salary | sum
+--------+-------
+ 5200 | 47100
+ 5000 | 47100
+ 3500 | 47100
+ 4800 | 47100
+ 3900 | 47100
+ 4200 | 47100
+ 4500 | 47100
+ 4800 | 47100
+ 6000 | 47100
+ 5200 | 47100
+(10 rows)
+</screen>
+
+ <para>
+ Above, since there is no <literal>ORDER BY</> in the <literal>OVER</>
+ clause, the window frame is the same as the partition, which for lack of
+ <literal>PARTITION BY</> is the whole table; in other words each sum is
+ taken over the whole table and so we get the same result for each output
+ row. But if we add an <literal>ORDER BY</> clause, we get very different
+ results:
+ </para>
+
+<programlisting>
+SELECT salary, sum(salary) OVER (ORDER BY salary) FROM empsalary;
+</programlisting>
+
+<screen>
+ salary | sum
+--------+-------
+ 3500 | 3500
+ 3900 | 7400
+ 4200 | 11600
+ 4500 | 16100
+ 4800 | 25700
+ 4800 | 25700
+ 5000 | 30700
+ 5200 | 41100
+ 5200 | 41100
+ 6000 | 47100
+(10 rows)
+</screen>
+
+ <para>
+ Here the sum is taken from the first (lowest) salary up through the
+ current one, including any duplicates of the current one (notice the
+ results for the duplicated salaries).
+ </para>
+
+ <para>
+ Window functions are permitted only in the <literal>SELECT</literal> list
+ and the <literal>ORDER BY</> clause of the query. They are forbidden
+ elsewhere, such as in <literal>GROUP BY</>, <literal>HAVING</>
+ and <literal>WHERE</literal> clauses. This is because they logically
+ execute after the processing of those clauses. Also, window functions
+ execute after regular aggregate functions. This means it is valid to
+ include an aggregate function call in the arguments of a window function,
+ but not vice versa.
+ </para>
+
+ <para>
+ If there is a need to filter or group rows after the window calculations
+ are performed, you can use a sub-select. For example:
+
+<programlisting>
+SELECT depname, empno, salary, enroll_date
+FROM
+ (SELECT depname, empno, salary, enroll_date,
+ rank() OVER (PARTITION BY depname ORDER BY salary DESC, empno) AS pos
+ FROM empsalary
+ ) AS ss
+WHERE pos < 3;
+</programlisting>
+
+ The above query only shows the rows from the inner query having
+ <literal>rank</> less than <literal>3</>.
+ </para>
+
+ <para>
+ When a query involves multiple window functions, it is possible to write
+ out each one with a separate <literal>OVER</> clause, but this is
+ duplicative and error-prone if the same windowing behavior is wanted
+ for several functions. Instead, each windowing behavior can be named
+ in a <literal>WINDOW</> clause and then referenced in <literal>OVER</>.
+ For example:
+
+<programlisting>
+SELECT sum(salary) OVER w, avg(salary) OVER w
+ FROM empsalary
+ WINDOW w AS (PARTITION BY depname ORDER BY salary DESC);
+</programlisting>
+ </para>
+ </sect1>
+
+
<sect1 id="tutorial-inheritance">
<title>Inheritance</title>
@@ -391,7 +627,7 @@ CREATE TABLE capitals (
<para>
For example, the following query finds the names of all cities,
- including state capitals, that are located at an altitude
+ including state capitals, that are located at an altitude
over 500 feet:
<programlisting>
@@ -455,7 +691,7 @@ SELECT name, altitude
<sect1 id="tutorial-conclusion">
<title>Conclusion</title>
-
+
<para>
<productname>PostgreSQL</productname> has many features not
touched upon in this tutorial introduction, which has been
diff --git a/doc/src/sgml/errcodes.sgml b/doc/src/sgml/errcodes.sgml
index 574e7f5fbad..e792a74e286 100644
--- a/doc/src/sgml/errcodes.sgml
+++ b/doc/src/sgml/errcodes.sgml
@@ -1,4 +1,4 @@
-<!-- $PostgreSQL: pgsql/doc/src/sgml/errcodes.sgml,v 1.25 2008/10/04 21:56:52 tgl Exp $ -->
+<!-- $PostgreSQL: pgsql/doc/src/sgml/errcodes.sgml,v 1.26 2008/12/28 18:53:53 tgl Exp $ -->
<appendix id="errcodes-appendix">
<title><productname>PostgreSQL</productname> Error Codes</title>
@@ -379,6 +379,18 @@
</row>
<row>
+<entry><literal>22014</literal></entry>
+<entry>INVALID ARGUMENT FOR NTILE FUNCTION</entry>
+<entry>invalid_argument_for_ntile_function</entry>
+</row>
+
+<row>
+<entry><literal>22016</literal></entry>
+<entry>INVALID ARGUMENT FOR NTH_VALUE FUNCTION</entry>
+<entry>invalid_argument_for_nth_value_function</entry>
+</row>
+
+<row>
<entry><literal>2201F</literal></entry>
<entry>INVALID ARGUMENT FOR POWER FUNCTION</entry>
<entry>invalid_argument_for_power_function</entry>
@@ -991,6 +1003,12 @@
</row>
<row>
+<entry><literal>42P20</literal></entry>
+<entry>WINDOWING ERROR</entry>
+<entry>windowing_error</entry>
+</row>
+
+<row>
<entry><literal>42P19</literal></entry>
<entry>INVALID RECURSION</entry>
<entry>invalid_recursion</entry>
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index de50c0e1d56..205b71e9c9e 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -1,4 +1,4 @@
-<!-- $PostgreSQL: pgsql/doc/src/sgml/func.sgml,v 1.463 2008/12/19 16:25:16 petere Exp $ -->
+<!-- $PostgreSQL: pgsql/doc/src/sgml/func.sgml,v 1.464 2008/12/28 18:53:53 tgl Exp $ -->
<chapter id="functions">
<title>Functions and Operators</title>
@@ -10149,6 +10149,278 @@ SELECT xmlagg(x) FROM (SELECT x FROM test ORDER BY y DESC) AS tab;
</sect1>
+ <sect1 id="functions-window">
+ <title>Window Functions</title>
+
+ <indexterm zone="functions-window">
+ <primary>window function</primary>
+ <secondary>built-in</secondary>
+ </indexterm>
+
+ <para>
+ <firstterm>Window functions</firstterm> provide the ability to perform
+ calculations across sets of rows that are related to the current query
+ row. For information about this feature see
+ <xref linkend="tutorial-window"> and
+ <xref linkend="syntax-window-functions">.
+ </para>
+
+ <para>
+ The built-in window functions are listed in
+ <xref linkend="functions-window-table">. Note that these functions
+ <emphasis>must</> be invoked using window function syntax; that is an
+ <literal>OVER</> clause is required.
+ </para>
+
+ <para>
+ In addition to these functions, any built-in or user-defined aggregate
+ function can be used as a window function (see
+ <xref linkend="functions-aggregate"> for a list of the built-in aggregates).
+ Aggregate functions act as window functions only when an <literal>OVER</>
+ clause follows the call; otherwise they act as regular aggregates.
+ </para>
+
+ <table id="functions-window-table">
+ <title>General-Purpose Window Functions</title>
+
+ <tgroup cols="3">
+ <thead>
+ <row>
+ <entry>Function</entry>
+ <entry>Return Type</entry>
+ <entry>Description</entry>
+ </row>
+ </thead>
+
+ <tbody>
+ <row>
+ <entry>
+ <indexterm>
+ <primary>row_number</primary>
+ </indexterm>
+ <function>row_number()</function>
+ </entry>
+ <entry>
+ <type>bigint</type>
+ </entry>
+ <entry>number of the current row within its partition, counting from 1</entry>
+ </row>
+
+ <row>
+ <entry>
+ <indexterm>
+ <primary>rank</primary>
+ </indexterm>
+ <function>rank()</function>
+ </entry>
+ <entry>
+ <type>bigint</type>
+ </entry>
+ <entry>rank of the current row with gaps; same as <function>row_number</> of its first peer</entry>
+ </row>
+
+ <row>
+ <entry>
+ <indexterm>
+ <primary>dense_rank</primary>
+ </indexterm>
+ <function>dense_rank()</function>
+ </entry>
+ <entry>
+ <type>bigint</type>
+ </entry>
+ <entry>rank of the current row without gaps; this function counts peer groups</entry>
+ </row>
+
+ <row>
+ <entry>
+ <indexterm>
+ <primary>percent_rank</primary>
+ </indexterm>
+ <function>percent_rank()</function>
+ </entry>
+ <entry>
+ <type>double precision</type>
+ </entry>
+ <entry>relative rank of the current row: (<function>rank</> - 1) / (total rows - 1)</entry>
+ </row>
+
+ <row>
+ <entry>
+ <indexterm>
+ <primary>cume_dist</primary>
+ </indexterm>
+ <function>cume_dist()</function>
+ </entry>
+ <entry>
+ <type>double precision</type>
+ </entry>
+ <entry>relative rank of the current row: (number of rows preceding or peer with current row) / (total rows)</entry>
+ </row>
+
+ <row>
+ <entry>
+ <indexterm>
+ <primary>ntile</primary>
+ </indexterm>
+ <function>ntile(<replaceable class="parameter">num_buckets</replaceable> <type>integer</>)</function>
+ </entry>
+ <entry>
+ <type>integer</type>
+ </entry>
+ <entry>integer ranging from 1 to the argument value, dividing the
+ partition as equally as possible</entry>
+ </row>
+
+ <row>
+ <entry>
+ <indexterm>
+ <primary>lag</primary>
+ </indexterm>
+ <function>
+ lag(<replaceable class="parameter">value</replaceable> <type>any</>
+ [, <replaceable class="parameter">offset</replaceable> <type>integer</>
+ [, <replaceable class="parameter">default</replaceable> <type>any</> ]])
+ </function>
+ </entry>
+ <entry>
+ <type>same type as <replaceable class="parameter">value</replaceable></type>
+ </entry>
+ <entry>
+ returns <replaceable class="parameter">value</replaceable> evaluated at
+ the row that is <replaceable class="parameter">offset</replaceable>
+ rows before the current row within the partition; if there is no such
+ row, instead return <replaceable class="parameter">default</replaceable>.
+ Both <replaceable class="parameter">offset</replaceable> and
+ <replaceable class="parameter">default</replaceable> are evaluated
+ with respect to the current row. If omitted,
+ <replaceable class="parameter">offset</replaceable> defaults to 1 and
+ <replaceable class="parameter">default</replaceable> to null
+ </entry>
+ </row>
+
+ <row>
+ <entry>
+ <indexterm>
+ <primary>lead</primary>
+ </indexterm>
+ <function>
+ lead(<replaceable class="parameter">value</replaceable> <type>any</>
+ [, <replaceable class="parameter">offset</replaceable> <type>integer</>
+ [, <replaceable class="parameter">default</replaceable> <type>any</> ]])
+ </function>
+ </entry>
+ <entry>
+ <type>same type as <replaceable class="parameter">value</replaceable></type>
+ </entry>
+ <entry>
+ returns <replaceable class="parameter">value</replaceable> evaluated at
+ the row that is <replaceable class="parameter">offset</replaceable>
+ rows after the current row within the partition; if there is no such
+ row, instead return <replaceable class="parameter">default</replaceable>.
+ Both <replaceable class="parameter">offset</replaceable> and
+ <replaceable class="parameter">default</replaceable> are evaluated
+ with respect to the current row. If omitted,
+ <replaceable class="parameter">offset</replaceable> defaults to 1 and
+ <replaceable class="parameter">default</replaceable> to null
+ </entry>
+ </row>
+
+ <row>
+ <entry>
+ <indexterm>
+ <primary>first_value</primary>
+ </indexterm>
+ <function>first_value(<replaceable class="parameter">value</replaceable> <type>any</>)</function>
+ </entry>
+ <entry>
+ <type>same type as <replaceable class="parameter">value</replaceable></type>
+ </entry>
+ <entry>
+ returns <replaceable class="parameter">value</replaceable> evaluated
+ at the row that is the first row of the window frame
+ </entry>
+ </row>
+
+ <row>
+ <entry>
+ <indexterm>
+ <primary>last_value</primary>
+ </indexterm>
+ <function>last_value(<replaceable class="parameter">value</replaceable> <type>any</>)</function>
+ </entry>
+ <entry>
+ <type>same type as <replaceable class="parameter">value</replaceable></type>
+ </entry>
+ <entry>
+ returns <replaceable class="parameter">value</replaceable> evaluated
+ at the row that is the last row of the window frame
+ </entry>
+ </row>
+
+ <row>
+ <entry>
+ <indexterm>
+ <primary>nth_value</primary>
+ </indexterm>
+ <function>
+ nth_value(<replaceable class="parameter">value</replaceable> <type>any</>, <replaceable class="parameter">nth</replaceable> <type>integer</>)
+ </function>
+ </entry>
+ <entry>
+ <type>same type as <replaceable class="parameter">value</replaceable></type>
+ </entry>
+ <entry>
+ returns <replaceable class="parameter">value</replaceable> evaluated
+ at the row that is the <replaceable class="parameter">nth</replaceable>
+ row of the window frame (counting from 1); null if no such row
+ </entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ <para>
+ All of the functions listed in
+ <xref linkend="functions-window-table"> depend on the sort ordering
+ specified by the <literal>ORDER BY</> clause of the associated window
+ definition. Rows that are not distinct in the <literal>ORDER BY</>
+ ordering are said to be <firstterm>peers</>; the four ranking functions
+ are defined so that they give the same answer for any two peer rows.
+ </para>
+
+ <para>
+ Note that <function>first_value</>, <function>last_value</>, and
+ <function>nth_value</> consider only the rows within the <quote>window
+ frame</>, that is the rows from the start of the partition through the
+ last peer of the current row. This is particularly likely to give
+ unintuitive results for <function>last_value</>.
+ </para>
+
+ <para>
+ When an aggregate function is used as a window function, it aggregates
+ over the rows within the current row's window frame. To obtain
+ aggregation over the whole partition, be sure to omit <literal>ORDER BY</>
+ from the window definition. An aggregate used with <literal>ORDER BY</>
+ produces a <quote>running sum</> type of behavior, which may or may not
+ be what's wanted.
+ </para>
+
+ <note>
+ <para>
+ The SQL standard defines a <literal>RESPECT NULLS</> or
+ <literal>IGNORE NULLS</> option for <function>lead</>, <function>lag</>,
+ <function>first_value</>, <function>last_value</>, and
+ <function>nth_value</>. This is not implemented in
+ <productname>PostgreSQL</productname>: the behavior is always the
+ same as the standard's default, namely <literal>RESPECT NULLS</>.
+ Likewise, the standard's <literal>FROM FIRST</> or <literal>FROM LAST</>
+ option for <function>nth_value</> is not implemented: only the
+ default <literal>FROM FIRST</> behavior is supported.
+ </para>
+ </note>
+
+ </sect1>
<sect1 id="functions-subquery">
<title>Subquery Expressions</title>
diff --git a/doc/src/sgml/queries.sgml b/doc/src/sgml/queries.sgml
index 283dd0a73dd..f1db64b273a 100644
--- a/doc/src/sgml/queries.sgml
+++ b/doc/src/sgml/queries.sgml
@@ -1,4 +1,4 @@
-<!-- $PostgreSQL: pgsql/doc/src/sgml/queries.sgml,v 1.50 2008/10/14 00:41:34 tgl Exp $ -->
+<!-- $PostgreSQL: pgsql/doc/src/sgml/queries.sgml,v 1.51 2008/12/28 18:53:54 tgl Exp $ -->
<chapter id="queries">
<title>Queries</title>
@@ -949,6 +949,57 @@ SELECT product_id, p.name, (sum(s.units) * (p.price - p.cost)) AS profit
5000. Note that the aggregate expressions do not necessarily need
to be the same in all parts of the query.
</para>
+
+ <para>
+ If a query contains aggregate function calls, but no <literal>GROUP BY</>
+ clause, grouping still occurs: the result is a single group row (or
+ perhaps no rows at all, if the single row is then eliminated by
+ <literal>HAVING</>).
+ The same is true if it contains a <literal>HAVING</> clause, even
+ without any aggregate function calls or <literal>GROUP BY</> clause.
+ </para>
+ </sect2>
+
+ <sect2 id="queries-window">
+ <title>Window Function Processing</>
+
+ <indexterm zone="queries-window">
+ <primary>window function</primary>
+ <secondary>order of execution</>
+ </indexterm>
+
+ <para>
+ If the query contains any window functions (see
+ <xref linkend="tutorial-window"> and
+ <xref linkend="syntax-window-functions">), these functions are evaluated
+ after any grouping, aggregation, and <literal>HAVING</> filtering is
+ performed. That is, if the query uses any aggregates, <literal>GROUP
+ BY</>, or <literal>HAVING</>, then the rows seen by the window functions
+ are the group rows instead of the original table rows from
+ <literal>FROM</>/<literal>WHERE</>.
+ </para>
+
+ <para>
+ When multiple window functions are used, all the window functions having
+ syntactically equivalent <literal>PARTITION BY</> and <literal>ORDER BY</>
+ clauses in their window definitions are guaranteed to be evaluated in a
+ single pass over the data. Therefore they will see the same sort ordering,
+ even if the <literal>ORDER BY</> does not uniquely determine an ordering.
+ However, no guarantees are made about the evaluation of functions having
+ different <literal>PARTITION BY</> or <literal>ORDER BY</> specifications.
+ (In such cases a sort step is typically required between the passes of
+ window function evaluations, and the sort is not guaranteed to preserve
+ ordering of rows that its <literal>ORDER BY</> sees as equivalent.)
+ </para>
+
+ <para>
+ Currently, use of window functions always forces sorting, and so the
+ query output will be ordered according to one or another of the window
+ functions' <literal>PARTITION BY</>/<literal>ORDER BY</> clauses.
+ It is not recommendable to rely on this, however. Use an explicit
+ top-level <literal>ORDER BY</> clause if you want to be sure the
+ results are sorted in a particular way.
+ </para>
</sect2>
</sect1>
diff --git a/doc/src/sgml/query.sgml b/doc/src/sgml/query.sgml
index 442f9ad0068..ffc641b03ad 100644
--- a/doc/src/sgml/query.sgml
+++ b/doc/src/sgml/query.sgml
@@ -1,4 +1,4 @@
-<!-- $PostgreSQL: pgsql/doc/src/sgml/query.sgml,v 1.50 2007/02/01 00:28:17 momjian Exp $ -->
+<!-- $PostgreSQL: pgsql/doc/src/sgml/query.sgml,v 1.51 2008/12/28 18:53:54 tgl Exp $ -->
<chapter id="tutorial-sql">
<title>The <acronym>SQL</acronym> Language</title>
@@ -621,7 +621,7 @@ SELECT W1.city, W1.temp_lo AS low, W1.temp_hi AS high,
San Francisco | 43 | 57 | San Francisco | 46 | 50
Hayward | 37 | 54 | San Francisco | 46 | 50
(2 rows)
-</programlisting>
+</programlisting>
Here we have relabeled the weather table as <literal>W1</> and
<literal>W2</> to be able to distinguish the left and right side
@@ -651,9 +651,9 @@ SELECT *
<indexterm><primary>min</primary></indexterm>
<indexterm><primary>sum</primary></indexterm>
- Like most other relational database products,
+ Like most other relational database products,
<productname>PostgreSQL</productname> supports
- aggregate functions.
+ <firstterm>aggregate functions</>.
An aggregate function computes a single result from multiple input rows.
For example, there are aggregates to compute the
<function>count</function>, <function>sum</function>,
@@ -815,7 +815,7 @@ SELECT city, max(temp_lo)
<para>
You can update existing rows using the
- <command>UPDATE</command> command.
+ <command>UPDATE</command> command.
Suppose you discover the temperature readings are
all off by 2 degrees after November 28. You can correct the
data as follows:
diff --git a/doc/src/sgml/ref/select.sgml b/doc/src/sgml/ref/select.sgml
index 814a6708f00..c9a386f24f3 100644
--- a/doc/src/sgml/ref/select.sgml
+++ b/doc/src/sgml/ref/select.sgml
@@ -1,5 +1,5 @@
<!--
-$PostgreSQL: pgsql/doc/src/sgml/ref/select.sgml,v 1.112 2008/12/01 09:38:08 petere Exp $
+$PostgreSQL: pgsql/doc/src/sgml/ref/select.sgml,v 1.113 2008/12/28 18:53:54 tgl Exp $
PostgreSQL documentation
-->
@@ -39,6 +39,7 @@ SELECT [ ALL | DISTINCT [ ON ( <replaceable class="parameter">expression</replac
[ WHERE <replaceable class="parameter">condition</replaceable> ]
[ GROUP BY <replaceable class="parameter">expression</replaceable> [, ...] ]
[ HAVING <replaceable class="parameter">condition</replaceable> [, ...] ]
+ [ WINDOW <replaceable class="parameter">window_name</replaceable> AS ( <replaceable class="parameter">window_definition</replaceable> ) [, ...] ]
[ { UNION | INTERSECT | EXCEPT } [ ALL ] <replaceable class="parameter">select</replaceable> ]
[ ORDER BY <replaceable class="parameter">expression</replaceable> [ ASC | DESC | USING <replaceable class="parameter">operator</replaceable> ] [ NULLS { FIRST | LAST } ] [, ...] ]
[ LIMIT { <replaceable class="parameter">count</replaceable> | ALL } ]
@@ -566,6 +567,67 @@ HAVING <replaceable class="parameter">condition</replaceable>
</para>
</refsect2>
+ <refsect2 id="SQL-WINDOW">
+ <title id="sql-window-title"><literal>WINDOW</literal> Clause</title>
+
+ <para>
+ The optional <literal>WINDOW</literal> clause has the general form
+<synopsis>
+WINDOW <replaceable class="parameter">window_name</replaceable> AS ( <replaceable class="parameter">window_definition</replaceable> ) [, ...]
+</synopsis>
+ where <replaceable class="parameter">window_name</replaceable> is
+ a name that can be referenced from subsequent window definitions or
+ <literal>OVER</> clauses, and
+ <replaceable class="parameter">window_definition</replaceable> is
+<synopsis>
+[ <replaceable class="parameter">existing_window_name</replaceable> ]
+[ PARTITION BY <replaceable class="parameter">expression</replaceable> [, ...] ]
+[ ORDER BY <replaceable class="parameter">expression</replaceable> [ ASC | DESC | USING <replaceable class="parameter">operator</replaceable> ] [ NULLS { FIRST | LAST } ] [, ...] ]
+</synopsis>
+ The elements of the <literal>PARTITION BY</> list are interpreted in
+ the same fashion as elements of a
+ <xref linkend="sql-groupby" endterm="sql-groupby-title">, and
+ the elements of the <literal>ORDER BY</> list are interpreted in the
+ same fashion as elements of an
+ <xref linkend="sql-orderby" endterm="sql-orderby-title">.
+ The only difference is that these expressions can contain aggregate
+ function calls, which are not allowed in a regular <literal>GROUP BY</>
+ clause. They are allowed here because windowing occurs after grouping
+ and aggregation.
+ </para>
+
+ <para>
+ If an <replaceable class="parameter">existing_window_name</replaceable>
+ is specified it must refer to an earlier entry in the <literal>WINDOW</>
+ list; the new window copies its partitioning clause from that entry,
+ as well as its ordering clause if any. In this case the new window cannot
+ specify its own <literal>PARTITION BY</> clause, and it can specify
+ <literal>ORDER BY</> only if the copied window does not have one.
+ </para>
+
+ <para>
+ The purpose of a <literal>WINDOW</literal> clause is to specify the
+ behavior of <firstterm>window functions</> appearing in the query's
+ <xref linkend="sql-select-list" endterm="sql-select-list-title"> or
+ <xref linkend="sql-orderby" endterm="sql-orderby-title">. These functions
+ can reference the <literal>WINDOW</literal> clause entries by name
+ in their <literal>OVER</> clauses. A <literal>WINDOW</literal> clause
+ entry does not have to be referenced anywhere, however; if it is not
+ used in the query it is simply ignored. It is possible to use window
+ functions without any <literal>WINDOW</literal> clause at all, since
+ a window function call can specify its window definition directly in
+ its <literal>OVER</> clause. However, the <literal>WINDOW</literal>
+ clause saves typing when the same window definition is needed for more
+ than one window function.
+ </para>
+
+ <para>
+ Window functions are described in detail in
+ <xref linkend="tutorial-window"> and
+ <xref linkend="syntax-window-functions">.
+ </para>
+ </refsect2>
+
<refsect2 id="sql-select-list">
<title id="sql-select-list-title"><command>SELECT</command> List</title>
@@ -922,7 +984,7 @@ FETCH { FIRST | NEXT } [ <replaceable class="parameter">count</replaceable> ] {
constants for the offset or fetch count, parentheses will be
necessary in most cases. If the fetch count is omitted, it
defaults to 1.
- </para>
+ </para>
<para>
When using <literal>LIMIT</>, it is a good idea to use an
@@ -1388,6 +1450,19 @@ SELECT distributors.* WHERE distributors.name = 'Westward';
</refsect2>
<refsect2>
+ <title><literal>WINDOW</literal> Clause Restrictions</title>
+
+ <para>
+ The SQL standard provides for an optional <quote>framing clause</>,
+ introduced by the key word <literal>RANGE</> or <literal>ROWS</>,
+ in window definitions. <productname>PostgreSQL</productname> does
+ not yet implement framing clauses, and always follows the
+ default framing behavior, which is equivalent to the framing clause
+ <literal>ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW</>.
+ </para>
+ </refsect2>
+
+ <refsect2>
<title><literal>LIMIT</literal> and <literal>OFFSET</literal></title>
<para>
diff --git a/doc/src/sgml/ref/select_into.sgml b/doc/src/sgml/ref/select_into.sgml
index 038ae1b333c..057bfb2a9d7 100644
--- a/doc/src/sgml/ref/select_into.sgml
+++ b/doc/src/sgml/ref/select_into.sgml
@@ -1,5 +1,5 @@
<!--
-$PostgreSQL: pgsql/doc/src/sgml/ref/select_into.sgml,v 1.43 2008/11/14 10:22:47 petere Exp $
+$PostgreSQL: pgsql/doc/src/sgml/ref/select_into.sgml,v 1.44 2008/12/28 18:53:54 tgl Exp $
PostgreSQL documentation
-->
@@ -29,6 +29,7 @@ SELECT [ ALL | DISTINCT [ ON ( <replaceable class="parameter">expression</replac
[ WHERE <replaceable class="parameter">condition</replaceable> ]
[ GROUP BY <replaceable class="parameter">expression</replaceable> [, ...] ]
[ HAVING <replaceable class="parameter">condition</replaceable> [, ...] ]
+ [ WINDOW <replaceable class="parameter">window_name</replaceable> AS ( <replaceable class="parameter">window_definition</replaceable> ) [, ...] ]
[ { UNION | INTERSECT | EXCEPT } [ ALL ] <replaceable class="parameter">select</replaceable> ]
[ ORDER BY <replaceable class="parameter">expression</replaceable> [ ASC | DESC | USING <replaceable class="parameter">operator</replaceable> ] [ NULLS { FIRST | LAST } ] [, ...] ]
[ LIMIT { <replaceable class="parameter">count</replaceable> | ALL } ]
diff --git a/doc/src/sgml/syntax.sgml b/doc/src/sgml/syntax.sgml
index cca44794340..9d0833c2035 100644
--- a/doc/src/sgml/syntax.sgml
+++ b/doc/src/sgml/syntax.sgml
@@ -1,4 +1,4 @@
-<!-- $PostgreSQL: pgsql/doc/src/sgml/syntax.sgml,v 1.126 2008/12/09 20:52:03 tgl Exp $ -->
+<!-- $PostgreSQL: pgsql/doc/src/sgml/syntax.sgml,v 1.127 2008/12/28 18:53:54 tgl Exp $ -->
<chapter id="sql-syntax">
<title>SQL Syntax</title>
@@ -1203,6 +1203,12 @@ SELECT 3 OPERATOR(pg_catalog.+) 4;
<listitem>
<para>
+ A window function call.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
A type cast.
</para>
</listitem>
@@ -1445,7 +1451,7 @@ $1.somecolumn
enclosed in parentheses:
<synopsis>
-<replaceable>function</replaceable> (<optional><replaceable>expression</replaceable> <optional>, <replaceable>expression</replaceable> ... </optional></optional> )
+<replaceable>function_name</replaceable> (<optional><replaceable>expression</replaceable> <optional>, <replaceable>expression</replaceable> ... </optional></optional> )
</synopsis>
</para>
@@ -1480,7 +1486,7 @@ sqrt(2)
<synopsis>
<replaceable>aggregate_name</replaceable> (<replaceable>expression</replaceable> [ , ... ] )
<replaceable>aggregate_name</replaceable> (ALL <replaceable>expression</replaceable> [ , ... ] )
-<replaceable>aggregate_name</replaceable> (DISTINCT <replaceable>expression</replaceable> [ , ... ] )
+<replaceable>aggregate_name</replaceable> (DISTINCT <replaceable>expression</replaceable>)
<replaceable>aggregate_name</replaceable> ( * )
</synopsis>
@@ -1488,7 +1494,7 @@ sqrt(2)
defined aggregate (possibly qualified with a schema name), and
<replaceable>expression</replaceable> is
any value expression that does not itself contain an aggregate
- expression.
+ expression or a window function call.
</para>
<para>
@@ -1550,6 +1556,89 @@ sqrt(2)
</note>
</sect2>
+ <sect2 id="syntax-window-functions">
+ <title>Window Function Calls</title>
+
+ <indexterm zone="syntax-window-functions">
+ <primary>window function</primary>
+ <secondary>invocation</secondary>
+ </indexterm>
+
+ <indexterm zone="syntax-window-functions">
+ <primary>OVER clause</primary>
+ </indexterm>
+
+ <para>
+ A <firstterm>window function call</firstterm> represents the application
+ of an aggregate-like function over some portion of the rows selected
+ by a query. Unlike regular aggregate function calls, this is not tied
+ to grouping of the selected rows into a single output row &mdash; each
+ row remains separate in the query output. However the window function
+ is able to scan all the rows that would be part of the current row's
+ group according to the grouping specification (<literal>PARTITION BY</>
+ list) of the window function call.
+ The syntax of a window function call is one of the following:
+
+<synopsis>
+<replaceable>function_name</replaceable> (<optional><replaceable>expression</replaceable> <optional>, <replaceable>expression</replaceable> ... </optional></optional>) OVER ( <replaceable class="parameter">window_definition</replaceable> )
+<replaceable>function_name</replaceable> (<optional><replaceable>expression</replaceable> <optional>, <replaceable>expression</replaceable> ... </optional></optional>) OVER <replaceable>window_name</replaceable>
+<replaceable>function_name</replaceable> ( * ) OVER ( <replaceable class="parameter">window_definition</replaceable> )
+<replaceable>function_name</replaceable> ( * ) OVER <replaceable>window_name</replaceable>
+</synopsis>
+ where <replaceable class="parameter">window_definition</replaceable>
+ has the syntax
+<synopsis>
+[ <replaceable class="parameter">window_name</replaceable> ]
+[ PARTITION BY <replaceable class="parameter">expression</replaceable> [, ...] ]
+[ ORDER BY <replaceable class="parameter">expression</replaceable> [ ASC | DESC | USING <replaceable class="parameter">operator</replaceable> ] [ NULLS { FIRST | LAST } ] [, ...] ]
+</synopsis>
+
+ Here, <replaceable>expression</replaceable> represents any value
+ expression that does not itself contain window function calls.
+ The <literal>PARTITION BY</> and <literal>ORDER BY</> lists have
+ essentially the same syntax and semantics as <literal>GROUP BY</>
+ and <literal>ORDER BY</> clauses of the whole query.
+ <replaceable>window_name</replaceable> is a reference to a named window
+ specification defined in the query's <literal>WINDOW</literal> clause.
+ Named window specifications are usually referenced with just
+ <literal>OVER</> <replaceable>window_name</replaceable>, but it is
+ also possible to write a window name inside the parentheses and then
+ optionally override its ordering clause with <literal>ORDER BY</>.
+ This latter syntax follows the same rules as modifying an existing
+ window name within the <literal>WINDOW</literal> clause; see the
+ <xref linkend="sql-select" endterm="sql-select-title"> reference
+ page for details.
+ </para>
+
+ <para>
+ The built-in window functions are described in <xref
+ linkend="functions-window-table">. Also, any built-in or
+ user-defined aggregate function can be used as a window function.
+ Currently, there is no provision for user-defined window functions
+ other than aggregates.
+ </para>
+
+ <para>
+ The syntaxes using <literal>*</> are used for calling parameter-less
+ aggregate functions as window functions, for example
+ <literal>count(*) OVER (PARTITION BY x ORDER BY y)</>.
+ <literal>*</> is customarily not used for non-aggregate window functions.
+ Aggregate window functions, unlike normal aggregate functions, do not
+ allow <literal>DISTINCT</> to be used within the function argument list.
+ </para>
+
+ <para>
+ Window function calls are permitted only in the <literal>SELECT</literal>
+ list and the <literal>ORDER BY</> clause of the query.
+ </para>
+
+ <para>
+ More information about window functions can be found in
+ <xref linkend="tutorial-window"> and
+ <xref linkend="queries-window">.
+ </para>
+ </sect2>
+
<sect2 id="sql-syntax-type-casts">
<title>Type Casts</title>
diff --git a/doc/src/sgml/xaggr.sgml b/doc/src/sgml/xaggr.sgml
index 3c4ce19258e..b223888f9ed 100644
--- a/doc/src/sgml/xaggr.sgml
+++ b/doc/src/sgml/xaggr.sgml
@@ -1,4 +1,4 @@
-<!-- $PostgreSQL: pgsql/doc/src/sgml/xaggr.sgml,v 1.36 2008/11/20 21:10:44 tgl Exp $ -->
+<!-- $PostgreSQL: pgsql/doc/src/sgml/xaggr.sgml,v 1.37 2008/12/28 18:53:54 tgl Exp $ -->
<sect1 id="xaggr">
<title>User-Defined Aggregates</title>
@@ -167,10 +167,13 @@ SELECT attrelid::regclass, array_accum(atttypid::regtype)
<para>
A function written in C can detect that it is being called as an
aggregate transition or final function by seeing if it was passed
- an <structname>AggState</> node as the function call <quote>context</>,
+ an <structname>AggState</> or <structname>WindowAggState</> node
+ as the function call <quote>context</>,
for example by:
<programlisting>
- if (fcinfo->context &amp;&amp; IsA(fcinfo->context, AggState))
+ if (fcinfo-&gt;context &amp;&amp;
+ (IsA(fcinfo-&gt;context, AggState) ||
+ IsA(fcinfo-&gt;context, WindowAggState)))
</programlisting>
One reason for checking this is that when it is true, the first input
must be a temporary transition value and can therefore safely be modified
diff --git a/src/backend/catalog/dependency.c b/src/backend/catalog/dependency.c
index 2cbc19f5a06..b78bebf506f 100644
--- a/src/backend/catalog/dependency.c
+++ b/src/backend/catalog/dependency.c
@@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/catalog/dependency.c,v 1.83 2008/12/19 16:25:17 petere Exp $
+ * $PostgreSQL: pgsql/src/backend/catalog/dependency.c,v 1.84 2008/12/28 18:53:54 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -1480,6 +1480,14 @@ find_expr_references_walker(Node *node,
context->addrs);
/* fall through to examine arguments */
}
+ else if (IsA(node, WindowFunc))
+ {
+ WindowFunc *wfunc = (WindowFunc *) node;
+
+ add_object_address(OCLASS_PROC, wfunc->winfnoid, 0,
+ context->addrs);
+ /* fall through to examine arguments */
+ }
else if (IsA(node, SubPlan))
{
/* Extra work needed here if we ever need this case */
@@ -1602,6 +1610,7 @@ find_expr_references_walker(Node *node,
/* query_tree_walker ignores ORDER BY etc, but we need those opers */
find_expr_references_walker((Node *) query->sortClause, context);
find_expr_references_walker((Node *) query->groupClause, context);
+ find_expr_references_walker((Node *) query->windowClause, context);
find_expr_references_walker((Node *) query->distinctClause, context);
/* Examine substructure of query */
diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c
index a711143f86c..af200afaac8 100644
--- a/src/backend/catalog/heap.c
+++ b/src/backend/catalog/heap.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/catalog/heap.c,v 1.347 2008/11/29 00:13:21 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/catalog/heap.c,v 1.348 2008/12/28 18:53:54 tgl Exp $
*
*
* INTERFACE ROUTINES
@@ -2138,6 +2138,10 @@ cookDefault(ParseState *pstate,
ereport(ERROR,
(errcode(ERRCODE_GROUPING_ERROR),
errmsg("cannot use aggregate function in default expression")));
+ if (pstate->p_hasWindowFuncs)
+ ereport(ERROR,
+ (errcode(ERRCODE_WINDOWING_ERROR),
+ errmsg("cannot use window function in default expression")));
/*
* Coerce the expression to the correct type and typmod, if given. This
@@ -2211,6 +2215,10 @@ cookConstraint(ParseState *pstate,
ereport(ERROR,
(errcode(ERRCODE_GROUPING_ERROR),
errmsg("cannot use aggregate function in check constraint")));
+ if (pstate->p_hasWindowFuncs)
+ ereport(ERROR,
+ (errcode(ERRCODE_WINDOWING_ERROR),
+ errmsg("cannot use window function in check constraint")));
return expr;
}
diff --git a/src/backend/catalog/pg_proc.c b/src/backend/catalog/pg_proc.c
index 345df0c6a95..8ff22c23c9e 100644
--- a/src/backend/catalog/pg_proc.c
+++ b/src/backend/catalog/pg_proc.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/catalog/pg_proc.c,v 1.157 2008/12/19 18:25:19 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/catalog/pg_proc.c,v 1.158 2008/12/28 18:53:54 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -80,6 +80,8 @@ ProcedureCreate(const char *procedureName,
float4 prorows)
{
Oid retval;
+ /* XXX we don't currently have a way to make new window functions */
+ bool isWindowFunc = false;
int parameterCount;
int allParamCount;
Oid *allParams;
@@ -292,8 +294,7 @@ ProcedureCreate(const char *procedureName,
values[Anum_pg_proc_prorows - 1] = Float4GetDatum(prorows);
values[Anum_pg_proc_provariadic - 1] = ObjectIdGetDatum(variadicType);
values[Anum_pg_proc_proisagg - 1] = BoolGetDatum(isAgg);
- /* XXX we don't currently have a way to make new window functions */
- values[Anum_pg_proc_proiswindow - 1] = BoolGetDatum(false);
+ values[Anum_pg_proc_proiswindow - 1] = BoolGetDatum(isWindowFunc);
values[Anum_pg_proc_prosecdef - 1] = BoolGetDatum(security_definer);
values[Anum_pg_proc_proisstrict - 1] = BoolGetDatum(isStrict);
values[Anum_pg_proc_proretset - 1] = BoolGetDatum(returnsSet);
@@ -440,18 +441,31 @@ ProcedureCreate(const char *procedureName,
}
}
- /* Can't change aggregate status, either */
+ /* Can't change aggregate or window-function status, either */
if (oldproc->proisagg != isAgg)
{
if (oldproc->proisagg)
ereport(ERROR,
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
- errmsg("function \"%s\" is an aggregate",
+ errmsg("function \"%s\" is an aggregate function",
+ procedureName)));
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("function \"%s\" is not an aggregate function",
+ procedureName)));
+ }
+ if (oldproc->proiswindow != isWindowFunc)
+ {
+ if (oldproc->proiswindow)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("function \"%s\" is a window function",
procedureName)));
else
ereport(ERROR,
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
- errmsg("function \"%s\" is not an aggregate",
+ errmsg("function \"%s\" is not a window function",
procedureName)));
}
diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index e5f1b313076..d829cb19235 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1994-5, Regents of the University of California
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/commands/explain.c,v 1.181 2008/11/19 01:10:23 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/commands/explain.c,v 1.182 2008/12/28 18:53:55 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -590,6 +590,9 @@ explain_outNode(StringInfo str,
break;
}
break;
+ case T_WindowAgg:
+ pname = "WindowAgg";
+ break;
case T_Unique:
pname = "Unique";
break;
diff --git a/src/backend/commands/functioncmds.c b/src/backend/commands/functioncmds.c
index 0a3de53e1e5..8963f981178 100644
--- a/src/backend/commands/functioncmds.c
+++ b/src/backend/commands/functioncmds.c
@@ -10,7 +10,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/commands/functioncmds.c,v 1.103 2008/12/18 18:20:33 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/commands/functioncmds.c,v 1.104 2008/12/28 18:53:55 tgl Exp $
*
* DESCRIPTION
* These routines take the parse tree and pick out the
@@ -321,6 +321,10 @@ examine_parameter_list(List *parameters, Oid languageOid,
ereport(ERROR,
(errcode(ERRCODE_GROUPING_ERROR),
errmsg("cannot use aggregate function in parameter default value")));
+ if (pstate->p_hasWindowFuncs)
+ ereport(ERROR,
+ (errcode(ERRCODE_WINDOWING_ERROR),
+ errmsg("cannot use window function in parameter default value")));
*parameterDefaults = lappend(*parameterDefaults, def);
have_defaults = true;
@@ -1538,6 +1542,10 @@ CreateCast(CreateCastStmt *stmt)
ereport(ERROR,
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
errmsg("cast function must not be an aggregate function")));
+ if (procstruct->proiswindow)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("cast function must not be a window function")));
if (procstruct->proretset)
ereport(ERROR,
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
diff --git a/src/backend/commands/prepare.c b/src/backend/commands/prepare.c
index 920b328bb30..f1f87abe227 100644
--- a/src/backend/commands/prepare.c
+++ b/src/backend/commands/prepare.c
@@ -10,7 +10,7 @@
* Copyright (c) 2002-2008, PostgreSQL Global Development Group
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/commands/prepare.c,v 1.93 2008/12/13 02:29:21 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/commands/prepare.c,v 1.94 2008/12/28 18:53:55 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -347,6 +347,10 @@ EvaluateParams(PreparedStatement *pstmt, List *params,
ereport(ERROR,
(errcode(ERRCODE_GROUPING_ERROR),
errmsg("cannot use aggregate function in EXECUTE parameter")));
+ if (pstate->p_hasWindowFuncs)
+ ereport(ERROR,
+ (errcode(ERRCODE_WINDOWING_ERROR),
+ errmsg("cannot use window function in EXECUTE parameter")));
given_type_id = exprType(expr);
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index 9f34c735028..173b24dab82 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/commands/tablecmds.c,v 1.274 2008/12/15 21:35:31 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/commands/tablecmds.c,v 1.275 2008/12/28 18:53:55 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -5506,6 +5506,10 @@ ATPrepAlterColumnType(List **wqueue,
ereport(ERROR,
(errcode(ERRCODE_GROUPING_ERROR),
errmsg("cannot use aggregate function in transform expression")));
+ if (pstate->p_hasWindowFuncs)
+ ereport(ERROR,
+ (errcode(ERRCODE_WINDOWING_ERROR),
+ errmsg("cannot use window function in transform expression")));
}
else
{
diff --git a/src/backend/commands/typecmds.c b/src/backend/commands/typecmds.c
index 38416fa67f2..f99ed813954 100644
--- a/src/backend/commands/typecmds.c
+++ b/src/backend/commands/typecmds.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/commands/typecmds.c,v 1.127 2008/11/30 19:01:29 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/commands/typecmds.c,v 1.128 2008/12/28 18:53:55 tgl Exp $
*
* DESCRIPTION
* The "DefineFoo" routines take the parse tree and pick out the
@@ -2255,6 +2255,10 @@ domainAddConstraint(Oid domainOid, Oid domainNamespace, Oid baseTypeOid,
ereport(ERROR,
(errcode(ERRCODE_GROUPING_ERROR),
errmsg("cannot use aggregate function in check constraint")));
+ if (pstate->p_hasWindowFuncs)
+ ereport(ERROR,
+ (errcode(ERRCODE_WINDOWING_ERROR),
+ errmsg("cannot use window function in check constraint")));
/*
* Convert to string form for storage.
diff --git a/src/backend/executor/Makefile b/src/backend/executor/Makefile
index b4a0492751c..63c86107782 100644
--- a/src/backend/executor/Makefile
+++ b/src/backend/executor/Makefile
@@ -4,7 +4,7 @@
# Makefile for executor
#
# IDENTIFICATION
-# $PostgreSQL: pgsql/src/backend/executor/Makefile,v 1.28 2008/10/04 21:56:52 tgl Exp $
+# $PostgreSQL: pgsql/src/backend/executor/Makefile,v 1.29 2008/12/28 18:53:55 tgl Exp $
#
#-------------------------------------------------------------------------
@@ -22,6 +22,6 @@ OBJS = execAmi.o execCurrent.o execGrouping.o execJunk.o execMain.o \
nodeSeqscan.o nodeSetOp.o nodeSort.o nodeUnique.o \
nodeValuesscan.o nodeCtescan.o nodeWorktablescan.o \
nodeLimit.o nodeGroup.o nodeSubplan.o nodeSubqueryscan.o nodeTidscan.o \
- tstoreReceiver.o spi.o
+ nodeWindowAgg.o tstoreReceiver.o spi.o
include $(top_srcdir)/src/backend/common.mk
diff --git a/src/backend/executor/execAmi.c b/src/backend/executor/execAmi.c
index ef4f6853899..d406a0cec9a 100644
--- a/src/backend/executor/execAmi.c
+++ b/src/backend/executor/execAmi.c
@@ -6,7 +6,7 @@
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/backend/executor/execAmi.c,v 1.101 2008/10/28 17:13:51 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/executor/execAmi.c,v 1.102 2008/12/28 18:53:55 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -20,6 +20,7 @@
#include "executor/nodeBitmapHeapscan.h"
#include "executor/nodeBitmapIndexscan.h"
#include "executor/nodeBitmapOr.h"
+#include "executor/nodeCtescan.h"
#include "executor/nodeFunctionscan.h"
#include "executor/nodeGroup.h"
#include "executor/nodeGroup.h"
@@ -40,7 +41,7 @@
#include "executor/nodeTidscan.h"
#include "executor/nodeUnique.h"
#include "executor/nodeValuesscan.h"
-#include "executor/nodeCtescan.h"
+#include "executor/nodeWindowAgg.h"
#include "executor/nodeWorktablescan.h"
#include "nodes/nodeFuncs.h"
#include "utils/syscache.h"
@@ -210,6 +211,10 @@ ExecReScan(PlanState *node, ExprContext *exprCtxt)
ExecReScanAgg((AggState *) node, exprCtxt);
break;
+ case T_WindowAggState:
+ ExecReScanWindowAgg((WindowAggState *) node, exprCtxt);
+ break;
+
case T_UniqueState:
ExecReScanUnique((UniqueState *) node, exprCtxt);
break;
diff --git a/src/backend/executor/execProcnode.c b/src/backend/executor/execProcnode.c
index e689ec00f8c..cd610c895c1 100644
--- a/src/backend/executor/execProcnode.c
+++ b/src/backend/executor/execProcnode.c
@@ -12,7 +12,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/executor/execProcnode.c,v 1.63 2008/10/04 21:56:53 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/executor/execProcnode.c,v 1.64 2008/12/28 18:53:55 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -85,6 +85,7 @@
#include "executor/nodeBitmapHeapscan.h"
#include "executor/nodeBitmapIndexscan.h"
#include "executor/nodeBitmapOr.h"
+#include "executor/nodeCtescan.h"
#include "executor/nodeFunctionscan.h"
#include "executor/nodeGroup.h"
#include "executor/nodeHash.h"
@@ -104,7 +105,7 @@
#include "executor/nodeTidscan.h"
#include "executor/nodeUnique.h"
#include "executor/nodeValuesscan.h"
-#include "executor/nodeCtescan.h"
+#include "executor/nodeWindowAgg.h"
#include "executor/nodeWorktablescan.h"
#include "miscadmin.h"
@@ -260,6 +261,11 @@ ExecInitNode(Plan *node, EState *estate, int eflags)
estate, eflags);
break;
+ case T_WindowAgg:
+ result = (PlanState *) ExecInitWindowAgg((WindowAgg *) node,
+ estate, eflags);
+ break;
+
case T_Unique:
result = (PlanState *) ExecInitUnique((Unique *) node,
estate, eflags);
@@ -425,6 +431,10 @@ ExecProcNode(PlanState *node)
result = ExecAgg((AggState *) node);
break;
+ case T_WindowAggState:
+ result = ExecWindowAgg((WindowAggState *) node);
+ break;
+
case T_UniqueState:
result = ExecUnique((UniqueState *) node);
break;
@@ -601,6 +611,10 @@ ExecCountSlotsNode(Plan *node)
case T_Agg:
return ExecCountSlotsAgg((Agg *) node);
+ case T_WindowAgg:
+ return ExecCountSlotsWindowAgg((WindowAgg *) node);
+ break;
+
case T_Unique:
return ExecCountSlotsUnique((Unique *) node);
@@ -749,6 +763,10 @@ ExecEndNode(PlanState *node)
ExecEndAgg((AggState *) node);
break;
+ case T_WindowAggState:
+ ExecEndWindowAgg((WindowAggState *) node);
+ break;
+
case T_UniqueState:
ExecEndUnique((UniqueState *) node);
break;
diff --git a/src/backend/executor/execQual.c b/src/backend/executor/execQual.c
index 71aad49647d..17606f5204e 100644
--- a/src/backend/executor/execQual.c
+++ b/src/backend/executor/execQual.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/executor/execQual.c,v 1.238 2008/12/18 19:38:22 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/executor/execQual.c,v 1.239 2008/12/28 18:53:55 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -62,6 +62,9 @@ static Datum ExecEvalArrayRef(ArrayRefExprState *astate,
static Datum ExecEvalAggref(AggrefExprState *aggref,
ExprContext *econtext,
bool *isNull, ExprDoneCond *isDone);
+static Datum ExecEvalWindowFunc(WindowFuncExprState *wfunc,
+ ExprContext *econtext,
+ bool *isNull, ExprDoneCond *isDone);
static Datum ExecEvalVar(ExprState *exprstate, ExprContext *econtext,
bool *isNull, ExprDoneCond *isDone);
static Datum ExecEvalScalarVar(ExprState *exprstate, ExprContext *econtext,
@@ -444,6 +447,27 @@ ExecEvalAggref(AggrefExprState *aggref, ExprContext *econtext,
}
/* ----------------------------------------------------------------
+ * ExecEvalWindowFunc
+ *
+ * Returns a Datum whose value is the value of the precomputed
+ * window function found in the given expression context.
+ * ----------------------------------------------------------------
+ */
+static Datum
+ExecEvalWindowFunc(WindowFuncExprState *wfunc, ExprContext *econtext,
+ bool *isNull, ExprDoneCond *isDone)
+{
+ if (isDone)
+ *isDone = ExprSingleResult;
+
+ if (econtext->ecxt_aggvalues == NULL) /* safety check */
+ elog(ERROR, "no window functions in this expression context");
+
+ *isNull = econtext->ecxt_aggnulls[wfunc->wfuncno];
+ return econtext->ecxt_aggvalues[wfunc->wfuncno];
+}
+
+/* ----------------------------------------------------------------
* ExecEvalVar
*
* Returns a Datum whose value is the value of a range
@@ -4062,12 +4086,12 @@ ExecEvalExprSwitchContext(ExprState *expression,
* executions of the expression are needed. Typically the context will be
* the same as the per-query context of the associated ExprContext.
*
- * Any Aggref and SubPlan nodes found in the tree are added to the lists
- * of such nodes held by the parent PlanState. Otherwise, we do very little
- * initialization here other than building the state-node tree. Any nontrivial
- * work associated with initializing runtime info for a node should happen
- * during the first actual evaluation of that node. (This policy lets us
- * avoid work if the node is never actually evaluated.)
+ * Any Aggref, WindowFunc, or SubPlan nodes found in the tree are added to the
+ * lists of such nodes held by the parent PlanState. Otherwise, we do very
+ * little initialization here other than building the state-node tree. Any
+ * nontrivial work associated with initializing runtime info for a node should
+ * happen during the first actual evaluation of that node. (This policy lets
+ * us avoid work if the node is never actually evaluated.)
*
* Note: there is no ExecEndExpr function; we assume that any resource
* cleanup needed will be handled by just releasing the memory context
@@ -4145,11 +4169,49 @@ ExecInitExpr(Expr *node, PlanState *parent)
else
{
/* planner messed up */
- elog(ERROR, "aggref found in non-Agg plan node");
+ elog(ERROR, "Aggref found in non-Agg plan node");
}
state = (ExprState *) astate;
}
break;
+ case T_WindowFunc:
+ {
+ WindowFunc *wfunc = (WindowFunc *) node;
+ WindowFuncExprState *wfstate = makeNode(WindowFuncExprState);
+
+ wfstate->xprstate.evalfunc = (ExprStateEvalFunc) ExecEvalWindowFunc;
+ if (parent && IsA(parent, WindowAggState))
+ {
+ WindowAggState *winstate = (WindowAggState *) parent;
+ int nfuncs;
+
+ winstate->funcs = lcons(wfstate, winstate->funcs);
+ nfuncs = ++winstate->numfuncs;
+ if (wfunc->winagg)
+ winstate->numaggs++;
+
+ wfstate->args = (List *) ExecInitExpr((Expr *) wfunc->args,
+ parent);
+
+ /*
+ * Complain if the windowfunc's arguments contain any
+ * windowfuncs; nested window functions are semantically
+ * nonsensical. (This should have been caught earlier,
+ * but we defend against it here anyway.)
+ */
+ if (nfuncs != winstate->numfuncs)
+ ereport(ERROR,
+ (errcode(ERRCODE_WINDOWING_ERROR),
+ errmsg("window function calls cannot be nested")));
+ }
+ else
+ {
+ /* planner messed up */
+ elog(ERROR, "WindowFunc found in non-WindowAgg plan node");
+ }
+ state = (ExprState *) wfstate;
+ }
+ break;
case T_ArrayRef:
{
ArrayRef *aref = (ArrayRef *) node;
diff --git a/src/backend/executor/nodeWindowAgg.c b/src/backend/executor/nodeWindowAgg.c
new file mode 100644
index 00000000000..37ef9a5e830
--- /dev/null
+++ b/src/backend/executor/nodeWindowAgg.c
@@ -0,0 +1,1854 @@
+/*-------------------------------------------------------------------------
+ *
+ * nodeWindowAgg.c
+ * routines to handle WindowAgg nodes.
+ *
+ * A WindowAgg node evaluates "window functions" across suitable partitions
+ * of the input tuple set. Any one WindowAgg works for just a single window
+ * specification, though it can evaluate multiple window functions sharing
+ * identical window specifications. The input tuples are required to be
+ * delivered in sorted order, with the PARTITION BY columns (if any) as
+ * major sort keys and the ORDER BY columns (if any) as minor sort keys.
+ * (The planner generates a stack of WindowAggs with intervening Sort nodes
+ * as needed, if a query involves more than one window specification.)
+ *
+ * Since window functions can require access to any or all of the rows in
+ * the current partition, we accumulate rows of the partition into a
+ * tuplestore. The window functions are called using the WindowObject API
+ * so that they can access those rows as needed.
+ *
+ * We also support using plain aggregate functions as window functions.
+ * For these, the regular Agg-node environment is emulated for each partition.
+ * As required by the SQL spec, the output represents the value of the
+ * aggregate function over all rows in the current row's window frame.
+ *
+ *
+ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ * $PostgreSQL: pgsql/src/backend/executor/nodeWindowAgg.c,v 1.1 2008/12/28 18:53:55 tgl Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "catalog/pg_aggregate.h"
+#include "catalog/pg_proc.h"
+#include "catalog/pg_type.h"
+#include "executor/executor.h"
+#include "executor/nodeWindowAgg.h"
+#include "miscadmin.h"
+#include "nodes/nodeFuncs.h"
+#include "optimizer/clauses.h"
+#include "parser/parse_agg.h"
+#include "parser/parse_coerce.h"
+#include "utils/acl.h"
+#include "utils/builtins.h"
+#include "utils/datum.h"
+#include "utils/lsyscache.h"
+#include "utils/memutils.h"
+#include "utils/syscache.h"
+#include "windowapi.h"
+
+/*
+ * All the window function APIs are called with this object, which is passed
+ * to window functions as fcinfo->context.
+ */
+typedef struct WindowObjectData
+{
+ NodeTag type;
+ WindowAggState *winstate; /* parent WindowAggState */
+ List *argstates; /* ExprState trees for fn's arguments */
+ void *localmem; /* WinGetPartitionLocalMemory's chunk */
+ int markptr; /* tuplestore mark pointer for this fn */
+ int readptr; /* tuplestore read pointer for this fn */
+ int64 markpos; /* row that markptr is positioned on */
+ int64 seekpos; /* row that readptr is positioned on */
+} WindowObjectData;
+
+/*
+ * We have one WindowStatePerFunc struct for each window function and
+ * window aggregate handled by this node.
+ */
+typedef struct WindowStatePerFuncData
+{
+ /* Links to WindowFunc expr and state nodes this working state is for */
+ WindowFuncExprState *wfuncstate;
+ WindowFunc *wfunc;
+
+ int numArguments; /* number of arguments */
+
+ FmgrInfo flinfo; /* fmgr lookup data for window function */
+
+ /*
+ * We need the len and byval info for the result of each function
+ * in order to know how to copy/delete values.
+ */
+ int16 resulttypeLen;
+ bool resulttypeByVal;
+
+ bool plain_agg; /* is it just a plain aggregate function? */
+ int aggno; /* if so, index of its PerAggData */
+
+ WindowObject winobj; /* object used in window function API */
+} WindowStatePerFuncData;
+
+/*
+ * For plain aggregate window functions, we also have one of these.
+ */
+typedef struct WindowStatePerAggData
+{
+ /* Oids of transfer functions */
+ Oid transfn_oid;
+ Oid finalfn_oid; /* may be InvalidOid */
+
+ /*
+ * fmgr lookup data for transfer functions --- only valid when
+ * corresponding oid is not InvalidOid. Note in particular that fn_strict
+ * flags are kept here.
+ */
+ FmgrInfo transfn;
+ FmgrInfo finalfn;
+
+ /*
+ * initial value from pg_aggregate entry
+ */
+ Datum initValue;
+ bool initValueIsNull;
+
+ /*
+ * cached value for non-moving frame
+ */
+ Datum resultValue;
+ bool resultValueIsNull;
+ bool hasResult;
+
+ /*
+ * We need the len and byval info for the agg's input, result, and
+ * transition data types in order to know how to copy/delete values.
+ */
+ int16 inputtypeLen,
+ resulttypeLen,
+ transtypeLen;
+ bool inputtypeByVal,
+ resulttypeByVal,
+ transtypeByVal;
+
+ int wfuncno; /* index of associated PerFuncData */
+
+ /* Current transition value */
+ Datum transValue; /* current transition value */
+ bool transValueIsNull;
+
+ bool noTransValue; /* true if transValue not set yet */
+} WindowStatePerAggData;
+
+static void initialize_windowaggregate(WindowAggState *winstate,
+ WindowStatePerFunc perfuncstate,
+ WindowStatePerAgg peraggstate);
+static void advance_windowaggregate(WindowAggState *winstate,
+ WindowStatePerFunc perfuncstate,
+ WindowStatePerAgg peraggstate);
+static void finalize_windowaggregate(WindowAggState *winstate,
+ WindowStatePerFunc perfuncstate,
+ WindowStatePerAgg peraggstate,
+ Datum *result, bool *isnull);
+
+static void eval_windowaggregates(WindowAggState *winstate);
+static void eval_windowfunction(WindowAggState *winstate,
+ WindowStatePerFunc perfuncstate,
+ Datum *result, bool *isnull);
+
+static void begin_partition(WindowAggState *winstate);
+static void spool_tuples(WindowAggState *winstate, int64 pos);
+static void release_partition(WindowAggState *winstate);
+
+static WindowStatePerAggData *initialize_peragg(WindowAggState *winstate,
+ WindowFunc *wfunc,
+ WindowStatePerAgg peraggstate);
+static Datum GetAggInitVal(Datum textInitVal, Oid transtype);
+
+static bool are_peers(WindowAggState *winstate, TupleTableSlot *slot1,
+ TupleTableSlot *slot2);
+static bool window_gettupleslot(WindowObject winobj, int64 pos,
+ TupleTableSlot *slot);
+
+
+/*
+ * initialize_windowaggregate
+ * parallel to initialize_aggregate in nodeAgg.c
+ */
+static void
+initialize_windowaggregate(WindowAggState *winstate,
+ WindowStatePerFunc perfuncstate,
+ WindowStatePerAgg peraggstate)
+{
+ MemoryContext oldContext;
+
+ if (peraggstate->initValueIsNull)
+ peraggstate->transValue = peraggstate->initValue;
+ else
+ {
+ oldContext = MemoryContextSwitchTo(winstate->wincontext);
+ peraggstate->transValue = datumCopy(peraggstate->initValue,
+ peraggstate->transtypeByVal,
+ peraggstate->transtypeLen);
+ MemoryContextSwitchTo(oldContext);
+ }
+ peraggstate->transValueIsNull = peraggstate->initValueIsNull;
+ peraggstate->noTransValue = peraggstate->initValueIsNull;
+}
+
+/*
+ * advance_windowaggregate
+ * parallel to advance_aggregate in nodeAgg.c
+ */
+static void
+advance_windowaggregate(WindowAggState *winstate,
+ WindowStatePerFunc perfuncstate,
+ WindowStatePerAgg peraggstate)
+{
+ WindowFuncExprState *wfuncstate = perfuncstate->wfuncstate;
+ int numArguments = perfuncstate->numArguments;
+ FunctionCallInfoData fcinfodata;
+ FunctionCallInfo fcinfo = &fcinfodata;
+ Datum newVal;
+ ListCell *arg;
+ int i;
+ MemoryContext oldContext;
+ ExprContext *econtext = winstate->tmpcontext;
+
+ oldContext = MemoryContextSwitchTo(econtext->ecxt_per_tuple_memory);
+
+ /* We start from 1, since the 0th arg will be the transition value */
+ i = 1;
+ foreach(arg, wfuncstate->args)
+ {
+ ExprState *argstate = (ExprState *) lfirst(arg);
+
+ fcinfo->arg[i] = ExecEvalExpr(argstate, econtext,
+ &fcinfo->argnull[i], NULL);
+ i++;
+ }
+
+ if (peraggstate->transfn.fn_strict)
+ {
+ /*
+ * For a strict transfn, nothing happens when there's a NULL input; we
+ * just keep the prior transValue.
+ */
+ for (i = 1; i <= numArguments; i++)
+ {
+ if (fcinfo->argnull[i])
+ {
+ MemoryContextSwitchTo(oldContext);
+ return;
+ }
+ }
+ if (peraggstate->noTransValue)
+ {
+ /*
+ * transValue has not been initialized. This is the first non-NULL
+ * input value. We use it as the initial value for transValue. (We
+ * already checked that the agg's input type is binary-compatible
+ * with its transtype, so straight copy here is OK.)
+ *
+ * We must copy the datum into wincontext if it is pass-by-ref. We
+ * do not need to pfree the old transValue, since it's NULL.
+ */
+ MemoryContextSwitchTo(winstate->wincontext);
+ peraggstate->transValue = datumCopy(fcinfo->arg[1],
+ peraggstate->transtypeByVal,
+ peraggstate->transtypeLen);
+ peraggstate->transValueIsNull = false;
+ peraggstate->noTransValue = false;
+ MemoryContextSwitchTo(oldContext);
+ return;
+ }
+ if (peraggstate->transValueIsNull)
+ {
+ /*
+ * Don't call a strict function with NULL inputs. Note it is
+ * possible to get here despite the above tests, if the transfn is
+ * strict *and* returned a NULL on a prior cycle. If that happens
+ * we will propagate the NULL all the way to the end.
+ */
+ MemoryContextSwitchTo(oldContext);
+ return;
+ }
+ }
+
+ /*
+ * OK to call the transition function
+ */
+ InitFunctionCallInfoData(*fcinfo, &(peraggstate->transfn),
+ numArguments + 1,
+ (void *) winstate, NULL);
+ fcinfo->arg[0] = peraggstate->transValue;
+ fcinfo->argnull[0] = peraggstate->transValueIsNull;
+ newVal = FunctionCallInvoke(fcinfo);
+
+ /*
+ * If pass-by-ref datatype, must copy the new value into wincontext and
+ * pfree the prior transValue. But if transfn returned a pointer to its
+ * first input, we don't need to do anything.
+ */
+ if (!peraggstate->transtypeByVal &&
+ DatumGetPointer(newVal) != DatumGetPointer(peraggstate->transValue))
+ {
+ if (!fcinfo->isnull)
+ {
+ MemoryContextSwitchTo(winstate->wincontext);
+ newVal = datumCopy(newVal,
+ peraggstate->transtypeByVal,
+ peraggstate->transtypeLen);
+ }
+ if (!peraggstate->transValueIsNull)
+ pfree(DatumGetPointer(peraggstate->transValue));
+ }
+
+ MemoryContextSwitchTo(oldContext);
+ peraggstate->transValue = newVal;
+ peraggstate->transValueIsNull = fcinfo->isnull;
+}
+
+/*
+ * finalize_windowaggregate
+ * parallel to finalize_aggregate in nodeAgg.c
+ */
+static void
+finalize_windowaggregate(WindowAggState *winstate,
+ WindowStatePerFunc perfuncstate,
+ WindowStatePerAgg peraggstate,
+ Datum *result, bool *isnull)
+{
+ MemoryContext oldContext;
+
+ oldContext = MemoryContextSwitchTo(winstate->ss.ps.ps_ExprContext->ecxt_per_tuple_memory);
+
+ /*
+ * Apply the agg's finalfn if one is provided, else return transValue.
+ */
+ if (OidIsValid(peraggstate->finalfn_oid))
+ {
+ FunctionCallInfoData fcinfo;
+
+ InitFunctionCallInfoData(fcinfo, &(peraggstate->finalfn), 1,
+ (void *) winstate, NULL);
+ fcinfo.arg[0] = peraggstate->transValue;
+ fcinfo.argnull[0] = peraggstate->transValueIsNull;
+ if (fcinfo.flinfo->fn_strict && peraggstate->transValueIsNull)
+ {
+ /* don't call a strict function with NULL inputs */
+ *result = (Datum) 0;
+ *isnull = true;
+ }
+ else
+ {
+ *result = FunctionCallInvoke(&fcinfo);
+ *isnull = fcinfo.isnull;
+ }
+ }
+ else
+ {
+ *result = peraggstate->transValue;
+ *isnull = peraggstate->transValueIsNull;
+ }
+
+ /*
+ * If result is pass-by-ref, make sure it is in the right context.
+ */
+ if (!peraggstate->resulttypeByVal && !*isnull &&
+ !MemoryContextContains(CurrentMemoryContext,
+ DatumGetPointer(*result)))
+ *result = datumCopy(*result,
+ peraggstate->resulttypeByVal,
+ peraggstate->resulttypeLen);
+ MemoryContextSwitchTo(oldContext);
+}
+
+/*
+ * eval_windowaggregates
+ * evaluate plain aggregates being used as window functions
+ *
+ * Much of this is duplicated from nodeAgg.c. But NOTE that we expect to be
+ * able to call aggregate final functions repeatedly after aggregating more
+ * data onto the same transition value. This is not a behavior required by
+ * nodeAgg.c.
+ */
+static void
+eval_windowaggregates(WindowAggState *winstate)
+{
+ WindowStatePerAgg peraggstate;
+ int wfuncno, numaggs;
+ int i;
+ MemoryContext oldContext;
+ ExprContext *econtext;
+ TupleTableSlot *first_peer_slot = winstate->first_peer_slot;
+ TupleTableSlot *slot;
+ bool first;
+
+ numaggs = winstate->numaggs;
+ if (numaggs == 0)
+ return; /* nothing to do */
+
+ /* final output execution is in ps_ExprContext */
+ econtext = winstate->ss.ps.ps_ExprContext;
+
+ /*
+ * We don't currently support explicitly-specified window frames. That
+ * means that the window frame always includes all the rows in the
+ * partition preceding and including the current row, and all its
+ * peers. As a special case, if there's no ORDER BY, all rows are peers,
+ * so the window frame includes all rows in the partition.
+ *
+ * When there's peer rows, all rows in a peer group will have the same
+ * aggregate values. The values will be calculated when current position
+ * reaches the first peer row, and on all the following peer rows we will
+ * just return the saved results.
+ *
+ * 'aggregatedupto' keeps track of the last row that has already been
+ * accumulated for the aggregates. When the current row has no peers,
+ * aggregatedupto will be the same as the current row after this
+ * function. If there are peer rows, all peers will be accumulated in one
+ * call of this function, and aggregatedupto will be ahead of the current
+ * position. If there's no ORDER BY, and thus all rows are peers, the
+ * first call will aggregate all rows in the partition.
+ *
+ * TODO: In the future, we could implement sliding frames by recalculating
+ * the aggregate whenever a row exits the frame. That would be pretty
+ * slow, though. For aggregates like SUM and COUNT we could implement a
+ * "negative transition function" that would be called for all the rows
+ * that exit the frame.
+ */
+
+ /*
+ * If we've already aggregated up through current row, reuse the
+ * saved result values
+ */
+ if (winstate->aggregatedupto > winstate->currentpos)
+ {
+ for (i = 0; i < numaggs; i++)
+ {
+ peraggstate = &winstate->peragg[i];
+ wfuncno = peraggstate->wfuncno;
+ econtext->ecxt_aggvalues[wfuncno] = peraggstate->resultValue;
+ econtext->ecxt_aggnulls[wfuncno] = peraggstate->resultValueIsNull;
+ }
+ return;
+ }
+
+ /* Initialize aggregates on first call for partition */
+ for (i = 0; i < numaggs; i++)
+ {
+ peraggstate = &winstate->peragg[i];
+ wfuncno = peraggstate->wfuncno;
+ if (!peraggstate->hasResult)
+ initialize_windowaggregate(winstate,
+ &winstate->perfunc[wfuncno],
+ &winstate->peragg[i]);
+ }
+
+ /*
+ * If this is the first call for this partition, fetch the first row
+ * for comparing peer rows. On subsequent calls, we'll always read
+ * ahead until we reach the first non-peer row, and store that row in
+ * first_peer_slot, for use in the next call.
+ */
+ if (TupIsNull(first_peer_slot))
+ {
+ spool_tuples(winstate, winstate->aggregatedupto);
+ tuplestore_select_read_pointer(winstate->buffer, winstate->agg_ptr);
+ if (!tuplestore_gettupleslot(winstate->buffer, true, first_peer_slot))
+ elog(ERROR, "unexpected end of tuplestore");
+ }
+
+ /*
+ * Advance until we reach the next non-peer row
+ */
+ first = true;
+ for (;;)
+ {
+ if (!first)
+ {
+ /* Fetch the next row, and see if it's a peer */
+ spool_tuples(winstate, winstate->aggregatedupto);
+ tuplestore_select_read_pointer(winstate->buffer,
+ winstate->agg_ptr);
+ slot = winstate->temp_slot_1;
+ if (!tuplestore_gettupleslot(winstate->buffer, true, slot))
+ break;
+ if (!are_peers(winstate, first_peer_slot, slot))
+ {
+ ExecCopySlot(first_peer_slot, slot);
+ break;
+ }
+ }
+ else
+ {
+ /*
+ * On first iteration, just accumulate the tuple saved from
+ * last call
+ */
+ slot = first_peer_slot;
+ first = false;
+ }
+
+ /* set tuple context for evaluation of aggregate arguments */
+ winstate->tmpcontext->ecxt_outertuple = slot;
+
+ for (i = 0; i < numaggs; i++)
+ {
+ wfuncno = winstate->peragg[i].wfuncno;
+
+ advance_windowaggregate(winstate,
+ &winstate->perfunc[wfuncno],
+ &winstate->peragg[i]);
+
+ }
+ /* Reset per-input-tuple context after each tuple */
+ ResetExprContext(winstate->tmpcontext);
+ winstate->aggregatedupto++;
+ }
+
+ /*
+ * finalize aggregates and fill result/isnull fields.
+ */
+ for (i = 0; i < numaggs; i++)
+ {
+ Datum *result;
+ bool *isnull;
+
+ peraggstate = &winstate->peragg[i];
+ wfuncno = peraggstate->wfuncno;
+ result = &econtext->ecxt_aggvalues[wfuncno];
+ isnull = &econtext->ecxt_aggnulls[wfuncno];
+ finalize_windowaggregate(winstate,
+ &winstate->perfunc[wfuncno],
+ peraggstate, result, isnull);
+
+ /*
+ * save the result for the next (non-shrinking frame) call.
+ */
+ if (!peraggstate->resulttypeByVal && !*isnull)
+ {
+ /*
+ * clear old resultValue in order not to leak memory.
+ */
+ if (peraggstate->hasResult &&
+ (DatumGetPointer(peraggstate->resultValue) !=
+ DatumGetPointer(*result)) &&
+ !peraggstate->resultValueIsNull)
+ pfree(DatumGetPointer(peraggstate->resultValue));
+
+ /*
+ * If pass-by-ref, copy it into our global context.
+ */
+ oldContext = MemoryContextSwitchTo(winstate->wincontext);
+ peraggstate->resultValue = datumCopy(*result,
+ peraggstate->resulttypeByVal,
+ peraggstate->resulttypeLen);
+ MemoryContextSwitchTo(oldContext);
+ }
+ else
+ {
+ peraggstate->resultValue = *result;
+ }
+ peraggstate->resultValueIsNull = *isnull;
+ peraggstate->hasResult = true;
+ }
+}
+
+/*
+ * eval_windowfunction
+ *
+ * Arguments of window functions are not evaluated here, because a window
+ * function can need random access to arbitrary rows in the partition.
+ * The window function uses the special WinGetFuncArgInPartition and
+ * WinGetFuncArgInFrame functions to evaluate the arguments for the rows
+ * it wants.
+ */
+static void
+eval_windowfunction(WindowAggState *winstate, WindowStatePerFunc perfuncstate,
+ Datum *result, bool *isnull)
+{
+ FunctionCallInfoData fcinfo;
+ MemoryContext oldContext;
+
+ oldContext = MemoryContextSwitchTo(winstate->ss.ps.ps_ExprContext->ecxt_per_tuple_memory);
+
+ /*
+ * We don't pass any normal arguments to a window function, but we do
+ * pass it the number of arguments, in order to permit window function
+ * implementations to support varying numbers of arguments. The real
+ * info goes through the WindowObject, which is passed via fcinfo->context.
+ */
+ InitFunctionCallInfoData(fcinfo, &(perfuncstate->flinfo),
+ perfuncstate->numArguments,
+ (void *) perfuncstate->winobj, NULL);
+ /* Just in case, make all the regular argument slots be null */
+ memset(fcinfo.argnull, true, perfuncstate->numArguments);
+
+ *result = FunctionCallInvoke(&fcinfo);
+ *isnull = fcinfo.isnull;
+
+ /*
+ * Make sure pass-by-ref data is allocated in the appropriate context.
+ * (We need this in case the function returns a pointer into some
+ * short-lived tuple, as is entirely possible.)
+ */
+ if (!perfuncstate->resulttypeByVal && !fcinfo.isnull &&
+ !MemoryContextContains(CurrentMemoryContext,
+ DatumGetPointer(*result)))
+ *result = datumCopy(*result,
+ perfuncstate->resulttypeByVal,
+ perfuncstate->resulttypeLen);
+
+ MemoryContextSwitchTo(oldContext);
+}
+
+/*
+ * begin_partition
+ * Start buffering rows of the next partition.
+ */
+static void
+begin_partition(WindowAggState *winstate)
+{
+ PlanState *outerPlan = outerPlanState(winstate);
+ int numfuncs = winstate->numfuncs;
+ int i;
+
+ winstate->partition_spooled = false;
+ winstate->spooled_rows = 0;
+ winstate->currentpos = 0;
+ winstate->frametailpos = -1;
+ winstate->aggregatedupto = 0;
+
+ /*
+ * If this is the very first partition, we need to fetch the first
+ * input row to store in it.
+ */
+ if (TupIsNull(winstate->first_part_slot))
+ {
+ TupleTableSlot *outerslot = ExecProcNode(outerPlan);
+
+ if (!TupIsNull(outerslot))
+ ExecCopySlot(winstate->first_part_slot, outerslot);
+ else
+ {
+ /* outer plan is empty, so we have nothing to do */
+ winstate->partition_spooled = true;
+ winstate->more_partitions = false;
+ return;
+ }
+ }
+
+ /* Create new tuplestore for this partition */
+ winstate->buffer = tuplestore_begin_heap(false, false, work_mem);
+
+ /*
+ * Set up read pointers for the tuplestore. The current and agg pointers
+ * don't need BACKWARD capability, but the per-window-function read
+ * pointers do.
+ */
+ winstate->current_ptr = 0; /* read pointer 0 is pre-allocated */
+
+ /* reset default REWIND capability bit for current ptr */
+ tuplestore_set_eflags(winstate->buffer, 0);
+
+ /* create a read pointer for aggregates, if needed */
+ if (winstate->numaggs > 0)
+ winstate->agg_ptr = tuplestore_alloc_read_pointer(winstate->buffer, 0);
+
+ /* create mark and read pointers for each real window function */
+ for (i = 0; i < numfuncs; i++)
+ {
+ WindowStatePerFunc perfuncstate = &(winstate->perfunc[i]);
+
+ if (!perfuncstate->plain_agg)
+ {
+ WindowObject winobj = perfuncstate->winobj;
+
+ winobj->markptr = tuplestore_alloc_read_pointer(winstate->buffer,
+ 0);
+ winobj->readptr = tuplestore_alloc_read_pointer(winstate->buffer,
+ EXEC_FLAG_BACKWARD);
+ winobj->markpos = -1;
+ winobj->seekpos = -1;
+ }
+ }
+
+ /*
+ * Store the first tuple into the tuplestore (it's always available now;
+ * we either read it above, or saved it at the end of previous partition)
+ */
+ tuplestore_puttupleslot(winstate->buffer, winstate->first_part_slot);
+ winstate->spooled_rows++;
+}
+
+/*
+ * Read tuples from the outer node, up to position 'pos', and store them
+ * into the tuplestore. If pos is -1, reads the whole partition.
+ */
+static void
+spool_tuples(WindowAggState *winstate, int64 pos)
+{
+ WindowAgg *node = (WindowAgg *) winstate->ss.ps.plan;
+ PlanState *outerPlan;
+ TupleTableSlot *outerslot;
+ MemoryContext oldcontext;
+
+ if (!winstate->buffer)
+ return; /* just a safety check */
+ if (winstate->partition_spooled)
+ return; /* whole partition done already */
+
+ /*
+ * If the tuplestore has spilled to disk, alternate reading and writing
+ * becomes quite expensive due to frequent buffer flushes. It's cheaper
+ * to force the entire partition to get spooled in one go.
+ *
+ * XXX this is a horrid kluge --- it'd be better to fix the performance
+ * problem inside tuplestore. FIXME
+ */
+ if (!tuplestore_in_memory(winstate->buffer))
+ pos = -1;
+
+ outerPlan = outerPlanState(winstate);
+
+ /* Must be in query context to call outerplan or touch tuplestore */
+ oldcontext = MemoryContextSwitchTo(winstate->ss.ps.ps_ExprContext->ecxt_per_query_memory);
+
+ while (winstate->spooled_rows <= pos || pos == -1)
+ {
+ outerslot = ExecProcNode(outerPlan);
+ if (TupIsNull(outerslot))
+ {
+ /* reached the end of the last partition */
+ winstate->partition_spooled = true;
+ winstate->more_partitions = false;
+ break;
+ }
+
+ if (node->partNumCols > 0)
+ {
+ /* Check if this tuple still belongs to the current partition */
+ if (!execTuplesMatch(winstate->first_part_slot,
+ outerslot,
+ node->partNumCols, node->partColIdx,
+ winstate->partEqfunctions,
+ winstate->tmpcontext->ecxt_per_tuple_memory))
+ {
+ /*
+ * end of partition; copy the tuple for the next cycle.
+ */
+ ExecCopySlot(winstate->first_part_slot, outerslot);
+ winstate->partition_spooled = true;
+ winstate->more_partitions = true;
+ break;
+ }
+ }
+
+ /* Still in partition, so save it into the tuplestore */
+ tuplestore_puttupleslot(winstate->buffer, outerslot);
+ winstate->spooled_rows++;
+ }
+
+ MemoryContextSwitchTo(oldcontext);
+}
+
+/*
+ * release_partition
+ * clear information kept within a partition, including
+ * tuplestore and aggregate results.
+ */
+static void
+release_partition(WindowAggState *winstate)
+{
+ int i;
+
+ for (i = 0; i < winstate->numfuncs; i++)
+ {
+ WindowStatePerFunc perfuncstate = &(winstate->perfunc[i]);
+
+ /* Release any partition-local state of this window function */
+ if (perfuncstate->winobj)
+ perfuncstate->winobj->localmem = NULL;
+
+ /* Reset agg result cache */
+ if (perfuncstate->plain_agg)
+ {
+ int aggno = perfuncstate->aggno;
+ WindowStatePerAggData *peraggstate = &winstate->peragg[aggno];
+
+ peraggstate->resultValueIsNull = true;
+ peraggstate->hasResult = false;
+ }
+ }
+
+ /*
+ * Release all partition-local memory (in particular, any partition-local
+ * state or aggregate temp data that we might have trashed our pointers
+ * to in the above loop). We don't rely on retail pfree because some
+ * aggregates might have allocated data we don't have direct pointers to.
+ */
+ MemoryContextResetAndDeleteChildren(winstate->wincontext);
+
+ /* Ensure eval_windowaggregates will see next call as partition start */
+ ExecClearTuple(winstate->first_peer_slot);
+
+ if (winstate->buffer)
+ tuplestore_end(winstate->buffer);
+ winstate->buffer = NULL;
+ winstate->partition_spooled = false;
+}
+
+
+/* -----------------
+ * ExecWindowAgg
+ *
+ * ExecWindowAgg receives tuples from its outer subplan and
+ * stores them into a tuplestore, then processes window functions.
+ * This node doesn't reduce nor qualify any row so the number of
+ * returned rows is exactly the same as its outer subplan's result
+ * (ignoring the case of SRFs in the targetlist, that is).
+ * -----------------
+ */
+TupleTableSlot *
+ExecWindowAgg(WindowAggState *winstate)
+{
+ TupleTableSlot *result;
+ ExprDoneCond isDone;
+ ExprContext *econtext;
+ int i;
+ int numfuncs;
+
+ if (winstate->all_done)
+ return NULL;
+
+ /*
+ * Check to see if we're still projecting out tuples from a previous output
+ * tuple (because there is a function-returning-set in the projection
+ * expressions). If so, try to project another one.
+ */
+ if (winstate->ss.ps.ps_TupFromTlist)
+ {
+ TupleTableSlot *result;
+ ExprDoneCond isDone;
+
+ result = ExecProject(winstate->ss.ps.ps_ProjInfo, &isDone);
+ if (isDone == ExprMultipleResult)
+ return result;
+ /* Done with that source tuple... */
+ winstate->ss.ps.ps_TupFromTlist = false;
+ }
+
+restart:
+ if (winstate->buffer == NULL)
+ {
+ /* Initialize for first partition and set current row = 0 */
+ begin_partition(winstate);
+ }
+ else
+ {
+ /* Advance current row within partition */
+ winstate->currentpos++;
+ }
+
+ /*
+ * Spool all tuples up to and including the current row, if we haven't
+ * already
+ */
+ spool_tuples(winstate, winstate->currentpos);
+
+ /* Move to the next partition if we reached the end of this partition */
+ if (winstate->partition_spooled &&
+ winstate->currentpos >= winstate->spooled_rows)
+ {
+ release_partition(winstate);
+
+ if (winstate->more_partitions)
+ {
+ begin_partition(winstate);
+ Assert(winstate->spooled_rows > 0);
+ }
+ else
+ {
+ winstate->all_done = true;
+ return NULL;
+ }
+ }
+
+ /* final output execution is in ps_ExprContext */
+ econtext = winstate->ss.ps.ps_ExprContext;
+
+ /* Clear the per-output-tuple context for current row */
+ ResetExprContext(econtext);
+
+ /*
+ * Read the current row from the tuplestore, and save in ScanTupleSlot
+ * for possible use by WinGetFuncArgCurrent or the final projection step.
+ * (We can't rely on the outerplan's output slot because we may have to
+ * read beyond the current row.)
+ *
+ * Current row must be in the tuplestore, since we spooled it above.
+ */
+ tuplestore_select_read_pointer(winstate->buffer, winstate->current_ptr);
+ if (!tuplestore_gettupleslot(winstate->buffer, true,
+ winstate->ss.ss_ScanTupleSlot))
+ elog(ERROR, "unexpected end of tuplestore");
+
+ /*
+ * Evaluate true window functions
+ */
+ numfuncs = winstate->numfuncs;
+ for (i = 0; i < numfuncs; i++)
+ {
+ WindowStatePerFunc perfuncstate = &(winstate->perfunc[i]);
+
+ if (perfuncstate->plain_agg)
+ continue;
+ eval_windowfunction(winstate, perfuncstate,
+ &(econtext->ecxt_aggvalues[perfuncstate->wfuncstate->wfuncno]),
+ &(econtext->ecxt_aggnulls[perfuncstate->wfuncstate->wfuncno]));
+ }
+
+ /*
+ * Evaluate aggregates
+ */
+ if (winstate->numaggs > 0)
+ eval_windowaggregates(winstate);
+
+ /*
+ * Truncate any no-longer-needed rows from the tuplestore.
+ */
+ tuplestore_trim(winstate->buffer);
+
+ /*
+ * Form and return a projection tuple using the windowfunc results
+ * and the current row. Setting ecxt_outertuple arranges that any
+ * Vars will be evaluated with respect to that row.
+ */
+ econtext->ecxt_outertuple = winstate->ss.ss_ScanTupleSlot;
+ result = ExecProject(winstate->ss.ps.ps_ProjInfo, &isDone);
+
+ if (isDone == ExprEndResult)
+ {
+ /* SRF in tlist returned no rows, so advance to next input tuple */
+ goto restart;
+ }
+
+ winstate->ss.ps.ps_TupFromTlist =
+ (isDone == ExprMultipleResult);
+ return result;
+}
+
+/* -----------------
+ * ExecInitWindowAgg
+ *
+ * Creates the run-time information for the WindowAgg node produced by the
+ * planner and initializes its outer subtree
+ * -----------------
+ */
+WindowAggState *
+ExecInitWindowAgg(WindowAgg *node, EState *estate, int eflags)
+{
+ WindowAggState *winstate;
+ Plan *outerPlan;
+ ExprContext *econtext;
+ ExprContext *tmpcontext;
+ WindowStatePerFunc perfunc;
+ WindowStatePerAgg peragg;
+ int numfuncs,
+ wfuncno,
+ numaggs,
+ aggno;
+ ListCell *l;
+
+ /* check for unsupported flags */
+ Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
+
+ /*
+ * create state structure
+ */
+ winstate = makeNode(WindowAggState);
+ winstate->ss.ps.plan = (Plan *) node;
+ winstate->ss.ps.state = estate;
+
+ /*
+ * Create expression contexts. We need two, one for per-input-tuple
+ * processing and one for per-output-tuple processing. We cheat a little
+ * by using ExecAssignExprContext() to build both.
+ */
+ ExecAssignExprContext(estate, &winstate->ss.ps);
+ tmpcontext = winstate->ss.ps.ps_ExprContext;
+ winstate->tmpcontext = tmpcontext;
+ ExecAssignExprContext(estate, &winstate->ss.ps);
+
+ /* Create long-lived context for storage of aggregate transvalues etc */
+ winstate->wincontext =
+ AllocSetContextCreate(CurrentMemoryContext,
+ "WindowAggContext",
+ ALLOCSET_DEFAULT_MINSIZE,
+ ALLOCSET_DEFAULT_INITSIZE,
+ ALLOCSET_DEFAULT_MAXSIZE);
+
+#define WINDOWAGG_NSLOTS 6
+
+ /*
+ * tuple table initialization
+ */
+ ExecInitScanTupleSlot(estate, &winstate->ss);
+ ExecInitResultTupleSlot(estate, &winstate->ss.ps);
+ winstate->first_part_slot = ExecInitExtraTupleSlot(estate);
+ winstate->first_peer_slot = ExecInitExtraTupleSlot(estate);
+ winstate->temp_slot_1 = ExecInitExtraTupleSlot(estate);
+ winstate->temp_slot_2 = ExecInitExtraTupleSlot(estate);
+
+ winstate->ss.ps.targetlist = (List *)
+ ExecInitExpr((Expr *) node->plan.targetlist,
+ (PlanState *) winstate);
+
+ /*
+ * WindowAgg nodes never have quals, since they can only occur at the
+ * logical top level of a query (ie, after any WHERE or HAVING filters)
+ */
+ Assert(node->plan.qual == NIL);
+ winstate->ss.ps.qual = NIL;
+
+ /*
+ * initialize child nodes
+ */
+ outerPlan = outerPlan(node);
+ outerPlanState(winstate) = ExecInitNode(outerPlan, estate, eflags);
+
+ /*
+ * initialize source tuple type (which is also the tuple type that we'll
+ * store in the tuplestore and use in all our working slots).
+ */
+ ExecAssignScanTypeFromOuterPlan(&winstate->ss);
+
+ ExecSetSlotDescriptor(winstate->first_part_slot,
+ winstate->ss.ss_ScanTupleSlot->tts_tupleDescriptor);
+ ExecSetSlotDescriptor(winstate->first_peer_slot,
+ winstate->ss.ss_ScanTupleSlot->tts_tupleDescriptor);
+ ExecSetSlotDescriptor(winstate->temp_slot_1,
+ winstate->ss.ss_ScanTupleSlot->tts_tupleDescriptor);
+ ExecSetSlotDescriptor(winstate->temp_slot_2,
+ winstate->ss.ss_ScanTupleSlot->tts_tupleDescriptor);
+
+ /*
+ * Initialize result tuple type and projection info.
+ */
+ ExecAssignResultTypeFromTL(&winstate->ss.ps);
+ ExecAssignProjectionInfo(&winstate->ss.ps, NULL);
+
+ winstate->ss.ps.ps_TupFromTlist = false;
+
+ /* Set up data for comparing tuples */
+ if (node->partNumCols > 0)
+ winstate->partEqfunctions = execTuplesMatchPrepare(node->partNumCols,
+ node->partOperators);
+ if (node->ordNumCols > 0)
+ winstate->ordEqfunctions = execTuplesMatchPrepare(node->ordNumCols,
+ node->ordOperators);
+
+ /*
+ * WindowAgg nodes use aggvalues and aggnulls as well as Agg nodes.
+ */
+ numfuncs = winstate->numfuncs;
+ numaggs = winstate->numaggs;
+ econtext = winstate->ss.ps.ps_ExprContext;
+ econtext->ecxt_aggvalues = (Datum *) palloc0(sizeof(Datum) * numfuncs);
+ econtext->ecxt_aggnulls = (bool *) palloc0(sizeof(bool) * numfuncs);
+
+ /*
+ * allocate per-wfunc/per-agg state information.
+ */
+ perfunc = (WindowStatePerFunc) palloc0(sizeof(WindowStatePerFuncData) * numfuncs);
+ peragg = (WindowStatePerAgg) palloc0(sizeof(WindowStatePerAggData) * numaggs);
+ winstate->perfunc = perfunc;
+ winstate->peragg = peragg;
+
+ wfuncno = -1;
+ aggno = -1;
+ foreach(l, winstate->funcs)
+ {
+ WindowFuncExprState *wfuncstate = (WindowFuncExprState *) lfirst(l);
+ WindowFunc *wfunc = (WindowFunc *) wfuncstate->xprstate.expr;
+ WindowStatePerFunc perfuncstate;
+ AclResult aclresult;
+ int i;
+
+ /* Look for a previous duplicate window function */
+ for (i = 0; i <= wfuncno; i++)
+ {
+ if (equal(wfunc, perfunc[i].wfunc) &&
+ !contain_volatile_functions((Node *) wfunc))
+ break;
+ }
+ if (i <= wfuncno)
+ {
+ /* Found a match to an existing entry, so just mark it */
+ wfuncstate->wfuncno = i;
+ continue;
+ }
+
+ /* Nope, so assign a new PerAgg record */
+ perfuncstate = &perfunc[++wfuncno];
+
+ /* Mark WindowFunc state node with assigned index in the result array */
+ wfuncstate->wfuncno = wfuncno;
+
+ /* Check permission to call window function */
+ aclresult = pg_proc_aclcheck(wfunc->winfnoid, GetUserId(),
+ ACL_EXECUTE);
+ if (aclresult != ACLCHECK_OK)
+ aclcheck_error(aclresult, ACL_KIND_PROC,
+ get_func_name(wfunc->winfnoid));
+
+ /* Fill in the perfuncstate data */
+ perfuncstate->wfuncstate = wfuncstate;
+ perfuncstate->wfunc = wfunc;
+ perfuncstate->numArguments = list_length(wfuncstate->args);
+
+ fmgr_info_cxt(wfunc->winfnoid, &perfuncstate->flinfo,
+ tmpcontext->ecxt_per_query_memory);
+ perfuncstate->flinfo.fn_expr = (Node *) wfunc;
+ get_typlenbyval(wfunc->wintype,
+ &perfuncstate->resulttypeLen,
+ &perfuncstate->resulttypeByVal);
+
+ /*
+ * If it's really just a plain aggregate function,
+ * we'll emulate the Agg environment for it.
+ */
+ perfuncstate->plain_agg = wfunc->winagg;
+ if (wfunc->winagg)
+ {
+ WindowStatePerAgg peraggstate;
+
+ perfuncstate->aggno = ++aggno;
+ peraggstate = &winstate->peragg[aggno];
+ initialize_peragg(winstate, wfunc, peraggstate);
+ peraggstate->wfuncno = wfuncno;
+ }
+ else
+ {
+ WindowObject winobj = makeNode(WindowObjectData);
+
+ winobj->winstate = winstate;
+ winobj->argstates = wfuncstate->args;
+ winobj->localmem = NULL;
+ perfuncstate->winobj = winobj;
+ }
+ }
+
+ /* Update numfuncs, numaggs to match number of unique functions found */
+ winstate->numfuncs = wfuncno + 1;
+ winstate->numaggs = aggno + 1;
+
+ winstate->partition_spooled = false;
+ winstate->more_partitions = false;
+
+ return winstate;
+}
+
+/* -----------------
+ * ExecCountSlotsWindowAgg
+ * -----------------
+ */
+int
+ExecCountSlotsWindowAgg(WindowAgg *node)
+{
+ return ExecCountSlotsNode(outerPlan(node)) +
+ ExecCountSlotsNode(innerPlan(node)) +
+ WINDOWAGG_NSLOTS;
+}
+
+/* -----------------
+ * ExecEndWindowAgg
+ * -----------------
+ */
+void
+ExecEndWindowAgg(WindowAggState *node)
+{
+ PlanState *outerPlan;
+
+ release_partition(node);
+
+ pfree(node->perfunc);
+ pfree(node->peragg);
+
+ ExecClearTuple(node->ss.ss_ScanTupleSlot);
+ ExecClearTuple(node->first_part_slot);
+ ExecClearTuple(node->first_peer_slot);
+ ExecClearTuple(node->temp_slot_1);
+ ExecClearTuple(node->temp_slot_2);
+
+ /*
+ * Free both the expr contexts.
+ */
+ ExecFreeExprContext(&node->ss.ps);
+ node->ss.ps.ps_ExprContext = node->tmpcontext;
+ ExecFreeExprContext(&node->ss.ps);
+
+ MemoryContextDelete(node->wincontext);
+
+ outerPlan = outerPlanState(node);
+ ExecEndNode(outerPlan);
+}
+
+/* -----------------
+ * ExecRescanWindowAgg
+ * -----------------
+ */
+void
+ExecReScanWindowAgg(WindowAggState *node, ExprContext *exprCtxt)
+{
+ ExprContext *econtext = node->ss.ps.ps_ExprContext;
+
+ node->all_done = false;
+
+ node->ss.ps.ps_TupFromTlist = false;
+
+ /* release tuplestore et al */
+ release_partition(node);
+
+ /* release all temp tuples, but especially first_part_slot */
+ ExecClearTuple(node->ss.ss_ScanTupleSlot);
+ ExecClearTuple(node->first_part_slot);
+ ExecClearTuple(node->first_peer_slot);
+ ExecClearTuple(node->temp_slot_1);
+ ExecClearTuple(node->temp_slot_2);
+
+ /* Forget current wfunc values */
+ MemSet(econtext->ecxt_aggvalues, 0, sizeof(Datum) * node->numfuncs);
+ MemSet(econtext->ecxt_aggnulls, 0, sizeof(bool) * node->numfuncs);
+
+ /*
+ * if chgParam of subnode is not null then plan will be re-scanned by
+ * first ExecProcNode.
+ */
+ if (((PlanState *) node)->lefttree->chgParam == NULL)
+ ExecReScan(((PlanState *) node)->lefttree, exprCtxt);
+}
+
+/*
+ * initialize_peragg
+ *
+ * Almost same as in nodeAgg.c, except we don't support DISTINCT currently.
+ */
+static WindowStatePerAggData *
+initialize_peragg(WindowAggState *winstate, WindowFunc *wfunc,
+ WindowStatePerAgg peraggstate)
+{
+ Oid inputTypes[FUNC_MAX_ARGS];
+ int numArguments;
+ HeapTuple aggTuple;
+ Form_pg_aggregate aggform;
+ Oid aggtranstype;
+ AclResult aclresult;
+ Oid transfn_oid,
+ finalfn_oid;
+ Expr *transfnexpr,
+ *finalfnexpr;
+ Datum textInitVal;
+ int i;
+ ListCell *lc;
+
+ numArguments = list_length(wfunc->args);
+
+ i = 0;
+ foreach(lc, wfunc->args)
+ {
+ inputTypes[i++] = exprType((Node *) lfirst(lc));
+ }
+
+ aggTuple = SearchSysCache(AGGFNOID,
+ ObjectIdGetDatum(wfunc->winfnoid),
+ 0, 0, 0);
+ if (!HeapTupleIsValid(aggTuple))
+ elog(ERROR, "cache lookup failed for aggregate %u",
+ wfunc->winfnoid);
+ aggform = (Form_pg_aggregate) GETSTRUCT(aggTuple);
+
+ /*
+ * ExecInitWindowAgg already checked permission to call aggregate function
+ * ... but we still need to check the component functions
+ */
+
+ peraggstate->transfn_oid = transfn_oid = aggform->aggtransfn;
+ peraggstate->finalfn_oid = finalfn_oid = aggform->aggfinalfn;
+
+ /* Check that aggregate owner has permission to call component fns */
+ {
+ HeapTuple procTuple;
+ Oid aggOwner;
+
+ procTuple = SearchSysCache(PROCOID,
+ ObjectIdGetDatum(wfunc->winfnoid),
+ 0, 0, 0);
+ if (!HeapTupleIsValid(procTuple))
+ elog(ERROR, "cache lookup failed for function %u",
+ wfunc->winfnoid);
+ aggOwner = ((Form_pg_proc) GETSTRUCT(procTuple))->proowner;
+ ReleaseSysCache(procTuple);
+
+ aclresult = pg_proc_aclcheck(transfn_oid, aggOwner,
+ ACL_EXECUTE);
+ if (aclresult != ACLCHECK_OK)
+ aclcheck_error(aclresult, ACL_KIND_PROC,
+ get_func_name(transfn_oid));
+ if (OidIsValid(finalfn_oid))
+ {
+ aclresult = pg_proc_aclcheck(finalfn_oid, aggOwner,
+ ACL_EXECUTE);
+ if (aclresult != ACLCHECK_OK)
+ aclcheck_error(aclresult, ACL_KIND_PROC,
+ get_func_name(finalfn_oid));
+ }
+ }
+
+ /* resolve actual type of transition state, if polymorphic */
+ aggtranstype = aggform->aggtranstype;
+ if (IsPolymorphicType(aggtranstype))
+ {
+ /* have to fetch the agg's declared input types... */
+ Oid *declaredArgTypes;
+ int agg_nargs;
+
+ get_func_signature(wfunc->winfnoid,
+ &declaredArgTypes, &agg_nargs);
+ Assert(agg_nargs == numArguments);
+ aggtranstype = enforce_generic_type_consistency(inputTypes,
+ declaredArgTypes,
+ agg_nargs,
+ aggtranstype,
+ false);
+ pfree(declaredArgTypes);
+ }
+
+ /* build expression trees using actual argument & result types */
+ build_aggregate_fnexprs(inputTypes,
+ numArguments,
+ aggtranstype,
+ wfunc->wintype,
+ transfn_oid,
+ finalfn_oid,
+ &transfnexpr,
+ &finalfnexpr);
+
+ fmgr_info(transfn_oid, &peraggstate->transfn);
+ peraggstate->transfn.fn_expr = (Node *) transfnexpr;
+
+ if (OidIsValid(finalfn_oid))
+ {
+ fmgr_info(finalfn_oid, &peraggstate->finalfn);
+ peraggstate->finalfn.fn_expr = (Node *) finalfnexpr;
+ }
+
+ get_typlenbyval(wfunc->wintype,
+ &peraggstate->resulttypeLen,
+ &peraggstate->resulttypeByVal);
+ get_typlenbyval(aggtranstype,
+ &peraggstate->transtypeLen,
+ &peraggstate->transtypeByVal);
+
+ /*
+ * initval is potentially null, so don't try to access it as a struct
+ * field. Must do it the hard way with SysCacheGetAttr.
+ */
+ textInitVal = SysCacheGetAttr(AGGFNOID, aggTuple,
+ Anum_pg_aggregate_agginitval,
+ &peraggstate->initValueIsNull);
+
+ if (peraggstate->initValueIsNull)
+ peraggstate->initValue = (Datum) 0;
+ else
+ peraggstate->initValue = GetAggInitVal(textInitVal,
+ aggtranstype);
+
+ /*
+ * If the transfn is strict and the initval is NULL, make sure input
+ * type and transtype are the same (or at least binary-compatible), so
+ * that it's OK to use the first input value as the initial
+ * transValue. This should have been checked at agg definition time,
+ * but just in case...
+ */
+ if (peraggstate->transfn.fn_strict && peraggstate->initValueIsNull)
+ {
+ if (numArguments < 1 ||
+ !IsBinaryCoercible(inputTypes[0], aggtranstype))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+ errmsg("aggregate %u needs to have compatible input type and transition type",
+ wfunc->winfnoid)));
+ }
+
+ ReleaseSysCache(aggTuple);
+
+ return peraggstate;
+}
+
+static Datum
+GetAggInitVal(Datum textInitVal, Oid transtype)
+{
+ Oid typinput,
+ typioparam;
+ char *strInitVal;
+ Datum initVal;
+
+ getTypeInputInfo(transtype, &typinput, &typioparam);
+ strInitVal = TextDatumGetCString(textInitVal);
+ initVal = OidInputFunctionCall(typinput, strInitVal,
+ typioparam, -1);
+ pfree(strInitVal);
+ return initVal;
+}
+
+/*
+ * are_peers
+ * compare two rows to see if they are equal according to the ORDER BY clause
+ */
+static bool
+are_peers(WindowAggState *winstate, TupleTableSlot *slot1,
+ TupleTableSlot *slot2)
+{
+ WindowAgg *node = (WindowAgg *) winstate->ss.ps.plan;
+
+ /* If no ORDER BY, all rows are peers with each other */
+ if (node->ordNumCols == 0)
+ return true;
+
+ return execTuplesMatch(slot1, slot2,
+ node->ordNumCols, node->ordColIdx,
+ winstate->ordEqfunctions,
+ winstate->tmpcontext->ecxt_per_tuple_memory);
+}
+
+/*
+ * window_gettupleslot
+ * Fetch the pos'th tuple of the current partition into the slot
+ *
+ * Returns true if successful, false if no such row
+ */
+static bool
+window_gettupleslot(WindowObject winobj, int64 pos, TupleTableSlot *slot)
+{
+ WindowAggState *winstate = winobj->winstate;
+ MemoryContext oldcontext;
+
+ /* Don't allow passing -1 to spool_tuples here */
+ if (pos < 0)
+ return false;
+
+ /* If necessary, fetch the tuple into the spool */
+ spool_tuples(winstate, pos);
+
+ if (pos >= winstate->spooled_rows)
+ return false;
+
+ if (pos < winobj->markpos)
+ elog(ERROR, "cannot fetch row before WindowObject's mark position");
+
+ oldcontext = MemoryContextSwitchTo(winstate->ss.ps.ps_ExprContext->ecxt_per_query_memory);
+
+ tuplestore_select_read_pointer(winstate->buffer, winobj->readptr);
+
+ /*
+ * There's no API to refetch the tuple at the current position. We
+ * have to move one tuple forward, and then one backward. (We don't
+ * do it the other way because we might try to fetch the row before
+ * our mark, which isn't allowed.)
+ */
+ if (winobj->seekpos == pos)
+ {
+ tuplestore_advance(winstate->buffer, true);
+ winobj->seekpos++;
+ }
+
+ while (winobj->seekpos > pos)
+ {
+ if (!tuplestore_gettupleslot(winstate->buffer, false, slot))
+ elog(ERROR, "unexpected end of tuplestore");
+ winobj->seekpos--;
+ }
+
+ while (winobj->seekpos < pos)
+ {
+ if (!tuplestore_gettupleslot(winstate->buffer, true, slot))
+ elog(ERROR, "unexpected end of tuplestore");
+ winobj->seekpos++;
+ }
+
+ MemoryContextSwitchTo(oldcontext);
+
+ return true;
+}
+
+
+/***********************************************************************
+ * API exposed to window functions
+ ***********************************************************************/
+
+
+/*
+ * WinGetPartitionLocalMemory
+ * Get working memory that lives till end of partition processing
+ *
+ * On first call within a given partition, this allocates and zeroes the
+ * requested amount of space. Subsequent calls just return the same chunk.
+ *
+ * Memory obtained this way is normally used to hold state that should be
+ * automatically reset for each new partition. If a window function wants
+ * to hold state across the whole query, fcinfo->fn_extra can be used in the
+ * usual way for that.
+ */
+void *
+WinGetPartitionLocalMemory(WindowObject winobj, Size sz)
+{
+ Assert(WindowObjectIsValid(winobj));
+ if (winobj->localmem == NULL)
+ winobj->localmem = MemoryContextAllocZero(winobj->winstate->wincontext,
+ sz);
+ return winobj->localmem;
+}
+
+/*
+ * WinGetCurrentPosition
+ * Return the current row's position (counting from 0) within the current
+ * partition.
+ */
+int64
+WinGetCurrentPosition(WindowObject winobj)
+{
+ Assert(WindowObjectIsValid(winobj));
+ return winobj->winstate->currentpos;
+}
+
+/*
+ * WinGetPartitionRowCount
+ * Return total number of rows contained in the current partition.
+ *
+ * Note: this is a relatively expensive operation because it forces the
+ * whole partition to be "spooled" into the tuplestore at once. Once
+ * executed, however, additional calls within the same partition are cheap.
+ */
+int64
+WinGetPartitionRowCount(WindowObject winobj)
+{
+ Assert(WindowObjectIsValid(winobj));
+ spool_tuples(winobj->winstate, -1);
+ return winobj->winstate->spooled_rows;
+}
+
+/*
+ * WinSetMarkPosition
+ * Set the "mark" position for the window object, which is the oldest row
+ * number (counting from 0) it is allowed to fetch during all subsequent
+ * operations within the current partition.
+ *
+ * Window functions do not have to call this, but are encouraged to move the
+ * mark forward when possible to keep the tuplestore size down and prevent
+ * having to spill rows to disk.
+ */
+void
+WinSetMarkPosition(WindowObject winobj, int64 markpos)
+{
+ WindowAggState *winstate;
+
+ Assert(WindowObjectIsValid(winobj));
+ winstate = winobj->winstate;
+
+ if (markpos < winobj->markpos)
+ elog(ERROR, "cannot move WindowObject's mark position backward");
+ tuplestore_select_read_pointer(winstate->buffer, winobj->markptr);
+ while (markpos > winobj->markpos)
+ {
+ tuplestore_advance(winstate->buffer, true);
+ winobj->markpos++;
+ }
+ tuplestore_select_read_pointer(winstate->buffer, winobj->readptr);
+ while (markpos > winobj->seekpos)
+ {
+ tuplestore_advance(winstate->buffer, true);
+ winobj->seekpos++;
+ }
+}
+
+/*
+ * WinRowsArePeers
+ * Compare two rows (specified by absolute position in window) to see
+ * if they are equal according to the ORDER BY clause.
+ */
+bool
+WinRowsArePeers(WindowObject winobj, int64 pos1, int64 pos2)
+{
+ WindowAggState *winstate;
+ WindowAgg *node;
+ TupleTableSlot *slot1;
+ TupleTableSlot *slot2;
+ bool res;
+
+ Assert(WindowObjectIsValid(winobj));
+
+ winstate = winobj->winstate;
+ node = (WindowAgg *) winstate->ss.ps.plan;
+
+ /* If no ORDER BY, all rows are peers; don't bother to fetch them */
+ if (node->ordNumCols == 0)
+ return true;
+
+ slot1 = winstate->temp_slot_1;
+ slot2 = winstate->temp_slot_2;
+
+ if (!window_gettupleslot(winobj, pos1, slot1))
+ elog(ERROR, "specified position is out of window: " INT64_FORMAT,
+ pos1);
+ if (!window_gettupleslot(winobj, pos2, slot2))
+ elog(ERROR, "specified position is out of window: " INT64_FORMAT,
+ pos2);
+
+ res = are_peers(winstate, slot1, slot2);
+
+ ExecClearTuple(slot1);
+ ExecClearTuple(slot2);
+
+ return res;
+}
+
+/*
+ * WinGetFuncArgInPartition
+ * Evaluate a window function's argument expression on a specified
+ * row of the partition. The row is identified in lseek(2) style,
+ * i.e. relative to the current, first, or last row.
+ *
+ * argno: argument number to evaluate (counted from 0)
+ * relpos: signed rowcount offset from the seek position
+ * seektype: WINDOW_SEEK_CURRENT, WINDOW_SEEK_HEAD, or WINDOW_SEEK_TAIL
+ * set_mark: If the row is found and set_mark is true, the mark is moved to
+ * the row as a side-effect.
+ * isnull: output argument, receives isnull status of result
+ * isout: output argument, set to indicate whether target row position
+ * is out of partition (can pass NULL if caller doesn't care about this)
+ *
+ * Specifying a nonexistent row is not an error, it just causes a null result
+ * (plus setting *isout true, if isout isn't NULL).
+ */
+Datum
+WinGetFuncArgInPartition(WindowObject winobj, int argno,
+ int relpos, int seektype, bool set_mark,
+ bool *isnull, bool *isout)
+{
+ ExprContext *econtext;
+ TupleTableSlot *slot;
+ bool gottuple;
+ int64 abs_pos;
+
+ Assert(WindowObjectIsValid(winobj));
+
+ econtext = winobj->winstate->ss.ps.ps_ExprContext;
+ slot = winobj->winstate->temp_slot_1;
+
+ switch (seektype)
+ {
+ case WINDOW_SEEK_CURRENT:
+ abs_pos = winobj->winstate->currentpos + relpos;
+ break;
+ case WINDOW_SEEK_HEAD:
+ abs_pos = relpos;
+ break;
+ case WINDOW_SEEK_TAIL:
+ spool_tuples(winobj->winstate, -1);
+ abs_pos = winobj->winstate->spooled_rows - 1 + relpos;
+ break;
+ default:
+ elog(ERROR, "unrecognized window seek type: %d", seektype);
+ abs_pos = 0; /* keep compiler quiet */
+ break;
+ }
+
+ if (abs_pos >= 0)
+ gottuple = window_gettupleslot(winobj, abs_pos, slot);
+ else
+ gottuple = false;
+
+ if (!gottuple)
+ {
+ if (isout)
+ *isout = true;
+ *isnull = true;
+ return (Datum) 0;
+ }
+ else
+ {
+ if (isout)
+ *isout = false;
+ if (set_mark)
+ WinSetMarkPosition(winobj, abs_pos);
+ econtext->ecxt_outertuple = slot;
+ return ExecEvalExpr((ExprState *) list_nth(winobj->argstates, argno),
+ econtext, isnull, NULL);
+ }
+}
+
+/*
+ * WinGetFuncArgInFrame
+ * Evaluate a window function's argument expression on a specified
+ * row of the window frame. The row is identified in lseek(2) style,
+ * i.e. relative to the current, first, or last row.
+ *
+ * argno: argument number to evaluate (counted from 0)
+ * relpos: signed rowcount offset from the seek position
+ * seektype: WINDOW_SEEK_CURRENT, WINDOW_SEEK_HEAD, or WINDOW_SEEK_TAIL
+ * set_mark: If the row is found and set_mark is true, the mark is moved to
+ * the row as a side-effect.
+ * isnull: output argument, receives isnull status of result
+ * isout: output argument, set to indicate whether target row position
+ * is out of frame (can pass NULL if caller doesn't care about this)
+ *
+ * Specifying a nonexistent row is not an error, it just causes a null result
+ * (plus setting *isout true, if isout isn't NULL).
+ */
+Datum
+WinGetFuncArgInFrame(WindowObject winobj, int argno,
+ int relpos, int seektype, bool set_mark,
+ bool *isnull, bool *isout)
+{
+ ExprContext *econtext;
+ TupleTableSlot *slot;
+ bool gottuple;
+ int64 abs_pos;
+ int64 frametailpos;
+
+ Assert(WindowObjectIsValid(winobj));
+
+ /* if no ordering columns, partition and frame are the same thing */
+ if (((WindowAgg *) winobj->winstate->ss.ps.plan)->ordNumCols == 0)
+ return WinGetFuncArgInPartition(winobj, argno, relpos, seektype,
+ set_mark, isnull, isout);
+
+ econtext = winobj->winstate->ss.ps.ps_ExprContext;
+ slot = winobj->winstate->temp_slot_1;
+ frametailpos = winobj->winstate->frametailpos;
+
+ switch (seektype)
+ {
+ case WINDOW_SEEK_CURRENT:
+ abs_pos = winobj->winstate->currentpos + relpos;
+ break;
+ case WINDOW_SEEK_HEAD:
+ abs_pos = relpos;
+ break;
+ case WINDOW_SEEK_TAIL:
+ /* abs_pos is calculated later */
+ abs_pos = 0; /* keep compiler quiet */
+ break;
+ default:
+ elog(ERROR, "unrecognized window seek type: %d", seektype);
+ abs_pos = 0; /* keep compiler quiet */
+ break;
+ }
+
+ /*
+ * Seek for frame tail. If the tail position is before current,
+ * always check if the tail is after the current or not.
+ */
+ if (frametailpos <= winobj->winstate->currentpos)
+ {
+ int64 add = 1;
+
+ for (;;)
+ {
+ spool_tuples(winobj->winstate, winobj->winstate->currentpos + add);
+ if (winobj->winstate->spooled_rows > winobj->winstate->currentpos + add)
+ {
+ /*
+ * When seektype is not TAIL, we may optimize not to
+ * spool unnecessary tuples. In TAIL mode, we need to search
+ * until we find a row that's definitely not a peer.
+ */
+ if (!WinRowsArePeers(winobj, winobj->winstate->currentpos,
+ winobj->winstate->currentpos + add) ||
+ (seektype != WINDOW_SEEK_TAIL &&
+ winobj->winstate->currentpos + add < abs_pos))
+ break;
+ add++;
+ }
+ else
+ {
+ /*
+ * If hit the partition end, the last row is the frame tail.
+ */
+ break;
+ }
+ }
+ frametailpos = winobj->winstate->currentpos + add - 1;
+ winobj->winstate->frametailpos = frametailpos;
+ }
+
+ if (seektype == WINDOW_SEEK_TAIL)
+ {
+ abs_pos = frametailpos + relpos;
+ }
+
+ /*
+ * If there is an ORDER BY (we don't support other window frame
+ * specifications yet), the frame runs from first row of the partition
+ * to the last peer of the current row. Otherwise the frame is the
+ * whole partition.
+ */
+ if (abs_pos < 0 || abs_pos > frametailpos)
+ gottuple = false;
+ else
+ gottuple = window_gettupleslot(winobj, abs_pos, slot);
+
+ if (!gottuple)
+ {
+ if (isout)
+ *isout = true;
+ *isnull = true;
+ return (Datum) 0;
+ }
+ else
+ {
+ if (isout)
+ *isout = false;
+ if (set_mark)
+ WinSetMarkPosition(winobj, abs_pos);
+ econtext->ecxt_outertuple = slot;
+ return ExecEvalExpr((ExprState *) list_nth(winobj->argstates, argno),
+ econtext, isnull, NULL);
+ }
+}
+
+/*
+ * WinGetFuncArgCurrent
+ * Evaluate a window function's argument expression on the current row.
+ *
+ * argno: argument number to evaluate (counted from 0)
+ * isnull: output argument, receives isnull status of result
+ *
+ * Note: this isn't quite equivalent to WinGetFuncArgInPartition or
+ * WinGetFuncArgInFrame targeting the current row, because it will succeed
+ * even if the WindowObject's mark has been set beyond the current row.
+ * This should generally be used for "ordinary" arguments of a window
+ * function, such as the offset argument of lead() or lag().
+ */
+Datum
+WinGetFuncArgCurrent(WindowObject winobj, int argno, bool *isnull)
+{
+ WindowAggState *winstate;
+ ExprContext *econtext;
+
+ Assert(WindowObjectIsValid(winobj));
+ winstate = winobj->winstate;
+
+ econtext = winstate->ss.ps.ps_ExprContext;
+
+ econtext->ecxt_outertuple = winstate->ss.ss_ScanTupleSlot;
+ return ExecEvalExpr((ExprState *) list_nth(winobj->argstates, argno),
+ econtext, isnull, NULL);
+}
diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c
index 86f555a03a6..412fd96e5bf 100644
--- a/src/backend/nodes/copyfuncs.c
+++ b/src/backend/nodes/copyfuncs.c
@@ -15,7 +15,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/nodes/copyfuncs.c,v 1.416 2008/12/19 16:25:17 petere Exp $
+ * $PostgreSQL: pgsql/src/backend/nodes/copyfuncs.c,v 1.417 2008/12/28 18:53:55 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -669,6 +669,32 @@ _copyAgg(Agg *from)
}
/*
+ * _copyWindowAgg
+ */
+static WindowAgg *
+_copyWindowAgg(WindowAgg *from)
+{
+ WindowAgg *newnode = makeNode(WindowAgg);
+
+ CopyPlanFields((Plan *) from, (Plan *) newnode);
+
+ COPY_SCALAR_FIELD(partNumCols);
+ if (from->partNumCols > 0)
+ {
+ COPY_POINTER_FIELD(partColIdx, from->partNumCols * sizeof(AttrNumber));
+ COPY_POINTER_FIELD(partOperators, from->partNumCols * sizeof(Oid));
+ }
+ COPY_SCALAR_FIELD(ordNumCols);
+ if (from->ordNumCols > 0)
+ {
+ COPY_POINTER_FIELD(ordColIdx, from->ordNumCols * sizeof(AttrNumber));
+ COPY_POINTER_FIELD(ordOperators, from->ordNumCols * sizeof(Oid));
+ }
+
+ return newnode;
+}
+
+/*
* _copyUnique
*/
static Unique *
@@ -932,6 +958,25 @@ _copyAggref(Aggref *from)
}
/*
+ * _copyWindowFunc
+ */
+static WindowFunc *
+_copyWindowFunc(WindowFunc *from)
+{
+ WindowFunc *newnode = makeNode(WindowFunc);
+
+ COPY_SCALAR_FIELD(winfnoid);
+ COPY_SCALAR_FIELD(wintype);
+ COPY_NODE_FIELD(args);
+ COPY_SCALAR_FIELD(winref);
+ COPY_SCALAR_FIELD(winstar);
+ COPY_SCALAR_FIELD(winagg);
+ COPY_LOCATION_FIELD(location);
+
+ return newnode;
+}
+
+/*
* _copyArrayRef
*/
static ArrayRef *
@@ -1729,6 +1774,21 @@ _copySortGroupClause(SortGroupClause *from)
return newnode;
}
+static WindowClause *
+_copyWindowClause(WindowClause *from)
+{
+ WindowClause *newnode = makeNode(WindowClause);
+
+ COPY_STRING_FIELD(name);
+ COPY_STRING_FIELD(refname);
+ COPY_NODE_FIELD(partitionClause);
+ COPY_NODE_FIELD(orderClause);
+ COPY_SCALAR_FIELD(winref);
+ COPY_SCALAR_FIELD(copiedOrder);
+
+ return newnode;
+}
+
static RowMarkClause *
_copyRowMarkClause(RowMarkClause *from)
{
@@ -1850,6 +1910,7 @@ _copyFuncCall(FuncCall *from)
COPY_SCALAR_FIELD(agg_star);
COPY_SCALAR_FIELD(agg_distinct);
COPY_SCALAR_FIELD(func_variadic);
+ COPY_NODE_FIELD(over);
COPY_LOCATION_FIELD(location);
return newnode;
@@ -1940,6 +2001,20 @@ _copySortBy(SortBy *from)
return newnode;
}
+static WindowDef *
+_copyWindowDef(WindowDef *from)
+{
+ WindowDef *newnode = makeNode(WindowDef);
+
+ COPY_STRING_FIELD(name);
+ COPY_STRING_FIELD(refname);
+ COPY_NODE_FIELD(partitionClause);
+ COPY_NODE_FIELD(orderClause);
+ COPY_LOCATION_FIELD(location);
+
+ return newnode;
+}
+
static RangeSubselect *
_copyRangeSubselect(RangeSubselect *from)
{
@@ -2081,6 +2156,7 @@ _copyQuery(Query *from)
COPY_SCALAR_FIELD(resultRelation);
COPY_NODE_FIELD(intoClause);
COPY_SCALAR_FIELD(hasAggs);
+ COPY_SCALAR_FIELD(hasWindowFuncs);
COPY_SCALAR_FIELD(hasSubLinks);
COPY_SCALAR_FIELD(hasDistinctOn);
COPY_SCALAR_FIELD(hasRecursive);
@@ -2091,6 +2167,7 @@ _copyQuery(Query *from)
COPY_NODE_FIELD(returningList);
COPY_NODE_FIELD(groupClause);
COPY_NODE_FIELD(havingQual);
+ COPY_NODE_FIELD(windowClause);
COPY_NODE_FIELD(distinctClause);
COPY_NODE_FIELD(sortClause);
COPY_NODE_FIELD(limitOffset);
@@ -2153,6 +2230,7 @@ _copySelectStmt(SelectStmt *from)
COPY_NODE_FIELD(whereClause);
COPY_NODE_FIELD(groupClause);
COPY_NODE_FIELD(havingClause);
+ COPY_NODE_FIELD(windowClause);
COPY_NODE_FIELD(withClause);
COPY_NODE_FIELD(valuesLists);
COPY_NODE_FIELD(sortClause);
@@ -3440,6 +3518,9 @@ copyObject(void *from)
case T_Agg:
retval = _copyAgg(from);
break;
+ case T_WindowAgg:
+ retval = _copyWindowAgg(from);
+ break;
case T_Unique:
retval = _copyUnique(from);
break;
@@ -3480,6 +3561,9 @@ copyObject(void *from)
case T_Aggref:
retval = _copyAggref(from);
break;
+ case T_WindowFunc:
+ retval = _copyWindowFunc(from);
+ break;
case T_ArrayRef:
retval = _copyArrayRef(from);
break;
@@ -3951,6 +4035,9 @@ copyObject(void *from)
case T_SortBy:
retval = _copySortBy(from);
break;
+ case T_WindowDef:
+ retval = _copyWindowDef(from);
+ break;
case T_RangeSubselect:
retval = _copyRangeSubselect(from);
break;
@@ -3984,6 +4071,9 @@ copyObject(void *from)
case T_SortGroupClause:
retval = _copySortGroupClause(from);
break;
+ case T_WindowClause:
+ retval = _copyWindowClause(from);
+ break;
case T_RowMarkClause:
retval = _copyRowMarkClause(from);
break;
diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c
index e5e2bc44226..e96c66152e8 100644
--- a/src/backend/nodes/equalfuncs.c
+++ b/src/backend/nodes/equalfuncs.c
@@ -22,7 +22,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/nodes/equalfuncs.c,v 1.341 2008/12/19 16:25:17 petere Exp $
+ * $PostgreSQL: pgsql/src/backend/nodes/equalfuncs.c,v 1.342 2008/12/28 18:53:56 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -192,6 +192,20 @@ _equalAggref(Aggref *a, Aggref *b)
}
static bool
+_equalWindowFunc(WindowFunc *a, WindowFunc *b)
+{
+ COMPARE_SCALAR_FIELD(winfnoid);
+ COMPARE_SCALAR_FIELD(wintype);
+ COMPARE_NODE_FIELD(args);
+ COMPARE_SCALAR_FIELD(winref);
+ COMPARE_SCALAR_FIELD(winstar);
+ COMPARE_SCALAR_FIELD(winagg);
+ COMPARE_LOCATION_FIELD(location);
+
+ return true;
+}
+
+static bool
_equalArrayRef(ArrayRef *a, ArrayRef *b)
{
COMPARE_SCALAR_FIELD(refarraytype);
@@ -839,6 +853,7 @@ _equalQuery(Query *a, Query *b)
COMPARE_SCALAR_FIELD(resultRelation);
COMPARE_NODE_FIELD(intoClause);
COMPARE_SCALAR_FIELD(hasAggs);
+ COMPARE_SCALAR_FIELD(hasWindowFuncs);
COMPARE_SCALAR_FIELD(hasSubLinks);
COMPARE_SCALAR_FIELD(hasDistinctOn);
COMPARE_SCALAR_FIELD(hasRecursive);
@@ -849,6 +864,7 @@ _equalQuery(Query *a, Query *b)
COMPARE_NODE_FIELD(returningList);
COMPARE_NODE_FIELD(groupClause);
COMPARE_NODE_FIELD(havingQual);
+ COMPARE_NODE_FIELD(windowClause);
COMPARE_NODE_FIELD(distinctClause);
COMPARE_NODE_FIELD(sortClause);
COMPARE_NODE_FIELD(limitOffset);
@@ -903,6 +919,7 @@ _equalSelectStmt(SelectStmt *a, SelectStmt *b)
COMPARE_NODE_FIELD(whereClause);
COMPARE_NODE_FIELD(groupClause);
COMPARE_NODE_FIELD(havingClause);
+ COMPARE_NODE_FIELD(windowClause);
COMPARE_NODE_FIELD(withClause);
COMPARE_NODE_FIELD(valuesLists);
COMPARE_NODE_FIELD(sortClause);
@@ -1894,6 +1911,7 @@ _equalFuncCall(FuncCall *a, FuncCall *b)
COMPARE_SCALAR_FIELD(agg_star);
COMPARE_SCALAR_FIELD(agg_distinct);
COMPARE_SCALAR_FIELD(func_variadic);
+ COMPARE_NODE_FIELD(over);
COMPARE_LOCATION_FIELD(location);
return true;
@@ -1981,6 +1999,18 @@ _equalSortBy(SortBy *a, SortBy *b)
}
static bool
+_equalWindowDef(WindowDef *a, WindowDef *b)
+{
+ COMPARE_STRING_FIELD(name);
+ COMPARE_STRING_FIELD(refname);
+ COMPARE_NODE_FIELD(partitionClause);
+ COMPARE_NODE_FIELD(orderClause);
+ COMPARE_LOCATION_FIELD(location);
+
+ return true;
+}
+
+static bool
_equalRangeSubselect(RangeSubselect *a, RangeSubselect *b)
{
COMPARE_NODE_FIELD(subquery);
@@ -2107,6 +2137,19 @@ _equalSortGroupClause(SortGroupClause *a, SortGroupClause *b)
}
static bool
+_equalWindowClause(WindowClause *a, WindowClause *b)
+{
+ COMPARE_STRING_FIELD(name);
+ COMPARE_STRING_FIELD(refname);
+ COMPARE_NODE_FIELD(partitionClause);
+ COMPARE_NODE_FIELD(orderClause);
+ COMPARE_SCALAR_FIELD(winref);
+ COMPARE_SCALAR_FIELD(copiedOrder);
+
+ return true;
+}
+
+static bool
_equalRowMarkClause(RowMarkClause *a, RowMarkClause *b)
{
COMPARE_SCALAR_FIELD(rti);
@@ -2311,6 +2354,9 @@ equal(void *a, void *b)
case T_Aggref:
retval = _equalAggref(a, b);
break;
+ case T_WindowFunc:
+ retval = _equalWindowFunc(a, b);
+ break;
case T_ArrayRef:
retval = _equalArrayRef(a, b);
break;
@@ -2769,6 +2815,9 @@ equal(void *a, void *b)
case T_SortBy:
retval = _equalSortBy(a, b);
break;
+ case T_WindowDef:
+ retval = _equalWindowDef(a, b);
+ break;
case T_RangeSubselect:
retval = _equalRangeSubselect(a, b);
break;
@@ -2802,6 +2851,9 @@ equal(void *a, void *b)
case T_SortGroupClause:
retval = _equalSortGroupClause(a, b);
break;
+ case T_WindowClause:
+ retval = _equalWindowClause(a, b);
+ break;
case T_RowMarkClause:
retval = _equalRowMarkClause(a, b);
break;
diff --git a/src/backend/nodes/nodeFuncs.c b/src/backend/nodes/nodeFuncs.c
index 7236360347c..0284ce4edca 100644
--- a/src/backend/nodes/nodeFuncs.c
+++ b/src/backend/nodes/nodeFuncs.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/nodes/nodeFuncs.c,v 1.35 2008/10/21 20:42:52 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/nodes/nodeFuncs.c,v 1.36 2008/12/28 18:53:56 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -52,6 +52,9 @@ exprType(Node *expr)
case T_Aggref:
type = ((Aggref *) expr)->aggtype;
break;
+ case T_WindowFunc:
+ type = ((WindowFunc *) expr)->wintype;
+ break;
case T_ArrayRef:
{
ArrayRef *arrayref = (ArrayRef *) expr;
@@ -548,6 +551,8 @@ expression_returns_set_walker(Node *node, void *context)
/* Avoid recursion for some cases that can't return a set */
if (IsA(node, Aggref))
return false;
+ if (IsA(node, WindowFunc))
+ return false;
if (IsA(node, DistinctExpr))
return false;
if (IsA(node, ScalarArrayOpExpr))
@@ -634,6 +639,10 @@ exprLocation(Node *expr)
/* function name should always be the first thing */
loc = ((Aggref *) expr)->location;
break;
+ case T_WindowFunc:
+ /* function name should always be the first thing */
+ loc = ((WindowFunc *) expr)->location;
+ break;
case T_ArrayRef:
/* just use array argument's location */
loc = exprLocation((Node *) ((ArrayRef *) expr)->refexpr);
@@ -868,6 +877,9 @@ exprLocation(Node *expr)
/* just use argument's location (ignore operator, if any) */
loc = exprLocation(((SortBy *) expr)->node);
break;
+ case T_WindowDef:
+ loc = ((WindowDef *) expr)->location;
+ break;
case T_TypeName:
loc = ((TypeName *) expr)->location;
break;
@@ -1045,6 +1057,16 @@ expression_tree_walker(Node *node,
return true;
}
break;
+ case T_WindowFunc:
+ {
+ WindowFunc *expr = (WindowFunc *) node;
+
+ /* recurse directly on List */
+ if (expression_tree_walker((Node *) expr->args,
+ walker, context))
+ return true;
+ }
+ break;
case T_ArrayRef:
{
ArrayRef *aref = (ArrayRef *) node;
@@ -1221,6 +1243,16 @@ expression_tree_walker(Node *node,
case T_Query:
/* Do nothing with a sub-Query, per discussion above */
break;
+ case T_WindowClause:
+ {
+ WindowClause *wc = (WindowClause *) node;
+
+ if (walker(wc->partitionClause, context))
+ return true;
+ if (walker(wc->orderClause, context))
+ return true;
+ }
+ break;
case T_CommonTableExpr:
{
CommonTableExpr *cte = (CommonTableExpr *) node;
@@ -1539,6 +1571,16 @@ expression_tree_mutator(Node *node,
return (Node *) newnode;
}
break;
+ case T_WindowFunc:
+ {
+ WindowFunc *wfunc = (WindowFunc *) node;
+ WindowFunc *newnode;
+
+ FLATCOPY(newnode, wfunc, WindowFunc);
+ MUTATE(newnode->args, wfunc->args, List *);
+ return (Node *) newnode;
+ }
+ break;
case T_ArrayRef:
{
ArrayRef *arrayref = (ArrayRef *) node;
@@ -1848,6 +1890,17 @@ expression_tree_mutator(Node *node,
case T_Query:
/* Do nothing with a sub-Query, per discussion above */
return node;
+ case T_WindowClause:
+ {
+ WindowClause *wc = (WindowClause *) node;
+ WindowClause *newnode;
+
+ FLATCOPY(newnode, wc, WindowClause);
+ MUTATE(newnode->partitionClause, wc->partitionClause, List *);
+ MUTATE(newnode->orderClause, wc->orderClause, List *);
+ return (Node *) newnode;
+ }
+ break;
case T_CommonTableExpr:
{
CommonTableExpr *cte = (CommonTableExpr *) node;
@@ -2280,6 +2333,8 @@ raw_expression_tree_walker(Node *node, bool (*walker) (), void *context)
return true;
if (walker(stmt->havingClause, context))
return true;
+ if (walker(stmt->windowClause, context))
+ return true;
if (walker(stmt->withClause, context))
return true;
if (walker(stmt->valuesLists, context))
@@ -2318,6 +2373,8 @@ raw_expression_tree_walker(Node *node, bool (*walker) (), void *context)
if (walker(fcall->args, context))
return true;
+ if (walker(fcall->over, context))
+ return true;
/* function name is deemed uninteresting */
}
break;
@@ -2365,6 +2422,16 @@ raw_expression_tree_walker(Node *node, bool (*walker) (), void *context)
break;
case T_SortBy:
return walker(((SortBy *) node)->node, context);
+ case T_WindowDef:
+ {
+ WindowDef *wd = (WindowDef *) node;
+
+ if (walker(wd->partitionClause, context))
+ return true;
+ if (walker(wd->orderClause, context))
+ return true;
+ }
+ break;
case T_RangeSubselect:
{
RangeSubselect *rs = (RangeSubselect *) node;
diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c
index 2477a17cfa3..f926f1314cd 100644
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.346 2008/12/01 21:06:12 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.347 2008/12/28 18:53:56 tgl Exp $
*
* NOTES
* Every node type that can appear in stored rules' parsetrees *must*
@@ -567,6 +567,36 @@ _outAgg(StringInfo str, Agg *node)
}
static void
+_outWindowAgg(StringInfo str, WindowAgg *node)
+{
+ int i;
+
+ WRITE_NODE_TYPE("WINDOWAGG");
+
+ _outPlanInfo(str, (Plan *) node);
+
+ WRITE_INT_FIELD(partNumCols);
+
+ appendStringInfo(str, " :partColIdx");
+ for (i = 0; i < node->partNumCols; i++)
+ appendStringInfo(str, " %d", node->partColIdx[i]);
+
+ appendStringInfo(str, " :partOperations");
+ for (i = 0; i < node->partNumCols; i++)
+ appendStringInfo(str, " %u", node->partOperators[i]);
+
+ WRITE_INT_FIELD(ordNumCols);
+
+ appendStringInfo(str, " :ordColIdx");
+ for (i = 0; i< node->ordNumCols; i++)
+ appendStringInfo(str, " %d", node->ordColIdx[i]);
+
+ appendStringInfo(str, " :ordOperations");
+ for (i = 0; i < node->ordNumCols; i++)
+ appendStringInfo(str, " %u", node->ordOperators[i]);
+}
+
+static void
_outGroup(StringInfo str, Group *node)
{
int i;
@@ -799,6 +829,20 @@ _outAggref(StringInfo str, Aggref *node)
}
static void
+_outWindowFunc(StringInfo str, WindowFunc *node)
+{
+ WRITE_NODE_TYPE("WINDOWFUNC");
+
+ WRITE_OID_FIELD(winfnoid);
+ WRITE_OID_FIELD(wintype);
+ WRITE_NODE_FIELD(args);
+ WRITE_UINT_FIELD(winref);
+ WRITE_BOOL_FIELD(winstar);
+ WRITE_BOOL_FIELD(winagg);
+ WRITE_LOCATION_FIELD(location);
+}
+
+static void
_outArrayRef(StringInfo str, ArrayRef *node)
{
WRITE_NODE_TYPE("ARRAYREF");
@@ -1440,6 +1484,7 @@ _outPlannerInfo(StringInfo str, PlannerInfo *node)
WRITE_NODE_FIELD(placeholder_list);
WRITE_NODE_FIELD(query_pathkeys);
WRITE_NODE_FIELD(group_pathkeys);
+ WRITE_NODE_FIELD(window_pathkeys);
WRITE_NODE_FIELD(distinct_pathkeys);
WRITE_NODE_FIELD(sort_pathkeys);
WRITE_FLOAT_FIELD(total_table_pages, "%.0f");
@@ -1722,6 +1767,7 @@ _outSelectStmt(StringInfo str, SelectStmt *node)
WRITE_NODE_FIELD(whereClause);
WRITE_NODE_FIELD(groupClause);
WRITE_NODE_FIELD(havingClause);
+ WRITE_NODE_FIELD(windowClause);
WRITE_NODE_FIELD(withClause);
WRITE_NODE_FIELD(valuesLists);
WRITE_NODE_FIELD(sortClause);
@@ -1744,6 +1790,7 @@ _outFuncCall(StringInfo str, FuncCall *node)
WRITE_BOOL_FIELD(agg_star);
WRITE_BOOL_FIELD(agg_distinct);
WRITE_BOOL_FIELD(func_variadic);
+ WRITE_NODE_FIELD(over);
WRITE_LOCATION_FIELD(location);
}
@@ -1866,6 +1913,7 @@ _outQuery(StringInfo str, Query *node)
WRITE_INT_FIELD(resultRelation);
WRITE_NODE_FIELD(intoClause);
WRITE_BOOL_FIELD(hasAggs);
+ WRITE_BOOL_FIELD(hasWindowFuncs);
WRITE_BOOL_FIELD(hasSubLinks);
WRITE_BOOL_FIELD(hasDistinctOn);
WRITE_BOOL_FIELD(hasRecursive);
@@ -1876,6 +1924,7 @@ _outQuery(StringInfo str, Query *node)
WRITE_NODE_FIELD(returningList);
WRITE_NODE_FIELD(groupClause);
WRITE_NODE_FIELD(havingQual);
+ WRITE_NODE_FIELD(windowClause);
WRITE_NODE_FIELD(distinctClause);
WRITE_NODE_FIELD(sortClause);
WRITE_NODE_FIELD(limitOffset);
@@ -1896,6 +1945,19 @@ _outSortGroupClause(StringInfo str, SortGroupClause *node)
}
static void
+_outWindowClause(StringInfo str, WindowClause *node)
+{
+ WRITE_NODE_TYPE("WINDOWCLAUSE");
+
+ WRITE_STRING_FIELD(name);
+ WRITE_STRING_FIELD(refname);
+ WRITE_NODE_FIELD(partitionClause);
+ WRITE_NODE_FIELD(orderClause);
+ WRITE_UINT_FIELD(winref);
+ WRITE_BOOL_FIELD(copiedOrder);
+}
+
+static void
_outRowMarkClause(StringInfo str, RowMarkClause *node)
{
WRITE_NODE_TYPE("ROWMARKCLAUSE");
@@ -2172,6 +2234,18 @@ _outSortBy(StringInfo str, SortBy *node)
}
static void
+_outWindowDef(StringInfo str, WindowDef *node)
+{
+ WRITE_NODE_TYPE("WINDOWDEF");
+
+ WRITE_STRING_FIELD(name);
+ WRITE_STRING_FIELD(refname);
+ WRITE_NODE_FIELD(partitionClause);
+ WRITE_NODE_FIELD(orderClause);
+ WRITE_LOCATION_FIELD(location);
+}
+
+static void
_outRangeSubselect(StringInfo str, RangeSubselect *node)
{
WRITE_NODE_TYPE("RANGESUBSELECT");
@@ -2347,6 +2421,9 @@ _outNode(StringInfo str, void *obj)
case T_Agg:
_outAgg(str, obj);
break;
+ case T_WindowAgg:
+ _outWindowAgg(str, obj);
+ break;
case T_Group:
_outGroup(str, obj);
break;
@@ -2392,6 +2469,9 @@ _outNode(StringInfo str, void *obj)
case T_Aggref:
_outAggref(str, obj);
break;
+ case T_WindowFunc:
+ _outWindowFunc(str, obj);
+ break;
case T_ArrayRef:
_outArrayRef(str, obj);
break;
@@ -2616,6 +2696,9 @@ _outNode(StringInfo str, void *obj)
case T_SortGroupClause:
_outSortGroupClause(str, obj);
break;
+ case T_WindowClause:
+ _outWindowClause(str, obj);
+ break;
case T_RowMarkClause:
_outRowMarkClause(str, obj);
break;
@@ -2661,6 +2744,9 @@ _outNode(StringInfo str, void *obj)
case T_SortBy:
_outSortBy(str, obj);
break;
+ case T_WindowDef:
+ _outWindowDef(str, obj);
+ break;
case T_RangeSubselect:
_outRangeSubselect(str, obj);
break;
diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c
index ed5b55fb571..7bcc8e8047d 100644
--- a/src/backend/nodes/readfuncs.c
+++ b/src/backend/nodes/readfuncs.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/nodes/readfuncs.c,v 1.217 2008/11/15 19:43:46 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/nodes/readfuncs.c,v 1.218 2008/12/28 18:53:56 tgl Exp $
*
* NOTES
* Path and Plan nodes do not have any readfuncs support, because we
@@ -153,6 +153,7 @@ _readQuery(void)
READ_INT_FIELD(resultRelation);
READ_NODE_FIELD(intoClause);
READ_BOOL_FIELD(hasAggs);
+ READ_BOOL_FIELD(hasWindowFuncs);
READ_BOOL_FIELD(hasSubLinks);
READ_BOOL_FIELD(hasDistinctOn);
READ_BOOL_FIELD(hasRecursive);
@@ -163,6 +164,7 @@ _readQuery(void)
READ_NODE_FIELD(returningList);
READ_NODE_FIELD(groupClause);
READ_NODE_FIELD(havingQual);
+ READ_NODE_FIELD(windowClause);
READ_NODE_FIELD(distinctClause);
READ_NODE_FIELD(sortClause);
READ_NODE_FIELD(limitOffset);
@@ -218,6 +220,24 @@ _readSortGroupClause(void)
}
/*
+ * _readWindowClause
+ */
+static WindowClause *
+_readWindowClause(void)
+{
+ READ_LOCALS(WindowClause);
+
+ READ_STRING_FIELD(name);
+ READ_STRING_FIELD(refname);
+ READ_NODE_FIELD(partitionClause);
+ READ_NODE_FIELD(orderClause);
+ READ_UINT_FIELD(winref);
+ READ_BOOL_FIELD(copiedOrder);
+
+ READ_DONE();
+}
+
+/*
* _readRowMarkClause
*/
static RowMarkClause *
@@ -403,6 +423,25 @@ _readAggref(void)
}
/*
+ * _readWindowFunc
+ */
+static WindowFunc *
+_readWindowFunc(void)
+{
+ READ_LOCALS(WindowFunc);
+
+ READ_OID_FIELD(winfnoid);
+ READ_OID_FIELD(wintype);
+ READ_NODE_FIELD(args);
+ READ_UINT_FIELD(winref);
+ READ_BOOL_FIELD(winstar);
+ READ_BOOL_FIELD(winagg);
+ READ_LOCATION_FIELD(location);
+
+ READ_DONE();
+}
+
+/*
* _readArrayRef
*/
static ArrayRef *
@@ -1091,6 +1130,8 @@ parseNodeString(void)
return_value = _readQuery();
else if (MATCH("SORTGROUPCLAUSE", 15))
return_value = _readSortGroupClause();
+ else if (MATCH("WINDOWCLAUSE", 12))
+ return_value = _readWindowClause();
else if (MATCH("ROWMARKCLAUSE", 13))
return_value = _readRowMarkClause();
else if (MATCH("COMMONTABLEEXPR", 15))
@@ -1111,6 +1152,8 @@ parseNodeString(void)
return_value = _readParam();
else if (MATCH("AGGREF", 6))
return_value = _readAggref();
+ else if (MATCH("WINDOWFUNC", 10))
+ return_value = _readWindowFunc();
else if (MATCH("ARRAYREF", 8))
return_value = _readArrayRef();
else if (MATCH("FUNCEXPR", 8))
diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c
index b0553894c24..17eebc67647 100644
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/optimizer/path/allpaths.c,v 1.177 2008/11/15 19:43:46 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/optimizer/path/allpaths.c,v 1.178 2008/12/28 18:53:56 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -929,10 +929,13 @@ standard_join_search(PlannerInfo *root, int levels_needed, List *initial_rels)
* 1. If the subquery has a LIMIT clause, we must not push down any quals,
* since that could change the set of rows returned.
*
- * 2. If the subquery contains EXCEPT or EXCEPT ALL set ops we cannot push
+ * 2. If the subquery contains any window functions, we can't push quals
+ * into it, because that would change the results.
+ *
+ * 3. If the subquery contains EXCEPT or EXCEPT ALL set ops we cannot push
* quals into it, because that would change the results.
*
- * 3. For subqueries using UNION/UNION ALL/INTERSECT/INTERSECT ALL, we can
+ * 4. For subqueries using UNION/UNION ALL/INTERSECT/INTERSECT ALL, we can
* push quals into each component query, but the quals can only reference
* subquery columns that suffer no type coercions in the set operation.
* Otherwise there are possible semantic gotchas. So, we check the
@@ -950,6 +953,10 @@ subquery_is_pushdown_safe(Query *subquery, Query *topquery,
if (subquery->limitOffset != NULL || subquery->limitCount != NULL)
return false;
+ /* Check point 2 */
+ if (subquery->hasWindowFuncs)
+ return false;
+
/* Are we at top level, or looking at a setop component? */
if (subquery == topquery)
{
@@ -1093,6 +1100,12 @@ qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual,
return false;
/*
+ * It would be unsafe to push down window function calls, but at least
+ * for the moment we could never see any in a qual anyhow.
+ */
+ Assert(!contain_window_function(qual));
+
+ /*
* Examine all Vars used in clause; since it's a restriction clause, all
* such Vars must refer to subselect output columns.
*/
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index 0b9c5819820..7f30dde869f 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -54,7 +54,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.201 2008/11/22 22:47:05 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.202 2008/12/28 18:53:56 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -1284,6 +1284,40 @@ cost_agg(Path *path, PlannerInfo *root,
}
/*
+ * cost_windowagg
+ * Determines and returns the cost of performing a WindowAgg plan node,
+ * including the cost of its input.
+ *
+ * Input is assumed already properly sorted.
+ */
+void
+cost_windowagg(Path *path, PlannerInfo *root,
+ int numWindowFuncs, int numPartCols, int numOrderCols,
+ Cost input_startup_cost, Cost input_total_cost,
+ double input_tuples)
+{
+ Cost startup_cost;
+ Cost total_cost;
+
+ startup_cost = input_startup_cost;
+ total_cost = input_total_cost;
+
+ /*
+ * We charge one cpu_operator_cost per window function per tuple (often a
+ * drastic underestimate, but without a way to gauge how many tuples the
+ * window function will fetch, it's hard to do better). We also charge
+ * cpu_operator_cost per grouping column per tuple for grouping
+ * comparisons, plus cpu_tuple_cost per tuple for general overhead.
+ */
+ total_cost += cpu_operator_cost * input_tuples * numWindowFuncs;
+ total_cost += cpu_operator_cost * input_tuples * (numPartCols + numOrderCols);
+ total_cost += cpu_tuple_cost * input_tuples;
+
+ path->startup_cost = startup_cost;
+ path->total_cost = total_cost;
+}
+
+/*
* cost_group
* Determines and returns the cost of performing a Group plan node,
* including the cost of its input.
@@ -2155,6 +2189,11 @@ cost_qual_eval_walker(Node *node, cost_qual_eval_context *context)
* Vars and Consts are charged zero, and so are boolean operators (AND,
* OR, NOT). Simplistic, but a lot better than no model at all.
*
+ * Note that Aggref and WindowFunc nodes are (and should be) treated
+ * like Vars --- whatever execution cost they have is absorbed into
+ * plan-node-specific costing. As far as expression evaluation is
+ * concerned they're just like Vars.
+ *
* Should we try to account for the possibility of short-circuit
* evaluation of AND/OR? Probably *not*, because that would make the
* results depend on the clause ordering, and we are not in any position
diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c
index 3d35eb605d9..5f6d219a01a 100644
--- a/src/backend/optimizer/path/equivclass.c
+++ b/src/backend/optimizer/path/equivclass.c
@@ -10,7 +10,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/optimizer/path/equivclass.c,v 1.14 2008/12/01 21:06:13 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/optimizer/path/equivclass.c,v 1.15 2008/12/28 18:53:56 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -438,14 +438,16 @@ get_eclass_for_sort_expr(PlannerInfo *root,
/*
* add_eq_member doesn't check for volatile functions, set-returning
- * functions, or aggregates, but such could appear in sort expressions; so
- * we have to check whether its const-marking was correct.
+ * functions, aggregates, or window functions, but such could appear
+ * in sort expressions; so we have to check whether its const-marking
+ * was correct.
*/
if (newec->ec_has_const)
{
if (newec->ec_has_volatile ||
expression_returns_set((Node *) expr) ||
- contain_agg_clause((Node *) expr))
+ contain_agg_clause((Node *) expr) ||
+ contain_window_function((Node *) expr))
{
newec->ec_has_const = false;
newem->em_is_const = false;
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index f5d4f41c032..b53b5e1470e 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -10,7 +10,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.252 2008/11/20 19:52:54 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.253 2008/12/28 18:53:56 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -3237,8 +3237,8 @@ make_agg(PlannerInfo *root, List *tlist, List *qual,
* anything for Aggref nodes; this is okay since they are really
* comparable to Vars.
*
- * See notes in grouping_planner about why this routine and make_group are
- * the only ones in this file that worry about tlist eval cost.
+ * See notes in grouping_planner about why only make_agg, make_windowagg
+ * and make_group worry about tlist eval cost.
*/
if (qual)
{
@@ -3260,6 +3260,53 @@ make_agg(PlannerInfo *root, List *tlist, List *qual,
return node;
}
+WindowAgg *
+make_windowagg(PlannerInfo *root, List *tlist, int numWindowFuncs,
+ int partNumCols, AttrNumber *partColIdx, Oid *partOperators,
+ int ordNumCols, AttrNumber *ordColIdx, Oid *ordOperators,
+ Plan *lefttree)
+{
+ WindowAgg *node = makeNode(WindowAgg);
+ Plan *plan = &node->plan;
+ Path windowagg_path; /* dummy for result of cost_windowagg */
+ QualCost qual_cost;
+
+ node->partNumCols = partNumCols;
+ node->partColIdx = partColIdx;
+ node->partOperators = partOperators;
+ node->ordNumCols = ordNumCols;
+ node->ordColIdx = ordColIdx;
+ node->ordOperators = ordOperators;
+
+ copy_plan_costsize(plan, lefttree); /* only care about copying size */
+ cost_windowagg(&windowagg_path, root,
+ numWindowFuncs, partNumCols, ordNumCols,
+ lefttree->startup_cost,
+ lefttree->total_cost,
+ lefttree->plan_rows);
+ plan->startup_cost = windowagg_path.startup_cost;
+ plan->total_cost = windowagg_path.total_cost;
+
+ /*
+ * We also need to account for the cost of evaluation of the tlist.
+ *
+ * See notes in grouping_planner about why only make_agg, make_windowagg
+ * and make_group worry about tlist eval cost.
+ */
+ cost_qual_eval(&qual_cost, tlist, root);
+ plan->startup_cost += qual_cost.startup;
+ plan->total_cost += qual_cost.startup;
+ plan->total_cost += qual_cost.per_tuple * plan->plan_rows;
+
+ plan->targetlist = tlist;
+ plan->lefttree = lefttree;
+ plan->righttree = NULL;
+ /* WindowAgg nodes never have a qual clause */
+ plan->qual = NIL;
+
+ return node;
+}
+
Group *
make_group(PlannerInfo *root,
List *tlist,
@@ -3300,8 +3347,8 @@ make_group(PlannerInfo *root,
* lower plan level and will only be copied by the Group node. Worth
* fixing?
*
- * See notes in grouping_planner about why this routine and make_agg are
- * the only ones in this file that worry about tlist eval cost.
+ * See notes in grouping_planner about why only make_agg, make_windowagg
+ * and make_group worry about tlist eval cost.
*/
if (qual)
{
diff --git a/src/backend/optimizer/plan/planagg.c b/src/backend/optimizer/plan/planagg.c
index 8a6b2ad0345..f0f17d5f950 100644
--- a/src/backend/optimizer/plan/planagg.c
+++ b/src/backend/optimizer/plan/planagg.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/optimizer/plan/planagg.c,v 1.43 2008/08/25 22:42:33 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/optimizer/plan/planagg.c,v 1.44 2008/12/28 18:53:56 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -95,11 +95,11 @@ optimize_minmax_aggregates(PlannerInfo *root, List *tlist, Path *best_path)
/*
* Reject unoptimizable cases.
*
- * We don't handle GROUP BY, because our current implementations of
- * grouping require looking at all the rows anyway, and so there's not
- * much point in optimizing MIN/MAX.
+ * We don't handle GROUP BY or windowing, because our current
+ * implementations of grouping require looking at all the rows anyway,
+ * and so there's not much point in optimizing MIN/MAX.
*/
- if (parse->groupClause)
+ if (parse->groupClause || parse->hasWindowFuncs)
return NULL;
/*
diff --git a/src/backend/optimizer/plan/planmain.c b/src/backend/optimizer/plan/planmain.c
index 0a1d1d1559f..a8ea043a697 100644
--- a/src/backend/optimizer/plan/planmain.c
+++ b/src/backend/optimizer/plan/planmain.c
@@ -14,7 +14,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/optimizer/plan/planmain.c,v 1.112 2008/10/22 20:17:51 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/optimizer/plan/planmain.c,v 1.113 2008/12/28 18:53:56 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -67,9 +67,9 @@
* PlannerInfo field and not a passed parameter is that the low-level routines
* in indxpath.c need to see it.)
*
- * Note: the PlannerInfo node also includes group_pathkeys, distinct_pathkeys,
- * and sort_pathkeys, which like query_pathkeys need to be canonicalized once
- * the info is available.
+ * Note: the PlannerInfo node also includes group_pathkeys, window_pathkeys,
+ * distinct_pathkeys, and sort_pathkeys, which like query_pathkeys need to be
+ * canonicalized once the info is available.
*
* tuple_fraction is interpreted as follows:
* 0: expect all tuples to be retrieved (normal case)
@@ -121,6 +121,8 @@ query_planner(PlannerInfo *root, List *tlist,
root->query_pathkeys);
root->group_pathkeys = canonicalize_pathkeys(root,
root->group_pathkeys);
+ root->window_pathkeys = canonicalize_pathkeys(root,
+ root->window_pathkeys);
root->distinct_pathkeys = canonicalize_pathkeys(root,
root->distinct_pathkeys);
root->sort_pathkeys = canonicalize_pathkeys(root,
@@ -228,11 +230,12 @@ query_planner(PlannerInfo *root, List *tlist,
/*
* We have completed merging equivalence sets, so it's now possible to
* convert the requested query_pathkeys to canonical form. Also
- * canonicalize the groupClause, distinctClause and sortClause pathkeys
- * for use later.
+ * canonicalize the groupClause, windowClause, distinctClause and
+ * sortClause pathkeys for use later.
*/
root->query_pathkeys = canonicalize_pathkeys(root, root->query_pathkeys);
root->group_pathkeys = canonicalize_pathkeys(root, root->group_pathkeys);
+ root->window_pathkeys = canonicalize_pathkeys(root, root->window_pathkeys);
root->distinct_pathkeys = canonicalize_pathkeys(root, root->distinct_pathkeys);
root->sort_pathkeys = canonicalize_pathkeys(root, root->sort_pathkeys);
@@ -287,10 +290,12 @@ query_planner(PlannerInfo *root, List *tlist,
* If both GROUP BY and ORDER BY are specified, we will need two
* levels of sort --- and, therefore, certainly need to read all the
* tuples --- unless ORDER BY is a subset of GROUP BY. Likewise if
- * we have both DISTINCT and GROUP BY.
+ * we have both DISTINCT and GROUP BY, or if we have a window
+ * specification not compatible with the GROUP BY.
*/
if (!pathkeys_contained_in(root->sort_pathkeys, root->group_pathkeys) ||
- !pathkeys_contained_in(root->distinct_pathkeys, root->group_pathkeys))
+ !pathkeys_contained_in(root->distinct_pathkeys, root->group_pathkeys) ||
+ !pathkeys_contained_in(root->window_pathkeys, root->group_pathkeys))
tuple_fraction = 0.0;
}
else if (parse->hasAggs || root->hasHavingQual)
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 7f91309032a..b4b578d5973 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.247 2008/12/18 18:20:33 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.248 2008/12/28 18:53:56 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -82,6 +82,18 @@ static void locate_grouping_columns(PlannerInfo *root,
List *sub_tlist,
AttrNumber *groupColIdx);
static List *postprocess_setop_tlist(List *new_tlist, List *orig_tlist);
+static List *select_active_windows(PlannerInfo *root, WindowFuncLists *wflists);
+static List *make_pathkeys_for_window(PlannerInfo *root, WindowClause *wc,
+ List *tlist, bool canonicalize);
+static void get_column_info_for_window(PlannerInfo *root, WindowClause *wc,
+ List *tlist,
+ int numSortCols, AttrNumber *sortColIdx,
+ int *partNumCols,
+ AttrNumber **partColIdx,
+ Oid **partOperators,
+ int *ordNumCols,
+ AttrNumber **ordColIdx,
+ Oid **ordOperators);
/*****************************************************************************
@@ -852,6 +864,8 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
AggClauseCounts agg_counts;
int numGroupCols;
bool use_hashed_grouping = false;
+ WindowFuncLists *wflists = NULL;
+ List *activeWindows = NIL;
MemSet(&agg_counts, 0, sizeof(AggClauseCounts));
@@ -867,6 +881,22 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
tlist = preprocess_targetlist(root, tlist);
/*
+ * Locate any window functions in the tlist. (We don't need to look
+ * anywhere else, since expressions used in ORDER BY will be in there
+ * too.) Note that they could all have been eliminated by constant
+ * folding, in which case we don't need to do any more work.
+ */
+ if (parse->hasWindowFuncs)
+ {
+ wflists = find_window_functions((Node *) tlist,
+ list_length(parse->windowClause));
+ if (wflists->numWindowFuncs > 0)
+ activeWindows = select_active_windows(root, wflists);
+ else
+ parse->hasWindowFuncs = false;
+ }
+
+ /*
* Generate appropriate target list for subplan; may be different from
* tlist if grouping or aggregation is needed.
*/
@@ -890,6 +920,19 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
else
root->group_pathkeys = NIL;
+ /* We consider only the first (bottom) window in pathkeys logic */
+ if (activeWindows != NIL)
+ {
+ WindowClause *wc = (WindowClause *) linitial(activeWindows);
+
+ root->window_pathkeys = make_pathkeys_for_window(root,
+ wc,
+ tlist,
+ false);
+ }
+ else
+ root->window_pathkeys = NIL;
+
if (parse->distinctClause &&
grouping_is_sortable(parse->distinctClause))
root->distinct_pathkeys =
@@ -927,11 +970,12 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
* Figure out whether we want a sorted result from query_planner.
*
* If we have a sortable GROUP BY clause, then we want a result sorted
- * properly for grouping. Otherwise, if there's a sortable DISTINCT
- * clause that's more rigorous than the ORDER BY clause, we try to
- * produce output that's sufficiently well sorted for the DISTINCT.
- * Otherwise, if there is an ORDER BY clause, we want to sort by the
- * ORDER BY clause.
+ * properly for grouping. Otherwise, if we have window functions to
+ * evaluate, we try to sort for the first window. Otherwise, if
+ * there's a sortable DISTINCT clause that's more rigorous than the
+ * ORDER BY clause, we try to produce output that's sufficiently well
+ * sorted for the DISTINCT. Otherwise, if there is an ORDER BY
+ * clause, we want to sort by the ORDER BY clause.
*
* Note: if we have both ORDER BY and GROUP BY, and ORDER BY is a
* superset of GROUP BY, it would be tempting to request sort by ORDER
@@ -942,6 +986,8 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
*/
if (root->group_pathkeys)
root->query_pathkeys = root->group_pathkeys;
+ else if (root->window_pathkeys)
+ root->query_pathkeys = root->window_pathkeys;
else if (list_length(root->distinct_pathkeys) >
list_length(root->sort_pathkeys))
root->query_pathkeys = root->distinct_pathkeys;
@@ -1092,10 +1138,10 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
*
* Below this point, any tlist eval cost for added-on nodes
* should be accounted for as we create those nodes.
- * Presently, of the node types we can add on, only Agg and
- * Group project new tlists (the rest just copy their input
- * tuples) --- so make_agg() and make_group() are responsible
- * for computing the added cost.
+ * Presently, of the node types we can add on, only Agg,
+ * WindowAgg, and Group project new tlists (the rest just copy
+ * their input tuples) --- so make_agg(), make_windowagg() and
+ * make_group() are responsible for computing the added cost.
*/
cost_qual_eval(&tlist_cost, sub_tlist, root);
result_plan->startup_cost += tlist_cost.startup;
@@ -1225,6 +1271,142 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
NULL);
}
} /* end of non-minmax-aggregate case */
+
+ /*
+ * Since each window function could require a different sort order,
+ * we stack up a WindowAgg node for each window, with sort steps
+ * between them as needed.
+ */
+ if (activeWindows)
+ {
+ List *window_tlist;
+ ListCell *l;
+
+ /*
+ * If the top-level plan node is one that cannot do expression
+ * evaluation, we must insert a Result node to project the
+ * desired tlist. (In some cases this might not really be
+ * required, but it's not worth trying to avoid it.) Note that
+ * on second and subsequent passes through the following loop,
+ * the top-level node will be a WindowAgg which we know can
+ * project; so we only need to check once.
+ */
+ if (!is_projection_capable_plan(result_plan))
+ {
+ result_plan = (Plan *) make_result(root,
+ NIL,
+ NULL,
+ result_plan);
+ }
+
+ /*
+ * The "base" targetlist for all steps of the windowing process
+ * is a flat tlist of all Vars and Aggs needed in the result.
+ * (In some cases we wouldn't need to propagate all of these
+ * all the way to the top, since they might only be needed as
+ * inputs to WindowFuncs. It's probably not worth trying to
+ * optimize that though.) As we climb up the stack, we add
+ * outputs for the WindowFuncs computed at each level. Also,
+ * each input tlist has to present all the columns needed to
+ * sort the data for the next WindowAgg step. That's handled
+ * internally by make_sort_from_pathkeys, but we need the
+ * copyObject steps here to ensure that each plan node has
+ * a separately modifiable tlist.
+ */
+ window_tlist = flatten_tlist(tlist);
+ if (parse->hasAggs)
+ window_tlist = add_to_flat_tlist(window_tlist,
+ pull_agg_clause((Node *) tlist));
+ result_plan->targetlist = (List *) copyObject(window_tlist);
+
+ foreach(l, activeWindows)
+ {
+ WindowClause *wc = (WindowClause *) lfirst(l);
+ List *window_pathkeys;
+ int partNumCols;
+ AttrNumber *partColIdx;
+ Oid *partOperators;
+ int ordNumCols;
+ AttrNumber *ordColIdx;
+ Oid *ordOperators;
+
+ window_pathkeys = make_pathkeys_for_window(root,
+ wc,
+ tlist,
+ true);
+
+ /*
+ * This is a bit tricky: we build a sort node even if we don't
+ * really have to sort. Even when no explicit sort is needed,
+ * we need to have suitable resjunk items added to the input
+ * plan's tlist for any partitioning or ordering columns that
+ * aren't plain Vars. Furthermore, this way we can use
+ * existing infrastructure to identify which input columns are
+ * the interesting ones.
+ */
+ if (window_pathkeys)
+ {
+ Sort *sort_plan;
+
+ sort_plan = make_sort_from_pathkeys(root,
+ result_plan,
+ window_pathkeys,
+ -1.0);
+ if (!pathkeys_contained_in(window_pathkeys,
+ current_pathkeys))
+ {
+ /* we do indeed need to sort */
+ result_plan = (Plan *) sort_plan;
+ current_pathkeys = window_pathkeys;
+ }
+ /* In either case, extract the per-column information */
+ get_column_info_for_window(root, wc, tlist,
+ sort_plan->numCols,
+ sort_plan->sortColIdx,
+ &partNumCols,
+ &partColIdx,
+ &partOperators,
+ &ordNumCols,
+ &ordColIdx,
+ &ordOperators);
+ }
+ else
+ {
+ /* empty window specification, nothing to sort */
+ partNumCols = 0;
+ partColIdx = NULL;
+ partOperators = NULL;
+ ordNumCols = 0;
+ ordColIdx = NULL;
+ ordOperators = NULL;
+ }
+
+ if (lnext(l))
+ {
+ /* Add the current WindowFuncs to the running tlist */
+ window_tlist = add_to_flat_tlist(window_tlist,
+ wflists->windowFuncs[wc->winref]);
+ }
+ else
+ {
+ /* Install the original tlist in the topmost WindowAgg */
+ window_tlist = tlist;
+ }
+
+ /* ... and make the WindowAgg plan node */
+ result_plan = (Plan *)
+ make_windowagg(root,
+ (List *) copyObject(window_tlist),
+ list_length(wflists->windowFuncs[wc->winref]),
+ partNumCols,
+ partColIdx,
+ partOperators,
+ ordNumCols,
+ ordColIdx,
+ ordOperators,
+ result_plan);
+ }
+ }
} /* end of if (setOperations) */
/*
@@ -2030,7 +2212,8 @@ make_subplanTargetList(PlannerInfo *root,
* If we're not grouping or aggregating, there's nothing to do here;
* query_planner should receive the unmodified target list.
*/
- if (!parse->hasAggs && !parse->groupClause && !root->hasHavingQual)
+ if (!parse->hasAggs && !parse->groupClause && !root->hasHavingQual &&
+ !parse->hasWindowFuncs)
{
*need_tlist_eval = true;
return tlist;
@@ -2039,7 +2222,9 @@ make_subplanTargetList(PlannerInfo *root,
/*
* Otherwise, start with a "flattened" tlist (having just the vars
* mentioned in the targetlist and HAVING qual --- but not upper-level
- * Vars; they will be replaced by Params later on).
+ * Vars; they will be replaced by Params later on). Note this includes
+ * vars used in resjunk items, so we are covering the needs of ORDER BY
+ * and window specifications.
*/
sub_tlist = flatten_tlist(tlist);
extravars = pull_var_clause(parse->havingQual, true);
@@ -2066,7 +2251,7 @@ make_subplanTargetList(PlannerInfo *root,
{
SortGroupClause *grpcl = (SortGroupClause *) lfirst(gl);
Node *groupexpr = get_sortgroupclause_expr(grpcl, tlist);
- TargetEntry *te = NULL;
+ TargetEntry *te;
/*
* Find or make a matching sub_tlist entry. If the groupexpr
@@ -2074,20 +2259,10 @@ make_subplanTargetList(PlannerInfo *root,
* won't make multiple groupClause entries for the same TLE.)
*/
if (groupexpr && IsA(groupexpr, Var))
- {
- ListCell *sl;
-
- foreach(sl, sub_tlist)
- {
- TargetEntry *lte = (TargetEntry *) lfirst(sl);
+ te = tlist_member(groupexpr, sub_tlist);
+ else
+ te = NULL;
- if (equal(groupexpr, lte->expr))
- {
- te = lte;
- break;
- }
- }
- }
if (!te)
{
te = makeTargetEntry((Expr *) groupexpr,
@@ -2112,7 +2287,7 @@ make_subplanTargetList(PlannerInfo *root,
*
* This is only needed if we don't use the sub_tlist chosen by
* make_subplanTargetList. We have to forget the column indexes found
- * by that routine and re-locate the grouping vars in the real sub_tlist.
+ * by that routine and re-locate the grouping exprs in the real sub_tlist.
*/
static void
locate_grouping_columns(PlannerInfo *root,
@@ -2137,18 +2312,10 @@ locate_grouping_columns(PlannerInfo *root,
{
SortGroupClause *grpcl = (SortGroupClause *) lfirst(gl);
Node *groupexpr = get_sortgroupclause_expr(grpcl, tlist);
- TargetEntry *te = NULL;
- ListCell *sl;
+ TargetEntry *te = tlist_member(groupexpr, sub_tlist);
- foreach(sl, sub_tlist)
- {
- te = (TargetEntry *) lfirst(sl);
- if (equal(groupexpr, te->expr))
- break;
- }
- if (!sl)
+ if (!te)
elog(ERROR, "failed to locate grouping columns");
-
groupColIdx[keyno++] = te->resno;
}
}
@@ -2190,3 +2357,219 @@ postprocess_setop_tlist(List *new_tlist, List *orig_tlist)
elog(ERROR, "resjunk output columns are not implemented");
return new_tlist;
}
+
+/*
+ * select_active_windows
+ * Create a list of the "active" window clauses (ie, those referenced
+ * by non-deleted WindowFuncs) in the order they are to be executed.
+ */
+static List *
+select_active_windows(PlannerInfo *root, WindowFuncLists *wflists)
+{
+ List *result;
+ List *actives;
+ ListCell *lc;
+
+ /* First, make a list of the active windows */
+ actives = NIL;
+ foreach(lc, root->parse->windowClause)
+ {
+ WindowClause *wc = (WindowClause *) lfirst(lc);
+
+ /* It's only active if wflists shows some related WindowFuncs */
+ Assert(wc->winref <= wflists->maxWinRef);
+ if (wflists->windowFuncs[wc->winref] != NIL)
+ actives = lappend(actives, wc);
+ }
+
+ /*
+ * Now, ensure that windows with identical partitioning/ordering clauses
+ * are adjacent in the list. This is required by the SQL standard, which
+ * says that only one sort is to be used for such windows, even if they
+ * are otherwise distinct (eg, different names or framing clauses).
+ *
+ * There is room to be much smarter here, for example detecting whether
+ * one window's sort keys are a prefix of another's (so that sorting
+ * for the latter would do for the former), or putting windows first
+ * that match a sort order available for the underlying query. For the
+ * moment we are content with meeting the spec.
+ */
+ result = NIL;
+ while (actives != NIL)
+ {
+ WindowClause *wc = (WindowClause *) linitial(actives);
+ ListCell *prev;
+ ListCell *next;
+
+ /* Move wc from actives to result */
+ actives = list_delete_first(actives);
+ result = lappend(result, wc);
+
+ /* Now move any matching windows from actives to result */
+ prev = NULL;
+ for (lc = list_head(actives); lc; lc = next)
+ {
+ WindowClause *wc2 = (WindowClause *) lfirst(lc);
+
+ next = lnext(lc);
+ if (equal(wc->partitionClause, wc2->partitionClause) &&
+ equal(wc->orderClause, wc2->orderClause))
+ {
+ actives = list_delete_cell(actives, lc, prev);
+ result = lappend(result, wc2);
+ }
+ else
+ prev = lc;
+ }
+ }
+
+ return result;
+}
+
+/*
+ * make_pathkeys_for_window
+ * Create a pathkeys list describing the required input ordering
+ * for the given WindowClause.
+ *
+ * The required ordering is first the PARTITION keys, then the ORDER keys.
+ * In the future we might try to implement windowing using hashing, in which
+ * case the ordering could be relaxed, but for now we always sort.
+ */
+static List *
+make_pathkeys_for_window(PlannerInfo *root, WindowClause *wc,
+ List *tlist, bool canonicalize)
+{
+ List *window_pathkeys;
+ List *window_sortclauses;
+
+ /* Throw error if can't sort */
+ if (!grouping_is_sortable(wc->partitionClause))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("could not implement window PARTITION BY"),
+ errdetail("Window partitioning columns must be of sortable datatypes.")));
+ if (!grouping_is_sortable(wc->orderClause))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("could not implement window ORDER BY"),
+ errdetail("Window ordering columns must be of sortable datatypes.")));
+
+ /* Okay, make the combined pathkeys */
+ window_sortclauses = list_concat(list_copy(wc->partitionClause),
+ list_copy(wc->orderClause));
+ window_pathkeys = make_pathkeys_for_sortclauses(root,
+ window_sortclauses,
+ tlist,
+ canonicalize);
+ list_free(window_sortclauses);
+ return window_pathkeys;
+}
+
+/*----------
+ * get_column_info_for_window
+ * Get the partitioning/ordering column numbers and equality operators
+ * for a WindowAgg node.
+ *
+ * This depends on the behavior of make_pathkeys_for_window()!
+ *
+ * We are given the target WindowClause and an array of the input column
+ * numbers associated with the resulting pathkeys. In the easy case, there
+ * are the same number of pathkey columns as partitioning + ordering columns
+ * and we just have to copy some data around. However, it's possible that
+ * some of the original partitioning + ordering columns were eliminated as
+ * redundant during the transformation to pathkeys. (This can happen even
+ * though the parser gets rid of obvious duplicates. A typical scenario is a
+ * window specification "PARTITION BY x ORDER BY y" coupled with a clause
+ * "WHERE x = y" that causes the two sort columns to be recognized as
+ * redundant.) In that unusual case, we have to work a lot harder to
+ * determine which keys are significant.
+ *
+ * The method used here is a bit brute-force: add the sort columns to a list
+ * one at a time and note when the resulting pathkey list gets longer. But
+ * it's a sufficiently uncommon case that a faster way doesn't seem worth
+ * the amount of code refactoring that'd be needed.
+ *----------
+ */
+static void
+get_column_info_for_window(PlannerInfo *root, WindowClause *wc, List *tlist,
+ int numSortCols, AttrNumber *sortColIdx,
+ int *partNumCols,
+ AttrNumber **partColIdx,
+ Oid **partOperators,
+ int *ordNumCols,
+ AttrNumber **ordColIdx,
+ Oid **ordOperators)
+{
+ int numPart = list_length(wc->partitionClause);
+ int numOrder = list_length(wc->orderClause);
+
+ if (numSortCols == numPart + numOrder)
+ {
+ /* easy case */
+ *partNumCols = numPart;
+ *partColIdx = sortColIdx;
+ *partOperators = extract_grouping_ops(wc->partitionClause);
+ *ordNumCols = numOrder;
+ *ordColIdx = sortColIdx + numPart;
+ *ordOperators = extract_grouping_ops(wc->orderClause);
+ }
+ else
+ {
+ List *sortclauses;
+ List *pathkeys;
+ int scidx;
+ ListCell *lc;
+
+ /* first, allocate what's certainly enough space for the arrays */
+ *partNumCols = 0;
+ *partColIdx = (AttrNumber *) palloc(numPart * sizeof(AttrNumber));
+ *partOperators = (Oid *) palloc(numPart * sizeof(Oid));
+ *ordNumCols = 0;
+ *ordColIdx = (AttrNumber *) palloc(numOrder * sizeof(AttrNumber));
+ *ordOperators = (Oid *) palloc(numOrder * sizeof(Oid));
+ sortclauses = NIL;
+ pathkeys = NIL;
+ scidx = 0;
+ foreach(lc, wc->partitionClause)
+ {
+ SortGroupClause *sgc = (SortGroupClause *) lfirst(lc);
+ List *new_pathkeys;
+
+ sortclauses = lappend(sortclauses, sgc);
+ new_pathkeys = make_pathkeys_for_sortclauses(root,
+ sortclauses,
+ tlist,
+ true);
+ if (list_length(new_pathkeys) > list_length(pathkeys))
+ {
+ /* this sort clause is actually significant */
+ *partColIdx[*partNumCols] = sortColIdx[scidx++];
+ *partOperators[*partNumCols] = sgc->eqop;
+ (*partNumCols)++;
+ pathkeys = new_pathkeys;
+ }
+ }
+ foreach(lc, wc->orderClause)
+ {
+ SortGroupClause *sgc = (SortGroupClause *) lfirst(lc);
+ List *new_pathkeys;
+
+ sortclauses = lappend(sortclauses, sgc);
+ new_pathkeys = make_pathkeys_for_sortclauses(root,
+ sortclauses,
+ tlist,
+ true);
+ if (list_length(new_pathkeys) > list_length(pathkeys))
+ {
+ /* this sort clause is actually significant */
+ *ordColIdx[*ordNumCols] = sortColIdx[scidx++];
+ *ordOperators[*ordNumCols] = sgc->eqop;
+ (*ordNumCols)++;
+ pathkeys = new_pathkeys;
+ }
+ }
+ /* complain if we didn't eat exactly the right number of sort cols */
+ if (scidx != numSortCols)
+ elog(ERROR, "failed to deconstruct sort operators into partitioning/ordering operators");
+ }
+}
diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c
index 9bec109f6f5..83447082f5b 100644
--- a/src/backend/optimizer/plan/setrefs.c
+++ b/src/backend/optimizer/plan/setrefs.c
@@ -9,7 +9,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/optimizer/plan/setrefs.c,v 1.146 2008/10/21 20:42:53 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/optimizer/plan/setrefs.c,v 1.147 2008/12/28 18:53:57 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -415,6 +415,7 @@ set_plan_refs(PlannerGlobal *glob, Plan *plan, int rtoffset)
}
break;
case T_Agg:
+ case T_WindowAgg:
case T_Group:
set_upper_references(glob, plan, rtoffset);
break;
@@ -679,6 +680,11 @@ fix_expr_common(PlannerGlobal *glob, Node *node)
record_plan_function_dependency(glob,
((Aggref *) node)->aggfnoid);
}
+ else if (IsA(node, WindowFunc))
+ {
+ record_plan_function_dependency(glob,
+ ((WindowFunc *) node)->winfnoid);
+ }
else if (IsA(node, FuncExpr))
{
record_plan_function_dependency(glob,
diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c
index c999fb6419c..a38f8c09ae7 100644
--- a/src/backend/optimizer/plan/subselect.c
+++ b/src/backend/optimizer/plan/subselect.c
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/optimizer/plan/subselect.c,v 1.143 2008/12/08 00:16:09 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/optimizer/plan/subselect.c,v 1.144 2008/12/28 18:53:57 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -1243,6 +1243,7 @@ simplify_EXISTS_query(Query *query)
query->intoClause ||
query->setOperations ||
query->hasAggs ||
+ query->hasWindowFuncs ||
query->havingQual ||
query->limitOffset ||
query->limitCount ||
@@ -1258,13 +1259,14 @@ simplify_EXISTS_query(Query *query)
/*
* Otherwise, we can throw away the targetlist, as well as any GROUP,
- * DISTINCT, and ORDER BY clauses; none of those clauses will change
- * a nonzero-rows result to zero rows or vice versa. (Furthermore,
+ * WINDOW, DISTINCT, and ORDER BY clauses; none of those clauses will
+ * change a nonzero-rows result to zero rows or vice versa. (Furthermore,
* since our parsetree representation of these clauses depends on the
* targetlist, we'd better throw them away if we drop the targetlist.)
*/
query->targetList = NIL;
query->groupClause = NIL;
+ query->windowClause = NIL;
query->distinctClause = NIL;
query->sortClause = NIL;
query->hasDistinctOn = false;
@@ -1321,8 +1323,8 @@ convert_EXISTS_to_ANY(PlannerInfo *root, Query *subselect,
* The rest of the sub-select must not refer to any Vars of the parent
* query. (Vars of higher levels should be okay, though.)
*
- * Note: we need not check for Aggs separately because we know the
- * sub-select is as yet unoptimized; any uplevel Agg must therefore
+ * Note: we need not check for Aggrefs separately because we know the
+ * sub-select is as yet unoptimized; any uplevel Aggref must therefore
* contain an uplevel Var reference. This is not the case below ...
*/
if (contain_vars_of_level((Node *) subselect, 1))
@@ -1432,7 +1434,8 @@ convert_EXISTS_to_ANY(PlannerInfo *root, Query *subselect,
/*
* And there can't be any child Vars in the stuff we intend to pull up.
* (Note: we'd need to check for child Aggs too, except we know the
- * child has no aggs at all because of simplify_EXISTS_query's check.)
+ * child has no aggs at all because of simplify_EXISTS_query's check.
+ * The same goes for window functions.)
*/
if (contain_vars_of_level((Node *) leftargs, 0))
return NULL;
@@ -1955,6 +1958,7 @@ finalize_plan(PlannerInfo *root, Plan *plan, Bitmapset *valid_params)
case T_RecursiveUnion:
case T_Hash:
case T_Agg:
+ case T_WindowAgg:
case T_SeqScan:
case T_Material:
case T_Sort:
diff --git a/src/backend/optimizer/prep/prepjointree.c b/src/backend/optimizer/prep/prepjointree.c
index e4d508523e1..80a51d80786 100644
--- a/src/backend/optimizer/prep/prepjointree.c
+++ b/src/backend/optimizer/prep/prepjointree.c
@@ -16,7 +16,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/optimizer/prep/prepjointree.c,v 1.60 2008/11/11 19:05:21 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/optimizer/prep/prepjointree.c,v 1.61 2008/12/28 18:53:57 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -742,7 +742,10 @@ pull_up_simple_subquery(PlannerInfo *root, Node *jtnode, RangeTblEntry *rte,
* Miscellaneous housekeeping.
*/
parse->hasSubLinks |= subquery->hasSubLinks;
- /* subquery won't be pulled up if it hasAggs, so no work there */
+ /*
+ * subquery won't be pulled up if it hasAggs or hasWindowFuncs, so no
+ * work needed on those flags
+ */
/*
* Return the adjusted subquery jointree to replace the RangeTblRef entry
@@ -931,6 +934,7 @@ is_simple_subquery(Query *subquery)
* limiting, or WITH. (XXX WITH could possibly be allowed later)
*/
if (subquery->hasAggs ||
+ subquery->hasWindowFuncs ||
subquery->groupClause ||
subquery->havingQual ||
subquery->sortClause ||
diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c
index bd7c05cc53d..f3a49cf9dee 100644
--- a/src/backend/optimizer/prep/prepunion.c
+++ b/src/backend/optimizer/prep/prepunion.c
@@ -22,7 +22,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/optimizer/prep/prepunion.c,v 1.162 2008/11/15 19:43:46 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/optimizer/prep/prepunion.c,v 1.163 2008/12/28 18:53:57 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -136,6 +136,7 @@ plan_set_operations(PlannerInfo *root, double tuple_fraction,
Assert(parse->jointree->quals == NULL);
Assert(parse->groupClause == NIL);
Assert(parse->havingQual == NULL);
+ Assert(parse->windowClause == NIL);
Assert(parse->distinctClause == NIL);
/*
diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c
index 3c74831f4da..ee45f32abbb 100644
--- a/src/backend/optimizer/util/clauses.c
+++ b/src/backend/optimizer/util/clauses.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/optimizer/util/clauses.c,v 1.271 2008/12/18 18:20:34 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/optimizer/util/clauses.c,v 1.272 2008/12/28 18:53:57 tgl Exp $
*
* HISTORY
* AUTHOR DATE MAJOR EVENT
@@ -72,7 +72,9 @@ typedef struct
} substitute_actual_srf_parameters_context;
static bool contain_agg_clause_walker(Node *node, void *context);
+static bool pull_agg_clause_walker(Node *node, List **context);
static bool count_agg_clauses_walker(Node *node, AggClauseCounts *counts);
+static bool find_window_functions_walker(Node *node, WindowFuncLists *lists);
static bool expression_returns_set_rows_walker(Node *node, double *count);
static bool contain_subplans_walker(Node *node, void *context);
static bool contain_mutable_functions_walker(Node *node, void *context);
@@ -389,6 +391,41 @@ contain_agg_clause_walker(Node *node, void *context)
}
/*
+ * pull_agg_clause
+ * Recursively search for Aggref nodes within a clause.
+ *
+ * Returns a List of all Aggrefs found.
+ *
+ * This does not descend into subqueries, and so should be used only after
+ * reduction of sublinks to subplans, or in contexts where it's known there
+ * are no subqueries. There mustn't be outer-aggregate references either.
+ */
+List *
+pull_agg_clause(Node *clause)
+{
+ List *result = NIL;
+
+ (void) pull_agg_clause_walker(clause, &result);
+ return result;
+}
+
+static bool
+pull_agg_clause_walker(Node *node, List **context)
+{
+ if (node == NULL)
+ return false;
+ if (IsA(node, Aggref))
+ {
+ Assert(((Aggref *) node)->agglevelsup == 0);
+ *context = lappend(*context, node);
+ return false; /* no need to descend into arguments */
+ }
+ Assert(!IsA(node, SubLink));
+ return expression_tree_walker(node, pull_agg_clause_walker,
+ (void *) context);
+}
+
+/*
* count_agg_clauses
* Recursively count the Aggref nodes in an expression tree.
*
@@ -520,6 +557,79 @@ count_agg_clauses_walker(Node *node, AggClauseCounts *counts)
/*****************************************************************************
+ * Window-function clause manipulation
+ *****************************************************************************/
+
+/*
+ * contain_window_function
+ * Recursively search for WindowFunc nodes within a clause.
+ *
+ * Since window functions don't have level fields, but are hard-wired to
+ * be associated with the current query level, this is just the same as
+ * rewriteManip.c's function.
+ */
+bool
+contain_window_function(Node *clause)
+{
+ return checkExprHasWindowFuncs(clause);
+}
+
+/*
+ * find_window_functions
+ * Locate all the WindowFunc nodes in an expression tree, and organize
+ * them by winref ID number.
+ *
+ * Caller must provide an upper bound on the winref IDs expected in the tree.
+ */
+WindowFuncLists *
+find_window_functions(Node *clause, Index maxWinRef)
+{
+ WindowFuncLists *lists = palloc(sizeof(WindowFuncLists));
+
+ lists->numWindowFuncs = 0;
+ lists->maxWinRef = maxWinRef;
+ lists->windowFuncs = (List **) palloc0((maxWinRef + 1) * sizeof(List *));
+ (void) find_window_functions_walker(clause, lists);
+ return lists;
+}
+
+static bool
+find_window_functions_walker(Node *node, WindowFuncLists *lists)
+{
+ if (node == NULL)
+ return false;
+ if (IsA(node, WindowFunc))
+ {
+ WindowFunc *wfunc = (WindowFunc *) node;
+
+ /* winref is unsigned, so one-sided test is OK */
+ if (wfunc->winref > lists->maxWinRef)
+ elog(ERROR, "WindowFunc contains out-of-range winref %u",
+ wfunc->winref);
+ lists->windowFuncs[wfunc->winref] =
+ lappend(lists->windowFuncs[wfunc->winref], wfunc);
+ lists->numWindowFuncs++;
+
+ /*
+ * Complain if the window function's arguments contain window functions
+ */
+ if (contain_window_function((Node *) wfunc->args))
+ ereport(ERROR,
+ (errcode(ERRCODE_WINDOWING_ERROR),
+ errmsg("window function calls cannot be nested")));
+
+ /*
+ * Having checked that, we need not recurse into the argument.
+ */
+ return false;
+ }
+ Assert(!IsA(node, SubLink));
+ return expression_tree_walker(node, find_window_functions_walker,
+ (void *) lists);
+}
+
+
+/*****************************************************************************
* Support for expressions returning sets
*****************************************************************************/
@@ -567,6 +677,8 @@ expression_returns_set_rows_walker(Node *node, double *count)
/* Avoid recursion for some cases that can't return a set */
if (IsA(node, Aggref))
return false;
+ if (IsA(node, WindowFunc))
+ return false;
if (IsA(node, DistinctExpr))
return false;
if (IsA(node, ScalarArrayOpExpr))
@@ -897,6 +1009,11 @@ contain_nonstrict_functions_walker(Node *node, void *context)
/* an aggregate could return non-null with null input */
return true;
}
+ if (IsA(node, WindowFunc))
+ {
+ /* a window function could return non-null with null input */
+ return true;
+ }
if (IsA(node, ArrayRef))
{
/* array assignment is nonstrict, but subscripting is strict */
@@ -1589,7 +1706,8 @@ is_strict_saop(ScalarArrayOpExpr *expr, bool falseOK)
* not-constant expressions, namely aggregates (Aggrefs). In current usage
* this is only applied to WHERE clauses and so a check for Aggrefs would be
* a waste of cycles; but be sure to also check contain_agg_clause() if you
- * want to know about pseudo-constness in other contexts.
+ * want to know about pseudo-constness in other contexts. The same goes
+ * for window functions (WindowFuncs).
*/
bool
is_pseudo_constant_clause(Node *clause)
@@ -3472,6 +3590,7 @@ inline_function(Oid funcid, Oid result_type, List *args,
querytree->utilityStmt ||
querytree->intoClause ||
querytree->hasAggs ||
+ querytree->hasWindowFuncs ||
querytree->hasSubLinks ||
querytree->cteList ||
querytree->rtable ||
@@ -3479,6 +3598,7 @@ inline_function(Oid funcid, Oid result_type, List *args,
querytree->jointree->quals ||
querytree->groupClause ||
querytree->havingQual ||
+ querytree->windowClause ||
querytree->distinctClause ||
querytree->sortClause ||
querytree->limitOffset ||
diff --git a/src/backend/optimizer/util/tlist.c b/src/backend/optimizer/util/tlist.c
index 968f4ae367a..aab3d032b12 100644
--- a/src/backend/optimizer/util/tlist.c
+++ b/src/backend/optimizer/util/tlist.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/optimizer/util/tlist.c,v 1.83 2008/10/21 20:42:53 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/optimizer/util/tlist.c,v 1.84 2008/12/28 18:53:57 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -101,28 +101,28 @@ flatten_tlist(List *tlist)
/*
* add_to_flat_tlist
- * Add more vars to a flattened tlist (if they're not already in it)
+ * Add more items to a flattened tlist (if they're not already in it)
*
* 'tlist' is the flattened tlist
- * 'vars' is a list of Var and/or PlaceHolderVar nodes
+ * 'exprs' is a list of expressions (usually, but not necessarily, Vars)
*
* Returns the extended tlist.
*/
List *
-add_to_flat_tlist(List *tlist, List *vars)
+add_to_flat_tlist(List *tlist, List *exprs)
{
int next_resno = list_length(tlist) + 1;
- ListCell *v;
+ ListCell *lc;
- foreach(v, vars)
+ foreach(lc, exprs)
{
- Node *var = (Node *) lfirst(v);
+ Node *expr = (Node *) lfirst(lc);
- if (!tlist_member(var, tlist))
+ if (!tlist_member(expr, tlist))
{
TargetEntry *tle;
- tle = makeTargetEntry(copyObject(var), /* copy needed?? */
+ tle = makeTargetEntry(copyObject(expr), /* copy needed?? */
next_resno++,
NULL,
false);
diff --git a/src/backend/parser/analyze.c b/src/backend/parser/analyze.c
index cdac02b71db..70688655cce 100644
--- a/src/backend/parser/analyze.c
+++ b/src/backend/parser/analyze.c
@@ -17,7 +17,7 @@
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/backend/parser/analyze.c,v 1.384 2008/12/13 02:00:19 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/parser/analyze.c,v 1.385 2008/12/28 18:53:58 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -306,6 +306,9 @@ transformDeleteStmt(ParseState *pstate, DeleteStmt *stmt)
qry->hasAggs = pstate->p_hasAggs;
if (pstate->p_hasAggs)
parseCheckAggregates(pstate, qry);
+ qry->hasWindowFuncs = pstate->p_hasWindowFuncs;
+ if (pstate->p_hasWindowFuncs)
+ parseCheckWindowFuncs(pstate, qry);
return qry;
}
@@ -673,6 +676,12 @@ transformInsertStmt(ParseState *pstate, InsertStmt *stmt)
errmsg("cannot use aggregate function in VALUES"),
parser_errposition(pstate,
locate_agg_of_level((Node *) qry, 0))));
+ if (pstate->p_hasWindowFuncs)
+ ereport(ERROR,
+ (errcode(ERRCODE_WINDOWING_ERROR),
+ errmsg("cannot use window function in VALUES"),
+ parser_errposition(pstate,
+ locate_windowfunc((Node *) qry))));
return qry;
}
@@ -764,6 +773,9 @@ transformSelectStmt(ParseState *pstate, SelectStmt *stmt)
/* make FOR UPDATE/FOR SHARE info available to addRangeTableEntry */
pstate->p_locking_clause = stmt->lockingClause;
+ /* make WINDOW info available for window functions, too */
+ pstate->p_windowdefs = stmt->windowClause;
+
/* process the WITH clause */
if (stmt->withClause)
{
@@ -803,7 +815,8 @@ transformSelectStmt(ParseState *pstate, SelectStmt *stmt)
qry->groupClause = transformGroupClause(pstate,
stmt->groupClause,
&qry->targetList,
- qry->sortClause);
+ qry->sortClause,
+ false);
if (stmt->distinctClause == NIL)
{
@@ -834,6 +847,11 @@ transformSelectStmt(ParseState *pstate, SelectStmt *stmt)
qry->limitCount = transformLimitClause(pstate, stmt->limitCount,
"LIMIT");
+ /* transform window clauses after we have seen all window functions */
+ qry->windowClause = transformWindowDefinitions(pstate,
+ pstate->p_windowdefs,
+ &qry->targetList);
+
/* handle any SELECT INTO/CREATE TABLE AS spec */
if (stmt->intoClause)
{
@@ -849,6 +867,9 @@ transformSelectStmt(ParseState *pstate, SelectStmt *stmt)
qry->hasAggs = pstate->p_hasAggs;
if (pstate->p_hasAggs || qry->groupClause || qry->havingQual)
parseCheckAggregates(pstate, qry);
+ qry->hasWindowFuncs = pstate->p_hasWindowFuncs;
+ if (pstate->p_hasWindowFuncs)
+ parseCheckWindowFuncs(pstate, qry);
foreach(l, stmt->lockingClause)
{
@@ -889,6 +910,7 @@ transformValuesClause(ParseState *pstate, SelectStmt *stmt)
Assert(stmt->whereClause == NULL);
Assert(stmt->groupClause == NIL);
Assert(stmt->havingClause == NULL);
+ Assert(stmt->windowClause == NIL);
Assert(stmt->op == SETOP_NONE);
/* process the WITH clause */
@@ -1061,6 +1083,12 @@ transformValuesClause(ParseState *pstate, SelectStmt *stmt)
errmsg("cannot use aggregate function in VALUES"),
parser_errposition(pstate,
locate_agg_of_level((Node *) newExprsLists, 0))));
+ if (pstate->p_hasWindowFuncs)
+ ereport(ERROR,
+ (errcode(ERRCODE_WINDOWING_ERROR),
+ errmsg("cannot use window function in VALUES"),
+ parser_errposition(pstate,
+ locate_windowfunc((Node *) newExprsLists))));
return qry;
}
@@ -1289,6 +1317,9 @@ transformSetOperationStmt(ParseState *pstate, SelectStmt *stmt)
qry->hasAggs = pstate->p_hasAggs;
if (pstate->p_hasAggs || qry->groupClause || qry->havingQual)
parseCheckAggregates(pstate, qry);
+ qry->hasWindowFuncs = pstate->p_hasWindowFuncs;
+ if (pstate->p_hasWindowFuncs)
+ parseCheckWindowFuncs(pstate, qry);
foreach(l, lockingClause)
{
@@ -1623,6 +1654,12 @@ transformUpdateStmt(ParseState *pstate, UpdateStmt *stmt)
errmsg("cannot use aggregate function in UPDATE"),
parser_errposition(pstate,
locate_agg_of_level((Node *) qry, 0))));
+ if (pstate->p_hasWindowFuncs)
+ ereport(ERROR,
+ (errcode(ERRCODE_WINDOWING_ERROR),
+ errmsg("cannot use window function in UPDATE"),
+ parser_errposition(pstate,
+ locate_windowfunc((Node *) qry))));
/*
* Now we are done with SELECT-like processing, and can get on with
@@ -1692,6 +1729,7 @@ transformReturningList(ParseState *pstate, List *returningList)
List *rlist;
int save_next_resno;
bool save_hasAggs;
+ bool save_hasWindowFuncs;
int length_rtable;
if (returningList == NIL)
@@ -1708,6 +1746,8 @@ transformReturningList(ParseState *pstate, List *returningList)
/* save other state so that we can detect disallowed stuff */
save_hasAggs = pstate->p_hasAggs;
pstate->p_hasAggs = false;
+ save_hasWindowFuncs = pstate->p_hasWindowFuncs;
+ pstate->p_hasWindowFuncs = false;
length_rtable = list_length(pstate->p_rtable);
/* transform RETURNING identically to a SELECT targetlist */
@@ -1722,6 +1762,12 @@ transformReturningList(ParseState *pstate, List *returningList)
errmsg("cannot use aggregate function in RETURNING"),
parser_errposition(pstate,
locate_agg_of_level((Node *) rlist, 0))));
+ if (pstate->p_hasWindowFuncs)
+ ereport(ERROR,
+ (errcode(ERRCODE_WINDOWING_ERROR),
+ errmsg("cannot use window function in RETURNING"),
+ parser_errposition(pstate,
+ locate_windowfunc((Node *) rlist))));
/* no new relation references please */
if (list_length(pstate->p_rtable) != length_rtable)
@@ -1748,6 +1794,7 @@ transformReturningList(ParseState *pstate, List *returningList)
/* restore state */
pstate->p_next_resno = save_next_resno;
pstate->p_hasAggs = save_hasAggs;
+ pstate->p_hasWindowFuncs = save_hasWindowFuncs;
return rlist;
}
@@ -1883,6 +1930,10 @@ CheckSelectLocking(Query *qry)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("SELECT FOR UPDATE/SHARE is not allowed with aggregate functions")));
+ if (qry->hasWindowFuncs)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("SELECT FOR UPDATE/SHARE is not allowed with window functions")));
}
/*
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index 29eab503198..59b7ada7b43 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -11,7 +11,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/parser/gram.y,v 2.647 2008/12/20 16:02:55 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/parser/gram.y,v 2.648 2008/12/28 18:53:58 tgl Exp $
*
* HISTORY
* AUTHOR DATE MAJOR EVENT
@@ -158,6 +158,7 @@ static TypeName *TableFuncTypeName(List *columns);
DefElem *defelt;
OptionDefElem *optdef;
SortBy *sortby;
+ WindowDef *windef;
JoinExpr *jexpr;
IndexElem *ielem;
Alias *alias;
@@ -402,6 +403,10 @@ static TypeName *TableFuncTypeName(List *columns);
%type <with> with_clause
%type <list> cte_list
+%type <list> window_clause window_definition_list opt_partition_clause
+%type <windef> window_definition over_clause window_specification
+%type <str> opt_existing_window_name
+
/*
* If you make any token changes, update the keyword table in
@@ -431,8 +436,8 @@ static TypeName *TableFuncTypeName(List *columns);
DEFERRABLE DEFERRED DEFINER DELETE_P DELIMITER DELIMITERS DESC
DICTIONARY DISABLE_P DISCARD DISTINCT DO DOCUMENT_P DOMAIN_P DOUBLE_P DROP
- EACH ELSE ENABLE_P ENCODING ENCRYPTED END_P ENUM_P ESCAPE EXCEPT EXCLUDING
- EXCLUSIVE EXECUTE EXISTS EXPLAIN EXTERNAL EXTRACT
+ EACH ELSE ENABLE_P ENCODING ENCRYPTED END_P ENUM_P ESCAPE EXCEPT
+ EXCLUDING EXCLUSIVE EXECUTE EXISTS EXPLAIN EXTERNAL EXTRACT
FALSE_P FAMILY FETCH FIRST_P FLOAT_P FOR FORCE FOREIGN FORWARD
FREEZE FROM FULL FUNCTION
@@ -461,9 +466,9 @@ static TypeName *TableFuncTypeName(List *columns);
NOT NOTHING NOTIFY NOTNULL NOWAIT NULL_P NULLIF NULLS_P NUMERIC
OBJECT_P OF OFF OFFSET OIDS OLD ON ONLY OPERATOR OPTION OPTIONS OR
- ORDER OUT_P OUTER_P OVERLAPS OVERLAY OWNED OWNER
+ ORDER OUT_P OUTER_P OVER OVERLAPS OVERLAY OWNED OWNER
- PARSER PARTIAL PASSWORD PLACING PLANS POSITION
+ PARSER PARTIAL PARTITION PASSWORD PLACING PLANS POSITION
PRECISION PRESERVE PREPARE PREPARED PRIMARY
PRIOR PRIVILEGES PROCEDURAL PROCEDURE
@@ -489,7 +494,7 @@ static TypeName *TableFuncTypeName(List *columns);
VACUUM VALID VALIDATOR VALUE_P VALUES VARCHAR VARIADIC VARYING
VERBOSE VERSION_P VIEW VOLATILE
- WHEN WHERE WHITESPACE_P WITH WITHOUT WORK WRAPPER WRITE
+ WHEN WHERE WHITESPACE_P WINDOW WITH WITHOUT WORK WRAPPER WRITE
XML_P XMLATTRIBUTES XMLCONCAT XMLELEMENT XMLFOREST XMLPARSE
XMLPI XMLROOT XMLSERIALIZE
@@ -523,7 +528,15 @@ static TypeName *TableFuncTypeName(List *columns);
%nonassoc BETWEEN
%nonassoc IN_P
%left POSTFIXOP /* dummy for postfix Op rules */
-%nonassoc IDENT /* to support target_el without AS */
+/*
+ * To support target_el without AS, we must give IDENT an explicit priority
+ * between POSTFIXOP and Op. We can safely assign the same priority to
+ * various unreserved keywords as needed to resolve ambiguities (this can't
+ * have any bad effects since obviously the keywords will still behave the
+ * same as if they weren't keywords). We need to do this for PARTITION
+ * to support opt_existing_window_name.
+ */
+%nonassoc IDENT PARTITION
%left Op OPERATOR /* multi-character ops and user-defined operators */
%nonassoc NOTNULL
%nonassoc ISNULL
@@ -1259,7 +1272,7 @@ opt_boolean:
* - an integer or floating point number
* - a time interval per SQL99
* ColId gives reduce/reduce errors against ConstInterval and LOCAL,
- * so use IDENT and reject anything which is a reserved word.
+ * so use IDENT (meaning we reject anything that is a key word).
*/
zone_value:
Sconst
@@ -3466,6 +3479,11 @@ old_aggr_list: old_aggr_elem { $$ = list_make1($1); }
| old_aggr_list ',' old_aggr_elem { $$ = lappend($1, $3); }
;
+/*
+ * Must use IDENT here to avoid reduce/reduce conflicts; fortunately none of
+ * the item names needed in old aggregate definitions are likely to become
+ * SQL keywords.
+ */
old_aggr_elem: IDENT '=' def_arg
{
$$ = makeDefElem($1, (Node *)$3);
@@ -6825,7 +6843,7 @@ select_clause:
simple_select:
SELECT opt_distinct target_list
into_clause from_clause where_clause
- group_clause having_clause
+ group_clause having_clause window_clause
{
SelectStmt *n = makeNode(SelectStmt);
n->distinctClause = $2;
@@ -6835,6 +6853,7 @@ simple_select:
n->whereClause = $6;
n->groupClause = $7;
n->havingClause = $8;
+ n->windowClause = $9;
$$ = (Node *)n;
}
| values_clause { $$ = $1; }
@@ -8076,6 +8095,7 @@ a_expr: c_expr { $$ = $1; }
n->agg_star = FALSE;
n->agg_distinct = FALSE;
n->func_variadic = FALSE;
+ n->over = NULL;
n->location = @2;
$$ = (Node *) n;
}
@@ -8135,6 +8155,7 @@ a_expr: c_expr { $$ = $1; }
n->agg_star = FALSE;
n->agg_distinct = FALSE;
n->func_variadic = FALSE;
+ n->over = NULL;
n->location = @4;
$$ = (Node *) makeSimpleA_Expr(AEXPR_OP, "~~", $1, (Node *) n, @2);
}
@@ -8148,6 +8169,7 @@ a_expr: c_expr { $$ = $1; }
n->agg_star = FALSE;
n->agg_distinct = FALSE;
n->func_variadic = FALSE;
+ n->over = NULL;
n->location = @5;
$$ = (Node *) makeSimpleA_Expr(AEXPR_OP, "!~~", $1, (Node *) n, @2);
}
@@ -8161,6 +8183,7 @@ a_expr: c_expr { $$ = $1; }
n->agg_star = FALSE;
n->agg_distinct = FALSE;
n->func_variadic = FALSE;
+ n->over = NULL;
n->location = @4;
$$ = (Node *) makeSimpleA_Expr(AEXPR_OP, "~~*", $1, (Node *) n, @2);
}
@@ -8174,6 +8197,7 @@ a_expr: c_expr { $$ = $1; }
n->agg_star = FALSE;
n->agg_distinct = FALSE;
n->func_variadic = FALSE;
+ n->over = NULL;
n->location = @5;
$$ = (Node *) makeSimpleA_Expr(AEXPR_OP, "!~~*", $1, (Node *) n, @2);
}
@@ -8186,6 +8210,7 @@ a_expr: c_expr { $$ = $1; }
n->agg_star = FALSE;
n->agg_distinct = FALSE;
n->func_variadic = FALSE;
+ n->over = NULL;
n->location = @2;
$$ = (Node *) makeSimpleA_Expr(AEXPR_OP, "~", $1, (Node *) n, @2);
}
@@ -8197,6 +8222,7 @@ a_expr: c_expr { $$ = $1; }
n->agg_star = FALSE;
n->agg_distinct = FALSE;
n->func_variadic = FALSE;
+ n->over = NULL;
n->location = @5;
$$ = (Node *) makeSimpleA_Expr(AEXPR_OP, "~", $1, (Node *) n, @2);
}
@@ -8208,6 +8234,7 @@ a_expr: c_expr { $$ = $1; }
n->agg_star = FALSE;
n->agg_distinct = FALSE;
n->func_variadic = FALSE;
+ n->over = NULL;
n->location = @5;
$$ = (Node *) makeSimpleA_Expr(AEXPR_OP, "!~", $1, (Node *) n, @2);
}
@@ -8219,6 +8246,7 @@ a_expr: c_expr { $$ = $1; }
n->agg_star = FALSE;
n->agg_distinct = FALSE;
n->func_variadic = FALSE;
+ n->over = NULL;
n->location = @6;
$$ = (Node *) makeSimpleA_Expr(AEXPR_OP, "!~", $1, (Node *) n, @2);
}
@@ -8622,7 +8650,7 @@ c_expr: columnref { $$ = $1; }
* (Note that many of the special SQL functions wouldn't actually make any
* sense as functional index entries, but we ignore that consideration here.)
*/
-func_expr: func_name '(' ')'
+func_expr: func_name '(' ')' over_clause
{
FuncCall *n = makeNode(FuncCall);
n->funcname = $1;
@@ -8630,10 +8658,11 @@ func_expr: func_name '(' ')'
n->agg_star = FALSE;
n->agg_distinct = FALSE;
n->func_variadic = FALSE;
+ n->over = $4;
n->location = @1;
$$ = (Node *)n;
}
- | func_name '(' expr_list ')'
+ | func_name '(' expr_list ')' over_clause
{
FuncCall *n = makeNode(FuncCall);
n->funcname = $1;
@@ -8641,10 +8670,11 @@ func_expr: func_name '(' ')'
n->agg_star = FALSE;
n->agg_distinct = FALSE;
n->func_variadic = FALSE;
+ n->over = $5;
n->location = @1;
$$ = (Node *)n;
}
- | func_name '(' VARIADIC a_expr ')'
+ | func_name '(' VARIADIC a_expr ')' over_clause
{
FuncCall *n = makeNode(FuncCall);
n->funcname = $1;
@@ -8652,10 +8682,11 @@ func_expr: func_name '(' ')'
n->agg_star = FALSE;
n->agg_distinct = FALSE;
n->func_variadic = TRUE;
+ n->over = $6;
n->location = @1;
$$ = (Node *)n;
}
- | func_name '(' expr_list ',' VARIADIC a_expr ')'
+ | func_name '(' expr_list ',' VARIADIC a_expr ')' over_clause
{
FuncCall *n = makeNode(FuncCall);
n->funcname = $1;
@@ -8663,10 +8694,11 @@ func_expr: func_name '(' ')'
n->agg_star = FALSE;
n->agg_distinct = FALSE;
n->func_variadic = TRUE;
+ n->over = $8;
n->location = @1;
$$ = (Node *)n;
}
- | func_name '(' ALL expr_list ')'
+ | func_name '(' ALL expr_list ')' over_clause
{
FuncCall *n = makeNode(FuncCall);
n->funcname = $1;
@@ -8678,10 +8710,11 @@ func_expr: func_name '(' ')'
* for that in FuncCall at the moment.
*/
n->func_variadic = FALSE;
+ n->over = $6;
n->location = @1;
$$ = (Node *)n;
}
- | func_name '(' DISTINCT expr_list ')'
+ | func_name '(' DISTINCT expr_list ')' over_clause
{
FuncCall *n = makeNode(FuncCall);
n->funcname = $1;
@@ -8689,10 +8722,11 @@ func_expr: func_name '(' ')'
n->agg_star = FALSE;
n->agg_distinct = TRUE;
n->func_variadic = FALSE;
+ n->over = $6;
n->location = @1;
$$ = (Node *)n;
}
- | func_name '(' '*' ')'
+ | func_name '(' '*' ')' over_clause
{
/*
* We consider AGGREGATE(*) to invoke a parameterless
@@ -8710,6 +8744,7 @@ func_expr: func_name '(' ')'
n->agg_star = TRUE;
n->agg_distinct = FALSE;
n->func_variadic = FALSE;
+ n->over = $5;
n->location = @1;
$$ = (Node *)n;
}
@@ -8769,6 +8804,7 @@ func_expr: func_name '(' ')'
n->agg_star = FALSE;
n->agg_distinct = FALSE;
n->func_variadic = FALSE;
+ n->over = NULL;
n->location = @1;
$$ = (Node *)n;
}
@@ -8839,6 +8875,7 @@ func_expr: func_name '(' ')'
n->agg_star = FALSE;
n->agg_distinct = FALSE;
n->func_variadic = FALSE;
+ n->over = NULL;
n->location = @1;
$$ = (Node *)n;
}
@@ -8850,6 +8887,7 @@ func_expr: func_name '(' ')'
n->agg_star = FALSE;
n->agg_distinct = FALSE;
n->func_variadic = FALSE;
+ n->over = NULL;
n->location = @1;
$$ = (Node *)n;
}
@@ -8861,6 +8899,7 @@ func_expr: func_name '(' ')'
n->agg_star = FALSE;
n->agg_distinct = FALSE;
n->func_variadic = FALSE;
+ n->over = NULL;
n->location = @1;
$$ = (Node *)n;
}
@@ -8872,6 +8911,7 @@ func_expr: func_name '(' ')'
n->agg_star = FALSE;
n->agg_distinct = FALSE;
n->func_variadic = FALSE;
+ n->over = NULL;
n->location = @1;
$$ = (Node *)n;
}
@@ -8883,6 +8923,7 @@ func_expr: func_name '(' ')'
n->agg_star = FALSE;
n->agg_distinct = FALSE;
n->func_variadic = FALSE;
+ n->over = NULL;
n->location = @1;
$$ = (Node *)n;
}
@@ -8894,6 +8935,7 @@ func_expr: func_name '(' ')'
n->agg_star = FALSE;
n->agg_distinct = FALSE;
n->func_variadic = FALSE;
+ n->over = NULL;
n->location = @1;
$$ = (Node *)n;
}
@@ -8907,6 +8949,7 @@ func_expr: func_name '(' ')'
n->agg_star = FALSE;
n->agg_distinct = FALSE;
n->func_variadic = FALSE;
+ n->over = NULL;
n->location = @1;
$$ = (Node *)n;
}
@@ -8923,6 +8966,7 @@ func_expr: func_name '(' ')'
n->agg_star = FALSE;
n->agg_distinct = FALSE;
n->func_variadic = FALSE;
+ n->over = NULL;
n->location = @1;
$$ = (Node *)n;
}
@@ -8935,6 +8979,7 @@ func_expr: func_name '(' ')'
n->agg_star = FALSE;
n->agg_distinct = FALSE;
n->func_variadic = FALSE;
+ n->over = NULL;
n->location = @1;
$$ = (Node *)n;
}
@@ -8949,6 +8994,7 @@ func_expr: func_name '(' ')'
n->agg_star = FALSE;
n->agg_distinct = FALSE;
n->func_variadic = FALSE;
+ n->over = NULL;
n->location = @1;
$$ = (Node *)n;
}
@@ -8969,6 +9015,7 @@ func_expr: func_name '(' ')'
n->agg_star = FALSE;
n->agg_distinct = FALSE;
n->func_variadic = FALSE;
+ n->over = NULL;
n->location = @1;
$$ = (Node *)n;
}
@@ -8983,6 +9030,7 @@ func_expr: func_name '(' ')'
n->agg_star = FALSE;
n->agg_distinct = FALSE;
n->func_variadic = FALSE;
+ n->over = NULL;
n->location = @1;
$$ = (Node *)n;
}
@@ -8994,6 +9042,7 @@ func_expr: func_name '(' ')'
n->agg_star = FALSE;
n->agg_distinct = FALSE;
n->func_variadic = FALSE;
+ n->over = NULL;
n->location = @1;
$$ = (Node *)n;
}
@@ -9005,6 +9054,7 @@ func_expr: func_name '(' ')'
n->agg_star = FALSE;
n->agg_distinct = FALSE;
n->func_variadic = FALSE;
+ n->over = NULL;
n->location = @1;
$$ = (Node *)n;
}
@@ -9016,6 +9066,7 @@ func_expr: func_name '(' ')'
n->agg_star = FALSE;
n->agg_distinct = FALSE;
n->func_variadic = FALSE;
+ n->over = NULL;
n->location = @1;
$$ = (Node *)n;
}
@@ -9157,6 +9208,77 @@ xml_whitespace_option: PRESERVE WHITESPACE_P { $$ = TRUE; }
;
/*
+ * Window Definitions
+ */
+window_clause:
+ WINDOW window_definition_list { $$ = $2; }
+ | /*EMPTY*/ { $$ = NIL; }
+ ;
+
+window_definition_list:
+ window_definition { $$ = list_make1($1); }
+ | window_definition_list ',' window_definition
+ { $$ = lappend($1, $3); }
+ ;
+
+window_definition:
+ ColId AS window_specification
+ {
+ WindowDef *n = $3;
+ n->name = $1;
+ $$ = n;
+ }
+ ;
+
+over_clause: OVER window_specification
+ { $$ = $2; }
+ | OVER ColId
+ {
+ WindowDef *n = makeNode(WindowDef);
+ n->name = NULL;
+ n->refname = $2;
+ n->partitionClause = NIL;
+ n->orderClause = NIL;
+ n->location = @2;
+ $$ = n;
+ }
+ | /*EMPTY*/
+ { $$ = NULL; }
+ ;
+
+window_specification: '(' opt_existing_window_name opt_partition_clause
+ opt_sort_clause ')'
+ {
+ WindowDef *n = makeNode(WindowDef);
+ n->name = NULL;
+ n->refname = $2;
+ n->partitionClause = $3;
+ n->orderClause = $4;
+ n->location = @1;
+ $$ = n;
+ }
+ ;
+
+/*
+ * If we see PARTITION, RANGE, or ROWS as the first token after the '('
+ * of a window_specification, we want the assumption to be that there is
+ * no existing_window_name; but those keywords are unreserved and so could
+ * be ColIds. We fix this by making them have the same precedence as IDENT
+ * and giving the empty production here a slightly higher precedence, so
+ * that the shift/reduce conflict is resolved in favor of reducing the rule.
+ * These keywords are thus precluded from being an existing_window_name but
+ * are not reserved for any other purpose.
+ * (RANGE/ROWS are not an issue as of 8.4 for lack of frame_clause support.)
+ */
+opt_existing_window_name: ColId { $$ = $1; }
+ | /*EMPTY*/ %prec Op { $$ = NULL; }
+ ;
+
+opt_partition_clause: PARTITION BY expr_list { $$ = $3; }
+ | /*EMPTY*/ { $$ = NIL; }
+ ;
+
+/*
* Supporting nonterminals for expressions.
*/
@@ -9961,6 +10083,7 @@ unreserved_keyword:
| OWNER
| PARSER
| PARTIAL
+ | PARTITION
| PASSWORD
| PLANS
| PREPARE
@@ -10139,6 +10262,7 @@ type_func_name_keyword:
| NATURAL
| NOTNULL
| OUTER_P
+ | OVER
| OVERLAPS
| RIGHT
| SIMILAR
@@ -10229,6 +10353,7 @@ reserved_keyword:
| VARIADIC
| WHEN
| WHERE
+ | WINDOW
| WITH
;
@@ -10451,6 +10576,7 @@ makeOverlaps(List *largs, List *rargs, int location)
n->agg_star = FALSE;
n->agg_distinct = FALSE;
n->func_variadic = FALSE;
+ n->over = NULL;
n->location = location;
return n;
}
diff --git a/src/backend/parser/keywords.c b/src/backend/parser/keywords.c
index bf7b1f6ad2e..c3ad852258b 100644
--- a/src/backend/parser/keywords.c
+++ b/src/backend/parser/keywords.c
@@ -11,7 +11,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/parser/keywords.c,v 1.206 2008/12/19 16:25:17 petere Exp $
+ * $PostgreSQL: pgsql/src/backend/parser/keywords.c,v 1.207 2008/12/28 18:53:58 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -287,12 +287,14 @@ const ScanKeyword ScanKeywords[] = {
{"order", ORDER, RESERVED_KEYWORD},
{"out", OUT_P, COL_NAME_KEYWORD},
{"outer", OUTER_P, TYPE_FUNC_NAME_KEYWORD},
+ {"over", OVER, TYPE_FUNC_NAME_KEYWORD},
{"overlaps", OVERLAPS, TYPE_FUNC_NAME_KEYWORD},
{"overlay", OVERLAY, COL_NAME_KEYWORD},
{"owned", OWNED, UNRESERVED_KEYWORD},
{"owner", OWNER, UNRESERVED_KEYWORD},
{"parser", PARSER, UNRESERVED_KEYWORD},
{"partial", PARTIAL, UNRESERVED_KEYWORD},
+ {"partition", PARTITION, UNRESERVED_KEYWORD},
{"password", PASSWORD, UNRESERVED_KEYWORD},
{"placing", PLACING, RESERVED_KEYWORD},
{"plans", PLANS, UNRESERVED_KEYWORD},
@@ -411,6 +413,7 @@ const ScanKeyword ScanKeywords[] = {
{"when", WHEN, RESERVED_KEYWORD},
{"where", WHERE, RESERVED_KEYWORD},
{"whitespace", WHITESPACE_P, UNRESERVED_KEYWORD},
+ {"window", WINDOW, RESERVED_KEYWORD},
{"with", WITH, RESERVED_KEYWORD},
{"without", WITHOUT, UNRESERVED_KEYWORD},
{"work", WORK, UNRESERVED_KEYWORD},
diff --git a/src/backend/parser/parse_agg.c b/src/backend/parser/parse_agg.c
index e2645462d57..6dba470e39f 100644
--- a/src/backend/parser/parse_agg.c
+++ b/src/backend/parser/parse_agg.c
@@ -1,14 +1,14 @@
/*-------------------------------------------------------------------------
*
* parse_agg.c
- * handle aggregates in parser
+ * handle aggregates and window functions in parser
*
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/parser/parse_agg.c,v 1.84 2008/10/04 21:56:54 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/parser/parse_agg.c,v 1.85 2008/12/28 18:53:58 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -67,7 +67,8 @@ transformAggregateCall(ParseState *pstate, Aggref *agg)
*/
if (min_varlevel == 0)
{
- if (checkExprHasAggs((Node *) agg->args))
+ if (pstate->p_hasAggs &&
+ checkExprHasAggs((Node *) agg->args))
ereport(ERROR,
(errcode(ERRCODE_GROUPING_ERROR),
errmsg("aggregate function calls cannot be nested"),
@@ -75,6 +76,15 @@ transformAggregateCall(ParseState *pstate, Aggref *agg)
locate_agg_of_level((Node *) agg->args, 0))));
}
+ /* It can't contain window functions either */
+ if (pstate->p_hasWindowFuncs &&
+ checkExprHasWindowFuncs((Node *) agg->args))
+ ereport(ERROR,
+ (errcode(ERRCODE_GROUPING_ERROR),
+ errmsg("aggregate function calls cannot contain window function calls"),
+ parser_errposition(pstate,
+ locate_windowfunc((Node *) agg->args))));
+
if (min_varlevel < 0)
min_varlevel = 0;
agg->agglevelsup = min_varlevel;
@@ -85,6 +95,98 @@ transformAggregateCall(ParseState *pstate, Aggref *agg)
pstate->p_hasAggs = true;
}
+/*
+ * transformWindowFuncCall -
+ * Finish initial transformation of a window function call
+ *
+ * parse_func.c has recognized the function as a window function, and has set
+ * up all the fields of the WindowFunc except winref. Here we must (1) add
+ * the WindowDef to the pstate (if not a duplicate of one already present) and
+ * set winref to link to it; and (2) mark p_hasWindowFuncs true in the pstate.
+ * Unlike aggregates, only the most closely nested pstate level need be
+ * considered --- there are no "outer window functions" per SQL spec.
+ */
+void
+transformWindowFuncCall(ParseState *pstate, WindowFunc *wfunc,
+ WindowDef *windef)
+{
+ /*
+ * A window function call can't contain another one (but aggs are OK).
+ * XXX is this required by spec, or just an unimplemented feature?
+ */
+ if (pstate->p_hasWindowFuncs &&
+ checkExprHasWindowFuncs((Node *) wfunc->args))
+ ereport(ERROR,
+ (errcode(ERRCODE_WINDOWING_ERROR),
+ errmsg("window function calls cannot be nested"),
+ parser_errposition(pstate,
+ locate_windowfunc((Node *) wfunc->args))));
+
+ /*
+ * If the OVER clause just specifies a reference name, find that
+ * WINDOW clause (which had better be present). Otherwise, try to
+ * match all the properties of the OVER clause, and make a new entry
+ * in the p_windowdefs list if no luck.
+ */
+ Assert(!windef->name);
+ if (windef->refname &&
+ windef->partitionClause == NIL &&
+ windef->orderClause == NIL)
+ {
+ Index winref = 0;
+ ListCell *lc;
+
+ foreach(lc, pstate->p_windowdefs)
+ {
+ WindowDef *refwin = (WindowDef *) lfirst(lc);
+
+ winref++;
+ if (refwin->name && strcmp(refwin->name, windef->refname) == 0)
+ {
+ wfunc->winref = winref;
+ break;
+ }
+ }
+ if (lc == NULL) /* didn't find it? */
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("window \"%s\" does not exist", windef->refname),
+ parser_errposition(pstate, windef->location)));
+ }
+ else
+ {
+ Index winref = 0;
+ ListCell *lc;
+
+ foreach(lc, pstate->p_windowdefs)
+ {
+ WindowDef *refwin = (WindowDef *) lfirst(lc);
+
+ winref++;
+ if (refwin->refname && windef->refname &&
+ strcmp(refwin->name, windef->refname) == 0)
+ /* matched on refname */ ;
+ else if (!refwin->refname && !windef->refname)
+ /* matched, no refname */ ;
+ else
+ continue;
+ if (equal(refwin->partitionClause, windef->partitionClause) &&
+ equal(refwin->orderClause, windef->orderClause))
+ {
+ /* found a duplicate window specification */
+ wfunc->winref = winref;
+ break;
+ }
+ }
+ if (lc == NULL) /* didn't find it? */
+ {
+ pstate->p_windowdefs = lappend(pstate->p_windowdefs, windef);
+ wfunc->winref = list_length(pstate->p_windowdefs);
+ }
+ }
+
+ pstate->p_hasWindowFuncs = true;
+}
/*
* parseCheckAggregates
@@ -207,6 +309,11 @@ parseCheckAggregates(ParseState *pstate, Query *qry)
/*
* Check the targetlist and HAVING clause for ungrouped variables.
+ *
+ * Note: because we check resjunk tlist elements as well as regular ones,
+ * this will also find ungrouped variables that came from ORDER BY and
+ * WINDOW clauses. For that matter, it's also going to examine the
+ * grouping expressions themselves --- but they'll all pass the test ...
*/
clause = (Node *) qry->targetList;
if (hasJoinRTEs)
@@ -226,11 +333,94 @@ parseCheckAggregates(ParseState *pstate, Query *qry)
if (pstate->p_hasAggs && hasSelfRefRTEs)
ereport(ERROR,
(errcode(ERRCODE_INVALID_RECURSION),
- errmsg("aggregates not allowed in a recursive query's recursive term"),
+ errmsg("aggregate functions not allowed in a recursive query's recursive term"),
parser_errposition(pstate,
locate_agg_of_level((Node *) qry, 0))));
}
+/*
+ * parseCheckWindowFuncs
+ * Check for window functions where they shouldn't be.
+ *
+ * We have to forbid window functions in WHERE, JOIN/ON, HAVING, GROUP BY,
+ * and window specifications. (Other clauses, such as RETURNING and LIMIT,
+ * have already been checked.) Transformation of all these clauses must
+ * be completed already.
+ */
+void
+parseCheckWindowFuncs(ParseState *pstate, Query *qry)
+{
+ ListCell *l;
+
+ /* This should only be called if we found window functions */
+ Assert(pstate->p_hasWindowFuncs);
+
+ if (checkExprHasWindowFuncs(qry->jointree->quals))
+ ereport(ERROR,
+ (errcode(ERRCODE_WINDOWING_ERROR),
+ errmsg("window functions not allowed in WHERE clause"),
+ parser_errposition(pstate,
+ locate_windowfunc(qry->jointree->quals))));
+ if (checkExprHasWindowFuncs((Node *) qry->jointree->fromlist))
+ ereport(ERROR,
+ (errcode(ERRCODE_WINDOWING_ERROR),
+ errmsg("window functions not allowed in JOIN conditions"),
+ parser_errposition(pstate,
+ locate_windowfunc((Node *) qry->jointree->fromlist))));
+ if (checkExprHasWindowFuncs(qry->havingQual))
+ ereport(ERROR,
+ (errcode(ERRCODE_WINDOWING_ERROR),
+ errmsg("window functions not allowed in HAVING clause"),
+ parser_errposition(pstate,
+ locate_windowfunc(qry->havingQual))));
+
+ foreach(l, qry->groupClause)
+ {
+ SortGroupClause *grpcl = (SortGroupClause *) lfirst(l);
+ Node *expr;
+
+ expr = get_sortgroupclause_expr(grpcl, qry->targetList);
+ if (checkExprHasWindowFuncs(expr))
+ ereport(ERROR,
+ (errcode(ERRCODE_WINDOWING_ERROR),
+ errmsg("window functions not allowed in GROUP BY clause"),
+ parser_errposition(pstate,
+ locate_windowfunc(expr))));
+ }
+
+ foreach(l, qry->windowClause)
+ {
+ WindowClause *wc = (WindowClause *) lfirst(l);
+ ListCell *l2;
+
+ foreach(l2, wc->partitionClause)
+ {
+ SortGroupClause *grpcl = (SortGroupClause *) lfirst(l2);
+ Node *expr;
+
+ expr = get_sortgroupclause_expr(grpcl, qry->targetList);
+ if (checkExprHasWindowFuncs(expr))
+ ereport(ERROR,
+ (errcode(ERRCODE_WINDOWING_ERROR),
+ errmsg("window functions not allowed in window definition"),
+ parser_errposition(pstate,
+ locate_windowfunc(expr))));
+ }
+ foreach(l2, wc->orderClause)
+ {
+ SortGroupClause *grpcl = (SortGroupClause *) lfirst(l2);
+ Node *expr;
+
+ expr = get_sortgroupclause_expr(grpcl, qry->targetList);
+ if (checkExprHasWindowFuncs(expr))
+ ereport(ERROR,
+ (errcode(ERRCODE_WINDOWING_ERROR),
+ errmsg("window functions not allowed in window definition"),
+ parser_errposition(pstate,
+ locate_windowfunc(expr))));
+ }
+ }
+}
/*
* check_ungrouped_columns -
diff --git a/src/backend/parser/parse_clause.c b/src/backend/parser/parse_clause.c
index 0e5fbfd28ac..df30361f0a5 100644
--- a/src/backend/parser/parse_clause.c
+++ b/src/backend/parser/parse_clause.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/parser/parse_clause.c,v 1.181 2008/10/06 02:12:56 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/parser/parse_clause.c,v 1.182 2008/12/28 18:53:58 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -40,8 +40,14 @@
#define ORDER_CLAUSE 0
#define GROUP_CLAUSE 1
#define DISTINCT_ON_CLAUSE 2
+#define PARTITION_CLAUSE 3
-static char *clauseText[] = {"ORDER BY", "GROUP BY", "DISTINCT ON"};
+static const char * const clauseText[] = {
+ "ORDER BY",
+ "GROUP BY",
+ "DISTINCT ON",
+ "PARTITION BY"
+};
static void extractRemainingColumns(List *common_colnames,
List *src_colnames, List *src_colvars,
@@ -76,6 +82,7 @@ static List *addTargetToSortList(ParseState *pstate, TargetEntry *tle,
static List *addTargetToGroupList(ParseState *pstate, TargetEntry *tle,
List *grouplist, List *targetlist, int location,
bool resolveUnknown);
+static WindowClause *findWindowClause(List *wclist, const char *name);
/*
@@ -555,15 +562,20 @@ transformRangeFunction(ParseState *pstate, RangeFunction *r)
* Disallow aggregate functions in the expression. (No reason to postpone
* this check until parseCheckAggregates.)
*/
- if (pstate->p_hasAggs)
- {
- if (checkExprHasAggs(funcexpr))
- ereport(ERROR,
- (errcode(ERRCODE_GROUPING_ERROR),
- errmsg("cannot use aggregate function in function expression in FROM"),
- parser_errposition(pstate,
- locate_agg_of_level(funcexpr, 0))));
- }
+ if (pstate->p_hasAggs &&
+ checkExprHasAggs(funcexpr))
+ ereport(ERROR,
+ (errcode(ERRCODE_GROUPING_ERROR),
+ errmsg("cannot use aggregate function in function expression in FROM"),
+ parser_errposition(pstate,
+ locate_agg_of_level(funcexpr, 0))));
+ if (pstate->p_hasWindowFuncs &&
+ checkExprHasWindowFuncs(funcexpr))
+ ereport(ERROR,
+ (errcode(ERRCODE_WINDOWING_ERROR),
+ errmsg("cannot use window function in function expression in FROM"),
+ parser_errposition(pstate,
+ locate_windowfunc(funcexpr))));
/*
* OK, build an RTE for the function.
@@ -1156,16 +1168,28 @@ transformLimitClause(ParseState *pstate, Node *clause,
parser_errposition(pstate,
locate_var_of_level(qual, 0))));
}
- if (checkExprHasAggs(qual))
+ if (pstate->p_hasAggs &&
+ checkExprHasAggs(qual))
{
ereport(ERROR,
(errcode(ERRCODE_GROUPING_ERROR),
/* translator: %s is name of a SQL construct, eg LIMIT */
- errmsg("argument of %s must not contain aggregates",
+ errmsg("argument of %s must not contain aggregate functions",
constructName),
parser_errposition(pstate,
locate_agg_of_level(qual, 0))));
}
+ if (pstate->p_hasWindowFuncs &&
+ checkExprHasWindowFuncs(qual))
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_WINDOWING_ERROR),
+ /* translator: %s is name of a SQL construct, eg LIMIT */
+ errmsg("argument of %s must not contain window functions",
+ constructName),
+ parser_errposition(pstate,
+ locate_windowfunc(qual))));
+ }
return qual;
}
@@ -1234,7 +1258,7 @@ findTargetlistEntry(ParseState *pstate, Node *node, List **tlist, int clause)
char *name = strVal(linitial(((ColumnRef *) node)->fields));
int location = ((ColumnRef *) node)->location;
- if (clause == GROUP_CLAUSE)
+ if (clause == GROUP_CLAUSE || clause == PARTITION_CLAUSE)
{
/*
* In GROUP BY, we must prefer a match against a FROM-clause
@@ -1251,6 +1275,8 @@ findTargetlistEntry(ParseState *pstate, Node *node, List **tlist, int clause)
* SQL99 do not allow GROUPing BY an outer reference, so this
* breaks no cases that are legal per spec, and it seems a more
* self-consistent behavior.
+ *
+ * Window PARTITION BY clauses should act exactly like GROUP BY.
*/
if (colNameToVar(pstate, name, true, location) != NULL)
name = NULL;
@@ -1356,12 +1382,17 @@ findTargetlistEntry(ParseState *pstate, Node *node, List **tlist, int clause)
*
* GROUP BY items will be added to the targetlist (as resjunk columns)
* if not already present, so the targetlist must be passed by reference.
+ *
+ * This is also used for window PARTITION BY clauses (which actually act
+ * just the same, except for the clause name used in error messages).
*/
List *
transformGroupClause(ParseState *pstate, List *grouplist,
- List **targetlist, List *sortClause)
+ List **targetlist, List *sortClause,
+ bool isPartition)
{
List *result = NIL;
+ int clause = isPartition ? PARTITION_CLAUSE : GROUP_CLAUSE;
ListCell *gl;
foreach(gl, grouplist)
@@ -1370,8 +1401,7 @@ transformGroupClause(ParseState *pstate, List *grouplist,
TargetEntry *tle;
bool found = false;
- tle = findTargetlistEntry(pstate, gexpr,
- targetlist, GROUP_CLAUSE);
+ tle = findTargetlistEntry(pstate, gexpr, targetlist, clause);
/* Eliminate duplicates (GROUP BY x, x) */
if (targetIsInSortList(tle, InvalidOid, result))
@@ -1452,6 +1482,125 @@ transformSortClause(ParseState *pstate,
}
/*
+ * transformWindowDefinitions -
+ * transform window definitions (WindowDef to WindowClause)
+ */
+List *
+transformWindowDefinitions(ParseState *pstate,
+ List *windowdefs,
+ List **targetlist)
+{
+ List *result = NIL;
+ Index winref = 0;
+ ListCell *lc;
+
+ foreach(lc, windowdefs)
+ {
+ WindowDef *windef = (WindowDef *) lfirst(lc);
+ WindowClause *refwc = NULL;
+ List *partitionClause;
+ List *orderClause;
+ WindowClause *wc;
+
+ winref++;
+
+ /*
+ * Check for duplicate window names.
+ */
+ if (windef->name &&
+ findWindowClause(result, windef->name) != NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_WINDOWING_ERROR),
+ errmsg("window \"%s\" is already defined", windef->name),
+ parser_errposition(pstate, windef->location)));
+
+ /*
+ * If it references a previous window, look that up.
+ */
+ if (windef->refname)
+ {
+ refwc = findWindowClause(result, windef->refname);
+ if (refwc == NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("window \"%s\" does not exist",
+ windef->refname),
+ parser_errposition(pstate, windef->location)));
+ }
+
+ /*
+ * Transform PARTITION and ORDER specs, if any. These are treated
+ * exactly like top-level GROUP BY and ORDER BY clauses, including
+ * the special handling of nondefault operator semantics.
+ */
+ orderClause = transformSortClause(pstate,
+ windef->orderClause,
+ targetlist,