diff options
author | Tom Lane | 2008-12-28 18:54:01 +0000 |
---|---|---|
committer | Tom Lane | 2008-12-28 18:54:01 +0000 |
commit | 95b07bc7f5010233f52f9d11da74e2e5b653b0a7 (patch) | |
tree | 48f5858bf4eca1bfb316ef02bb959ca85f568e0a | |
parent | 38e9348282e9d078487147ba8a85aebec54e3a08 (diff) |
Support window functions a la SQL:2008.
Hitoshi Harada, with some kibitzing from Heikki and Tom.
92 files changed, 6716 insertions, 317 deletions
diff --git a/contrib/tsearch2/tsearch2.c b/contrib/tsearch2/tsearch2.c index 7754f574026..bdccba787a9 100644 --- a/contrib/tsearch2/tsearch2.c +++ b/contrib/tsearch2/tsearch2.c @@ -7,7 +7,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/contrib/tsearch2/tsearch2.c,v 1.6 2008/03/25 22:42:42 tgl Exp $ + * $PostgreSQL: pgsql/contrib/tsearch2/tsearch2.c,v 1.7 2008/12/28 18:53:53 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -410,7 +410,15 @@ tsa_rewrite_accum(PG_FUNCTION_ARGS) MemoryContext aggcontext; MemoryContext oldcontext; - aggcontext = ((AggState *) fcinfo->context)->aggcontext; + if (fcinfo->context && IsA(fcinfo->context, AggState)) + aggcontext = ((AggState *) fcinfo->context)->aggcontext; + else if (fcinfo->context && IsA(fcinfo->context, WindowAggState)) + aggcontext = ((WindowAggState *) fcinfo->context)->wincontext; + else + { + elog(ERROR, "tsa_rewrite_accum called in non-aggregate context"); + aggcontext = NULL; /* keep compiler quiet */ + } if (PG_ARGISNULL(0) || PG_GETARG_POINTER(0) == NULL) { diff --git a/doc/src/sgml/advanced.sgml b/doc/src/sgml/advanced.sgml index 2ecb2da5c56..ce8ef535dba 100644 --- a/doc/src/sgml/advanced.sgml +++ b/doc/src/sgml/advanced.sgml @@ -1,4 +1,4 @@ -<!-- $PostgreSQL: pgsql/doc/src/sgml/advanced.sgml,v 1.54 2007/02/01 00:28:16 momjian Exp $ --> +<!-- $PostgreSQL: pgsql/doc/src/sgml/advanced.sgml,v 1.55 2008/12/28 18:53:53 tgl Exp $ --> <chapter id="tutorial-advanced"> <title>Advanced Features</title> @@ -240,7 +240,7 @@ COMMIT; <para> <productname>PostgreSQL</> actually treats every SQL statement as being executed within a transaction. If you do not issue a <command>BEGIN</> - command, + command, then each individual statement has an implicit <command>BEGIN</> and (if successful) <command>COMMIT</> wrapped around it. A group of statements surrounded by <command>BEGIN</> and <command>COMMIT</> @@ -265,7 +265,7 @@ COMMIT; with <command>ROLLBACK TO</>. All the transaction's database changes between defining the savepoint and rolling back to it are discarded, but changes earlier than the savepoint are kept. - </para> + </para> <para> After rolling back to a savepoint, it continues to be defined, so you can @@ -274,7 +274,7 @@ COMMIT; system can free some resources. Keep in mind that either releasing or rolling back to a savepoint will automatically release all savepoints that were defined after it. - </para> + </para> <para> All this is happening within the transaction block, so none of it @@ -282,7 +282,7 @@ COMMIT; transaction block, the committed actions become visible as a unit to other sessions, while the rolled-back actions never become visible at all. - </para> + </para> <para> Remembering the bank database, suppose we debit $100.00 from Alice's @@ -317,6 +317,242 @@ COMMIT; </sect1> + <sect1 id="tutorial-window"> + <title id="tutorial-window-title">Window Functions</title> + + <indexterm zone="tutorial-window"> + <primary>window function</primary> + </indexterm> + + <para> + A <firstterm>window function</> performs a calculation across a set of + table rows that are somehow related to the current row. This is comparable + to the type of calculation that can be done with an aggregate function. + But unlike regular aggregate functions, use of a window function does not + cause rows to become grouped into a single output row — the + rows retain their separate identities. Behind the scenes, the window + function is able to access more than just the current row of the query + result. + </para> + + <para> + Here is an example that shows how to compare each employee's salary + with the average salary in his or her department: + +<programlisting> +SELECT depname, empno, salary, avg(salary) OVER (PARTITION BY depname) FROM empsalary; +</programlisting> + +<screen> + depname | empno | salary | avg +-----------+-------+--------+----------------------- + develop | 11 | 5200 | 5020.0000000000000000 + develop | 7 | 4200 | 5020.0000000000000000 + develop | 9 | 4500 | 5020.0000000000000000 + develop | 8 | 6000 | 5020.0000000000000000 + develop | 10 | 5200 | 5020.0000000000000000 + personnel | 5 | 3500 | 3700.0000000000000000 + personnel | 2 | 3900 | 3700.0000000000000000 + sales | 3 | 4800 | 4866.6666666666666667 + sales | 1 | 5000 | 4866.6666666666666667 + sales | 4 | 4800 | 4866.6666666666666667 +(10 rows) +</screen> + + The first three output columns come directly from the table + <structname>empsalary</>, and there is one output row for each row in the + table. The fourth column represents an average taken across all the table + rows that have the same <structfield>depname</> value as the current row. + (This actually is the same function as the regular <function>avg</> + aggregate function, but the <literal>OVER</> clause causes it to be + treated as a window function and computed across an appropriate set of + rows.) + </para> + + <para> + A window function call always contains an <literal>OVER</> clause + following the window function's name and argument(s). This is what + syntactically distinguishes it from a regular function or aggregate + function. The <literal>OVER</> clause determines exactly how the + rows of the query are split up for processing by the window function. + The <literal>PARTITION BY</> list within <literal>OVER</> specifies + dividing the rows into groups, or partitions, that share the same + values of the <literal>PARTITION BY</> expression(s). For each row, + the window function is computed across the rows that fall into the + same partition as the current row. + </para> + + <para> + Although <function>avg</> will produce the same result no matter + what order it processes the partition's rows in, this is not true of all + window functions. When needed, you can control that order using + <literal>ORDER BY</> within <literal>OVER</>. Here is an example: + +<programlisting> +SELECT depname, empno, salary, rank() OVER (PARTITION BY depname ORDER BY salary DESC) FROM empsalary; +</programlisting> + +<screen> + depname | empno | salary | rank +-----------+-------+--------+------ + develop | 8 | 6000 | 1 + develop | 10 | 5200 | 2 + develop | 11 | 5200 | 2 + develop | 9 | 4500 | 4 + develop | 7 | 4200 | 5 + personnel | 2 | 3900 | 1 + personnel | 5 | 3500 | 2 + sales | 1 | 5000 | 1 + sales | 4 | 4800 | 2 + sales | 3 | 4800 | 2 +(10 rows) +</screen> + + As shown here, the <function>rank</> function produces a numerical rank + within the current row's partition for each distinct <literal>ORDER BY</> + value, in the order defined by the <literal>ORDER BY</> clause. + <function>rank</> needs no explicit parameter, because its behavior + is entirely determined by the <literal>OVER</> clause. + </para> + + <para> + The rows considered by a window function are those of the <quote>virtual + table</> produced by the query's <literal>FROM</> clause as filtered by its + <literal>WHERE</>, <literal>GROUP BY</>, and <literal>HAVING</> clauses + if any. For example, a row removed because it does not meet the + <literal>WHERE</> condition is not seen by any window function. + A query can contain multiple window functions that slice up the data + in different ways by means of different <literal>OVER</> clauses, but + they all act on the same collection of rows defined by this virtual table. + </para> + + <para> + We already saw that <literal>ORDER BY</> can be omitted if the ordering + of rows is not important. It is also possible to omit <literal>PARTITION + BY</>, in which case the window function is computed over all rows of the + virtual table; that is, there is one partition containing all the rows. + </para> + + <para> + There is another important concept associated with window functions: + for each row, there is a set of rows within its partition called its + <firstterm>window frame</>. When <literal>ORDER BY</> is omitted the + frame is always the same as the partition. If <literal>ORDER BY</> is + supplied, the frame consists of all rows from the start of the partition + up to the current row, plus any following rows that are equal to the + current row according to the <literal>ORDER BY</> clause. + <footnote> + <para> + The SQL standard includes options to define the window frame in + other ways, but this definition is the only one currently supported + by <productname>PostgreSQL</productname>. + </para> + </footnote> + Many window functions act only on the rows of the window frame, rather + than of the whole partition. Here is an example using <function>sum</>: + </para> + +<programlisting> +SELECT salary, sum(salary) OVER () FROM empsalary; +</programlisting> + +<screen> + salary | sum +--------+------- + 5200 | 47100 + 5000 | 47100 + 3500 | 47100 + 4800 | 47100 + 3900 | 47100 + 4200 | 47100 + 4500 | 47100 + 4800 | 47100 + 6000 | 47100 + 5200 | 47100 +(10 rows) +</screen> + + <para> + Above, since there is no <literal>ORDER BY</> in the <literal>OVER</> + clause, the window frame is the same as the partition, which for lack of + <literal>PARTITION BY</> is the whole table; in other words each sum is + taken over the whole table and so we get the same result for each output + row. But if we add an <literal>ORDER BY</> clause, we get very different + results: + </para> + +<programlisting> +SELECT salary, sum(salary) OVER (ORDER BY salary) FROM empsalary; +</programlisting> + +<screen> + salary | sum +--------+------- + 3500 | 3500 + 3900 | 7400 + 4200 | 11600 + 4500 | 16100 + 4800 | 25700 + 4800 | 25700 + 5000 | 30700 + 5200 | 41100 + 5200 | 41100 + 6000 | 47100 +(10 rows) +</screen> + + <para> + Here the sum is taken from the first (lowest) salary up through the + current one, including any duplicates of the current one (notice the + results for the duplicated salaries). + </para> + + <para> + Window functions are permitted only in the <literal>SELECT</literal> list + and the <literal>ORDER BY</> clause of the query. They are forbidden + elsewhere, such as in <literal>GROUP BY</>, <literal>HAVING</> + and <literal>WHERE</literal> clauses. This is because they logically + execute after the processing of those clauses. Also, window functions + execute after regular aggregate functions. This means it is valid to + include an aggregate function call in the arguments of a window function, + but not vice versa. + </para> + + <para> + If there is a need to filter or group rows after the window calculations + are performed, you can use a sub-select. For example: + +<programlisting> +SELECT depname, empno, salary, enroll_date +FROM + (SELECT depname, empno, salary, enroll_date, + rank() OVER (PARTITION BY depname ORDER BY salary DESC, empno) AS pos + FROM empsalary + ) AS ss +WHERE pos < 3; +</programlisting> + + The above query only shows the rows from the inner query having + <literal>rank</> less than <literal>3</>. + </para> + + <para> + When a query involves multiple window functions, it is possible to write + out each one with a separate <literal>OVER</> clause, but this is + duplicative and error-prone if the same windowing behavior is wanted + for several functions. Instead, each windowing behavior can be named + in a <literal>WINDOW</> clause and then referenced in <literal>OVER</>. + For example: + +<programlisting> +SELECT sum(salary) OVER w, avg(salary) OVER w + FROM empsalary + WINDOW w AS (PARTITION BY depname ORDER BY salary DESC); +</programlisting> + </para> + </sect1> + + <sect1 id="tutorial-inheritance"> <title>Inheritance</title> @@ -391,7 +627,7 @@ CREATE TABLE capitals ( <para> For example, the following query finds the names of all cities, - including state capitals, that are located at an altitude + including state capitals, that are located at an altitude over 500 feet: <programlisting> @@ -455,7 +691,7 @@ SELECT name, altitude <sect1 id="tutorial-conclusion"> <title>Conclusion</title> - + <para> <productname>PostgreSQL</productname> has many features not touched upon in this tutorial introduction, which has been diff --git a/doc/src/sgml/errcodes.sgml b/doc/src/sgml/errcodes.sgml index 574e7f5fbad..e792a74e286 100644 --- a/doc/src/sgml/errcodes.sgml +++ b/doc/src/sgml/errcodes.sgml @@ -1,4 +1,4 @@ -<!-- $PostgreSQL: pgsql/doc/src/sgml/errcodes.sgml,v 1.25 2008/10/04 21:56:52 tgl Exp $ --> +<!-- $PostgreSQL: pgsql/doc/src/sgml/errcodes.sgml,v 1.26 2008/12/28 18:53:53 tgl Exp $ --> <appendix id="errcodes-appendix"> <title><productname>PostgreSQL</productname> Error Codes</title> @@ -379,6 +379,18 @@ </row> <row> +<entry><literal>22014</literal></entry> +<entry>INVALID ARGUMENT FOR NTILE FUNCTION</entry> +<entry>invalid_argument_for_ntile_function</entry> +</row> + +<row> +<entry><literal>22016</literal></entry> +<entry>INVALID ARGUMENT FOR NTH_VALUE FUNCTION</entry> +<entry>invalid_argument_for_nth_value_function</entry> +</row> + +<row> <entry><literal>2201F</literal></entry> <entry>INVALID ARGUMENT FOR POWER FUNCTION</entry> <entry>invalid_argument_for_power_function</entry> @@ -991,6 +1003,12 @@ </row> <row> +<entry><literal>42P20</literal></entry> +<entry>WINDOWING ERROR</entry> +<entry>windowing_error</entry> +</row> + +<row> <entry><literal>42P19</literal></entry> <entry>INVALID RECURSION</entry> <entry>invalid_recursion</entry> diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index de50c0e1d56..205b71e9c9e 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -1,4 +1,4 @@ -<!-- $PostgreSQL: pgsql/doc/src/sgml/func.sgml,v 1.463 2008/12/19 16:25:16 petere Exp $ --> +<!-- $PostgreSQL: pgsql/doc/src/sgml/func.sgml,v 1.464 2008/12/28 18:53:53 tgl Exp $ --> <chapter id="functions"> <title>Functions and Operators</title> @@ -10149,6 +10149,278 @@ SELECT xmlagg(x) FROM (SELECT x FROM test ORDER BY y DESC) AS tab; </sect1> + <sect1 id="functions-window"> + <title>Window Functions</title> + + <indexterm zone="functions-window"> + <primary>window function</primary> + <secondary>built-in</secondary> + </indexterm> + + <para> + <firstterm>Window functions</firstterm> provide the ability to perform + calculations across sets of rows that are related to the current query + row. For information about this feature see + <xref linkend="tutorial-window"> and + <xref linkend="syntax-window-functions">. + </para> + + <para> + The built-in window functions are listed in + <xref linkend="functions-window-table">. Note that these functions + <emphasis>must</> be invoked using window function syntax; that is an + <literal>OVER</> clause is required. + </para> + + <para> + In addition to these functions, any built-in or user-defined aggregate + function can be used as a window function (see + <xref linkend="functions-aggregate"> for a list of the built-in aggregates). + Aggregate functions act as window functions only when an <literal>OVER</> + clause follows the call; otherwise they act as regular aggregates. + </para> + + <table id="functions-window-table"> + <title>General-Purpose Window Functions</title> + + <tgroup cols="3"> + <thead> + <row> + <entry>Function</entry> + <entry>Return Type</entry> + <entry>Description</entry> + </row> + </thead> + + <tbody> + <row> + <entry> + <indexterm> + <primary>row_number</primary> + </indexterm> + <function>row_number()</function> + </entry> + <entry> + <type>bigint</type> + </entry> + <entry>number of the current row within its partition, counting from 1</entry> + </row> + + <row> + <entry> + <indexterm> + <primary>rank</primary> + </indexterm> + <function>rank()</function> + </entry> + <entry> + <type>bigint</type> + </entry> + <entry>rank of the current row with gaps; same as <function>row_number</> of its first peer</entry> + </row> + + <row> + <entry> + <indexterm> + <primary>dense_rank</primary> + </indexterm> + <function>dense_rank()</function> + </entry> + <entry> + <type>bigint</type> + </entry> + <entry>rank of the current row without gaps; this function counts peer groups</entry> + </row> + + <row> + <entry> + <indexterm> + <primary>percent_rank</primary> + </indexterm> + <function>percent_rank()</function> + </entry> + <entry> + <type>double precision</type> + </entry> + <entry>relative rank of the current row: (<function>rank</> - 1) / (total rows - 1)</entry> + </row> + + <row> + <entry> + <indexterm> + <primary>cume_dist</primary> + </indexterm> + <function>cume_dist()</function> + </entry> + <entry> + <type>double precision</type> + </entry> + <entry>relative rank of the current row: (number of rows preceding or peer with current row) / (total rows)</entry> + </row> + + <row> + <entry> + <indexterm> + <primary>ntile</primary> + </indexterm> + <function>ntile(<replaceable class="parameter">num_buckets</replaceable> <type>integer</>)</function> + </entry> + <entry> + <type>integer</type> + </entry> + <entry>integer ranging from 1 to the argument value, dividing the + partition as equally as possible</entry> + </row> + + <row> + <entry> + <indexterm> + <primary>lag</primary> + </indexterm> + <function> + lag(<replaceable class="parameter">value</replaceable> <type>any</> + [, <replaceable class="parameter">offset</replaceable> <type>integer</> + [, <replaceable class="parameter">default</replaceable> <type>any</> ]]) + </function> + </entry> + <entry> + <type>same type as <replaceable class="parameter">value</replaceable></type> + </entry> + <entry> + returns <replaceable class="parameter">value</replaceable> evaluated at + the row that is <replaceable class="parameter">offset</replaceable> + rows before the current row within the partition; if there is no such + row, instead return <replaceable class="parameter">default</replaceable>. + Both <replaceable class="parameter">offset</replaceable> and + <replaceable class="parameter">default</replaceable> are evaluated + with respect to the current row. If omitted, + <replaceable class="parameter">offset</replaceable> defaults to 1 and + <replaceable class="parameter">default</replaceable> to null + </entry> + </row> + + <row> + <entry> + <indexterm> + <primary>lead</primary> + </indexterm> + <function> + lead(<replaceable class="parameter">value</replaceable> <type>any</> + [, <replaceable class="parameter">offset</replaceable> <type>integer</> + [, <replaceable class="parameter">default</replaceable> <type>any</> ]]) + </function> + </entry> + <entry> + <type>same type as <replaceable class="parameter">value</replaceable></type> + </entry> + <entry> + returns <replaceable class="parameter">value</replaceable> evaluated at + the row that is <replaceable class="parameter">offset</replaceable> + rows after the current row within the partition; if there is no such + row, instead return <replaceable class="parameter">default</replaceable>. + Both <replaceable class="parameter">offset</replaceable> and + <replaceable class="parameter">default</replaceable> are evaluated + with respect to the current row. If omitted, + <replaceable class="parameter">offset</replaceable> defaults to 1 and + <replaceable class="parameter">default</replaceable> to null + </entry> + </row> + + <row> + <entry> + <indexterm> + <primary>first_value</primary> + </indexterm> + <function>first_value(<replaceable class="parameter">value</replaceable> <type>any</>)</function> + </entry> + <entry> + <type>same type as <replaceable class="parameter">value</replaceable></type> + </entry> + <entry> + returns <replaceable class="parameter">value</replaceable> evaluated + at the row that is the first row of the window frame + </entry> + </row> + + <row> + <entry> + <indexterm> + <primary>last_value</primary> + </indexterm> + <function>last_value(<replaceable class="parameter">value</replaceable> <type>any</>)</function> + </entry> + <entry> + <type>same type as <replaceable class="parameter">value</replaceable></type> + </entry> + <entry> + returns <replaceable class="parameter">value</replaceable> evaluated + at the row that is the last row of the window frame + </entry> + </row> + + <row> + <entry> + <indexterm> + <primary>nth_value</primary> + </indexterm> + <function> + nth_value(<replaceable class="parameter">value</replaceable> <type>any</>, <replaceable class="parameter">nth</replaceable> <type>integer</>) + </function> + </entry> + <entry> + <type>same type as <replaceable class="parameter">value</replaceable></type> + </entry> + <entry> + returns <replaceable class="parameter">value</replaceable> evaluated + at the row that is the <replaceable class="parameter">nth</replaceable> + row of the window frame (counting from 1); null if no such row + </entry> + </row> + </tbody> + </tgroup> + </table> + + <para> + All of the functions listed in + <xref linkend="functions-window-table"> depend on the sort ordering + specified by the <literal>ORDER BY</> clause of the associated window + definition. Rows that are not distinct in the <literal>ORDER BY</> + ordering are said to be <firstterm>peers</>; the four ranking functions + are defined so that they give the same answer for any two peer rows. + </para> + + <para> + Note that <function>first_value</>, <function>last_value</>, and + <function>nth_value</> consider only the rows within the <quote>window + frame</>, that is the rows from the start of the partition through the + last peer of the current row. This is particularly likely to give + unintuitive results for <function>last_value</>. + </para> + + <para> + When an aggregate function is used as a window function, it aggregates + over the rows within the current row's window frame. To obtain + aggregation over the whole partition, be sure to omit <literal>ORDER BY</> + from the window definition. An aggregate used with <literal>ORDER BY</> + produces a <quote>running sum</> type of behavior, which may or may not + be what's wanted. + </para> + + <note> + <para> + The SQL standard defines a <literal>RESPECT NULLS</> or + <literal>IGNORE NULLS</> option for <function>lead</>, <function>lag</>, + <function>first_value</>, <function>last_value</>, and + <function>nth_value</>. This is not implemented in + <productname>PostgreSQL</productname>: the behavior is always the + same as the standard's default, namely <literal>RESPECT NULLS</>. + Likewise, the standard's <literal>FROM FIRST</> or <literal>FROM LAST</> + option for <function>nth_value</> is not implemented: only the + default <literal>FROM FIRST</> behavior is supported. + </para> + </note> + + </sect1> <sect1 id="functions-subquery"> <title>Subquery Expressions</title> diff --git a/doc/src/sgml/queries.sgml b/doc/src/sgml/queries.sgml index 283dd0a73dd..f1db64b273a 100644 --- a/doc/src/sgml/queries.sgml +++ b/doc/src/sgml/queries.sgml @@ -1,4 +1,4 @@ -<!-- $PostgreSQL: pgsql/doc/src/sgml/queries.sgml,v 1.50 2008/10/14 00:41:34 tgl Exp $ --> +<!-- $PostgreSQL: pgsql/doc/src/sgml/queries.sgml,v 1.51 2008/12/28 18:53:54 tgl Exp $ --> <chapter id="queries"> <title>Queries</title> @@ -949,6 +949,57 @@ SELECT product_id, p.name, (sum(s.units) * (p.price - p.cost)) AS profit 5000. Note that the aggregate expressions do not necessarily need to be the same in all parts of the query. </para> + + <para> + If a query contains aggregate function calls, but no <literal>GROUP BY</> + clause, grouping still occurs: the result is a single group row (or + perhaps no rows at all, if the single row is then eliminated by + <literal>HAVING</>). + The same is true if it contains a <literal>HAVING</> clause, even + without any aggregate function calls or <literal>GROUP BY</> clause. + </para> + </sect2> + + <sect2 id="queries-window"> + <title>Window Function Processing</> + + <indexterm zone="queries-window"> + <primary>window function</primary> + <secondary>order of execution</> + </indexterm> + + <para> + If the query contains any window functions (see + <xref linkend="tutorial-window"> and + <xref linkend="syntax-window-functions">), these functions are evaluated + after any grouping, aggregation, and <literal>HAVING</> filtering is + performed. That is, if the query uses any aggregates, <literal>GROUP + BY</>, or <literal>HAVING</>, then the rows seen by the window functions + are the group rows instead of the original table rows from + <literal>FROM</>/<literal>WHERE</>. + </para> + + <para> + When multiple window functions are used, all the window functions having + syntactically equivalent <literal>PARTITION BY</> and <literal>ORDER BY</> + clauses in their window definitions are guaranteed to be evaluated in a + single pass over the data. Therefore they will see the same sort ordering, + even if the <literal>ORDER BY</> does not uniquely determine an ordering. + However, no guarantees are made about the evaluation of functions having + different <literal>PARTITION BY</> or <literal>ORDER BY</> specifications. + (In such cases a sort step is typically required between the passes of + window function evaluations, and the sort is not guaranteed to preserve + ordering of rows that its <literal>ORDER BY</> sees as equivalent.) + </para> + + <para> + Currently, use of window functions always forces sorting, and so the + query output will be ordered according to one or another of the window + functions' <literal>PARTITION BY</>/<literal>ORDER BY</> clauses. + It is not recommendable to rely on this, however. Use an explicit + top-level <literal>ORDER BY</> clause if you want to be sure the + results are sorted in a particular way. + </para> </sect2> </sect1> diff --git a/doc/src/sgml/query.sgml b/doc/src/sgml/query.sgml index 442f9ad0068..ffc641b03ad 100644 --- a/doc/src/sgml/query.sgml +++ b/doc/src/sgml/query.sgml @@ -1,4 +1,4 @@ -<!-- $PostgreSQL: pgsql/doc/src/sgml/query.sgml,v 1.50 2007/02/01 00:28:17 momjian Exp $ --> +<!-- $PostgreSQL: pgsql/doc/src/sgml/query.sgml,v 1.51 2008/12/28 18:53:54 tgl Exp $ --> <chapter id="tutorial-sql"> <title>The <acronym>SQL</acronym> Language</title> @@ -621,7 +621,7 @@ SELECT W1.city, W1.temp_lo AS low, W1.temp_hi AS high, San Francisco | 43 | 57 | San Francisco | 46 | 50 Hayward | 37 | 54 | San Francisco | 46 | 50 (2 rows) -</programlisting> +</programlisting> Here we have relabeled the weather table as <literal>W1</> and <literal>W2</> to be able to distinguish the left and right side @@ -651,9 +651,9 @@ SELECT * <indexterm><primary>min</primary></indexterm> <indexterm><primary>sum</primary></indexterm> - Like most other relational database products, + Like most other relational database products, <productname>PostgreSQL</productname> supports - aggregate functions. + <firstterm>aggregate functions</>. An aggregate function computes a single result from multiple input rows. For example, there are aggregates to compute the <function>count</function>, <function>sum</function>, @@ -815,7 +815,7 @@ SELECT city, max(temp_lo) <para> You can update existing rows using the - <command>UPDATE</command> command. + <command>UPDATE</command> command. Suppose you discover the temperature readings are all off by 2 degrees after November 28. You can correct the data as follows: diff --git a/doc/src/sgml/ref/select.sgml b/doc/src/sgml/ref/select.sgml index 814a6708f00..c9a386f24f3 100644 --- a/doc/src/sgml/ref/select.sgml +++ b/doc/src/sgml/ref/select.sgml @@ -1,5 +1,5 @@ <!-- -$PostgreSQL: pgsql/doc/src/sgml/ref/select.sgml,v 1.112 2008/12/01 09:38:08 petere Exp $ +$PostgreSQL: pgsql/doc/src/sgml/ref/select.sgml,v 1.113 2008/12/28 18:53:54 tgl Exp $ PostgreSQL documentation --> @@ -39,6 +39,7 @@ SELECT [ ALL | DISTINCT [ ON ( <replaceable class="parameter">expression</replac [ WHERE <replaceable class="parameter">condition</replaceable> ] [ GROUP BY <replaceable class="parameter">expression</replaceable> [, ...] ] [ HAVING <replaceable class="parameter">condition</replaceable> [, ...] ] + [ WINDOW <replaceable class="parameter">window_name</replaceable> AS ( <replaceable class="parameter">window_definition</replaceable> ) [, ...] ] [ { UNION | INTERSECT | EXCEPT } [ ALL ] <replaceable class="parameter">select</replaceable> ] [ ORDER BY <replaceable class="parameter">expression</replaceable> [ ASC | DESC | USING <replaceable class="parameter">operator</replaceable> ] [ NULLS { FIRST | LAST } ] [, ...] ] [ LIMIT { <replaceable class="parameter">count</replaceable> | ALL } ] @@ -566,6 +567,67 @@ HAVING <replaceable class="parameter">condition</replaceable> </para> </refsect2> + <refsect2 id="SQL-WINDOW"> + <title id="sql-window-title"><literal>WINDOW</literal> Clause</title> + + <para> + The optional <literal>WINDOW</literal> clause has the general form +<synopsis> +WINDOW <replaceable class="parameter">window_name</replaceable> AS ( <replaceable class="parameter">window_definition</replaceable> ) [, ...] +</synopsis> + where <replaceable class="parameter">window_name</replaceable> is + a name that can be referenced from subsequent window definitions or + <literal>OVER</> clauses, and + <replaceable class="parameter">window_definition</replaceable> is +<synopsis> +[ <replaceable class="parameter">existing_window_name</replaceable> ] +[ PARTITION BY <replaceable class="parameter">expression</replaceable> [, ...] ] +[ ORDER BY <replaceable class="parameter">expression</replaceable> [ ASC | DESC | USING <replaceable class="parameter">operator</replaceable> ] [ NULLS { FIRST | LAST } ] [, ...] ] +</synopsis> + The elements of the <literal>PARTITION BY</> list are interpreted in + the same fashion as elements of a + <xref linkend="sql-groupby" endterm="sql-groupby-title">, and + the elements of the <literal>ORDER BY</> list are interpreted in the + same fashion as elements of an + <xref linkend="sql-orderby" endterm="sql-orderby-title">. + The only difference is that these expressions can contain aggregate + function calls, which are not allowed in a regular <literal>GROUP BY</> + clause. They are allowed here because windowing occurs after grouping + and aggregation. + </para> + + <para> + If an <replaceable class="parameter">existing_window_name</replaceable> + is specified it must refer to an earlier entry in the <literal>WINDOW</> + list; the new window copies its partitioning clause from that entry, + as well as its ordering clause if any. In this case the new window cannot + specify its own <literal>PARTITION BY</> clause, and it can specify + <literal>ORDER BY</> only if the copied window does not have one. + </para> + + <para> + The purpose of a <literal>WINDOW</literal> clause is to specify the + behavior of <firstterm>window functions</> appearing in the query's + <xref linkend="sql-select-list" endterm="sql-select-list-title"> or + <xref linkend="sql-orderby" endterm="sql-orderby-title">. These functions + can reference the <literal>WINDOW</literal> clause entries by name + in their <literal>OVER</> clauses. A <literal>WINDOW</literal> clause + entry does not have to be referenced anywhere, however; if it is not + used in the query it is simply ignored. It is possible to use window + functions without any <literal>WINDOW</literal> clause at all, since + a window function call can specify its window definition directly in + its <literal>OVER</> clause. However, the <literal>WINDOW</literal> + clause saves typing when the same window definition is needed for more + than one window function. + </para> + + <para> + Window functions are described in detail in + <xref linkend="tutorial-window"> and + <xref linkend="syntax-window-functions">. + </para> + </refsect2> + <refsect2 id="sql-select-list"> <title id="sql-select-list-title"><command>SELECT</command> List</title> @@ -922,7 +984,7 @@ FETCH { FIRST | NEXT } [ <replaceable class="parameter">count</replaceable> ] { constants for the offset or fetch count, parentheses will be necessary in most cases. If the fetch count is omitted, it defaults to 1. - </para> + </para> <para> When using <literal>LIMIT</>, it is a good idea to use an @@ -1388,6 +1450,19 @@ SELECT distributors.* WHERE distributors.name = 'Westward'; </refsect2> <refsect2> + <title><literal>WINDOW</literal> Clause Restrictions</title> + + <para> + The SQL standard provides for an optional <quote>framing clause</>, + introduced by the key word <literal>RANGE</> or <literal>ROWS</>, + in window definitions. <productname>PostgreSQL</productname> does + not yet implement framing clauses, and always follows the + default framing behavior, which is equivalent to the framing clause + <literal>ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW</>. + </para> + </refsect2> + + <refsect2> <title><literal>LIMIT</literal> and <literal>OFFSET</literal></title> <para> diff --git a/doc/src/sgml/ref/select_into.sgml b/doc/src/sgml/ref/select_into.sgml index 038ae1b333c..057bfb2a9d7 100644 --- a/doc/src/sgml/ref/select_into.sgml +++ b/doc/src/sgml/ref/select_into.sgml @@ -1,5 +1,5 @@ <!-- -$PostgreSQL: pgsql/doc/src/sgml/ref/select_into.sgml,v 1.43 2008/11/14 10:22:47 petere Exp $ +$PostgreSQL: pgsql/doc/src/sgml/ref/select_into.sgml,v 1.44 2008/12/28 18:53:54 tgl Exp $ PostgreSQL documentation --> @@ -29,6 +29,7 @@ SELECT [ ALL | DISTINCT [ ON ( <replaceable class="parameter">expression</replac [ WHERE <replaceable class="parameter">condition</replaceable> ] [ GROUP BY <replaceable class="parameter">expression</replaceable> [, ...] ] [ HAVING <replaceable class="parameter">condition</replaceable> [, ...] ] + [ WINDOW <replaceable class="parameter">window_name</replaceable> AS ( <replaceable class="parameter">window_definition</replaceable> ) [, ...] ] [ { UNION | INTERSECT | EXCEPT } [ ALL ] <replaceable class="parameter">select</replaceable> ] [ ORDER BY <replaceable class="parameter">expression</replaceable> [ ASC | DESC | USING <replaceable class="parameter">operator</replaceable> ] [ NULLS { FIRST | LAST } ] [, ...] ] [ LIMIT { <replaceable class="parameter">count</replaceable> | ALL } ] diff --git a/doc/src/sgml/syntax.sgml b/doc/src/sgml/syntax.sgml index cca44794340..9d0833c2035 100644 --- a/doc/src/sgml/syntax.sgml +++ b/doc/src/sgml/syntax.sgml @@ -1,4 +1,4 @@ -<!-- $PostgreSQL: pgsql/doc/src/sgml/syntax.sgml,v 1.126 2008/12/09 20:52:03 tgl Exp $ --> +<!-- $PostgreSQL: pgsql/doc/src/sgml/syntax.sgml,v 1.127 2008/12/28 18:53:54 tgl Exp $ --> <chapter id="sql-syntax"> <title>SQL Syntax</title> @@ -1203,6 +1203,12 @@ SELECT 3 OPERATOR(pg_catalog.+) 4; <listitem> <para> + A window function call. + </para> + </listitem> + + <listitem> + <para> A type cast. </para> </listitem> @@ -1445,7 +1451,7 @@ $1.somecolumn enclosed in parentheses: <synopsis> -<replaceable>function</replaceable> (<optional><replaceable>expression</replaceable> <optional>, <replaceable>expression</replaceable> ... </optional></optional> ) +<replaceable>function_name</replaceable> (<optional><replaceable>expression</replaceable> <optional>, <replaceable>expression</replaceable> ... </optional></optional> ) </synopsis> </para> @@ -1480,7 +1486,7 @@ sqrt(2) <synopsis> <replaceable>aggregate_name</replaceable> (<replaceable>expression</replaceable> [ , ... ] ) <replaceable>aggregate_name</replaceable> (ALL <replaceable>expression</replaceable> [ , ... ] ) -<replaceable>aggregate_name</replaceable> (DISTINCT <replaceable>expression</replaceable> [ , ... ] ) +<replaceable>aggregate_name</replaceable> (DISTINCT <replaceable>expression</replaceable>) <replaceable>aggregate_name</replaceable> ( * ) </synopsis> @@ -1488,7 +1494,7 @@ sqrt(2) defined aggregate (possibly qualified with a schema name), and <replaceable>expression</replaceable> is any value expression that does not itself contain an aggregate - expression. + expression or a window function call. </para> <para> @@ -1550,6 +1556,89 @@ sqrt(2) </note> </sect2> + <sect2 id="syntax-window-functions"> + <title>Window Function Calls</title> + + <indexterm zone="syntax-window-functions"> + <primary>window function</primary> + <secondary>invocation</secondary> + </indexterm> + + <indexterm zone="syntax-window-functions"> + <primary>OVER clause</primary> + </indexterm> + + <para> + A <firstterm>window function call</firstterm> represents the application + of an aggregate-like function over some portion of the rows selected + by a query. Unlike regular aggregate function calls, this is not tied + to grouping of the selected rows into a single output row — each + row remains separate in the query output. However the window function + is able to scan all the rows that would be part of the current row's + group according to the grouping specification (<literal>PARTITION BY</> + list) of the window function call. + The syntax of a window function call is one of the following: + +<synopsis> +<replaceable>function_name</replaceable> (<optional><replaceable>expression</replaceable> <optional>, <replaceable>expression</replaceable> ... </optional></optional>) OVER ( <replaceable class="parameter">window_definition</replaceable> ) +<replaceable>function_name</replaceable> (<optional><replaceable>expression</replaceable> <optional>, <replaceable>expression</replaceable> ... </optional></optional>) OVER <replaceable>window_name</replaceable> +<replaceable>function_name</replaceable> ( * ) OVER ( <replaceable class="parameter">window_definition</replaceable> ) +<replaceable>function_name</replaceable> ( * ) OVER <replaceable>window_name</replaceable> +</synopsis> + where <replaceable class="parameter">window_definition</replaceable> + has the syntax +<synopsis> +[ <replaceable class="parameter">window_name</replaceable> ] +[ PARTITION BY <replaceable class="parameter">expression</replaceable> [, ...] ] +[ ORDER BY <replaceable class="parameter">expression</replaceable> [ ASC | DESC | USING <replaceable class="parameter">operator</replaceable> ] [ NULLS { FIRST | LAST } ] [, ...] ] +</synopsis> + + Here, <replaceable>expression</replaceable> represents any value + expression that does not itself contain window function calls. + The <literal>PARTITION BY</> and <literal>ORDER BY</> lists have + essentially the same syntax and semantics as <literal>GROUP BY</> + and <literal>ORDER BY</> clauses of the whole query. + <replaceable>window_name</replaceable> is a reference to a named window + specification defined in the query's <literal>WINDOW</literal> clause. + Named window specifications are usually referenced with just + <literal>OVER</> <replaceable>window_name</replaceable>, but it is + also possible to write a window name inside the parentheses and then + optionally override its ordering clause with <literal>ORDER BY</>. + This latter syntax follows the same rules as modifying an existing + window name within the <literal>WINDOW</literal> clause; see the + <xref linkend="sql-select" endterm="sql-select-title"> reference + page for details. + </para> + + <para> + The built-in window functions are described in <xref + linkend="functions-window-table">. Also, any built-in or + user-defined aggregate function can be used as a window function. + Currently, there is no provision for user-defined window functions + other than aggregates. + </para> + + <para> + The syntaxes using <literal>*</> are used for calling parameter-less + aggregate functions as window functions, for example + <literal>count(*) OVER (PARTITION BY x ORDER BY y)</>. + <literal>*</> is customarily not used for non-aggregate window functions. + Aggregate window functions, unlike normal aggregate functions, do not + allow <literal>DISTINCT</> to be used within the function argument list. + </para> + + <para> + Window function calls are permitted only in the <literal>SELECT</literal> + list and the <literal>ORDER BY</> clause of the query. + </para> + + <para> + More information about window functions can be found in + <xref linkend="tutorial-window"> and + <xref linkend="queries-window">. + </para> + </sect2> + <sect2 id="sql-syntax-type-casts"> <title>Type Casts</title> diff --git a/doc/src/sgml/xaggr.sgml b/doc/src/sgml/xaggr.sgml index 3c4ce19258e..b223888f9ed 100644 --- a/doc/src/sgml/xaggr.sgml +++ b/doc/src/sgml/xaggr.sgml @@ -1,4 +1,4 @@ -<!-- $PostgreSQL: pgsql/doc/src/sgml/xaggr.sgml,v 1.36 2008/11/20 21:10:44 tgl Exp $ --> +<!-- $PostgreSQL: pgsql/doc/src/sgml/xaggr.sgml,v 1.37 2008/12/28 18:53:54 tgl Exp $ --> <sect1 id="xaggr"> <title>User-Defined Aggregates</title> @@ -167,10 +167,13 @@ SELECT attrelid::regclass, array_accum(atttypid::regtype) <para> A function written in C can detect that it is being called as an aggregate transition or final function by seeing if it was passed - an <structname>AggState</> node as the function call <quote>context</>, + an <structname>AggState</> or <structname>WindowAggState</> node + as the function call <quote>context</>, for example by: <programlisting> - if (fcinfo->context && IsA(fcinfo->context, AggState)) + if (fcinfo->context && + (IsA(fcinfo->context, AggState) || + IsA(fcinfo->context, WindowAggState))) </programlisting> One reason for checking this is that when it is true, the first input must be a temporary transition value and can therefore safely be modified diff --git a/src/backend/catalog/dependency.c b/src/backend/catalog/dependency.c index 2cbc19f5a06..b78bebf506f 100644 --- a/src/backend/catalog/dependency.c +++ b/src/backend/catalog/dependency.c @@ -8,7 +8,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/catalog/dependency.c,v 1.83 2008/12/19 16:25:17 petere Exp $ + * $PostgreSQL: pgsql/src/backend/catalog/dependency.c,v 1.84 2008/12/28 18:53:54 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1480,6 +1480,14 @@ find_expr_references_walker(Node *node, context->addrs); /* fall through to examine arguments */ } + else if (IsA(node, WindowFunc)) + { + WindowFunc *wfunc = (WindowFunc *) node; + + add_object_address(OCLASS_PROC, wfunc->winfnoid, 0, + context->addrs); + /* fall through to examine arguments */ + } else if (IsA(node, SubPlan)) { /* Extra work needed here if we ever need this case */ @@ -1602,6 +1610,7 @@ find_expr_references_walker(Node *node, /* query_tree_walker ignores ORDER BY etc, but we need those opers */ find_expr_references_walker((Node *) query->sortClause, context); find_expr_references_walker((Node *) query->groupClause, context); + find_expr_references_walker((Node *) query->windowClause, context); find_expr_references_walker((Node *) query->distinctClause, context); /* Examine substructure of query */ diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c index a711143f86c..af200afaac8 100644 --- a/src/backend/catalog/heap.c +++ b/src/backend/catalog/heap.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/catalog/heap.c,v 1.347 2008/11/29 00:13:21 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/catalog/heap.c,v 1.348 2008/12/28 18:53:54 tgl Exp $ * * * INTERFACE ROUTINES @@ -2138,6 +2138,10 @@ cookDefault(ParseState *pstate, ereport(ERROR, (errcode(ERRCODE_GROUPING_ERROR), errmsg("cannot use aggregate function in default expression"))); + if (pstate->p_hasWindowFuncs) + ereport(ERROR, + (errcode(ERRCODE_WINDOWING_ERROR), + errmsg("cannot use window function in default expression"))); /* * Coerce the expression to the correct type and typmod, if given. This @@ -2211,6 +2215,10 @@ cookConstraint(ParseState *pstate, ereport(ERROR, (errcode(ERRCODE_GROUPING_ERROR), errmsg("cannot use aggregate function in check constraint"))); + if (pstate->p_hasWindowFuncs) + ereport(ERROR, + (errcode(ERRCODE_WINDOWING_ERROR), + errmsg("cannot use window function in check constraint"))); return expr; } diff --git a/src/backend/catalog/pg_proc.c b/src/backend/catalog/pg_proc.c index 345df0c6a95..8ff22c23c9e 100644 --- a/src/backend/catalog/pg_proc.c +++ b/src/backend/catalog/pg_proc.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/catalog/pg_proc.c,v 1.157 2008/12/19 18:25:19 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/catalog/pg_proc.c,v 1.158 2008/12/28 18:53:54 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -80,6 +80,8 @@ ProcedureCreate(const char *procedureName, float4 prorows) { Oid retval; + /* XXX we don't currently have a way to make new window functions */ + bool isWindowFunc = false; int parameterCount; int allParamCount; Oid *allParams; @@ -292,8 +294,7 @@ ProcedureCreate(const char *procedureName, values[Anum_pg_proc_prorows - 1] = Float4GetDatum(prorows); values[Anum_pg_proc_provariadic - 1] = ObjectIdGetDatum(variadicType); values[Anum_pg_proc_proisagg - 1] = BoolGetDatum(isAgg); - /* XXX we don't currently have a way to make new window functions */ - values[Anum_pg_proc_proiswindow - 1] = BoolGetDatum(false); + values[Anum_pg_proc_proiswindow - 1] = BoolGetDatum(isWindowFunc); values[Anum_pg_proc_prosecdef - 1] = BoolGetDatum(security_definer); values[Anum_pg_proc_proisstrict - 1] = BoolGetDatum(isStrict); values[Anum_pg_proc_proretset - 1] = BoolGetDatum(returnsSet); @@ -440,18 +441,31 @@ ProcedureCreate(const char *procedureName, } } - /* Can't change aggregate status, either */ + /* Can't change aggregate or window-function status, either */ if (oldproc->proisagg != isAgg) { if (oldproc->proisagg) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), - errmsg("function \"%s\" is an aggregate", + errmsg("function \"%s\" is an aggregate function", + procedureName))); + else + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("function \"%s\" is not an aggregate function", + procedureName))); + } + if (oldproc->proiswindow != isWindowFunc) + { + if (oldproc->proiswindow) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("function \"%s\" is a window function", procedureName))); else ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), - errmsg("function \"%s\" is not an aggregate", + errmsg("function \"%s\" is not a window function", procedureName))); } diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c index e5f1b313076..d829cb19235 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -7,7 +7,7 @@ * Portions Copyright (c) 1994-5, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/commands/explain.c,v 1.181 2008/11/19 01:10:23 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/commands/explain.c,v 1.182 2008/12/28 18:53:55 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -590,6 +590,9 @@ explain_outNode(StringInfo str, break; } break; + case T_WindowAgg: + pname = "WindowAgg"; + break; case T_Unique: pname = "Unique"; break; diff --git a/src/backend/commands/functioncmds.c b/src/backend/commands/functioncmds.c index 0a3de53e1e5..8963f981178 100644 --- a/src/backend/commands/functioncmds.c +++ b/src/backend/commands/functioncmds.c @@ -10,7 +10,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/commands/functioncmds.c,v 1.103 2008/12/18 18:20:33 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/commands/functioncmds.c,v 1.104 2008/12/28 18:53:55 tgl Exp $ * * DESCRIPTION * These routines take the parse tree and pick out the @@ -321,6 +321,10 @@ examine_parameter_list(List *parameters, Oid languageOid, ereport(ERROR, (errcode(ERRCODE_GROUPING_ERROR), errmsg("cannot use aggregate function in parameter default value"))); + if (pstate->p_hasWindowFuncs) + ereport(ERROR, + (errcode(ERRCODE_WINDOWING_ERROR), + errmsg("cannot use window function in parameter default value"))); *parameterDefaults = lappend(*parameterDefaults, def); have_defaults = true; @@ -1538,6 +1542,10 @@ CreateCast(CreateCastStmt *stmt) ereport(ERROR, (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), errmsg("cast function must not be an aggregate function"))); + if (procstruct->proiswindow) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("cast function must not be a window function"))); if (procstruct->proretset) ereport(ERROR, (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), diff --git a/src/backend/commands/prepare.c b/src/backend/commands/prepare.c index 920b328bb30..f1f87abe227 100644 --- a/src/backend/commands/prepare.c +++ b/src/backend/commands/prepare.c @@ -10,7 +10,7 @@ * Copyright (c) 2002-2008, PostgreSQL Global Development Group * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/commands/prepare.c,v 1.93 2008/12/13 02:29:21 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/commands/prepare.c,v 1.94 2008/12/28 18:53:55 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -347,6 +347,10 @@ EvaluateParams(PreparedStatement *pstmt, List *params, ereport(ERROR, (errcode(ERRCODE_GROUPING_ERROR), errmsg("cannot use aggregate function in EXECUTE parameter"))); + if (pstate->p_hasWindowFuncs) + ereport(ERROR, + (errcode(ERRCODE_WINDOWING_ERROR), + errmsg("cannot use window function in EXECUTE parameter"))); given_type_id = exprType(expr); diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index 9f34c735028..173b24dab82 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/commands/tablecmds.c,v 1.274 2008/12/15 21:35:31 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/commands/tablecmds.c,v 1.275 2008/12/28 18:53:55 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -5506,6 +5506,10 @@ ATPrepAlterColumnType(List **wqueue, ereport(ERROR, (errcode(ERRCODE_GROUPING_ERROR), errmsg("cannot use aggregate function in transform expression"))); + if (pstate->p_hasWindowFuncs) + ereport(ERROR, + (errcode(ERRCODE_WINDOWING_ERROR), + errmsg("cannot use window function in transform expression"))); } else { diff --git a/src/backend/commands/typecmds.c b/src/backend/commands/typecmds.c index 38416fa67f2..f99ed813954 100644 --- a/src/backend/commands/typecmds.c +++ b/src/backend/commands/typecmds.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/commands/typecmds.c,v 1.127 2008/11/30 19:01:29 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/commands/typecmds.c,v 1.128 2008/12/28 18:53:55 tgl Exp $ * * DESCRIPTION * The "DefineFoo" routines take the parse tree and pick out the @@ -2255,6 +2255,10 @@ domainAddConstraint(Oid domainOid, Oid domainNamespace, Oid baseTypeOid, ereport(ERROR, (errcode(ERRCODE_GROUPING_ERROR), errmsg("cannot use aggregate function in check constraint"))); + if (pstate->p_hasWindowFuncs) + ereport(ERROR, + (errcode(ERRCODE_WINDOWING_ERROR), + errmsg("cannot use window function in check constraint"))); /* * Convert to string form for storage. diff --git a/src/backend/executor/Makefile b/src/backend/executor/Makefile index b4a0492751c..63c86107782 100644 --- a/src/backend/executor/Makefile +++ b/src/backend/executor/Makefile @@ -4,7 +4,7 @@ # Makefile for executor # # IDENTIFICATION -# $PostgreSQL: pgsql/src/backend/executor/Makefile,v 1.28 2008/10/04 21:56:52 tgl Exp $ +# $PostgreSQL: pgsql/src/backend/executor/Makefile,v 1.29 2008/12/28 18:53:55 tgl Exp $ # #------------------------------------------------------------------------- @@ -22,6 +22,6 @@ OBJS = execAmi.o execCurrent.o execGrouping.o execJunk.o execMain.o \ nodeSeqscan.o nodeSetOp.o nodeSort.o nodeUnique.o \ nodeValuesscan.o nodeCtescan.o nodeWorktablescan.o \ nodeLimit.o nodeGroup.o nodeSubplan.o nodeSubqueryscan.o nodeTidscan.o \ - tstoreReceiver.o spi.o + nodeWindowAgg.o tstoreReceiver.o spi.o include $(top_srcdir)/src/backend/common.mk diff --git a/src/backend/executor/execAmi.c b/src/backend/executor/execAmi.c index ef4f6853899..d406a0cec9a 100644 --- a/src/backend/executor/execAmi.c +++ b/src/backend/executor/execAmi.c @@ -6,7 +6,7 @@ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/backend/executor/execAmi.c,v 1.101 2008/10/28 17:13:51 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/executor/execAmi.c,v 1.102 2008/12/28 18:53:55 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -20,6 +20,7 @@ #include "executor/nodeBitmapHeapscan.h" #include "executor/nodeBitmapIndexscan.h" #include "executor/nodeBitmapOr.h" +#include "executor/nodeCtescan.h" #include "executor/nodeFunctionscan.h" #include "executor/nodeGroup.h" #include "executor/nodeGroup.h" @@ -40,7 +41,7 @@ #include "executor/nodeTidscan.h" #include "executor/nodeUnique.h" #include "executor/nodeValuesscan.h" -#include "executor/nodeCtescan.h" +#include "executor/nodeWindowAgg.h" #include "executor/nodeWorktablescan.h" #include "nodes/nodeFuncs.h" #include "utils/syscache.h" @@ -210,6 +211,10 @@ ExecReScan(PlanState *node, ExprContext *exprCtxt) ExecReScanAgg((AggState *) node, exprCtxt); break; + case T_WindowAggState: + ExecReScanWindowAgg((WindowAggState *) node, exprCtxt); + break; + case T_UniqueState: ExecReScanUnique((UniqueState *) node, exprCtxt); break; diff --git a/src/backend/executor/execProcnode.c b/src/backend/executor/execProcnode.c index e689ec00f8c..cd610c895c1 100644 --- a/src/backend/executor/execProcnode.c +++ b/src/backend/executor/execProcnode.c @@ -12,7 +12,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/executor/execProcnode.c,v 1.63 2008/10/04 21:56:53 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/executor/execProcnode.c,v 1.64 2008/12/28 18:53:55 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -85,6 +85,7 @@ #include "executor/nodeBitmapHeapscan.h" #include "executor/nodeBitmapIndexscan.h" #include "executor/nodeBitmapOr.h" +#include "executor/nodeCtescan.h" #include "executor/nodeFunctionscan.h" #include "executor/nodeGroup.h" #include "executor/nodeHash.h" @@ -104,7 +105,7 @@ #include "executor/nodeTidscan.h" #include "executor/nodeUnique.h" #include "executor/nodeValuesscan.h" -#include "executor/nodeCtescan.h" +#include "executor/nodeWindowAgg.h" #include "executor/nodeWorktablescan.h" #include "miscadmin.h" @@ -260,6 +261,11 @@ ExecInitNode(Plan *node, EState *estate, int eflags) estate, eflags); break; + case T_WindowAgg: + result = (PlanState *) ExecInitWindowAgg((WindowAgg *) node, + estate, eflags); + break; + case T_Unique: result = (PlanState *) ExecInitUnique((Unique *) node, estate, eflags); @@ -425,6 +431,10 @@ ExecProcNode(PlanState *node) result = ExecAgg((AggState *) node); break; + case T_WindowAggState: + result = ExecWindowAgg((WindowAggState *) node); + break; + case T_UniqueState: result = ExecUnique((UniqueState *) node); break; @@ -601,6 +611,10 @@ ExecCountSlotsNode(Plan *node) case T_Agg: return ExecCountSlotsAgg((Agg *) node); + case T_WindowAgg: + return ExecCountSlotsWindowAgg((WindowAgg *) node); + break; + case T_Unique: return ExecCountSlotsUnique((Unique *) node); @@ -749,6 +763,10 @@ ExecEndNode(PlanState *node) ExecEndAgg((AggState *) node); break; + case T_WindowAggState: + ExecEndWindowAgg((WindowAggState *) node); + break; + case T_UniqueState: ExecEndUnique((UniqueState *) node); break; diff --git a/src/backend/executor/execQual.c b/src/backend/executor/execQual.c index 71aad49647d..17606f5204e 100644 --- a/src/backend/executor/execQual.c +++ b/src/backend/executor/execQual.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/executor/execQual.c,v 1.238 2008/12/18 19:38:22 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/executor/execQual.c,v 1.239 2008/12/28 18:53:55 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -62,6 +62,9 @@ static Datum ExecEvalArrayRef(ArrayRefExprState *astate, static Datum ExecEvalAggref(AggrefExprState *aggref, ExprContext *econtext, bool *isNull, ExprDoneCond *isDone); +static Datum ExecEvalWindowFunc(WindowFuncExprState *wfunc, + ExprContext *econtext, + bool *isNull, ExprDoneCond *isDone); static Datum ExecEvalVar(ExprState *exprstate, ExprContext *econtext, bool *isNull, ExprDoneCond *isDone); static Datum ExecEvalScalarVar(ExprState *exprstate, ExprContext *econtext, @@ -444,6 +447,27 @@ ExecEvalAggref(AggrefExprState *aggref, ExprContext *econtext, } /* ---------------------------------------------------------------- + * ExecEvalWindowFunc + * + * Returns a Datum whose value is the value of the precomputed + * window function found in the given expression context. + * ---------------------------------------------------------------- + */ +static Datum +ExecEvalWindowFunc(WindowFuncExprState *wfunc, ExprContext *econtext, + bool *isNull, ExprDoneCond *isDone) +{ + if (isDone) + *isDone = ExprSingleResult; + + if (econtext->ecxt_aggvalues == NULL) /* safety check */ + elog(ERROR, "no window functions in this expression context"); + + *isNull = econtext->ecxt_aggnulls[wfunc->wfuncno]; + return econtext->ecxt_aggvalues[wfunc->wfuncno]; +} + +/* ---------------------------------------------------------------- * ExecEvalVar * * Returns a Datum whose value is the value of a range @@ -4062,12 +4086,12 @@ ExecEvalExprSwitchContext(ExprState *expression, * executions of the expression are needed. Typically the context will be * the same as the per-query context of the associated ExprContext. * - * Any Aggref and SubPlan nodes found in the tree are added to the lists - * of such nodes held by the parent PlanState. Otherwise, we do very little - * initialization here other than building the state-node tree. Any nontrivial - * work associated with initializing runtime info for a node should happen - * during the first actual evaluation of that node. (This policy lets us - * avoid work if the node is never actually evaluated.) + * Any Aggref, WindowFunc, or SubPlan nodes found in the tree are added to the + * lists of such nodes held by the parent PlanState. Otherwise, we do very + * little initialization here other than building the state-node tree. Any + * nontrivial work associated with initializing runtime info for a node should + * happen during the first actual evaluation of that node. (This policy lets + * us avoid work if the node is never actually evaluated.) * * Note: there is no ExecEndExpr function; we assume that any resource * cleanup needed will be handled by just releasing the memory context @@ -4145,11 +4169,49 @@ ExecInitExpr(Expr *node, PlanState *parent) else { /* planner messed up */ - elog(ERROR, "aggref found in non-Agg plan node"); + elog(ERROR, "Aggref found in non-Agg plan node"); } state = (ExprState *) astate; } break; + case T_WindowFunc: + { + WindowFunc *wfunc = (WindowFunc *) node; + WindowFuncExprState *wfstate = makeNode(WindowFuncExprState); + + wfstate->xprstate.evalfunc = (ExprStateEvalFunc) ExecEvalWindowFunc; + if (parent && IsA(parent, WindowAggState)) + { + WindowAggState *winstate = (WindowAggState *) parent; + int nfuncs; + + winstate->funcs = lcons(wfstate, winstate->funcs); + nfuncs = ++winstate->numfuncs; + if (wfunc->winagg) + winstate->numaggs++; + + wfstate->args = (List *) ExecInitExpr((Expr *) wfunc->args, + parent); + + /* + * Complain if the windowfunc's arguments contain any + * windowfuncs; nested window functions are semantically + * nonsensical. (This should have been caught earlier, + * but we defend against it here anyway.) + */ + if (nfuncs != winstate->numfuncs) + ereport(ERROR, + (errcode(ERRCODE_WINDOWING_ERROR), + errmsg("window function calls cannot be nested"))); + } + else + { + /* planner messed up */ + elog(ERROR, "WindowFunc found in non-WindowAgg plan node"); + } + state = (ExprState *) wfstate; + } + break; case T_ArrayRef: { ArrayRef *aref = (ArrayRef *) node; diff --git a/src/backend/executor/nodeWindowAgg.c b/src/backend/executor/nodeWindowAgg.c new file mode 100644 index 00000000000..37ef9a5e830 --- /dev/null +++ b/src/backend/executor/nodeWindowAgg.c @@ -0,0 +1,1854 @@ +/*------------------------------------------------------------------------- + * + * nodeWindowAgg.c + * routines to handle WindowAgg nodes. + * + * A WindowAgg node evaluates "window functions" across suitable partitions + * of the input tuple set. Any one WindowAgg works for just a single window + * specification, though it can evaluate multiple window functions sharing + * identical window specifications. The input tuples are required to be + * delivered in sorted order, with the PARTITION BY columns (if any) as + * major sort keys and the ORDER BY columns (if any) as minor sort keys. + * (The planner generates a stack of WindowAggs with intervening Sort nodes + * as needed, if a query involves more than one window specification.) + * + * Since window functions can require access to any or all of the rows in + * the current partition, we accumulate rows of the partition into a + * tuplestore. The window functions are called using the WindowObject API + * so that they can access those rows as needed. + * + * We also support using plain aggregate functions as window functions. + * For these, the regular Agg-node environment is emulated for each partition. + * As required by the SQL spec, the output represents the value of the + * aggregate function over all rows in the current row's window frame. + * + * + * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * $PostgreSQL: pgsql/src/backend/executor/nodeWindowAgg.c,v 1.1 2008/12/28 18:53:55 tgl Exp $ + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "catalog/pg_aggregate.h" +#include "catalog/pg_proc.h" +#include "catalog/pg_type.h" +#include "executor/executor.h" +#include "executor/nodeWindowAgg.h" +#include "miscadmin.h" +#include "nodes/nodeFuncs.h" +#include "optimizer/clauses.h" +#include "parser/parse_agg.h" +#include "parser/parse_coerce.h" +#include "utils/acl.h" +#include "utils/builtins.h" +#include "utils/datum.h" +#include "utils/lsyscache.h" +#include "utils/memutils.h" +#include "utils/syscache.h" +#include "windowapi.h" + +/* + * All the window function APIs are called with this object, which is passed + * to window functions as fcinfo->context. + */ +typedef struct WindowObjectData +{ + NodeTag type; + WindowAggState *winstate; /* parent WindowAggState */ + List *argstates; /* ExprState trees for fn's arguments */ + void *localmem; /* WinGetPartitionLocalMemory's chunk */ + int markptr; /* tuplestore mark pointer for this fn */ + int readptr; /* tuplestore read pointer for this fn */ + int64 markpos; /* row that markptr is positioned on */ + int64 seekpos; /* row that readptr is positioned on */ +} WindowObjectData; + +/* + * We have one WindowStatePerFunc struct for each window function and + * window aggregate handled by this node. + */ +typedef struct WindowStatePerFuncData +{ + /* Links to WindowFunc expr and state nodes this working state is for */ + WindowFuncExprState *wfuncstate; + WindowFunc *wfunc; + + int numArguments; /* number of arguments */ + + FmgrInfo flinfo; /* fmgr lookup data for window function */ + + /* + * We need the len and byval info for the result of each function + * in order to know how to copy/delete values. + */ + int16 resulttypeLen; + bool resulttypeByVal; + + bool plain_agg; /* is it just a plain aggregate function? */ + int aggno; /* if so, index of its PerAggData */ + + WindowObject winobj; /* object used in window function API */ +} WindowStatePerFuncData; + +/* + * For plain aggregate window functions, we also have one of these. + */ +typedef struct WindowStatePerAggData +{ + /* Oids of transfer functions */ + Oid transfn_oid; + Oid finalfn_oid; /* may be InvalidOid */ + + /* + * fmgr lookup data for transfer functions --- only valid when + * corresponding oid is not InvalidOid. Note in particular that fn_strict + * flags are kept here. + */ + FmgrInfo transfn; + FmgrInfo finalfn; + + /* + * initial value from pg_aggregate entry + */ + Datum initValue; + bool initValueIsNull; + + /* + * cached value for non-moving frame + */ + Datum resultValue; + bool resultValueIsNull; + bool hasResult; + + /* + * We need the len and byval info for the agg's input, result, and + * transition data types in order to know how to copy/delete values. + */ + int16 inputtypeLen, + resulttypeLen, + transtypeLen; + bool inputtypeByVal, + resulttypeByVal, + transtypeByVal; + + int wfuncno; /* index of associated PerFuncData */ + + /* Current transition value */ + Datum transValue; /* current transition value */ + bool transValueIsNull; + + bool noTransValue; /* true if transValue not set yet */ +} WindowStatePerAggData; + +static void initialize_windowaggregate(WindowAggState *winstate, + WindowStatePerFunc perfuncstate, + WindowStatePerAgg peraggstate); +static void advance_windowaggregate(WindowAggState *winstate, + WindowStatePerFunc perfuncstate, + WindowStatePerAgg peraggstate); +static void finalize_windowaggregate(WindowAggState *winstate, + WindowStatePerFunc perfuncstate, + WindowStatePerAgg peraggstate, + Datum *result, bool *isnull); + +static void eval_windowaggregates(WindowAggState *winstate); +static void eval_windowfunction(WindowAggState *winstate, + WindowStatePerFunc perfuncstate, + Datum *result, bool *isnull); + +static void begin_partition(WindowAggState *winstate); +static void spool_tuples(WindowAggState *winstate, int64 pos); +static void release_partition(WindowAggState *winstate); + +static WindowStatePerAggData *initialize_peragg(WindowAggState *winstate, + WindowFunc *wfunc, + WindowStatePerAgg peraggstate); +static Datum GetAggInitVal(Datum textInitVal, Oid transtype); + +static bool are_peers(WindowAggState *winstate, TupleTableSlot *slot1, + TupleTableSlot *slot2); +static bool window_gettupleslot(WindowObject winobj, int64 pos, + TupleTableSlot *slot); + + +/* + * initialize_windowaggregate + * parallel to initialize_aggregate in nodeAgg.c + */ +static void +initialize_windowaggregate(WindowAggState *winstate, + WindowStatePerFunc perfuncstate, + WindowStatePerAgg peraggstate) +{ + MemoryContext oldContext; + + if (peraggstate->initValueIsNull) + peraggstate->transValue = peraggstate->initValue; + else + { + oldContext = MemoryContextSwitchTo(winstate->wincontext); + peraggstate->transValue = datumCopy(peraggstate->initValue, + peraggstate->transtypeByVal, + peraggstate->transtypeLen); + MemoryContextSwitchTo(oldContext); + } + peraggstate->transValueIsNull = peraggstate->initValueIsNull; + peraggstate->noTransValue = peraggstate->initValueIsNull; +} + +/* + * advance_windowaggregate + * parallel to advance_aggregate in nodeAgg.c + */ +static void +advance_windowaggregate(WindowAggState *winstate, + WindowStatePerFunc perfuncstate, + WindowStatePerAgg peraggstate) +{ + WindowFuncExprState *wfuncstate = perfuncstate->wfuncstate; + int numArguments = perfuncstate->numArguments; + FunctionCallInfoData fcinfodata; + FunctionCallInfo fcinfo = &fcinfodata; + Datum newVal; + ListCell *arg; + int i; + MemoryContext oldContext; + ExprContext *econtext = winstate->tmpcontext; + + oldContext = MemoryContextSwitchTo(econtext->ecxt_per_tuple_memory); + + /* We start from 1, since the 0th arg will be the transition value */ + i = 1; + foreach(arg, wfuncstate->args) + { + ExprState *argstate = (ExprState *) lfirst(arg); + + fcinfo->arg[i] = ExecEvalExpr(argstate, econtext, + &fcinfo->argnull[i], NULL); + i++; + } + + if (peraggstate->transfn.fn_strict) + { + /* + * For a strict transfn, nothing happens when there's a NULL input; we + * just keep the prior transValue. + */ + for (i = 1; i <= numArguments; i++) + { + if (fcinfo->argnull[i]) + { + MemoryContextSwitchTo(oldContext); + return; + } + } + if (peraggstate->noTransValue) + { + /* + * transValue has not been initialized. This is the first non-NULL + * input value. We use it as the initial value for transValue. (We + * already checked that the agg's input type is binary-compatible + * with its transtype, so straight copy here is OK.) + * + * We must copy the datum into wincontext if it is pass-by-ref. We + * do not need to pfree the old transValue, since it's NULL. + */ + MemoryContextSwitchTo(winstate->wincontext); + peraggstate->transValue = datumCopy(fcinfo->arg[1], + peraggstate->transtypeByVal, + peraggstate->transtypeLen); + peraggstate->transValueIsNull = false; + peraggstate->noTransValue = false; + MemoryContextSwitchTo(oldContext); + return; + } + if (peraggstate->transValueIsNull) + { + /* + * Don't call a strict function with NULL inputs. Note it is + * possible to get here despite the above tests, if the transfn is + * strict *and* returned a NULL on a prior cycle. If that happens + * we will propagate the NULL all the way to the end. + */ + MemoryContextSwitchTo(oldContext); + return; + } + } + + /* + * OK to call the transition function + */ + InitFunctionCallInfoData(*fcinfo, &(peraggstate->transfn), + numArguments + 1, + (void *) winstate, NULL); + fcinfo->arg[0] = peraggstate->transValue; + fcinfo->argnull[0] = peraggstate->transValueIsNull; + newVal = FunctionCallInvoke(fcinfo); + + /* + * If pass-by-ref datatype, must copy the new value into wincontext and + * pfree the prior transValue. But if transfn returned a pointer to its + * first input, we don't need to do anything. + */ + if (!peraggstate->transtypeByVal && + DatumGetPointer(newVal) != DatumGetPointer(peraggstate->transValue)) + { + if (!fcinfo->isnull) + { + MemoryContextSwitchTo(winstate->wincontext); + newVal = datumCopy(newVal, + peraggstate->transtypeByVal, + peraggstate->transtypeLen); + } + if (!peraggstate->transValueIsNull) + pfree(DatumGetPointer(peraggstate->transValue)); + } + + MemoryContextSwitchTo(oldContext); + peraggstate->transValue = newVal; + peraggstate->transValueIsNull = fcinfo->isnull; +} + +/* + * finalize_windowaggregate + * parallel to finalize_aggregate in nodeAgg.c + */ +static void +finalize_windowaggregate(WindowAggState *winstate, + WindowStatePerFunc perfuncstate, + WindowStatePerAgg peraggstate, + Datum *result, bool *isnull) +{ + MemoryContext oldContext; + + oldContext = MemoryContextSwitchTo(winstate->ss.ps.ps_ExprContext->ecxt_per_tuple_memory); + + /* + * Apply the agg's finalfn if one is provided, else return transValue. + */ + if (OidIsValid(peraggstate->finalfn_oid)) + { + FunctionCallInfoData fcinfo; + + InitFunctionCallInfoData(fcinfo, &(peraggstate->finalfn), 1, + (void *) winstate, NULL); + fcinfo.arg[0] = peraggstate->transValue; + fcinfo.argnull[0] = peraggstate->transValueIsNull; + if (fcinfo.flinfo->fn_strict && peraggstate->transValueIsNull) + { + /* don't call a strict function with NULL inputs */ + *result = (Datum) 0; + *isnull = true; + } + else + { + *result = FunctionCallInvoke(&fcinfo); + *isnull = fcinfo.isnull; + } + } + else + { + *result = peraggstate->transValue; + *isnull = peraggstate->transValueIsNull; + } + + /* + * If result is pass-by-ref, make sure it is in the right context. + */ + if (!peraggstate->resulttypeByVal && !*isnull && + !MemoryContextContains(CurrentMemoryContext, + DatumGetPointer(*result))) + *result = datumCopy(*result, + peraggstate->resulttypeByVal, + peraggstate->resulttypeLen); + MemoryContextSwitchTo(oldContext); +} + +/* + * eval_windowaggregates + * evaluate plain aggregates being used as window functions + * + * Much of this is duplicated from nodeAgg.c. But NOTE that we expect to be + * able to call aggregate final functions repeatedly after aggregating more + * data onto the same transition value. This is not a behavior required by + * nodeAgg.c. + */ +static void +eval_windowaggregates(WindowAggState *winstate) +{ + WindowStatePerAgg peraggstate; + int wfuncno, numaggs; + int i; + MemoryContext oldContext; + ExprContext *econtext; + TupleTableSlot *first_peer_slot = winstate->first_peer_slot; + TupleTableSlot *slot; + bool first; + + numaggs = winstate->numaggs; + if (numaggs == 0) + return; /* nothing to do */ + + /* final output execution is in ps_ExprContext */ + econtext = winstate->ss.ps.ps_ExprContext; + + /* + * We don't currently support explicitly-specified window frames. That + * means that the window frame always includes all the rows in the + * partition preceding and including the current row, and all its + * peers. As a special case, if there's no ORDER BY, all rows are peers, + * so the window frame includes all rows in the partition. + * + * When there's peer rows, all rows in a peer group will have the same + * aggregate values. The values will be calculated when current position + * reaches the first peer row, and on all the following peer rows we will + * just return the saved results. + * + * 'aggregatedupto' keeps track of the last row that has already been + * accumulated for the aggregates. When the current row has no peers, + * aggregatedupto will be the same as the current row after this + * function. If there are peer rows, all peers will be accumulated in one + * call of this function, and aggregatedupto will be ahead of the current + * position. If there's no ORDER BY, and thus all rows are peers, the + * first call will aggregate all rows in the partition. + * + * TODO: In the future, we could implement sliding frames by recalculating + * the aggregate whenever a row exits the frame. That would be pretty + * slow, though. For aggregates like SUM and COUNT we could implement a + * "negative transition function" that would be called for all the rows + * that exit the frame. + */ + + /* + * If we've already aggregated up through current row, reuse the + * saved result values + */ + if (winstate->aggregatedupto > winstate->currentpos) + { + for (i = 0; i < numaggs; i++) + { + peraggstate = &winstate->peragg[i]; + wfuncno = peraggstate->wfuncno; + econtext->ecxt_aggvalues[wfuncno] = peraggstate->resultValue; + econtext->ecxt_aggnulls[wfuncno] = peraggstate->resultValueIsNull; + } + return; + } + + /* Initialize aggregates on first call for partition */ + for (i = 0; i < numaggs; i++) + { + peraggstate = &winstate->peragg[i]; + wfuncno = peraggstate->wfuncno; + if (!peraggstate->hasResult) + initialize_windowaggregate(winstate, + &winstate->perfunc[wfuncno], + &winstate->peragg[i]); + } + + /* + * If this is the first call for this partition, fetch the first row + * for comparing peer rows. On subsequent calls, we'll always read + * ahead until we reach the first non-peer row, and store that row in + * first_peer_slot, for use in the next call. + */ + if (TupIsNull(first_peer_slot)) + { + spool_tuples(winstate, winstate->aggregatedupto); + tuplestore_select_read_pointer(winstate->buffer, winstate->agg_ptr); + if (!tuplestore_gettupleslot(winstate->buffer, true, first_peer_slot)) + elog(ERROR, "unexpected end of tuplestore"); + } + + /* + * Advance until we reach the next non-peer row + */ + first = true; + for (;;) + { + if (!first) + { + /* Fetch the next row, and see if it's a peer */ + spool_tuples(winstate, winstate->aggregatedupto); + tuplestore_select_read_pointer(winstate->buffer, + winstate->agg_ptr); + slot = winstate->temp_slot_1; + if (!tuplestore_gettupleslot(winstate->buffer, true, slot)) + break; + if (!are_peers(winstate, first_peer_slot, slot)) + { + ExecCopySlot(first_peer_slot, slot); + break; + } + } + else + { + /* + * On first iteration, just accumulate the tuple saved from + * last call + */ + slot = first_peer_slot; + first = false; + } + + /* set tuple context for evaluation of aggregate arguments */ + winstate->tmpcontext->ecxt_outertuple = slot; + + for (i = 0; i < numaggs; i++) + { + wfuncno = winstate->peragg[i].wfuncno; + + advance_windowaggregate(winstate, + &winstate->perfunc[wfuncno], + &winstate->peragg[i]); + + } + /* Reset per-input-tuple context after each tuple */ + ResetExprContext(winstate->tmpcontext); + winstate->aggregatedupto++; + } + + /* + * finalize aggregates and fill result/isnull fields. + */ + for (i = 0; i < numaggs; i++) + { + Datum *result; + bool *isnull; + + peraggstate = &winstate->peragg[i]; + wfuncno = peraggstate->wfuncno; + result = &econtext->ecxt_aggvalues[wfuncno]; + isnull = &econtext->ecxt_aggnulls[wfuncno]; + finalize_windowaggregate(winstate, + &winstate->perfunc[wfuncno], + peraggstate, result, isnull); + + /* + * save the result for the next (non-shrinking frame) call. + */ + if (!peraggstate->resulttypeByVal && !*isnull) + { + /* + * clear old resultValue in order not to leak memory. + */ + if (peraggstate->hasResult && + (DatumGetPointer(peraggstate->resultValue) != + DatumGetPointer(*result)) && + !peraggstate->resultValueIsNull) + pfree(DatumGetPointer(peraggstate->resultValue)); + + /* + * If pass-by-ref, copy it into our global context. + */ + oldContext = MemoryContextSwitchTo(winstate->wincontext); + peraggstate->resultValue = datumCopy(*result, + peraggstate->resulttypeByVal, + peraggstate->resulttypeLen); + MemoryContextSwitchTo(oldContext); + } + else + { + peraggstate->resultValue = *result; + } + peraggstate->resultValueIsNull = *isnull; + peraggstate->hasResult = true; + } +} + +/* + * eval_windowfunction + * + * Arguments of window functions are not evaluated here, because a window + * function can need random access to arbitrary rows in the partition. + * The window function uses the special WinGetFuncArgInPartition and + * WinGetFuncArgInFrame functions to evaluate the arguments for the rows + * it wants. + */ +static void +eval_windowfunction(WindowAggState *winstate, WindowStatePerFunc perfuncstate, + Datum *result, bool *isnull) +{ + FunctionCallInfoData fcinfo; + MemoryContext oldContext; + + oldContext = MemoryContextSwitchTo(winstate->ss.ps.ps_ExprContext->ecxt_per_tuple_memory); + + /* + * We don't pass any normal arguments to a window function, but we do + * pass it the number of arguments, in order to permit window function + * implementations to support varying numbers of arguments. The real + * info goes through the WindowObject, which is passed via fcinfo->context. + */ + InitFunctionCallInfoData(fcinfo, &(perfuncstate->flinfo), + perfuncstate->numArguments, + (void *) perfuncstate->winobj, NULL); + /* Just in case, make all the regular argument slots be null */ + memset(fcinfo.argnull, true, perfuncstate->numArguments); + + *result = FunctionCallInvoke(&fcinfo); + *isnull = fcinfo.isnull; + + /* + * Make sure pass-by-ref data is allocated in the appropriate context. + * (We need this in case the function returns a pointer into some + * short-lived tuple, as is entirely possible.) + */ + if (!perfuncstate->resulttypeByVal && !fcinfo.isnull && + !MemoryContextContains(CurrentMemoryContext, + DatumGetPointer(*result))) + *result = datumCopy(*result, + perfuncstate->resulttypeByVal, + perfuncstate->resulttypeLen); + + MemoryContextSwitchTo(oldContext); +} + +/* + * begin_partition + * Start buffering rows of the next partition. + */ +static void +begin_partition(WindowAggState *winstate) +{ + PlanState *outerPlan = outerPlanState(winstate); + int numfuncs = winstate->numfuncs; + int i; + + winstate->partition_spooled = false; + winstate->spooled_rows = 0; + winstate->currentpos = 0; + winstate->frametailpos = -1; + winstate->aggregatedupto = 0; + + /* + * If this is the very first partition, we need to fetch the first + * input row to store in it. + */ + if (TupIsNull(winstate->first_part_slot)) + { + TupleTableSlot *outerslot = ExecProcNode(outerPlan); + + if (!TupIsNull(outerslot)) + ExecCopySlot(winstate->first_part_slot, outerslot); + else + { + /* outer plan is empty, so we have nothing to do */ + winstate->partition_spooled = true; + winstate->more_partitions = false; + return; + } + } + + /* Create new tuplestore for this partition */ + winstate->buffer = tuplestore_begin_heap(false, false, work_mem); + + /* + * Set up read pointers for the tuplestore. The current and agg pointers + * don't need BACKWARD capability, but the per-window-function read + * pointers do. + */ + winstate->current_ptr = 0; /* read pointer 0 is pre-allocated */ + + /* reset default REWIND capability bit for current ptr */ + tuplestore_set_eflags(winstate->buffer, 0); + + /* create a read pointer for aggregates, if needed */ + if (winstate->numaggs > 0) + winstate->agg_ptr = tuplestore_alloc_read_pointer(winstate->buffer, 0); + + /* create mark and read pointers for each real window function */ + for (i = 0; i < numfuncs; i++) + { + WindowStatePerFunc perfuncstate = &(winstate->perfunc[i]); + + if (!perfuncstate->plain_agg) + { + WindowObject winobj = perfuncstate->winobj; + + winobj->markptr = tuplestore_alloc_read_pointer(winstate->buffer, + 0); + winobj->readptr = tuplestore_alloc_read_pointer(winstate->buffer, + EXEC_FLAG_BACKWARD); + winobj->markpos = -1; + winobj->seekpos = -1; + } + } + + /* + * Store the first tuple into the tuplestore (it's always available now; + * we either read it above, or saved it at the end of previous partition) + */ + tuplestore_puttupleslot(winstate->buffer, winstate->first_part_slot); + winstate->spooled_rows++; +} + +/* + * Read tuples from the outer node, up to position 'pos', and store them + * into the tuplestore. If pos is -1, reads the whole partition. + */ +static void +spool_tuples(WindowAggState *winstate, int64 pos) +{ + WindowAgg *node = (WindowAgg *) winstate->ss.ps.plan; + PlanState *outerPlan; + TupleTableSlot *outerslot; + MemoryContext oldcontext; + + if (!winstate->buffer) + return; /* just a safety check */ + if (winstate->partition_spooled) + return; /* whole partition done already */ + + /* + * If the tuplestore has spilled to disk, alternate reading and writing + * becomes quite expensive due to frequent buffer flushes. It's cheaper + * to force the entire partition to get spooled in one go. + * + * XXX this is a horrid kluge --- it'd be better to fix the performance + * problem inside tuplestore. FIXME + */ + if (!tuplestore_in_memory(winstate->buffer)) + pos = -1; + + outerPlan = outerPlanState(winstate); + + /* Must be in query context to call outerplan or touch tuplestore */ + oldcontext = MemoryContextSwitchTo(winstate->ss.ps.ps_ExprContext->ecxt_per_query_memory); + + while (winstate->spooled_rows <= pos || pos == -1) + { + outerslot = ExecProcNode(outerPlan); + if (TupIsNull(outerslot)) + { + /* reached the end of the last partition */ + winstate->partition_spooled = true; + winstate->more_partitions = false; + break; + } + + if (node->partNumCols > 0) + { + /* Check if this tuple still belongs to the current partition */ + if (!execTuplesMatch(winstate->first_part_slot, + outerslot, + node->partNumCols, node->partColIdx, + winstate->partEqfunctions, + winstate->tmpcontext->ecxt_per_tuple_memory)) + { + /* + * end of partition; copy the tuple for the next cycle. + */ + ExecCopySlot(winstate->first_part_slot, outerslot); + winstate->partition_spooled = true; + winstate->more_partitions = true; + break; + } + } + + /* Still in partition, so save it into the tuplestore */ + tuplestore_puttupleslot(winstate->buffer, outerslot); + winstate->spooled_rows++; + } + + MemoryContextSwitchTo(oldcontext); +} + +/* + * release_partition + * clear information kept within a partition, including + * tuplestore and aggregate results. + */ +static void +release_partition(WindowAggState *winstate) +{ + int i; + + for (i = 0; i < winstate->numfuncs; i++) + { + WindowStatePerFunc perfuncstate = &(winstate->perfunc[i]); + + /* Release any partition-local state of this window function */ + if (perfuncstate->winobj) + perfuncstate->winobj->localmem = NULL; + + /* Reset agg result cache */ + if (perfuncstate->plain_agg) + { + int aggno = perfuncstate->aggno; + WindowStatePerAggData *peraggstate = &winstate->peragg[aggno]; + + peraggstate->resultValueIsNull = true; + peraggstate->hasResult = false; + } + } + + /* + * Release all partition-local memory (in particular, any partition-local + * state or aggregate temp data that we might have trashed our pointers + * to in the above loop). We don't rely on retail pfree because some + * aggregates might have allocated data we don't have direct pointers to. + */ + MemoryContextResetAndDeleteChildren(winstate->wincontext); + + /* Ensure eval_windowaggregates will see next call as partition start */ + ExecClearTuple(winstate->first_peer_slot); + + if (winstate->buffer) + tuplestore_end(winstate->buffer); + winstate->buffer = NULL; + winstate->partition_spooled = false; +} + + +/* ----------------- + * ExecWindowAgg + * + * ExecWindowAgg receives tuples from its outer subplan and + * stores them into a tuplestore, then processes window functions. + * This node doesn't reduce nor qualify any row so the number of + * returned rows is exactly the same as its outer subplan's result + * (ignoring the case of SRFs in the targetlist, that is). + * ----------------- + */ +TupleTableSlot * +ExecWindowAgg(WindowAggState *winstate) +{ + TupleTableSlot *result; + ExprDoneCond isDone; + ExprContext *econtext; + int i; + int numfuncs; + + if (winstate->all_done) + return NULL; + + /* + * Check to see if we're still projecting out tuples from a previous output + * tuple (because there is a function-returning-set in the projection + * expressions). If so, try to project another one. + */ + if (winstate->ss.ps.ps_TupFromTlist) + { + TupleTableSlot *result; + ExprDoneCond isDone; + + result = ExecProject(winstate->ss.ps.ps_ProjInfo, &isDone); + if (isDone == ExprMultipleResult) + return result; + /* Done with that source tuple... */ + winstate->ss.ps.ps_TupFromTlist = false; + } + +restart: + if (winstate->buffer == NULL) + { + /* Initialize for first partition and set current row = 0 */ + begin_partition(winstate); + } + else + { + /* Advance current row within partition */ + winstate->currentpos++; + } + + /* + * Spool all tuples up to and including the current row, if we haven't + * already + */ + spool_tuples(winstate, winstate->currentpos); + + /* Move to the next partition if we reached the end of this partition */ + if (winstate->partition_spooled && + winstate->currentpos >= winstate->spooled_rows) + { + release_partition(winstate); + + if (winstate->more_partitions) + { + begin_partition(winstate); + Assert(winstate->spooled_rows > 0); + } + else + { + winstate->all_done = true; + return NULL; + } + } + + /* final output execution is in ps_ExprContext */ + econtext = winstate->ss.ps.ps_ExprContext; + + /* Clear the per-output-tuple context for current row */ + ResetExprContext(econtext); + + /* + * Read the current row from the tuplestore, and save in ScanTupleSlot + * for possible use by WinGetFuncArgCurrent or the final projection step. + * (We can't rely on the outerplan's output slot because we may have to + * read beyond the current row.) + * + * Current row must be in the tuplestore, since we spooled it above. + */ + tuplestore_select_read_pointer(winstate->buffer, winstate->current_ptr); + if (!tuplestore_gettupleslot(winstate->buffer, true, + winstate->ss.ss_ScanTupleSlot)) + elog(ERROR, "unexpected end of tuplestore"); + + /* + * Evaluate true window functions + */ + numfuncs = winstate->numfuncs; + for (i = 0; i < numfuncs; i++) + { + WindowStatePerFunc perfuncstate = &(winstate->perfunc[i]); + + if (perfuncstate->plain_agg) + continue; + eval_windowfunction(winstate, perfuncstate, + &(econtext->ecxt_aggvalues[perfuncstate->wfuncstate->wfuncno]), + &(econtext->ecxt_aggnulls[perfuncstate->wfuncstate->wfuncno])); + } + + /* + * Evaluate aggregates + */ + if (winstate->numaggs > 0) + eval_windowaggregates(winstate); + + /* + * Truncate any no-longer-needed rows from the tuplestore. + */ + tuplestore_trim(winstate->buffer); + + /* + * Form and return a projection tuple using the windowfunc results + * and the current row. Setting ecxt_outertuple arranges that any + * Vars will be evaluated with respect to that row. + */ + econtext->ecxt_outertuple = winstate->ss.ss_ScanTupleSlot; + result = ExecProject(winstate->ss.ps.ps_ProjInfo, &isDone); + + if (isDone == ExprEndResult) + { + /* SRF in tlist returned no rows, so advance to next input tuple */ + goto restart; + } + + winstate->ss.ps.ps_TupFromTlist = + (isDone == ExprMultipleResult); + return result; +} + +/* ----------------- + * ExecInitWindowAgg + * + * Creates the run-time information for the WindowAgg node produced by the + * planner and initializes its outer subtree + * ----------------- + */ +WindowAggState * +ExecInitWindowAgg(WindowAgg *node, EState *estate, int eflags) +{ + WindowAggState *winstate; + Plan *outerPlan; + ExprContext *econtext; + ExprContext *tmpcontext; + WindowStatePerFunc perfunc; + WindowStatePerAgg peragg; + int numfuncs, + wfuncno, + numaggs, + aggno; + ListCell *l; + + /* check for unsupported flags */ + Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK))); + + /* + * create state structure + */ + winstate = makeNode(WindowAggState); + winstate->ss.ps.plan = (Plan *) node; + winstate->ss.ps.state = estate; + + /* + * Create expression contexts. We need two, one for per-input-tuple + * processing and one for per-output-tuple processing. We cheat a little + * by using ExecAssignExprContext() to build both. + */ + ExecAssignExprContext(estate, &winstate->ss.ps); + tmpcontext = winstate->ss.ps.ps_ExprContext; + winstate->tmpcontext = tmpcontext; + ExecAssignExprContext(estate, &winstate->ss.ps); + + /* Create long-lived context for storage of aggregate transvalues etc */ + winstate->wincontext = + AllocSetContextCreate(CurrentMemoryContext, + "WindowAggContext", + ALLOCSET_DEFAULT_MINSIZE, + ALLOCSET_DEFAULT_INITSIZE, + ALLOCSET_DEFAULT_MAXSIZE); + +#define WINDOWAGG_NSLOTS 6 + + /* + * tuple table initialization + */ + ExecInitScanTupleSlot(estate, &winstate->ss); + ExecInitResultTupleSlot(estate, &winstate->ss.ps); + winstate->first_part_slot = ExecInitExtraTupleSlot(estate); + winstate->first_peer_slot = ExecInitExtraTupleSlot(estate); + winstate->temp_slot_1 = ExecInitExtraTupleSlot(estate); + winstate->temp_slot_2 = ExecInitExtraTupleSlot(estate); + + winstate->ss.ps.targetlist = (List *) + ExecInitExpr((Expr *) node->plan.targetlist, + (PlanState *) winstate); + + /* + * WindowAgg nodes never have quals, since they can only occur at the + * logical top level of a query (ie, after any WHERE or HAVING filters) + */ + Assert(node->plan.qual == NIL); + winstate->ss.ps.qual = NIL; + + /* + * initialize child nodes + */ + outerPlan = outerPlan(node); + outerPlanState(winstate) = ExecInitNode(outerPlan, estate, eflags); + + /* + * initialize source tuple type (which is also the tuple type that we'll + * store in the tuplestore and use in all our working slots). + */ + ExecAssignScanTypeFromOuterPlan(&winstate->ss); + + ExecSetSlotDescriptor(winstate->first_part_slot, + winstate->ss.ss_ScanTupleSlot->tts_tupleDescriptor); + ExecSetSlotDescriptor(winstate->first_peer_slot, + winstate->ss.ss_ScanTupleSlot->tts_tupleDescriptor); + ExecSetSlotDescriptor(winstate->temp_slot_1, + winstate->ss.ss_ScanTupleSlot->tts_tupleDescriptor); + ExecSetSlotDescriptor(winstate->temp_slot_2, + winstate->ss.ss_ScanTupleSlot->tts_tupleDescriptor); + + /* + * Initialize result tuple type and projection info. + */ + ExecAssignResultTypeFromTL(&winstate->ss.ps); + ExecAssignProjectionInfo(&winstate->ss.ps, NULL); + + winstate->ss.ps.ps_TupFromTlist = false; + + /* Set up data for comparing tuples */ + if (node->partNumCols > 0) + winstate->partEqfunctions = execTuplesMatchPrepare(node->partNumCols, + node->partOperators); + if (node->ordNumCols > 0) + winstate->ordEqfunctions = execTuplesMatchPrepare(node->ordNumCols, + node->ordOperators); + + /* + * WindowAgg nodes use aggvalues and aggnulls as well as Agg nodes. + */ + numfuncs = winstate->numfuncs; + numaggs = winstate->numaggs; + econtext = winstate->ss.ps.ps_ExprContext; + econtext->ecxt_aggvalues = (Datum *) palloc0(sizeof(Datum) * numfuncs); + econtext->ecxt_aggnulls = (bool *) palloc0(sizeof(bool) * numfuncs); + + /* + * allocate per-wfunc/per-agg state information. + */ + perfunc = (WindowStatePerFunc) palloc0(sizeof(WindowStatePerFuncData) * numfuncs); + peragg = (WindowStatePerAgg) palloc0(sizeof(WindowStatePerAggData) * numaggs); + winstate->perfunc = perfunc; + winstate->peragg = peragg; + + wfuncno = -1; + aggno = -1; + foreach(l, winstate->funcs) + { + WindowFuncExprState *wfuncstate = (WindowFuncExprState *) lfirst(l); + WindowFunc *wfunc = (WindowFunc *) wfuncstate->xprstate.expr; + WindowStatePerFunc perfuncstate; + AclResult aclresult; + int i; + + /* Look for a previous duplicate window function */ + for (i = 0; i <= wfuncno; i++) + { + if (equal(wfunc, perfunc[i].wfunc) && + !contain_volatile_functions((Node *) wfunc)) + break; + } + if (i <= wfuncno) + { + /* Found a match to an existing entry, so just mark it */ + wfuncstate->wfuncno = i; + continue; + } + + /* Nope, so assign a new PerAgg record */ + perfuncstate = &perfunc[++wfuncno]; + + /* Mark WindowFunc state node with assigned index in the result array */ + wfuncstate->wfuncno = wfuncno; + + /* Check permission to call window function */ + aclresult = pg_proc_aclcheck(wfunc->winfnoid, GetUserId(), + ACL_EXECUTE); + if (aclresult != ACLCHECK_OK) + aclcheck_error(aclresult, ACL_KIND_PROC, + get_func_name(wfunc->winfnoid)); + + /* Fill in the perfuncstate data */ + perfuncstate->wfuncstate = wfuncstate; + perfuncstate->wfunc = wfunc; + perfuncstate->numArguments = list_length(wfuncstate->args); + + fmgr_info_cxt(wfunc->winfnoid, &perfuncstate->flinfo, + tmpcontext->ecxt_per_query_memory); + perfuncstate->flinfo.fn_expr = (Node *) wfunc; + get_typlenbyval(wfunc->wintype, + &perfuncstate->resulttypeLen, + &perfuncstate->resulttypeByVal); + + /* + * If it's really just a plain aggregate function, + * we'll emulate the Agg environment for it. + */ + perfuncstate->plain_agg = wfunc->winagg; + if (wfunc->winagg) + { + WindowStatePerAgg peraggstate; + + perfuncstate->aggno = ++aggno; + peraggstate = &winstate->peragg[aggno]; + initialize_peragg(winstate, wfunc, peraggstate); + peraggstate->wfuncno = wfuncno; + } + else + { + WindowObject winobj = makeNode(WindowObjectData); + + winobj->winstate = winstate; + winobj->argstates = wfuncstate->args; + winobj->localmem = NULL; + perfuncstate->winobj = winobj; + } + } + + /* Update numfuncs, numaggs to match number of unique functions found */ + winstate->numfuncs = wfuncno + 1; + winstate->numaggs = aggno + 1; + + winstate->partition_spooled = false; + winstate->more_partitions = false; + + return winstate; +} + +/* ----------------- + * ExecCountSlotsWindowAgg + * ----------------- + */ +int +ExecCountSlotsWindowAgg(WindowAgg *node) +{ + return ExecCountSlotsNode(outerPlan(node)) + + ExecCountSlotsNode(innerPlan(node)) + + WINDOWAGG_NSLOTS; +} + +/* ----------------- + * ExecEndWindowAgg + * ----------------- + */ +void +ExecEndWindowAgg(WindowAggState *node) +{ + PlanState *outerPlan; + + release_partition(node); + + pfree(node->perfunc); + pfree(node->peragg); + + ExecClearTuple(node->ss.ss_ScanTupleSlot); + ExecClearTuple(node->first_part_slot); + ExecClearTuple(node->first_peer_slot); + ExecClearTuple(node->temp_slot_1); + ExecClearTuple(node->temp_slot_2); + + /* + * Free both the expr contexts. + */ + ExecFreeExprContext(&node->ss.ps); + node->ss.ps.ps_ExprContext = node->tmpcontext; + ExecFreeExprContext(&node->ss.ps); + + MemoryContextDelete(node->wincontext); + + outerPlan = outerPlanState(node); + ExecEndNode(outerPlan); +} + +/* ----------------- + * ExecRescanWindowAgg + * ----------------- + */ +void +ExecReScanWindowAgg(WindowAggState *node, ExprContext *exprCtxt) +{ + ExprContext *econtext = node->ss.ps.ps_ExprContext; + + node->all_done = false; + + node->ss.ps.ps_TupFromTlist = false; + + /* release tuplestore et al */ + release_partition(node); + + /* release all temp tuples, but especially first_part_slot */ + ExecClearTuple(node->ss.ss_ScanTupleSlot); + ExecClearTuple(node->first_part_slot); + ExecClearTuple(node->first_peer_slot); + ExecClearTuple(node->temp_slot_1); + ExecClearTuple(node->temp_slot_2); + + /* Forget current wfunc values */ + MemSet(econtext->ecxt_aggvalues, 0, sizeof(Datum) * node->numfuncs); + MemSet(econtext->ecxt_aggnulls, 0, sizeof(bool) * node->numfuncs); + + /* + * if chgParam of subnode is not null then plan will be re-scanned by + * first ExecProcNode. + */ + if (((PlanState *) node)->lefttree->chgParam == NULL) + ExecReScan(((PlanState *) node)->lefttree, exprCtxt); +} + +/* + * initialize_peragg + * + * Almost same as in nodeAgg.c, except we don't support DISTINCT currently. + */ +static WindowStatePerAggData * +initialize_peragg(WindowAggState *winstate, WindowFunc *wfunc, + WindowStatePerAgg peraggstate) +{ + Oid inputTypes[FUNC_MAX_ARGS]; + int numArguments; + HeapTuple aggTuple; + Form_pg_aggregate aggform; + Oid aggtranstype; + AclResult aclresult; + Oid transfn_oid, + finalfn_oid; + Expr *transfnexpr, + *finalfnexpr; + Datum textInitVal; + int i; + ListCell *lc; + + numArguments = list_length(wfunc->args); + + i = 0; + foreach(lc, wfunc->args) + { + inputTypes[i++] = exprType((Node *) lfirst(lc)); + } + + aggTuple = SearchSysCache(AGGFNOID, + ObjectIdGetDatum(wfunc->winfnoid), + 0, 0, 0); + if (!HeapTupleIsValid(aggTuple)) + elog(ERROR, "cache lookup failed for aggregate %u", + wfunc->winfnoid); + aggform = (Form_pg_aggregate) GETSTRUCT(aggTuple); + + /* + * ExecInitWindowAgg already checked permission to call aggregate function + * ... but we still need to check the component functions + */ + + peraggstate->transfn_oid = transfn_oid = aggform->aggtransfn; + peraggstate->finalfn_oid = finalfn_oid = aggform->aggfinalfn; + + /* Check that aggregate owner has permission to call component fns */ + { + HeapTuple procTuple; + Oid aggOwner; + + procTuple = SearchSysCache(PROCOID, + ObjectIdGetDatum(wfunc->winfnoid), + 0, 0, 0); + if (!HeapTupleIsValid(procTuple)) + elog(ERROR, "cache lookup failed for function %u", + wfunc->winfnoid); + aggOwner = ((Form_pg_proc) GETSTRUCT(procTuple))->proowner; + ReleaseSysCache(procTuple); + + aclresult = pg_proc_aclcheck(transfn_oid, aggOwner, + ACL_EXECUTE); + if (aclresult != ACLCHECK_OK) + aclcheck_error(aclresult, ACL_KIND_PROC, + get_func_name(transfn_oid)); + if (OidIsValid(finalfn_oid)) + { + aclresult = pg_proc_aclcheck(finalfn_oid, aggOwner, + ACL_EXECUTE); + if (aclresult != ACLCHECK_OK) + aclcheck_error(aclresult, ACL_KIND_PROC, + get_func_name(finalfn_oid)); + } + } + + /* resolve actual type of transition state, if polymorphic */ + aggtranstype = aggform->aggtranstype; + if (IsPolymorphicType(aggtranstype)) + { + /* have to fetch the agg's declared input types... */ + Oid *declaredArgTypes; + int agg_nargs; + + get_func_signature(wfunc->winfnoid, + &declaredArgTypes, &agg_nargs); + Assert(agg_nargs == numArguments); + aggtranstype = enforce_generic_type_consistency(inputTypes, + declaredArgTypes, + agg_nargs, + aggtranstype, + false); + pfree(declaredArgTypes); + } + + /* build expression trees using actual argument & result types */ + build_aggregate_fnexprs(inputTypes, + numArguments, + aggtranstype, + wfunc->wintype, + transfn_oid, + finalfn_oid, + &transfnexpr, + &finalfnexpr); + + fmgr_info(transfn_oid, &peraggstate->transfn); + peraggstate->transfn.fn_expr = (Node *) transfnexpr; + + if (OidIsValid(finalfn_oid)) + { + fmgr_info(finalfn_oid, &peraggstate->finalfn); + peraggstate->finalfn.fn_expr = (Node *) finalfnexpr; + } + + get_typlenbyval(wfunc->wintype, + &peraggstate->resulttypeLen, + &peraggstate->resulttypeByVal); + get_typlenbyval(aggtranstype, + &peraggstate->transtypeLen, + &peraggstate->transtypeByVal); + + /* + * initval is potentially null, so don't try to access it as a struct + * field. Must do it the hard way with SysCacheGetAttr. + */ + textInitVal = SysCacheGetAttr(AGGFNOID, aggTuple, + Anum_pg_aggregate_agginitval, + &peraggstate->initValueIsNull); + + if (peraggstate->initValueIsNull) + peraggstate->initValue = (Datum) 0; + else + peraggstate->initValue = GetAggInitVal(textInitVal, + aggtranstype); + + /* + * If the transfn is strict and the initval is NULL, make sure input + * type and transtype are the same (or at least binary-compatible), so + * that it's OK to use the first input value as the initial + * transValue. This should have been checked at agg definition time, + * but just in case... + */ + if (peraggstate->transfn.fn_strict && peraggstate->initValueIsNull) + { + if (numArguments < 1 || + !IsBinaryCoercible(inputTypes[0], aggtranstype)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION), + errmsg("aggregate %u needs to have compatible input type and transition type", + wfunc->winfnoid))); + } + + ReleaseSysCache(aggTuple); + + return peraggstate; +} + +static Datum +GetAggInitVal(Datum textInitVal, Oid transtype) +{ + Oid typinput, + typioparam; + char *strInitVal; + Datum initVal; + + getTypeInputInfo(transtype, &typinput, &typioparam); + strInitVal = TextDatumGetCString(textInitVal); + initVal = OidInputFunctionCall(typinput, strInitVal, + typioparam, -1); + pfree(strInitVal); + return initVal; +} + +/* + * are_peers + * compare two rows to see if they are equal according to the ORDER BY clause + */ +static bool +are_peers(WindowAggState *winstate, TupleTableSlot *slot1, + TupleTableSlot *slot2) +{ + WindowAgg *node = (WindowAgg *) winstate->ss.ps.plan; + + /* If no ORDER BY, all rows are peers with each other */ + if (node->ordNumCols == 0) + return true; + + return execTuplesMatch(slot1, slot2, + node->ordNumCols, node->ordColIdx, + winstate->ordEqfunctions, + winstate->tmpcontext->ecxt_per_tuple_memory); +} + +/* + * window_gettupleslot + * Fetch the pos'th tuple of the current partition into the slot + * + * Returns true if successful, false if no such row + */ +static bool +window_gettupleslot(WindowObject winobj, int64 pos, TupleTableSlot *slot) +{ + WindowAggState *winstate = winobj->winstate; + MemoryContext oldcontext; + + /* Don't allow passing -1 to spool_tuples here */ + if (pos < 0) + return false; + + /* If necessary, fetch the tuple into the spool */ + spool_tuples(winstate, pos); + + if (pos >= winstate->spooled_rows) + return false; + + if (pos < winobj->markpos) + elog(ERROR, "cannot fetch row before WindowObject's mark position"); + + oldcontext = MemoryContextSwitchTo(winstate->ss.ps.ps_ExprContext->ecxt_per_query_memory); + + tuplestore_select_read_pointer(winstate->buffer, winobj->readptr); + + /* + * There's no API to refetch the tuple at the current position. We + * have to move one tuple forward, and then one backward. (We don't + * do it the other way because we might try to fetch the row before + * our mark, which isn't allowed.) + */ + if (winobj->seekpos == pos) + { + tuplestore_advance(winstate->buffer, true); + winobj->seekpos++; + } + + while (winobj->seekpos > pos) + { + if (!tuplestore_gettupleslot(winstate->buffer, false, slot)) + elog(ERROR, "unexpected end of tuplestore"); + winobj->seekpos--; + } + + while (winobj->seekpos < pos) + { + if (!tuplestore_gettupleslot(winstate->buffer, true, slot)) + elog(ERROR, "unexpected end of tuplestore"); + winobj->seekpos++; + } + + MemoryContextSwitchTo(oldcontext); + + return true; +} + + +/*********************************************************************** + * API exposed to window functions + ***********************************************************************/ + + +/* + * WinGetPartitionLocalMemory + * Get working memory that lives till end of partition processing + * + * On first call within a given partition, this allocates and zeroes the + * requested amount of space. Subsequent calls just return the same chunk. + * + * Memory obtained this way is normally used to hold state that should be + * automatically reset for each new partition. If a window function wants + * to hold state across the whole query, fcinfo->fn_extra can be used in the + * usual way for that. + */ +void * +WinGetPartitionLocalMemory(WindowObject winobj, Size sz) +{ + Assert(WindowObjectIsValid(winobj)); + if (winobj->localmem == NULL) + winobj->localmem = MemoryContextAllocZero(winobj->winstate->wincontext, + sz); + return winobj->localmem; +} + +/* + * WinGetCurrentPosition + * Return the current row's position (counting from 0) within the current + * partition. + */ +int64 +WinGetCurrentPosition(WindowObject winobj) +{ + Assert(WindowObjectIsValid(winobj)); + return winobj->winstate->currentpos; +} + +/* + * WinGetPartitionRowCount + * Return total number of rows contained in the current partition. + * + * Note: this is a relatively expensive operation because it forces the + * whole partition to be "spooled" into the tuplestore at once. Once + * executed, however, additional calls within the same partition are cheap. + */ +int64 +WinGetPartitionRowCount(WindowObject winobj) +{ + Assert(WindowObjectIsValid(winobj)); + spool_tuples(winobj->winstate, -1); + return winobj->winstate->spooled_rows; +} + +/* + * WinSetMarkPosition + * Set the "mark" position for the window object, which is the oldest row + * number (counting from 0) it is allowed to fetch during all subsequent + * operations within the current partition. + * + * Window functions do not have to call this, but are encouraged to move the + * mark forward when possible to keep the tuplestore size down and prevent + * having to spill rows to disk. + */ +void +WinSetMarkPosition(WindowObject winobj, int64 markpos) +{ + WindowAggState *winstate; + + Assert(WindowObjectIsValid(winobj)); + winstate = winobj->winstate; + + if (markpos < winobj->markpos) + elog(ERROR, "cannot move WindowObject's mark position backward"); + tuplestore_select_read_pointer(winstate->buffer, winobj->markptr); + while (markpos > winobj->markpos) + { + tuplestore_advance(winstate->buffer, true); + winobj->markpos++; + } + tuplestore_select_read_pointer(winstate->buffer, winobj->readptr); + while (markpos > winobj->seekpos) + { + tuplestore_advance(winstate->buffer, true); + winobj->seekpos++; + } +} + +/* + * WinRowsArePeers + * Compare two rows (specified by absolute position in window) to see + * if they are equal according to the ORDER BY clause. + */ +bool +WinRowsArePeers(WindowObject winobj, int64 pos1, int64 pos2) +{ + WindowAggState *winstate; + WindowAgg *node; + TupleTableSlot *slot1; + TupleTableSlot *slot2; + bool res; + + Assert(WindowObjectIsValid(winobj)); + + winstate = winobj->winstate; + node = (WindowAgg *) winstate->ss.ps.plan; + + /* If no ORDER BY, all rows are peers; don't bother to fetch them */ + if (node->ordNumCols == 0) + return true; + + slot1 = winstate->temp_slot_1; + slot2 = winstate->temp_slot_2; + + if (!window_gettupleslot(winobj, pos1, slot1)) + elog(ERROR, "specified position is out of window: " INT64_FORMAT, + pos1); + if (!window_gettupleslot(winobj, pos2, slot2)) + elog(ERROR, "specified position is out of window: " INT64_FORMAT, + pos2); + + res = are_peers(winstate, slot1, slot2); + + ExecClearTuple(slot1); + ExecClearTuple(slot2); + + return res; +} + +/* + * WinGetFuncArgInPartition + * Evaluate a window function's argument expression on a specified + * row of the partition. The row is identified in lseek(2) style, + * i.e. relative to the current, first, or last row. + * + * argno: argument number to evaluate (counted from 0) + * relpos: signed rowcount offset from the seek position + * seektype: WINDOW_SEEK_CURRENT, WINDOW_SEEK_HEAD, or WINDOW_SEEK_TAIL + * set_mark: If the row is found and set_mark is true, the mark is moved to + * the row as a side-effect. + * isnull: output argument, receives isnull status of result + * isout: output argument, set to indicate whether target row position + * is out of partition (can pass NULL if caller doesn't care about this) + * + * Specifying a nonexistent row is not an error, it just causes a null result + * (plus setting *isout true, if isout isn't NULL). + */ +Datum +WinGetFuncArgInPartition(WindowObject winobj, int argno, + int relpos, int seektype, bool set_mark, + bool *isnull, bool *isout) +{ + ExprContext *econtext; + TupleTableSlot *slot; + bool gottuple; + int64 abs_pos; + + Assert(WindowObjectIsValid(winobj)); + + econtext = winobj->winstate->ss.ps.ps_ExprContext; + slot = winobj->winstate->temp_slot_1; + + switch (seektype) + { + case WINDOW_SEEK_CURRENT: + abs_pos = winobj->winstate->currentpos + relpos; + break; + case WINDOW_SEEK_HEAD: + abs_pos = relpos; + break; + case WINDOW_SEEK_TAIL: + spool_tuples(winobj->winstate, -1); + abs_pos = winobj->winstate->spooled_rows - 1 + relpos; + break; + default: + elog(ERROR, "unrecognized window seek type: %d", seektype); + abs_pos = 0; /* keep compiler quiet */ + break; + } + + if (abs_pos >= 0) + gottuple = window_gettupleslot(winobj, abs_pos, slot); + else + gottuple = false; + + if (!gottuple) + { + if (isout) + *isout = true; + *isnull = true; + return (Datum) 0; + } + else + { + if (isout) + *isout = false; + if (set_mark) + WinSetMarkPosition(winobj, abs_pos); + econtext->ecxt_outertuple = slot; + return ExecEvalExpr((ExprState *) list_nth(winobj->argstates, argno), + econtext, isnull, NULL); + } +} + +/* + * WinGetFuncArgInFrame + * Evaluate a window function's argument expression on a specified + * row of the window frame. The row is identified in lseek(2) style, + * i.e. relative to the current, first, or last row. + * + * argno: argument number to evaluate (counted from 0) + * relpos: signed rowcount offset from the seek position + * seektype: WINDOW_SEEK_CURRENT, WINDOW_SEEK_HEAD, or WINDOW_SEEK_TAIL + * set_mark: If the row is found and set_mark is true, the mark is moved to + * the row as a side-effect. + * isnull: output argument, receives isnull status of result + * isout: output argument, set to indicate whether target row position + * is out of frame (can pass NULL if caller doesn't care about this) + * + * Specifying a nonexistent row is not an error, it just causes a null result + * (plus setting *isout true, if isout isn't NULL). + */ +Datum +WinGetFuncArgInFrame(WindowObject winobj, int argno, + int relpos, int seektype, bool set_mark, + bool *isnull, bool *isout) +{ + ExprContext *econtext; + TupleTableSlot *slot; + bool gottuple; + int64 abs_pos; + int64 frametailpos; + + Assert(WindowObjectIsValid(winobj)); + + /* if no ordering columns, partition and frame are the same thing */ + if (((WindowAgg *) winobj->winstate->ss.ps.plan)->ordNumCols == 0) + return WinGetFuncArgInPartition(winobj, argno, relpos, seektype, + set_mark, isnull, isout); + + econtext = winobj->winstate->ss.ps.ps_ExprContext; + slot = winobj->winstate->temp_slot_1; + frametailpos = winobj->winstate->frametailpos; + + switch (seektype) + { + case WINDOW_SEEK_CURRENT: + abs_pos = winobj->winstate->currentpos + relpos; + break; + case WINDOW_SEEK_HEAD: + abs_pos = relpos; + break; + case WINDOW_SEEK_TAIL: + /* abs_pos is calculated later */ + abs_pos = 0; /* keep compiler quiet */ + break; + default: + elog(ERROR, "unrecognized window seek type: %d", seektype); + abs_pos = 0; /* keep compiler quiet */ + break; + } + + /* + * Seek for frame tail. If the tail position is before current, + * always check if the tail is after the current or not. + */ + if (frametailpos <= winobj->winstate->currentpos) + { + int64 add = 1; + + for (;;) + { + spool_tuples(winobj->winstate, winobj->winstate->currentpos + add); + if (winobj->winstate->spooled_rows > winobj->winstate->currentpos + add) + { + /* + * When seektype is not TAIL, we may optimize not to + * spool unnecessary tuples. In TAIL mode, we need to search + * until we find a row that's definitely not a peer. + */ + if (!WinRowsArePeers(winobj, winobj->winstate->currentpos, + winobj->winstate->currentpos + add) || + (seektype != WINDOW_SEEK_TAIL && + winobj->winstate->currentpos + add < abs_pos)) + break; + add++; + } + else + { + /* + * If hit the partition end, the last row is the frame tail. + */ + break; + } + } + frametailpos = winobj->winstate->currentpos + add - 1; + winobj->winstate->frametailpos = frametailpos; + } + + if (seektype == WINDOW_SEEK_TAIL) + { + abs_pos = frametailpos + relpos; + } + + /* + * If there is an ORDER BY (we don't support other window frame + * specifications yet), the frame runs from first row of the partition + * to the last peer of the current row. Otherwise the frame is the + * whole partition. + */ + if (abs_pos < 0 || abs_pos > frametailpos) + gottuple = false; + else + gottuple = window_gettupleslot(winobj, abs_pos, slot); + + if (!gottuple) + { + if (isout) + *isout = true; + *isnull = true; + return (Datum) 0; + } + else + { + if (isout) + *isout = false; + if (set_mark) + WinSetMarkPosition(winobj, abs_pos); + econtext->ecxt_outertuple = slot; + return ExecEvalExpr((ExprState *) list_nth(winobj->argstates, argno), + econtext, isnull, NULL); + } +} + +/* + * WinGetFuncArgCurrent + * Evaluate a window function's argument expression on the current row. + * + * argno: argument number to evaluate (counted from 0) + * isnull: output argument, receives isnull status of result + * + * Note: this isn't quite equivalent to WinGetFuncArgInPartition or + * WinGetFuncArgInFrame targeting the current row, because it will succeed + * even if the WindowObject's mark has been set beyond the current row. + * This should generally be used for "ordinary" arguments of a window + * function, such as the offset argument of lead() or lag(). + */ +Datum +WinGetFuncArgCurrent(WindowObject winobj, int argno, bool *isnull) +{ + WindowAggState *winstate; + ExprContext *econtext; + + Assert(WindowObjectIsValid(winobj)); + winstate = winobj->winstate; + + econtext = winstate->ss.ps.ps_ExprContext; + + econtext->ecxt_outertuple = winstate->ss.ss_ScanTupleSlot; + return ExecEvalExpr((ExprState *) list_nth(winobj->argstates, argno), + econtext, isnull, NULL); +} diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index 86f555a03a6..412fd96e5bf 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -15,7 +15,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/nodes/copyfuncs.c,v 1.416 2008/12/19 16:25:17 petere Exp $ + * $PostgreSQL: pgsql/src/backend/nodes/copyfuncs.c,v 1.417 2008/12/28 18:53:55 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -669,6 +669,32 @@ _copyAgg(Agg *from) } /* + * _copyWindowAgg + */ +static WindowAgg * +_copyWindowAgg(WindowAgg *from) +{ + WindowAgg *newnode = makeNode(WindowAgg); + + CopyPlanFields((Plan *) from, (Plan *) newnode); + + COPY_SCALAR_FIELD(partNumCols); + if (from->partNumCols > 0) + { + COPY_POINTER_FIELD(partColIdx, from->partNumCols * sizeof(AttrNumber)); + COPY_POINTER_FIELD(partOperators, from->partNumCols * sizeof(Oid)); + } + COPY_SCALAR_FIELD(ordNumCols); + if (from->ordNumCols > 0) + { + COPY_POINTER_FIELD(ordColIdx, from->ordNumCols * sizeof(AttrNumber)); + COPY_POINTER_FIELD(ordOperators, from->ordNumCols * sizeof(Oid)); + } + + return newnode; +} + +/* * _copyUnique */ static Unique * @@ -932,6 +958,25 @@ _copyAggref(Aggref *from) } /* + * _copyWindowFunc + */ +static WindowFunc * +_copyWindowFunc(WindowFunc *from) +{ + WindowFunc *newnode = makeNode(WindowFunc); + + COPY_SCALAR_FIELD(winfnoid); + COPY_SCALAR_FIELD(wintype); + COPY_NODE_FIELD(args); + COPY_SCALAR_FIELD(winref); + COPY_SCALAR_FIELD(winstar); + COPY_SCALAR_FIELD(winagg); + COPY_LOCATION_FIELD(location); + + return newnode; +} + +/* * _copyArrayRef */ static ArrayRef * @@ -1729,6 +1774,21 @@ _copySortGroupClause(SortGroupClause *from) return newnode; } +static WindowClause * +_copyWindowClause(WindowClause *from) +{ + WindowClause *newnode = makeNode(WindowClause); + + COPY_STRING_FIELD(name); + COPY_STRING_FIELD(refname); + COPY_NODE_FIELD(partitionClause); + COPY_NODE_FIELD(orderClause); + COPY_SCALAR_FIELD(winref); + COPY_SCALAR_FIELD(copiedOrder); + + return newnode; +} + static RowMarkClause * _copyRowMarkClause(RowMarkClause *from) { @@ -1850,6 +1910,7 @@ _copyFuncCall(FuncCall *from) COPY_SCALAR_FIELD(agg_star); COPY_SCALAR_FIELD(agg_distinct); COPY_SCALAR_FIELD(func_variadic); + COPY_NODE_FIELD(over); COPY_LOCATION_FIELD(location); return newnode; @@ -1940,6 +2001,20 @@ _copySortBy(SortBy *from) return newnode; } +static WindowDef * +_copyWindowDef(WindowDef *from) +{ + WindowDef *newnode = makeNode(WindowDef); + + COPY_STRING_FIELD(name); + COPY_STRING_FIELD(refname); + COPY_NODE_FIELD(partitionClause); + COPY_NODE_FIELD(orderClause); + COPY_LOCATION_FIELD(location); + + return newnode; +} + static RangeSubselect * _copyRangeSubselect(RangeSubselect *from) { @@ -2081,6 +2156,7 @@ _copyQuery(Query *from) COPY_SCALAR_FIELD(resultRelation); COPY_NODE_FIELD(intoClause); COPY_SCALAR_FIELD(hasAggs); + COPY_SCALAR_FIELD(hasWindowFuncs); COPY_SCALAR_FIELD(hasSubLinks); COPY_SCALAR_FIELD(hasDistinctOn); COPY_SCALAR_FIELD(hasRecursive); @@ -2091,6 +2167,7 @@ _copyQuery(Query *from) COPY_NODE_FIELD(returningList); COPY_NODE_FIELD(groupClause); COPY_NODE_FIELD(havingQual); + COPY_NODE_FIELD(windowClause); COPY_NODE_FIELD(distinctClause); COPY_NODE_FIELD(sortClause); COPY_NODE_FIELD(limitOffset); @@ -2153,6 +2230,7 @@ _copySelectStmt(SelectStmt *from) COPY_NODE_FIELD(whereClause); COPY_NODE_FIELD(groupClause); COPY_NODE_FIELD(havingClause); + COPY_NODE_FIELD(windowClause); COPY_NODE_FIELD(withClause); COPY_NODE_FIELD(valuesLists); COPY_NODE_FIELD(sortClause); @@ -3440,6 +3518,9 @@ copyObject(void *from) case T_Agg: retval = _copyAgg(from); break; + case T_WindowAgg: + retval = _copyWindowAgg(from); + break; case T_Unique: retval = _copyUnique(from); break; @@ -3480,6 +3561,9 @@ copyObject(void *from) case T_Aggref: retval = _copyAggref(from); break; + case T_WindowFunc: + retval = _copyWindowFunc(from); + break; case T_ArrayRef: retval = _copyArrayRef(from); break; @@ -3951,6 +4035,9 @@ copyObject(void *from) case T_SortBy: retval = _copySortBy(from); break; + case T_WindowDef: + retval = _copyWindowDef(from); + break; case T_RangeSubselect: retval = _copyRangeSubselect(from); break; @@ -3984,6 +4071,9 @@ copyObject(void *from) case T_SortGroupClause: retval = _copySortGroupClause(from); break; + case T_WindowClause: + retval = _copyWindowClause(from); + break; case T_RowMarkClause: retval = _copyRowMarkClause(from); break; diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c index e5e2bc44226..e96c66152e8 100644 --- a/src/backend/nodes/equalfuncs.c +++ b/src/backend/nodes/equalfuncs.c @@ -22,7 +22,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/nodes/equalfuncs.c,v 1.341 2008/12/19 16:25:17 petere Exp $ + * $PostgreSQL: pgsql/src/backend/nodes/equalfuncs.c,v 1.342 2008/12/28 18:53:56 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -192,6 +192,20 @@ _equalAggref(Aggref *a, Aggref *b) } static bool +_equalWindowFunc(WindowFunc *a, WindowFunc *b) +{ + COMPARE_SCALAR_FIELD(winfnoid); + COMPARE_SCALAR_FIELD(wintype); + COMPARE_NODE_FIELD(args); + COMPARE_SCALAR_FIELD(winref); + COMPARE_SCALAR_FIELD(winstar); + COMPARE_SCALAR_FIELD(winagg); + COMPARE_LOCATION_FIELD(location); + + return true; +} + +static bool _equalArrayRef(ArrayRef *a, ArrayRef *b) { COMPARE_SCALAR_FIELD(refarraytype); @@ -839,6 +853,7 @@ _equalQuery(Query *a, Query *b) COMPARE_SCALAR_FIELD(resultRelation); COMPARE_NODE_FIELD(intoClause); COMPARE_SCALAR_FIELD(hasAggs); + COMPARE_SCALAR_FIELD(hasWindowFuncs); COMPARE_SCALAR_FIELD(hasSubLinks); COMPARE_SCALAR_FIELD(hasDistinctOn); COMPARE_SCALAR_FIELD(hasRecursive); @@ -849,6 +864,7 @@ _equalQuery(Query *a, Query *b) COMPARE_NODE_FIELD(returningList); COMPARE_NODE_FIELD(groupClause); COMPARE_NODE_FIELD(havingQual); + COMPARE_NODE_FIELD(windowClause); COMPARE_NODE_FIELD(distinctClause); COMPARE_NODE_FIELD(sortClause); COMPARE_NODE_FIELD(limitOffset); @@ -903,6 +919,7 @@ _equalSelectStmt(SelectStmt *a, SelectStmt *b) COMPARE_NODE_FIELD(whereClause); COMPARE_NODE_FIELD(groupClause); COMPARE_NODE_FIELD(havingClause); + COMPARE_NODE_FIELD(windowClause); COMPARE_NODE_FIELD(withClause); COMPARE_NODE_FIELD(valuesLists); COMPARE_NODE_FIELD(sortClause); @@ -1894,6 +1911,7 @@ _equalFuncCall(FuncCall *a, FuncCall *b) COMPARE_SCALAR_FIELD(agg_star); COMPARE_SCALAR_FIELD(agg_distinct); COMPARE_SCALAR_FIELD(func_variadic); + COMPARE_NODE_FIELD(over); COMPARE_LOCATION_FIELD(location); return true; @@ -1981,6 +1999,18 @@ _equalSortBy(SortBy *a, SortBy *b) } static bool +_equalWindowDef(WindowDef *a, WindowDef *b) +{ + COMPARE_STRING_FIELD(name); + COMPARE_STRING_FIELD(refname); + COMPARE_NODE_FIELD(partitionClause); + COMPARE_NODE_FIELD(orderClause); + COMPARE_LOCATION_FIELD(location); + + return true; +} + +static bool _equalRangeSubselect(RangeSubselect *a, RangeSubselect *b) { COMPARE_NODE_FIELD(subquery); @@ -2107,6 +2137,19 @@ _equalSortGroupClause(SortGroupClause *a, SortGroupClause *b) } static bool +_equalWindowClause(WindowClause *a, WindowClause *b) +{ + COMPARE_STRING_FIELD(name); + COMPARE_STRING_FIELD(refname); + COMPARE_NODE_FIELD(partitionClause); + COMPARE_NODE_FIELD(orderClause); + COMPARE_SCALAR_FIELD(winref); + COMPARE_SCALAR_FIELD(copiedOrder); + + return true; +} + +static bool _equalRowMarkClause(RowMarkClause *a, RowMarkClause *b) { COMPARE_SCALAR_FIELD(rti); @@ -2311,6 +2354,9 @@ equal(void *a, void *b) case T_Aggref: retval = _equalAggref(a, b); break; + case T_WindowFunc: + retval = _equalWindowFunc(a, b); + break; case T_ArrayRef: retval = _equalArrayRef(a, b); break; @@ -2769,6 +2815,9 @@ equal(void *a, void *b) case T_SortBy: retval = _equalSortBy(a, b); break; + case T_WindowDef: + retval = _equalWindowDef(a, b); + break; case T_RangeSubselect: retval = _equalRangeSubselect(a, b); break; @@ -2802,6 +2851,9 @@ equal(void *a, void *b) case T_SortGroupClause: retval = _equalSortGroupClause(a, b); break; + case T_WindowClause: + retval = _equalWindowClause(a, b); + break; case T_RowMarkClause: retval = _equalRowMarkClause(a, b); break; diff --git a/src/backend/nodes/nodeFuncs.c b/src/backend/nodes/nodeFuncs.c index 7236360347c..0284ce4edca 100644 --- a/src/backend/nodes/nodeFuncs.c +++ b/src/backend/nodes/nodeFuncs.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/nodes/nodeFuncs.c,v 1.35 2008/10/21 20:42:52 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/nodes/nodeFuncs.c,v 1.36 2008/12/28 18:53:56 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -52,6 +52,9 @@ exprType(Node *expr) case T_Aggref: type = ((Aggref *) expr)->aggtype; break; + case T_WindowFunc: + type = ((WindowFunc *) expr)->wintype; + break; case T_ArrayRef: { ArrayRef *arrayref = (ArrayRef *) expr; @@ -548,6 +551,8 @@ expression_returns_set_walker(Node *node, void *context) /* Avoid recursion for some cases that can't return a set */ if (IsA(node, Aggref)) return false; + if (IsA(node, WindowFunc)) + return false; if (IsA(node, DistinctExpr)) return false; if (IsA(node, ScalarArrayOpExpr)) @@ -634,6 +639,10 @@ exprLocation(Node *expr) /* function name should always be the first thing */ loc = ((Aggref *) expr)->location; break; + case T_WindowFunc: + /* function name should always be the first thing */ + loc = ((WindowFunc *) expr)->location; + break; case T_ArrayRef: /* just use array argument's location */ loc = exprLocation((Node *) ((ArrayRef *) expr)->refexpr); @@ -868,6 +877,9 @@ exprLocation(Node *expr) /* just use argument's location (ignore operator, if any) */ loc = exprLocation(((SortBy *) expr)->node); break; + case T_WindowDef: + loc = ((WindowDef *) expr)->location; + break; case T_TypeName: loc = ((TypeName *) expr)->location; break; @@ -1045,6 +1057,16 @@ expression_tree_walker(Node *node, return true; } break; + case T_WindowFunc: + { + WindowFunc *expr = (WindowFunc *) node; + + /* recurse directly on List */ + if (expression_tree_walker((Node *) expr->args, + walker, context)) + return true; + } + break; case T_ArrayRef: { ArrayRef *aref = (ArrayRef *) node; @@ -1221,6 +1243,16 @@ expression_tree_walker(Node *node, case T_Query: /* Do nothing with a sub-Query, per discussion above */ break; + case T_WindowClause: + { + WindowClause *wc = (WindowClause *) node; + + if (walker(wc->partitionClause, context)) + return true; + if (walker(wc->orderClause, context)) + return true; + } + break; case T_CommonTableExpr: { CommonTableExpr *cte = (CommonTableExpr *) node; @@ -1539,6 +1571,16 @@ expression_tree_mutator(Node *node, return (Node *) newnode; } break; + case T_WindowFunc: + { + WindowFunc *wfunc = (WindowFunc *) node; + WindowFunc *newnode; + + FLATCOPY(newnode, wfunc, WindowFunc); + MUTATE(newnode->args, wfunc->args, List *); + return (Node *) newnode; + } + break; case T_ArrayRef: { ArrayRef *arrayref = (ArrayRef *) node; @@ -1848,6 +1890,17 @@ expression_tree_mutator(Node *node, case T_Query: /* Do nothing with a sub-Query, per discussion above */ return node; + case T_WindowClause: + { + WindowClause *wc = (WindowClause *) node; + WindowClause *newnode; + + FLATCOPY(newnode, wc, WindowClause); + MUTATE(newnode->partitionClause, wc->partitionClause, List *); + MUTATE(newnode->orderClause, wc->orderClause, List *); + return (Node *) newnode; + } + break; case T_CommonTableExpr: { CommonTableExpr *cte = (CommonTableExpr *) node; @@ -2280,6 +2333,8 @@ raw_expression_tree_walker(Node *node, bool (*walker) (), void *context) return true; if (walker(stmt->havingClause, context)) return true; + if (walker(stmt->windowClause, context)) + return true; if (walker(stmt->withClause, context)) return true; if (walker(stmt->valuesLists, context)) @@ -2318,6 +2373,8 @@ raw_expression_tree_walker(Node *node, bool (*walker) (), void *context) if (walker(fcall->args, context)) return true; + if (walker(fcall->over, context)) + return true; /* function name is deemed uninteresting */ } break; @@ -2365,6 +2422,16 @@ raw_expression_tree_walker(Node *node, bool (*walker) (), void *context) break; case T_SortBy: return walker(((SortBy *) node)->node, context); + case T_WindowDef: + { + WindowDef *wd = (WindowDef *) node; + + if (walker(wd->partitionClause, context)) + return true; + if (walker(wd->orderClause, context)) + return true; + } + break; case T_RangeSubselect: { RangeSubselect *rs = (RangeSubselect *) node; diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index 2477a17cfa3..f926f1314cd 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.346 2008/12/01 21:06:12 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.347 2008/12/28 18:53:56 tgl Exp $ * * NOTES * Every node type that can appear in stored rules' parsetrees *must* @@ -567,6 +567,36 @@ _outAgg(StringInfo str, Agg *node) } static void +_outWindowAgg(StringInfo str, WindowAgg *node) +{ + int i; + + WRITE_NODE_TYPE("WINDOWAGG"); + + _outPlanInfo(str, (Plan *) node); + + WRITE_INT_FIELD(partNumCols); + + appendStringInfo(str, " :partColIdx"); + for (i = 0; i < node->partNumCols; i++) + appendStringInfo(str, " %d", node->partColIdx[i]); + + appendStringInfo(str, " :partOperations"); + for (i = 0; i < node->partNumCols; i++) + appendStringInfo(str, " %u", node->partOperators[i]); + + WRITE_INT_FIELD(ordNumCols); + + appendStringInfo(str, " :ordColIdx"); + for (i = 0; i< node->ordNumCols; i++) + appendStringInfo(str, " %d", node->ordColIdx[i]); + + appendStringInfo(str, " :ordOperations"); + for (i = 0; i < node->ordNumCols; i++) + appendStringInfo(str, " %u", node->ordOperators[i]); +} + +static void _outGroup(StringInfo str, Group *node) { int i; @@ -799,6 +829,20 @@ _outAggref(StringInfo str, Aggref *node) } static void +_outWindowFunc(StringInfo str, WindowFunc *node) +{ + WRITE_NODE_TYPE("WINDOWFUNC"); + + WRITE_OID_FIELD(winfnoid); + WRITE_OID_FIELD(wintype); + WRITE_NODE_FIELD(args); + WRITE_UINT_FIELD(winref); + WRITE_BOOL_FIELD(winstar); + WRITE_BOOL_FIELD(winagg); + WRITE_LOCATION_FIELD(location); +} + +static void _outArrayRef(StringInfo str, ArrayRef *node) { WRITE_NODE_TYPE("ARRAYREF"); @@ -1440,6 +1484,7 @@ _outPlannerInfo(StringInfo str, PlannerInfo *node) WRITE_NODE_FIELD(placeholder_list); WRITE_NODE_FIELD(query_pathkeys); WRITE_NODE_FIELD(group_pathkeys); + WRITE_NODE_FIELD(window_pathkeys); WRITE_NODE_FIELD(distinct_pathkeys); WRITE_NODE_FIELD(sort_pathkeys); WRITE_FLOAT_FIELD(total_table_pages, "%.0f"); @@ -1722,6 +1767,7 @@ _outSelectStmt(StringInfo str, SelectStmt *node) WRITE_NODE_FIELD(whereClause); WRITE_NODE_FIELD(groupClause); WRITE_NODE_FIELD(havingClause); + WRITE_NODE_FIELD(windowClause); WRITE_NODE_FIELD(withClause); WRITE_NODE_FIELD(valuesLists); WRITE_NODE_FIELD(sortClause); @@ -1744,6 +1790,7 @@ _outFuncCall(StringInfo str, FuncCall *node) WRITE_BOOL_FIELD(agg_star); WRITE_BOOL_FIELD(agg_distinct); WRITE_BOOL_FIELD(func_variadic); + WRITE_NODE_FIELD(over); WRITE_LOCATION_FIELD(location); } @@ -1866,6 +1913,7 @@ _outQuery(StringInfo str, Query *node) WRITE_INT_FIELD(resultRelation); WRITE_NODE_FIELD(intoClause); WRITE_BOOL_FIELD(hasAggs); + WRITE_BOOL_FIELD(hasWindowFuncs); WRITE_BOOL_FIELD(hasSubLinks); WRITE_BOOL_FIELD(hasDistinctOn); WRITE_BOOL_FIELD(hasRecursive); @@ -1876,6 +1924,7 @@ _outQuery(StringInfo str, Query *node) WRITE_NODE_FIELD(returningList); WRITE_NODE_FIELD(groupClause); WRITE_NODE_FIELD(havingQual); + WRITE_NODE_FIELD(windowClause); WRITE_NODE_FIELD(distinctClause); WRITE_NODE_FIELD(sortClause); WRITE_NODE_FIELD(limitOffset); @@ -1896,6 +1945,19 @@ _outSortGroupClause(StringInfo str, SortGroupClause *node) } static void +_outWindowClause(StringInfo str, WindowClause *node) +{ + WRITE_NODE_TYPE("WINDOWCLAUSE"); + + WRITE_STRING_FIELD(name); + WRITE_STRING_FIELD(refname); + WRITE_NODE_FIELD(partitionClause); + WRITE_NODE_FIELD(orderClause); + WRITE_UINT_FIELD(winref); + WRITE_BOOL_FIELD(copiedOrder); +} + +static void _outRowMarkClause(StringInfo str, RowMarkClause *node) { WRITE_NODE_TYPE("ROWMARKCLAUSE"); @@ -2172,6 +2234,18 @@ _outSortBy(StringInfo str, SortBy *node) } static void +_outWindowDef(StringInfo str, WindowDef *node) +{ + WRITE_NODE_TYPE("WINDOWDEF"); + + WRITE_STRING_FIELD(name); + WRITE_STRING_FIELD(refname); + WRITE_NODE_FIELD(partitionClause); + WRITE_NODE_FIELD(orderClause); + WRITE_LOCATION_FIELD(location); +} + +static void _outRangeSubselect(StringInfo str, RangeSubselect *node) { WRITE_NODE_TYPE("RANGESUBSELECT"); @@ -2347,6 +2421,9 @@ _outNode(StringInfo str, void *obj) case T_Agg: _outAgg(str, obj); break; + case T_WindowAgg: + _outWindowAgg(str, obj); + break; case T_Group: _outGroup(str, obj); break; @@ -2392,6 +2469,9 @@ _outNode(StringInfo str, void *obj) case T_Aggref: _outAggref(str, obj); break; + case T_WindowFunc: + _outWindowFunc(str, obj); + break; case T_ArrayRef: _outArrayRef(str, obj); break; @@ -2616,6 +2696,9 @@ _outNode(StringInfo str, void *obj) case T_SortGroupClause: _outSortGroupClause(str, obj); break; + case T_WindowClause: + _outWindowClause(str, obj); + break; case T_RowMarkClause: _outRowMarkClause(str, obj); break; @@ -2661,6 +2744,9 @@ _outNode(StringInfo str, void *obj) case T_SortBy: _outSortBy(str, obj); break; + case T_WindowDef: + _outWindowDef(str, obj); + break; case T_RangeSubselect: _outRangeSubselect(str, obj); break; diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c index ed5b55fb571..7bcc8e8047d 100644 --- a/src/backend/nodes/readfuncs.c +++ b/src/backend/nodes/readfuncs.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/nodes/readfuncs.c,v 1.217 2008/11/15 19:43:46 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/nodes/readfuncs.c,v 1.218 2008/12/28 18:53:56 tgl Exp $ * * NOTES * Path and Plan nodes do not have any readfuncs support, because we @@ -153,6 +153,7 @@ _readQuery(void) READ_INT_FIELD(resultRelation); READ_NODE_FIELD(intoClause); READ_BOOL_FIELD(hasAggs); + READ_BOOL_FIELD(hasWindowFuncs); READ_BOOL_FIELD(hasSubLinks); READ_BOOL_FIELD(hasDistinctOn); READ_BOOL_FIELD(hasRecursive); @@ -163,6 +164,7 @@ _readQuery(void) READ_NODE_FIELD(returningList); READ_NODE_FIELD(groupClause); READ_NODE_FIELD(havingQual); + READ_NODE_FIELD(windowClause); READ_NODE_FIELD(distinctClause); READ_NODE_FIELD(sortClause); READ_NODE_FIELD(limitOffset); @@ -218,6 +220,24 @@ _readSortGroupClause(void) } /* + * _readWindowClause + */ +static WindowClause * +_readWindowClause(void) +{ + READ_LOCALS(WindowClause); + + READ_STRING_FIELD(name); + READ_STRING_FIELD(refname); + READ_NODE_FIELD(partitionClause); + READ_NODE_FIELD(orderClause); + READ_UINT_FIELD(winref); + READ_BOOL_FIELD(copiedOrder); + + READ_DONE(); +} + +/* * _readRowMarkClause */ static RowMarkClause * @@ -403,6 +423,25 @@ _readAggref(void) } /* + * _readWindowFunc + */ +static WindowFunc * +_readWindowFunc(void) +{ + READ_LOCALS(WindowFunc); + + READ_OID_FIELD(winfnoid); + READ_OID_FIELD(wintype); + READ_NODE_FIELD(args); + READ_UINT_FIELD(winref); + READ_BOOL_FIELD(winstar); + READ_BOOL_FIELD(winagg); + READ_LOCATION_FIELD(location); + + READ_DONE(); +} + +/* * _readArrayRef */ static ArrayRef * @@ -1091,6 +1130,8 @@ parseNodeString(void) return_value = _readQuery(); else if (MATCH("SORTGROUPCLAUSE", 15)) return_value = _readSortGroupClause(); + else if (MATCH("WINDOWCLAUSE", 12)) + return_value = _readWindowClause(); else if (MATCH("ROWMARKCLAUSE", 13)) return_value = _readRowMarkClause(); else if (MATCH("COMMONTABLEEXPR", 15)) @@ -1111,6 +1152,8 @@ parseNodeString(void) return_value = _readParam(); else if (MATCH("AGGREF", 6)) return_value = _readAggref(); + else if (MATCH("WINDOWFUNC", 10)) + return_value = _readWindowFunc(); else if (MATCH("ARRAYREF", 8)) return_value = _readArrayRef(); else if (MATCH("FUNCEXPR", 8)) diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index b0553894c24..17eebc67647 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/path/allpaths.c,v 1.177 2008/11/15 19:43:46 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/path/allpaths.c,v 1.178 2008/12/28 18:53:56 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -929,10 +929,13 @@ standard_join_search(PlannerInfo *root, int levels_needed, List *initial_rels) * 1. If the subquery has a LIMIT clause, we must not push down any quals, * since that could change the set of rows returned. * - * 2. If the subquery contains EXCEPT or EXCEPT ALL set ops we cannot push + * 2. If the subquery contains any window functions, we can't push quals + * into it, because that would change the results. + * + * 3. If the subquery contains EXCEPT or EXCEPT ALL set ops we cannot push * quals into it, because that would change the results. * - * 3. For subqueries using UNION/UNION ALL/INTERSECT/INTERSECT ALL, we can + * 4. For subqueries using UNION/UNION ALL/INTERSECT/INTERSECT ALL, we can * push quals into each component query, but the quals can only reference * subquery columns that suffer no type coercions in the set operation. * Otherwise there are possible semantic gotchas. So, we check the @@ -950,6 +953,10 @@ subquery_is_pushdown_safe(Query *subquery, Query *topquery, if (subquery->limitOffset != NULL || subquery->limitCount != NULL) return false; + /* Check point 2 */ + if (subquery->hasWindowFuncs) + return false; + /* Are we at top level, or looking at a setop component? */ if (subquery == topquery) { @@ -1093,6 +1100,12 @@ qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual, return false; /* + * It would be unsafe to push down window function calls, but at least + * for the moment we could never see any in a qual anyhow. + */ + Assert(!contain_window_function(qual)); + + /* * Examine all Vars used in clause; since it's a restriction clause, all * such Vars must refer to subselect output columns. */ diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index 0b9c5819820..7f30dde869f 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -54,7 +54,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.201 2008/11/22 22:47:05 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.202 2008/12/28 18:53:56 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1284,6 +1284,40 @@ cost_agg(Path *path, PlannerInfo *root, } /* + * cost_windowagg + * Determines and returns the cost of performing a WindowAgg plan node, + * including the cost of its input. + * + * Input is assumed already properly sorted. + */ +void +cost_windowagg(Path *path, PlannerInfo *root, + int numWindowFuncs, int numPartCols, int numOrderCols, + Cost input_startup_cost, Cost input_total_cost, + double input_tuples) +{ + Cost startup_cost; + Cost total_cost; + + startup_cost = input_startup_cost; + total_cost = input_total_cost; + + /* + * We charge one cpu_operator_cost per window function per tuple (often a + * drastic underestimate, but without a way to gauge how many tuples the + * window function will fetch, it's hard to do better). We also charge + * cpu_operator_cost per grouping column per tuple for grouping + * comparisons, plus cpu_tuple_cost per tuple for general overhead. + */ + total_cost += cpu_operator_cost * input_tuples * numWindowFuncs; + total_cost += cpu_operator_cost * input_tuples * (numPartCols + numOrderCols); + total_cost += cpu_tuple_cost * input_tuples; + + path->startup_cost = startup_cost; + path->total_cost = total_cost; +} + +/* * cost_group * Determines and returns the cost of performing a Group plan node, * including the cost of its input. @@ -2155,6 +2189,11 @@ cost_qual_eval_walker(Node *node, cost_qual_eval_context *context) * Vars and Consts are charged zero, and so are boolean operators (AND, * OR, NOT). Simplistic, but a lot better than no model at all. * + * Note that Aggref and WindowFunc nodes are (and should be) treated + * like Vars --- whatever execution cost they have is absorbed into + * plan-node-specific costing. As far as expression evaluation is + * concerned they're just like Vars. + * * Should we try to account for the possibility of short-circuit * evaluation of AND/OR? Probably *not*, because that would make the * results depend on the clause ordering, and we are not in any position diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c index 3d35eb605d9..5f6d219a01a 100644 --- a/src/backend/optimizer/path/equivclass.c +++ b/src/backend/optimizer/path/equivclass.c @@ -10,7 +10,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/path/equivclass.c,v 1.14 2008/12/01 21:06:13 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/path/equivclass.c,v 1.15 2008/12/28 18:53:56 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -438,14 +438,16 @@ get_eclass_for_sort_expr(PlannerInfo *root, /* * add_eq_member doesn't check for volatile functions, set-returning - * functions, or aggregates, but such could appear in sort expressions; so - * we have to check whether its const-marking was correct. + * functions, aggregates, or window functions, but such could appear + * in sort expressions; so we have to check whether its const-marking + * was correct. */ if (newec->ec_has_const) { if (newec->ec_has_volatile || expression_returns_set((Node *) expr) || - contain_agg_clause((Node *) expr)) + contain_agg_clause((Node *) expr) || + contain_window_function((Node *) expr)) { newec->ec_has_const = false; newem->em_is_const = false; diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index f5d4f41c032..b53b5e1470e 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -10,7 +10,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.252 2008/11/20 19:52:54 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.253 2008/12/28 18:53:56 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -3237,8 +3237,8 @@ make_agg(PlannerInfo *root, List *tlist, List *qual, * anything for Aggref nodes; this is okay since they are really * comparable to Vars. * - * See notes in grouping_planner about why this routine and make_group are - * the only ones in this file that worry about tlist eval cost. + * See notes in grouping_planner about why only make_agg, make_windowagg + * and make_group worry about tlist eval cost. */ if (qual) { @@ -3260,6 +3260,53 @@ make_agg(PlannerInfo *root, List *tlist, List *qual, return node; } +WindowAgg * +make_windowagg(PlannerInfo *root, List *tlist, int numWindowFuncs, + int partNumCols, AttrNumber *partColIdx, Oid *partOperators, + int ordNumCols, AttrNumber *ordColIdx, Oid *ordOperators, + Plan *lefttree) +{ + WindowAgg *node = makeNode(WindowAgg); + Plan *plan = &node->plan; + Path windowagg_path; /* dummy for result of cost_windowagg */ + QualCost qual_cost; + + node->partNumCols = partNumCols; + node->partColIdx = partColIdx; + node->partOperators = partOperators; + node->ordNumCols = ordNumCols; + node->ordColIdx = ordColIdx; + node->ordOperators = ordOperators; + + copy_plan_costsize(plan, lefttree); /* only care about copying size */ + cost_windowagg(&windowagg_path, root, + numWindowFuncs, partNumCols, ordNumCols, + lefttree->startup_cost, + lefttree->total_cost, + lefttree->plan_rows); + plan->startup_cost = windowagg_path.startup_cost; + plan->total_cost = windowagg_path.total_cost; + + /* + * We also need to account for the cost of evaluation of the tlist. + * + * See notes in grouping_planner about why only make_agg, make_windowagg + * and make_group worry about tlist eval cost. + */ + cost_qual_eval(&qual_cost, tlist, root); + plan->startup_cost += qual_cost.startup; + plan->total_cost += qual_cost.startup; + plan->total_cost += qual_cost.per_tuple * plan->plan_rows; + + plan->targetlist = tlist; + plan->lefttree = lefttree; + plan->righttree = NULL; + /* WindowAgg nodes never have a qual clause */ + plan->qual = NIL; + + return node; +} + Group * make_group(PlannerInfo *root, List *tlist, @@ -3300,8 +3347,8 @@ make_group(PlannerInfo *root, * lower plan level and will only be copied by the Group node. Worth * fixing? * - * See notes in grouping_planner about why this routine and make_agg are - * the only ones in this file that worry about tlist eval cost. + * See notes in grouping_planner about why only make_agg, make_windowagg + * and make_group worry about tlist eval cost. */ if (qual) { diff --git a/src/backend/optimizer/plan/planagg.c b/src/backend/optimizer/plan/planagg.c index 8a6b2ad0345..f0f17d5f950 100644 --- a/src/backend/optimizer/plan/planagg.c +++ b/src/backend/optimizer/plan/planagg.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/plan/planagg.c,v 1.43 2008/08/25 22:42:33 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/plan/planagg.c,v 1.44 2008/12/28 18:53:56 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -95,11 +95,11 @@ optimize_minmax_aggregates(PlannerInfo *root, List *tlist, Path *best_path) /* * Reject unoptimizable cases. * - * We don't handle GROUP BY, because our current implementations of - * grouping require looking at all the rows anyway, and so there's not - * much point in optimizing MIN/MAX. + * We don't handle GROUP BY or windowing, because our current + * implementations of grouping require looking at all the rows anyway, + * and so there's not much point in optimizing MIN/MAX. */ - if (parse->groupClause) + if (parse->groupClause || parse->hasWindowFuncs) return NULL; /* diff --git a/src/backend/optimizer/plan/planmain.c b/src/backend/optimizer/plan/planmain.c index 0a1d1d1559f..a8ea043a697 100644 --- a/src/backend/optimizer/plan/planmain.c +++ b/src/backend/optimizer/plan/planmain.c @@ -14,7 +14,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/plan/planmain.c,v 1.112 2008/10/22 20:17:51 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/plan/planmain.c,v 1.113 2008/12/28 18:53:56 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -67,9 +67,9 @@ * PlannerInfo field and not a passed parameter is that the low-level routines * in indxpath.c need to see it.) * - * Note: the PlannerInfo node also includes group_pathkeys, distinct_pathkeys, - * and sort_pathkeys, which like query_pathkeys need to be canonicalized once - * the info is available. + * Note: the PlannerInfo node also includes group_pathkeys, window_pathkeys, + * distinct_pathkeys, and sort_pathkeys, which like query_pathkeys need to be + * canonicalized once the info is available. * * tuple_fraction is interpreted as follows: * 0: expect all tuples to be retrieved (normal case) @@ -121,6 +121,8 @@ query_planner(PlannerInfo *root, List *tlist, root->query_pathkeys); root->group_pathkeys = canonicalize_pathkeys(root, root->group_pathkeys); + root->window_pathkeys = canonicalize_pathkeys(root, + root->window_pathkeys); root->distinct_pathkeys = canonicalize_pathkeys(root, root->distinct_pathkeys); root->sort_pathkeys = canonicalize_pathkeys(root, @@ -228,11 +230,12 @@ query_planner(PlannerInfo *root, List *tlist, /* * We have completed merging equivalence sets, so it's now possible to * convert the requested query_pathkeys to canonical form. Also - * canonicalize the groupClause, distinctClause and sortClause pathkeys - * for use later. + * canonicalize the groupClause, windowClause, distinctClause and + * sortClause pathkeys for use later. */ root->query_pathkeys = canonicalize_pathkeys(root, root->query_pathkeys); root->group_pathkeys = canonicalize_pathkeys(root, root->group_pathkeys); + root->window_pathkeys = canonicalize_pathkeys(root, root->window_pathkeys); root->distinct_pathkeys = canonicalize_pathkeys(root, root->distinct_pathkeys); root->sort_pathkeys = canonicalize_pathkeys(root, root->sort_pathkeys); @@ -287,10 +290,12 @@ query_planner(PlannerInfo *root, List *tlist, * If both GROUP BY and ORDER BY are specified, we will need two * levels of sort --- and, therefore, certainly need to read all the * tuples --- unless ORDER BY is a subset of GROUP BY. Likewise if - * we have both DISTINCT and GROUP BY. + * we have both DISTINCT and GROUP BY, or if we have a window + * specification not compatible with the GROUP BY. */ if (!pathkeys_contained_in(root->sort_pathkeys, root->group_pathkeys) || - !pathkeys_contained_in(root->distinct_pathkeys, root->group_pathkeys)) + !pathkeys_contained_in(root->distinct_pathkeys, root->group_pathkeys) || + !pathkeys_contained_in(root->window_pathkeys, root->group_pathkeys)) tuple_fraction = 0.0; } else if (parse->hasAggs || root->hasHavingQual) diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 7f91309032a..b4b578d5973 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.247 2008/12/18 18:20:33 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.248 2008/12/28 18:53:56 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -82,6 +82,18 @@ static void locate_grouping_columns(PlannerInfo *root, List *sub_tlist, AttrNumber *groupColIdx); static List *postprocess_setop_tlist(List *new_tlist, List *orig_tlist); +static List *select_active_windows(PlannerInfo *root, WindowFuncLists *wflists); +static List *make_pathkeys_for_window(PlannerInfo *root, WindowClause *wc, + List *tlist, bool canonicalize); +static void get_column_info_for_window(PlannerInfo *root, WindowClause *wc, + List *tlist, + int numSortCols, AttrNumber *sortColIdx, + int *partNumCols, + AttrNumber **partColIdx, + Oid **partOperators, + int *ordNumCols, + AttrNumber **ordColIdx, + Oid **ordOperators); /***************************************************************************** @@ -852,6 +864,8 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) AggClauseCounts agg_counts; int numGroupCols; bool use_hashed_grouping = false; + WindowFuncLists *wflists = NULL; + List *activeWindows = NIL; MemSet(&agg_counts, 0, sizeof(AggClauseCounts)); @@ -867,6 +881,22 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) tlist = preprocess_targetlist(root, tlist); /* + * Locate any window functions in the tlist. (We don't need to look + * anywhere else, since expressions used in ORDER BY will be in there + * too.) Note that they could all have been eliminated by constant + * folding, in which case we don't need to do any more work. + */ + if (parse->hasWindowFuncs) + { + wflists = find_window_functions((Node *) tlist, + list_length(parse->windowClause)); + if (wflists->numWindowFuncs > 0) + activeWindows = select_active_windows(root, wflists); + else + parse->hasWindowFuncs = false; + } + + /* * Generate appropriate target list for subplan; may be different from * tlist if grouping or aggregation is needed. */ @@ -890,6 +920,19 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) else root->group_pathkeys = NIL; + /* We consider only the first (bottom) window in pathkeys logic */ + if (activeWindows != NIL) + { + WindowClause *wc = (WindowClause *) linitial(activeWindows); + + root->window_pathkeys = make_pathkeys_for_window(root, + wc, + tlist, + false); + } + else + root->window_pathkeys = NIL; + if (parse->distinctClause && grouping_is_sortable(parse->distinctClause)) root->distinct_pathkeys = @@ -927,11 +970,12 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) * Figure out whether we want a sorted result from query_planner. * * If we have a sortable GROUP BY clause, then we want a result sorted - * properly for grouping. Otherwise, if there's a sortable DISTINCT - * clause that's more rigorous than the ORDER BY clause, we try to - * produce output that's sufficiently well sorted for the DISTINCT. - * Otherwise, if there is an ORDER BY clause, we want to sort by the - * ORDER BY clause. + * properly for grouping. Otherwise, if we have window functions to + * evaluate, we try to sort for the first window. Otherwise, if + * there's a sortable DISTINCT clause that's more rigorous than the + * ORDER BY clause, we try to produce output that's sufficiently well + * sorted for the DISTINCT. Otherwise, if there is an ORDER BY + * clause, we want to sort by the ORDER BY clause. * * Note: if we have both ORDER BY and GROUP BY, and ORDER BY is a * superset of GROUP BY, it would be tempting to request sort by ORDER @@ -942,6 +986,8 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) */ if (root->group_pathkeys) root->query_pathkeys = root->group_pathkeys; + else if (root->window_pathkeys) + root->query_pathkeys = root->window_pathkeys; else if (list_length(root->distinct_pathkeys) > list_length(root->sort_pathkeys)) root->query_pathkeys = root->distinct_pathkeys; @@ -1092,10 +1138,10 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) * * Below this point, any tlist eval cost for added-on nodes * should be accounted for as we create those nodes. - * Presently, of the node types we can add on, only Agg and - * Group project new tlists (the rest just copy their input - * tuples) --- so make_agg() and make_group() are responsible - * for computing the added cost. + * Presently, of the node types we can add on, only Agg, + * WindowAgg, and Group project new tlists (the rest just copy + * their input tuples) --- so make_agg(), make_windowagg() and + * make_group() are responsible for computing the added cost. */ cost_qual_eval(&tlist_cost, sub_tlist, root); result_plan->startup_cost += tlist_cost.startup; @@ -1225,6 +1271,142 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) NULL); } } /* end of non-minmax-aggregate case */ + + /* + * Since each window function could require a different sort order, + * we stack up a WindowAgg node for each window, with sort steps + * between them as needed. + */ + if (activeWindows) + { + List *window_tlist; + ListCell *l; + + /* + * If the top-level plan node is one that cannot do expression + * evaluation, we must insert a Result node to project the + * desired tlist. (In some cases this might not really be + * required, but it's not worth trying to avoid it.) Note that + * on second and subsequent passes through the following loop, + * the top-level node will be a WindowAgg which we know can + * project; so we only need to check once. + */ + if (!is_projection_capable_plan(result_plan)) + { + result_plan = (Plan *) make_result(root, + NIL, + NULL, + result_plan); + } + + /* + * The "base" targetlist for all steps of the windowing process + * is a flat tlist of all Vars and Aggs needed in the result. + * (In some cases we wouldn't need to propagate all of these + * all the way to the top, since they might only be needed as + * inputs to WindowFuncs. It's probably not worth trying to + * optimize that though.) As we climb up the stack, we add + * outputs for the WindowFuncs computed at each level. Also, + * each input tlist has to present all the columns needed to + * sort the data for the next WindowAgg step. That's handled + * internally by make_sort_from_pathkeys, but we need the + * copyObject steps here to ensure that each plan node has + * a separately modifiable tlist. + */ + window_tlist = flatten_tlist(tlist); + if (parse->hasAggs) + window_tlist = add_to_flat_tlist(window_tlist, + pull_agg_clause((Node *) tlist)); + result_plan->targetlist = (List *) copyObject(window_tlist); + + foreach(l, activeWindows) + { + WindowClause *wc = (WindowClause *) lfirst(l); + List *window_pathkeys; + int partNumCols; + AttrNumber *partColIdx; + Oid *partOperators; + int ordNumCols; + AttrNumber *ordColIdx; + Oid *ordOperators; + + window_pathkeys = make_pathkeys_for_window(root, + wc, + tlist, + true); + + /* + * This is a bit tricky: we build a sort node even if we don't + * really have to sort. Even when no explicit sort is needed, + * we need to have suitable resjunk items added to the input + * plan's tlist for any partitioning or ordering columns that + * aren't plain Vars. Furthermore, this way we can use + * existing infrastructure to identify which input columns are + * the interesting ones. + */ + if (window_pathkeys) + { + Sort *sort_plan; + + sort_plan = make_sort_from_pathkeys(root, + result_plan, + window_pathkeys, + -1.0); + if (!pathkeys_contained_in(window_pathkeys, + current_pathkeys)) + { + /* we do indeed need to sort */ + result_plan = (Plan *) sort_plan; + current_pathkeys = window_pathkeys; + } + /* In either case, extract the per-column information */ + get_column_info_for_window(root, wc, tlist, + sort_plan->numCols, + sort_plan->sortColIdx, + &partNumCols, + &partColIdx, + &partOperators, + &ordNumCols, + &ordColIdx, + &ordOperators); + } + else + { + /* empty window specification, nothing to sort */ + partNumCols = 0; + partColIdx = NULL; + partOperators = NULL; + ordNumCols = 0; + ordColIdx = NULL; + ordOperators = NULL; + } + + if (lnext(l)) + { + /* Add the current WindowFuncs to the running tlist */ + window_tlist = add_to_flat_tlist(window_tlist, + wflists->windowFuncs[wc->winref]); + } + else + { + /* Install the original tlist in the topmost WindowAgg */ + window_tlist = tlist; + } + + /* ... and make the WindowAgg plan node */ + result_plan = (Plan *) + make_windowagg(root, + (List *) copyObject(window_tlist), + list_length(wflists->windowFuncs[wc->winref]), + partNumCols, + partColIdx, + partOperators, + ordNumCols, + ordColIdx, + ordOperators, + result_plan); + } + } } /* end of if (setOperations) */ /* @@ -2030,7 +2212,8 @@ make_subplanTargetList(PlannerInfo *root, * If we're not grouping or aggregating, there's nothing to do here; * query_planner should receive the unmodified target list. */ - if (!parse->hasAggs && !parse->groupClause && !root->hasHavingQual) + if (!parse->hasAggs && !parse->groupClause && !root->hasHavingQual && + !parse->hasWindowFuncs) { *need_tlist_eval = true; return tlist; @@ -2039,7 +2222,9 @@ make_subplanTargetList(PlannerInfo *root, /* * Otherwise, start with a "flattened" tlist (having just the vars * mentioned in the targetlist and HAVING qual --- but not upper-level - * Vars; they will be replaced by Params later on). + * Vars; they will be replaced by Params later on). Note this includes + * vars used in resjunk items, so we are covering the needs of ORDER BY + * and window specifications. */ sub_tlist = flatten_tlist(tlist); extravars = pull_var_clause(parse->havingQual, true); @@ -2066,7 +2251,7 @@ make_subplanTargetList(PlannerInfo *root, { SortGroupClause *grpcl = (SortGroupClause *) lfirst(gl); Node *groupexpr = get_sortgroupclause_expr(grpcl, tlist); - TargetEntry *te = NULL; + TargetEntry *te; /* * Find or make a matching sub_tlist entry. If the groupexpr @@ -2074,20 +2259,10 @@ make_subplanTargetList(PlannerInfo *root, * won't make multiple groupClause entries for the same TLE.) */ if (groupexpr && IsA(groupexpr, Var)) - { - ListCell *sl; - - foreach(sl, sub_tlist) - { - TargetEntry *lte = (TargetEntry *) lfirst(sl); + te = tlist_member(groupexpr, sub_tlist); + else + te = NULL; - if (equal(groupexpr, lte->expr)) - { - te = lte; - break; - } - } - } if (!te) { te = makeTargetEntry((Expr *) groupexpr, @@ -2112,7 +2287,7 @@ make_subplanTargetList(PlannerInfo *root, * * This is only needed if we don't use the sub_tlist chosen by * make_subplanTargetList. We have to forget the column indexes found - * by that routine and re-locate the grouping vars in the real sub_tlist. + * by that routine and re-locate the grouping exprs in the real sub_tlist. */ static void locate_grouping_columns(PlannerInfo *root, @@ -2137,18 +2312,10 @@ locate_grouping_columns(PlannerInfo *root, { SortGroupClause *grpcl = (SortGroupClause *) lfirst(gl); Node *groupexpr = get_sortgroupclause_expr(grpcl, tlist); - TargetEntry *te = NULL; - ListCell *sl; + TargetEntry *te = tlist_member(groupexpr, sub_tlist); - foreach(sl, sub_tlist) - { - te = (TargetEntry *) lfirst(sl); - if (equal(groupexpr, te->expr)) - break; - } - if (!sl) + if (!te) elog(ERROR, "failed to locate grouping columns"); - groupColIdx[keyno++] = te->resno; } } @@ -2190,3 +2357,219 @@ postprocess_setop_tlist(List *new_tlist, List *orig_tlist) elog(ERROR, "resjunk output columns are not implemented"); return new_tlist; } + +/* + * select_active_windows + * Create a list of the "active" window clauses (ie, those referenced + * by non-deleted WindowFuncs) in the order they are to be executed. + */ +static List * +select_active_windows(PlannerInfo *root, WindowFuncLists *wflists) +{ + List *result; + List *actives; + ListCell *lc; + + /* First, make a list of the active windows */ + actives = NIL; + foreach(lc, root->parse->windowClause) + { + WindowClause *wc = (WindowClause *) lfirst(lc); + + /* It's only active if wflists shows some related WindowFuncs */ + Assert(wc->winref <= wflists->maxWinRef); + if (wflists->windowFuncs[wc->winref] != NIL) + actives = lappend(actives, wc); + } + + /* + * Now, ensure that windows with identical partitioning/ordering clauses + * are adjacent in the list. This is required by the SQL standard, which + * says that only one sort is to be used for such windows, even if they + * are otherwise distinct (eg, different names or framing clauses). + * + * There is room to be much smarter here, for example detecting whether + * one window's sort keys are a prefix of another's (so that sorting + * for the latter would do for the former), or putting windows first + * that match a sort order available for the underlying query. For the + * moment we are content with meeting the spec. + */ + result = NIL; + while (actives != NIL) + { + WindowClause *wc = (WindowClause *) linitial(actives); + ListCell *prev; + ListCell *next; + + /* Move wc from actives to result */ + actives = list_delete_first(actives); + result = lappend(result, wc); + + /* Now move any matching windows from actives to result */ + prev = NULL; + for (lc = list_head(actives); lc; lc = next) + { + WindowClause *wc2 = (WindowClause *) lfirst(lc); + + next = lnext(lc); + if (equal(wc->partitionClause, wc2->partitionClause) && + equal(wc->orderClause, wc2->orderClause)) + { + actives = list_delete_cell(actives, lc, prev); + result = lappend(result, wc2); + } + else + prev = lc; + } + } + + return result; +} + +/* + * make_pathkeys_for_window + * Create a pathkeys list describing the required input ordering + * for the given WindowClause. + * + * The required ordering is first the PARTITION keys, then the ORDER keys. + * In the future we might try to implement windowing using hashing, in which + * case the ordering could be relaxed, but for now we always sort. + */ +static List * +make_pathkeys_for_window(PlannerInfo *root, WindowClause *wc, + List *tlist, bool canonicalize) +{ + List *window_pathkeys; + List *window_sortclauses; + + /* Throw error if can't sort */ + if (!grouping_is_sortable(wc->partitionClause)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("could not implement window PARTITION BY"), + errdetail("Window partitioning columns must be of sortable datatypes."))); + if (!grouping_is_sortable(wc->orderClause)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("could not implement window ORDER BY"), + errdetail("Window ordering columns must be of sortable datatypes."))); + + /* Okay, make the combined pathkeys */ + window_sortclauses = list_concat(list_copy(wc->partitionClause), + list_copy(wc->orderClause)); + window_pathkeys = make_pathkeys_for_sortclauses(root, + window_sortclauses, + tlist, + canonicalize); + list_free(window_sortclauses); + return window_pathkeys; +} + +/*---------- + * get_column_info_for_window + * Get the partitioning/ordering column numbers and equality operators + * for a WindowAgg node. + * + * This depends on the behavior of make_pathkeys_for_window()! + * + * We are given the target WindowClause and an array of the input column + * numbers associated with the resulting pathkeys. In the easy case, there + * are the same number of pathkey columns as partitioning + ordering columns + * and we just have to copy some data around. However, it's possible that + * some of the original partitioning + ordering columns were eliminated as + * redundant during the transformation to pathkeys. (This can happen even + * though the parser gets rid of obvious duplicates. A typical scenario is a + * window specification "PARTITION BY x ORDER BY y" coupled with a clause + * "WHERE x = y" that causes the two sort columns to be recognized as + * redundant.) In that unusual case, we have to work a lot harder to + * determine which keys are significant. + * + * The method used here is a bit brute-force: add the sort columns to a list + * one at a time and note when the resulting pathkey list gets longer. But + * it's a sufficiently uncommon case that a faster way doesn't seem worth + * the amount of code refactoring that'd be needed. + *---------- + */ +static void +get_column_info_for_window(PlannerInfo *root, WindowClause *wc, List *tlist, + int numSortCols, AttrNumber *sortColIdx, + int *partNumCols, + AttrNumber **partColIdx, + Oid **partOperators, + int *ordNumCols, + AttrNumber **ordColIdx, + Oid **ordOperators) +{ + int numPart = list_length(wc->partitionClause); + int numOrder = list_length(wc->orderClause); + + if (numSortCols == numPart + numOrder) + { + /* easy case */ + *partNumCols = numPart; + *partColIdx = sortColIdx; + *partOperators = extract_grouping_ops(wc->partitionClause); + *ordNumCols = numOrder; + *ordColIdx = sortColIdx + numPart; + *ordOperators = extract_grouping_ops(wc->orderClause); + } + else + { + List *sortclauses; + List *pathkeys; + int scidx; + ListCell *lc; + + /* first, allocate what's certainly enough space for the arrays */ + *partNumCols = 0; + *partColIdx = (AttrNumber *) palloc(numPart * sizeof(AttrNumber)); + *partOperators = (Oid *) palloc(numPart * sizeof(Oid)); + *ordNumCols = 0; + *ordColIdx = (AttrNumber *) palloc(numOrder * sizeof(AttrNumber)); + *ordOperators = (Oid *) palloc(numOrder * sizeof(Oid)); + sortclauses = NIL; + pathkeys = NIL; + scidx = 0; + foreach(lc, wc->partitionClause) + { + SortGroupClause *sgc = (SortGroupClause *) lfirst(lc); + List *new_pathkeys; + + sortclauses = lappend(sortclauses, sgc); + new_pathkeys = make_pathkeys_for_sortclauses(root, + sortclauses, + tlist, + true); + if (list_length(new_pathkeys) > list_length(pathkeys)) + { + /* this sort clause is actually significant */ + *partColIdx[*partNumCols] = sortColIdx[scidx++]; + *partOperators[*partNumCols] = sgc->eqop; + (*partNumCols)++; + pathkeys = new_pathkeys; + } + } + foreach(lc, wc->orderClause) + { + SortGroupClause *sgc = (SortGroupClause *) lfirst(lc); + List *new_pathkeys; + + sortclauses = lappend(sortclauses, sgc); + new_pathkeys = make_pathkeys_for_sortclauses(root, + sortclauses, + tlist, + true); + if (list_length(new_pathkeys) > list_length(pathkeys)) + { + /* this sort clause is actually significant */ + *ordColIdx[*ordNumCols] = sortColIdx[scidx++]; + *ordOperators[*ordNumCols] = sgc->eqop; + (*ordNumCols)++; + pathkeys = new_pathkeys; + } + } + /* complain if we didn't eat exactly the right number of sort cols */ + if (scidx != numSortCols) + elog(ERROR, "failed to deconstruct sort operators into partitioning/ordering operators"); + } +} diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c index 9bec109f6f5..83447082f5b 100644 --- a/src/backend/optimizer/plan/setrefs.c +++ b/src/backend/optimizer/plan/setrefs.c @@ -9,7 +9,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/plan/setrefs.c,v 1.146 2008/10/21 20:42:53 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/plan/setrefs.c,v 1.147 2008/12/28 18:53:57 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -415,6 +415,7 @@ set_plan_refs(PlannerGlobal *glob, Plan *plan, int rtoffset) } break; case T_Agg: + case T_WindowAgg: case T_Group: set_upper_references(glob, plan, rtoffset); break; @@ -679,6 +680,11 @@ fix_expr_common(PlannerGlobal *glob, Node *node) record_plan_function_dependency(glob, ((Aggref *) node)->aggfnoid); } + else if (IsA(node, WindowFunc)) + { + record_plan_function_dependency(glob, + ((WindowFunc *) node)->winfnoid); + } else if (IsA(node, FuncExpr)) { record_plan_function_dependency(glob, diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c index c999fb6419c..a38f8c09ae7 100644 --- a/src/backend/optimizer/plan/subselect.c +++ b/src/backend/optimizer/plan/subselect.c @@ -7,7 +7,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/plan/subselect.c,v 1.143 2008/12/08 00:16:09 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/plan/subselect.c,v 1.144 2008/12/28 18:53:57 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1243,6 +1243,7 @@ simplify_EXISTS_query(Query *query) query->intoClause || query->setOperations || query->hasAggs || + query->hasWindowFuncs || query->havingQual || query->limitOffset || query->limitCount || @@ -1258,13 +1259,14 @@ simplify_EXISTS_query(Query *query) /* * Otherwise, we can throw away the targetlist, as well as any GROUP, - * DISTINCT, and ORDER BY clauses; none of those clauses will change - * a nonzero-rows result to zero rows or vice versa. (Furthermore, + * WINDOW, DISTINCT, and ORDER BY clauses; none of those clauses will + * change a nonzero-rows result to zero rows or vice versa. (Furthermore, * since our parsetree representation of these clauses depends on the * targetlist, we'd better throw them away if we drop the targetlist.) */ query->targetList = NIL; query->groupClause = NIL; + query->windowClause = NIL; query->distinctClause = NIL; query->sortClause = NIL; query->hasDistinctOn = false; @@ -1321,8 +1323,8 @@ convert_EXISTS_to_ANY(PlannerInfo *root, Query *subselect, * The rest of the sub-select must not refer to any Vars of the parent * query. (Vars of higher levels should be okay, though.) * - * Note: we need not check for Aggs separately because we know the - * sub-select is as yet unoptimized; any uplevel Agg must therefore + * Note: we need not check for Aggrefs separately because we know the + * sub-select is as yet unoptimized; any uplevel Aggref must therefore * contain an uplevel Var reference. This is not the case below ... */ if (contain_vars_of_level((Node *) subselect, 1)) @@ -1432,7 +1434,8 @@ convert_EXISTS_to_ANY(PlannerInfo *root, Query *subselect, /* * And there can't be any child Vars in the stuff we intend to pull up. * (Note: we'd need to check for child Aggs too, except we know the - * child has no aggs at all because of simplify_EXISTS_query's check.) + * child has no aggs at all because of simplify_EXISTS_query's check. + * The same goes for window functions.) */ if (contain_vars_of_level((Node *) leftargs, 0)) return NULL; @@ -1955,6 +1958,7 @@ finalize_plan(PlannerInfo *root, Plan *plan, Bitmapset *valid_params) case T_RecursiveUnion: case T_Hash: case T_Agg: + case T_WindowAgg: case T_SeqScan: case T_Material: case T_Sort: diff --git a/src/backend/optimizer/prep/prepjointree.c b/src/backend/optimizer/prep/prepjointree.c index e4d508523e1..80a51d80786 100644 --- a/src/backend/optimizer/prep/prepjointree.c +++ b/src/backend/optimizer/prep/prepjointree.c @@ -16,7 +16,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/prep/prepjointree.c,v 1.60 2008/11/11 19:05:21 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/prep/prepjointree.c,v 1.61 2008/12/28 18:53:57 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -742,7 +742,10 @@ pull_up_simple_subquery(PlannerInfo *root, Node *jtnode, RangeTblEntry *rte, * Miscellaneous housekeeping. */ parse->hasSubLinks |= subquery->hasSubLinks; - /* subquery won't be pulled up if it hasAggs, so no work there */ + /* + * subquery won't be pulled up if it hasAggs or hasWindowFuncs, so no + * work needed on those flags + */ /* * Return the adjusted subquery jointree to replace the RangeTblRef entry @@ -931,6 +934,7 @@ is_simple_subquery(Query *subquery) * limiting, or WITH. (XXX WITH could possibly be allowed later) */ if (subquery->hasAggs || + subquery->hasWindowFuncs || subquery->groupClause || subquery->havingQual || subquery->sortClause || diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c index bd7c05cc53d..f3a49cf9dee 100644 --- a/src/backend/optimizer/prep/prepunion.c +++ b/src/backend/optimizer/prep/prepunion.c @@ -22,7 +22,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/prep/prepunion.c,v 1.162 2008/11/15 19:43:46 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/prep/prepunion.c,v 1.163 2008/12/28 18:53:57 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -136,6 +136,7 @@ plan_set_operations(PlannerInfo *root, double tuple_fraction, Assert(parse->jointree->quals == NULL); Assert(parse->groupClause == NIL); Assert(parse->havingQual == NULL); + Assert(parse->windowClause == NIL); Assert(parse->distinctClause == NIL); /* diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c index 3c74831f4da..ee45f32abbb 100644 --- a/src/backend/optimizer/util/clauses.c +++ b/src/backend/optimizer/util/clauses.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/util/clauses.c,v 1.271 2008/12/18 18:20:34 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/util/clauses.c,v 1.272 2008/12/28 18:53:57 tgl Exp $ * * HISTORY * AUTHOR DATE MAJOR EVENT @@ -72,7 +72,9 @@ typedef struct } substitute_actual_srf_parameters_context; static bool contain_agg_clause_walker(Node *node, void *context); +static bool pull_agg_clause_walker(Node *node, List **context); static bool count_agg_clauses_walker(Node *node, AggClauseCounts *counts); +static bool find_window_functions_walker(Node *node, WindowFuncLists *lists); static bool expression_returns_set_rows_walker(Node *node, double *count); static bool contain_subplans_walker(Node *node, void *context); static bool contain_mutable_functions_walker(Node *node, void *context); @@ -389,6 +391,41 @@ contain_agg_clause_walker(Node *node, void *context) } /* + * pull_agg_clause + * Recursively search for Aggref nodes within a clause. + * + * Returns a List of all Aggrefs found. + * + * This does not descend into subqueries, and so should be used only after + * reduction of sublinks to subplans, or in contexts where it's known there + * are no subqueries. There mustn't be outer-aggregate references either. + */ +List * +pull_agg_clause(Node *clause) +{ + List *result = NIL; + + (void) pull_agg_clause_walker(clause, &result); + return result; +} + +static bool +pull_agg_clause_walker(Node *node, List **context) +{ + if (node == NULL) + return false; + if (IsA(node, Aggref)) + { + Assert(((Aggref *) node)->agglevelsup == 0); + *context = lappend(*context, node); + return false; /* no need to descend into arguments */ + } + Assert(!IsA(node, SubLink)); + return expression_tree_walker(node, pull_agg_clause_walker, + (void *) context); +} + +/* * count_agg_clauses * Recursively count the Aggref nodes in an expression tree. * @@ -520,6 +557,79 @@ count_agg_clauses_walker(Node *node, AggClauseCounts *counts) /***************************************************************************** + * Window-function clause manipulation + *****************************************************************************/ + +/* + * contain_window_function + * Recursively search for WindowFunc nodes within a clause. + * + * Since window functions don't have level fields, but are hard-wired to + * be associated with the current query level, this is just the same as + * rewriteManip.c's function. + */ +bool +contain_window_function(Node *clause) +{ + return checkExprHasWindowFuncs(clause); +} + +/* + * find_window_functions + * Locate all the WindowFunc nodes in an expression tree, and organize + * them by winref ID number. + * + * Caller must provide an upper bound on the winref IDs expected in the tree. + */ +WindowFuncLists * +find_window_functions(Node *clause, Index maxWinRef) +{ + WindowFuncLists *lists = palloc(sizeof(WindowFuncLists)); + + lists->numWindowFuncs = 0; + lists->maxWinRef = maxWinRef; + lists->windowFuncs = (List **) palloc0((maxWinRef + 1) * sizeof(List *)); + (void) find_window_functions_walker(clause, lists); + return lists; +} + +static bool +find_window_functions_walker(Node *node, WindowFuncLists *lists) +{ + if (node == NULL) + return false; + if (IsA(node, WindowFunc)) + { + WindowFunc *wfunc = (WindowFunc *) node; + + /* winref is unsigned, so one-sided test is OK */ + if (wfunc->winref > lists->maxWinRef) + elog(ERROR, "WindowFunc contains out-of-range winref %u", + wfunc->winref); + lists->windowFuncs[wfunc->winref] = + lappend(lists->windowFuncs[wfunc->winref], wfunc); + lists->numWindowFuncs++; + + /* + * Complain if the window function's arguments contain window functions + */ + if (contain_window_function((Node *) wfunc->args)) + ereport(ERROR, + (errcode(ERRCODE_WINDOWING_ERROR), + errmsg("window function calls cannot be nested"))); + + /* + * Having checked that, we need not recurse into the argument. + */ + return false; + } + Assert(!IsA(node, SubLink)); + return expression_tree_walker(node, find_window_functions_walker, + (void *) lists); +} + + +/***************************************************************************** * Support for expressions returning sets *****************************************************************************/ @@ -567,6 +677,8 @@ expression_returns_set_rows_walker(Node *node, double *count) /* Avoid recursion for some cases that can't return a set */ if (IsA(node, Aggref)) return false; + if (IsA(node, WindowFunc)) + return false; if (IsA(node, DistinctExpr)) return false; if (IsA(node, ScalarArrayOpExpr)) @@ -897,6 +1009,11 @@ contain_nonstrict_functions_walker(Node *node, void *context) /* an aggregate could return non-null with null input */ return true; } + if (IsA(node, WindowFunc)) + { + /* a window function could return non-null with null input */ + return true; + } if (IsA(node, ArrayRef)) { /* array assignment is nonstrict, but subscripting is strict */ @@ -1589,7 +1706,8 @@ is_strict_saop(ScalarArrayOpExpr *expr, bool falseOK) * not-constant expressions, namely aggregates (Aggrefs). In current usage * this is only applied to WHERE clauses and so a check for Aggrefs would be * a waste of cycles; but be sure to also check contain_agg_clause() if you - * want to know about pseudo-constness in other contexts. + * want to know about pseudo-constness in other contexts. The same goes + * for window functions (WindowFuncs). */ bool is_pseudo_constant_clause(Node *clause) @@ -3472,6 +3590,7 @@ inline_function(Oid funcid, Oid result_type, List *args, querytree->utilityStmt || querytree->intoClause || querytree->hasAggs || + querytree->hasWindowFuncs || querytree->hasSubLinks || querytree->cteList || querytree->rtable || @@ -3479,6 +3598,7 @@ inline_function(Oid funcid, Oid result_type, List *args, querytree->jointree->quals || querytree->groupClause || querytree->havingQual || + querytree->windowClause || querytree->distinctClause || querytree->sortClause || querytree->limitOffset || diff --git a/src/backend/optimizer/util/tlist.c b/src/backend/optimizer/util/tlist.c index 968f4ae367a..aab3d032b12 100644 --- a/src/backend/optimizer/util/tlist.c +++ b/src/backend/optimizer/util/tlist.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/util/tlist.c,v 1.83 2008/10/21 20:42:53 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/util/tlist.c,v 1.84 2008/12/28 18:53:57 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -101,28 +101,28 @@ flatten_tlist(List *tlist) /* * add_to_flat_tlist - * Add more vars to a flattened tlist (if they're not already in it) + * Add more items to a flattened tlist (if they're not already in it) * * 'tlist' is the flattened tlist - * 'vars' is a list of Var and/or PlaceHolderVar nodes + * 'exprs' is a list of expressions (usually, but not necessarily, Vars) * * Returns the extended tlist. */ List * -add_to_flat_tlist(List *tlist, List *vars) +add_to_flat_tlist(List *tlist, List *exprs) { int next_resno = list_length(tlist) + 1; - ListCell *v; + ListCell *lc; - foreach(v, vars) + foreach(lc, exprs) { - Node *var = (Node *) lfirst(v); + Node *expr = (Node *) lfirst(lc); - if (!tlist_member(var, tlist)) + if (!tlist_member(expr, tlist)) { TargetEntry *tle; - tle = makeTargetEntry(copyObject(var), /* copy needed?? */ + tle = makeTargetEntry(copyObject(expr), /* copy needed?? */ next_resno++, NULL, false); diff --git a/src/backend/parser/analyze.c b/src/backend/parser/analyze.c index cdac02b71db..70688655cce 100644 --- a/src/backend/parser/analyze.c +++ b/src/backend/parser/analyze.c @@ -17,7 +17,7 @@ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/backend/parser/analyze.c,v 1.384 2008/12/13 02:00:19 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/parser/analyze.c,v 1.385 2008/12/28 18:53:58 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -306,6 +306,9 @@ transformDeleteStmt(ParseState *pstate, DeleteStmt *stmt) qry->hasAggs = pstate->p_hasAggs; if (pstate->p_hasAggs) parseCheckAggregates(pstate, qry); + qry->hasWindowFuncs = pstate->p_hasWindowFuncs; + if (pstate->p_hasWindowFuncs) + parseCheckWindowFuncs(pstate, qry); return qry; } @@ -673,6 +676,12 @@ transformInsertStmt(ParseState *pstate, InsertStmt *stmt) errmsg("cannot use aggregate function in VALUES"), parser_errposition(pstate, locate_agg_of_level((Node *) qry, 0)))); + if (pstate->p_hasWindowFuncs) + ereport(ERROR, + (errcode(ERRCODE_WINDOWING_ERROR), + errmsg("cannot use window function in VALUES"), + parser_errposition(pstate, + locate_windowfunc((Node *) qry)))); return qry; } @@ -764,6 +773,9 @@ transformSelectStmt(ParseState *pstate, SelectStmt *stmt) /* make FOR UPDATE/FOR SHARE info available to addRangeTableEntry */ pstate->p_locking_clause = stmt->lockingClause; + /* make WINDOW info available for window functions, too */ + pstate->p_windowdefs = stmt->windowClause; + /* process the WITH clause */ if (stmt->withClause) { @@ -803,7 +815,8 @@ transformSelectStmt(ParseState *pstate, SelectStmt *stmt) qry->groupClause = transformGroupClause(pstate, stmt->groupClause, &qry->targetList, - qry->sortClause); + qry->sortClause, + false); if (stmt->distinctClause == NIL) { @@ -834,6 +847,11 @@ transformSelectStmt(ParseState *pstate, SelectStmt *stmt) qry->limitCount = transformLimitClause(pstate, stmt->limitCount, "LIMIT"); + /* transform window clauses after we have seen all window functions */ + qry->windowClause = transformWindowDefinitions(pstate, + pstate->p_windowdefs, + &qry->targetList); + /* handle any SELECT INTO/CREATE TABLE AS spec */ if (stmt->intoClause) { @@ -849,6 +867,9 @@ transformSelectStmt(ParseState *pstate, SelectStmt *stmt) qry->hasAggs = pstate->p_hasAggs; if (pstate->p_hasAggs || qry->groupClause || qry->havingQual) parseCheckAggregates(pstate, qry); + qry->hasWindowFuncs = pstate->p_hasWindowFuncs; + if (pstate->p_hasWindowFuncs) + parseCheckWindowFuncs(pstate, qry); foreach(l, stmt->lockingClause) { @@ -889,6 +910,7 @@ transformValuesClause(ParseState *pstate, SelectStmt *stmt) Assert(stmt->whereClause == NULL); Assert(stmt->groupClause == NIL); Assert(stmt->havingClause == NULL); + Assert(stmt->windowClause == NIL); Assert(stmt->op == SETOP_NONE); /* process the WITH clause */ @@ -1061,6 +1083,12 @@ transformValuesClause(ParseState *pstate, SelectStmt *stmt) errmsg("cannot use aggregate function in VALUES"), parser_errposition(pstate, locate_agg_of_level((Node *) newExprsLists, 0)))); + if (pstate->p_hasWindowFuncs) + ereport(ERROR, + (errcode(ERRCODE_WINDOWING_ERROR), + errmsg("cannot use window function in VALUES"), + parser_errposition(pstate, + locate_windowfunc((Node *) newExprsLists)))); return qry; } @@ -1289,6 +1317,9 @@ transformSetOperationStmt(ParseState *pstate, SelectStmt *stmt) qry->hasAggs = pstate->p_hasAggs; if (pstate->p_hasAggs || qry->groupClause || qry->havingQual) parseCheckAggregates(pstate, qry); + qry->hasWindowFuncs = pstate->p_hasWindowFuncs; + if (pstate->p_hasWindowFuncs) + parseCheckWindowFuncs(pstate, qry); foreach(l, lockingClause) { @@ -1623,6 +1654,12 @@ transformUpdateStmt(ParseState *pstate, UpdateStmt *stmt) errmsg("cannot use aggregate function in UPDATE"), parser_errposition(pstate, locate_agg_of_level((Node *) qry, 0)))); + if (pstate->p_hasWindowFuncs) + ereport(ERROR, + (errcode(ERRCODE_WINDOWING_ERROR), + errmsg("cannot use window function in UPDATE"), + parser_errposition(pstate, + locate_windowfunc((Node *) qry)))); /* * Now we are done with SELECT-like processing, and can get on with @@ -1692,6 +1729,7 @@ transformReturningList(ParseState *pstate, List *returningList) List *rlist; int save_next_resno; bool save_hasAggs; + bool save_hasWindowFuncs; int length_rtable; if (returningList == NIL) @@ -1708,6 +1746,8 @@ transformReturningList(ParseState *pstate, List *returningList) /* save other state so that we can detect disallowed stuff */ save_hasAggs = pstate->p_hasAggs; pstate->p_hasAggs = false; + save_hasWindowFuncs = pstate->p_hasWindowFuncs; + pstate->p_hasWindowFuncs = false; length_rtable = list_length(pstate->p_rtable); /* transform RETURNING identically to a SELECT targetlist */ @@ -1722,6 +1762,12 @@ transformReturningList(ParseState *pstate, List *returningList) errmsg("cannot use aggregate function in RETURNING"), parser_errposition(pstate, locate_agg_of_level((Node *) rlist, 0)))); + if (pstate->p_hasWindowFuncs) + ereport(ERROR, + (errcode(ERRCODE_WINDOWING_ERROR), + errmsg("cannot use window function in RETURNING"), + parser_errposition(pstate, + locate_windowfunc((Node *) rlist)))); /* no new relation references please */ if (list_length(pstate->p_rtable) != length_rtable) @@ -1748,6 +1794,7 @@ transformReturningList(ParseState *pstate, List *returningList) /* restore state */ pstate->p_next_resno = save_next_resno; pstate->p_hasAggs = save_hasAggs; + pstate->p_hasWindowFuncs = save_hasWindowFuncs; return rlist; } @@ -1883,6 +1930,10 @@ CheckSelectLocking(Query *qry) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("SELECT FOR UPDATE/SHARE is not allowed with aggregate functions"))); + if (qry->hasWindowFuncs) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("SELECT FOR UPDATE/SHARE is not allowed with window functions"))); } /* diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index 29eab503198..59b7ada7b43 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -11,7 +11,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/parser/gram.y,v 2.647 2008/12/20 16:02:55 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/parser/gram.y,v 2.648 2008/12/28 18:53:58 tgl Exp $ * * HISTORY * AUTHOR DATE MAJOR EVENT @@ -158,6 +158,7 @@ static TypeName *TableFuncTypeName(List *columns); DefElem *defelt; OptionDefElem *optdef; SortBy *sortby; + WindowDef *windef; JoinExpr *jexpr; IndexElem *ielem; Alias *alias; @@ -402,6 +403,10 @@ static TypeName *TableFuncTypeName(List *columns); %type <with> with_clause %type <list> cte_list +%type <list> window_clause window_definition_list opt_partition_clause +%type <windef> window_definition over_clause window_specification +%type <str> opt_existing_window_name + /* * If you make any token changes, update the keyword table in @@ -431,8 +436,8 @@ static TypeName *TableFuncTypeName(List *columns); DEFERRABLE DEFERRED DEFINER DELETE_P DELIMITER DELIMITERS DESC DICTIONARY DISABLE_P DISCARD DISTINCT DO DOCUMENT_P DOMAIN_P DOUBLE_P DROP - EACH ELSE ENABLE_P ENCODING ENCRYPTED END_P ENUM_P ESCAPE EXCEPT EXCLUDING - EXCLUSIVE EXECUTE EXISTS EXPLAIN EXTERNAL EXTRACT + EACH ELSE ENABLE_P ENCODING ENCRYPTED END_P ENUM_P ESCAPE EXCEPT + EXCLUDING EXCLUSIVE EXECUTE EXISTS EXPLAIN EXTERNAL EXTRACT FALSE_P FAMILY FETCH FIRST_P FLOAT_P FOR FORCE FOREIGN FORWARD FREEZE FROM FULL FUNCTION @@ -461,9 +466,9 @@ static TypeName *TableFuncTypeName(List *columns); NOT NOTHING NOTIFY NOTNULL NOWAIT NULL_P NULLIF NULLS_P NUMERIC OBJECT_P OF OFF OFFSET OIDS OLD ON ONLY OPERATOR OPTION OPTIONS OR - ORDER OUT_P OUTER_P OVERLAPS OVERLAY OWNED OWNER + ORDER OUT_P OUTER_P OVER OVERLAPS OVERLAY OWNED OWNER - PARSER PARTIAL PASSWORD PLACING PLANS POSITION + PARSER PARTIAL PARTITION PASSWORD PLACING PLANS POSITION PRECISION PRESERVE PREPARE PREPARED PRIMARY PRIOR PRIVILEGES PROCEDURAL PROCEDURE @@ -489,7 +494,7 @@ static TypeName *TableFuncTypeName(List *columns); VACUUM VALID VALIDATOR VALUE_P VALUES VARCHAR VARIADIC VARYING VERBOSE VERSION_P VIEW VOLATILE - WHEN WHERE WHITESPACE_P WITH WITHOUT WORK WRAPPER WRITE + WHEN WHERE WHITESPACE_P WINDOW WITH WITHOUT WORK WRAPPER WRITE XML_P XMLATTRIBUTES XMLCONCAT XMLELEMENT XMLFOREST XMLPARSE XMLPI XMLROOT XMLSERIALIZE @@ -523,7 +528,15 @@ static TypeName *TableFuncTypeName(List *columns); %nonassoc BETWEEN %nonassoc IN_P %left POSTFIXOP /* dummy for postfix Op rules */ -%nonassoc IDENT /* to support target_el without AS */ +/* + * To support target_el without AS, we must give IDENT an explicit priority + * between POSTFIXOP and Op. We can safely assign the same priority to + * various unreserved keywords as needed to resolve ambiguities (this can't + * have any bad effects since obviously the keywords will still behave the + * same as if they weren't keywords). We need to do this for PARTITION + * to support opt_existing_window_name. + */ +%nonassoc IDENT PARTITION %left Op OPERATOR /* multi-character ops and user-defined operators */ %nonassoc NOTNULL %nonassoc ISNULL @@ -1259,7 +1272,7 @@ opt_boolean: * - an integer or floating point number * - a time interval per SQL99 * ColId gives reduce/reduce errors against ConstInterval and LOCAL, - * so use IDENT and reject anything which is a reserved word. + * so use IDENT (meaning we reject anything that is a key word). */ zone_value: Sconst @@ -3466,6 +3479,11 @@ old_aggr_list: old_aggr_elem { $$ = list_make1($1); } | old_aggr_list ',' old_aggr_elem { $$ = lappend($1, $3); } ; +/* + * Must use IDENT here to avoid reduce/reduce conflicts; fortunately none of + * the item names needed in old aggregate definitions are likely to become + * SQL keywords. + */ old_aggr_elem: IDENT '=' def_arg { $$ = makeDefElem($1, (Node *)$3); @@ -6825,7 +6843,7 @@ select_clause: simple_select: SELECT opt_distinct target_list into_clause from_clause where_clause - group_clause having_clause + group_clause having_clause window_clause { SelectStmt *n = makeNode(SelectStmt); n->distinctClause = $2; @@ -6835,6 +6853,7 @@ simple_select: n->whereClause = $6; n->groupClause = $7; n->havingClause = $8; + n->windowClause = $9; $$ = (Node *)n; } | values_clause { $$ = $1; } @@ -8076,6 +8095,7 @@ a_expr: c_expr { $$ = $1; } n->agg_star = FALSE; n->agg_distinct = FALSE; n->func_variadic = FALSE; + n->over = NULL; n->location = @2; $$ = (Node *) n; } @@ -8135,6 +8155,7 @@ a_expr: c_expr { $$ = $1; } n->agg_star = FALSE; n->agg_distinct = FALSE; n->func_variadic = FALSE; + n->over = NULL; n->location = @4; $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, "~~", $1, (Node *) n, @2); } @@ -8148,6 +8169,7 @@ a_expr: c_expr { $$ = $1; } n->agg_star = FALSE; n->agg_distinct = FALSE; n->func_variadic = FALSE; + n->over = NULL; n->location = @5; $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, "!~~", $1, (Node *) n, @2); } @@ -8161,6 +8183,7 @@ a_expr: c_expr { $$ = $1; } n->agg_star = FALSE; n->agg_distinct = FALSE; n->func_variadic = FALSE; + n->over = NULL; n->location = @4; $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, "~~*", $1, (Node *) n, @2); } @@ -8174,6 +8197,7 @@ a_expr: c_expr { $$ = $1; } n->agg_star = FALSE; n->agg_distinct = FALSE; n->func_variadic = FALSE; + n->over = NULL; n->location = @5; $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, "!~~*", $1, (Node *) n, @2); } @@ -8186,6 +8210,7 @@ a_expr: c_expr { $$ = $1; } n->agg_star = FALSE; n->agg_distinct = FALSE; n->func_variadic = FALSE; + n->over = NULL; n->location = @2; $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, "~", $1, (Node *) n, @2); } @@ -8197,6 +8222,7 @@ a_expr: c_expr { $$ = $1; } n->agg_star = FALSE; n->agg_distinct = FALSE; n->func_variadic = FALSE; + n->over = NULL; n->location = @5; $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, "~", $1, (Node *) n, @2); } @@ -8208,6 +8234,7 @@ a_expr: c_expr { $$ = $1; } n->agg_star = FALSE; n->agg_distinct = FALSE; n->func_variadic = FALSE; + n->over = NULL; n->location = @5; $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, "!~", $1, (Node *) n, @2); } @@ -8219,6 +8246,7 @@ a_expr: c_expr { $$ = $1; } n->agg_star = FALSE; n->agg_distinct = FALSE; n->func_variadic = FALSE; + n->over = NULL; n->location = @6; $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, "!~", $1, (Node *) n, @2); } @@ -8622,7 +8650,7 @@ c_expr: columnref { $$ = $1; } * (Note that many of the special SQL functions wouldn't actually make any * sense as functional index entries, but we ignore that consideration here.) */ -func_expr: func_name '(' ')' +func_expr: func_name '(' ')' over_clause { FuncCall *n = makeNode(FuncCall); n->funcname = $1; @@ -8630,10 +8658,11 @@ func_expr: func_name '(' ')' n->agg_star = FALSE; n->agg_distinct = FALSE; n->func_variadic = FALSE; + n->over = $4; n->location = @1; $$ = (Node *)n; } - | func_name '(' expr_list ')' + | func_name '(' expr_list ')' over_clause { FuncCall *n = makeNode(FuncCall); n->funcname = $1; @@ -8641,10 +8670,11 @@ func_expr: func_name '(' ')' n->agg_star = FALSE; n->agg_distinct = FALSE; n->func_variadic = FALSE; + n->over = $5; n->location = @1; $$ = (Node *)n; } - | func_name '(' VARIADIC a_expr ')' + | func_name '(' VARIADIC a_expr ')' over_clause { FuncCall *n = makeNode(FuncCall); n->funcname = $1; @@ -8652,10 +8682,11 @@ func_expr: func_name '(' ')' n->agg_star = FALSE; n->agg_distinct = FALSE; n->func_variadic = TRUE; + n->over = $6; n->location = @1; $$ = (Node *)n; } - | func_name '(' expr_list ',' VARIADIC a_expr ')' + | func_name '(' expr_list ',' VARIADIC a_expr ')' over_clause { FuncCall *n = makeNode(FuncCall); n->funcname = $1; @@ -8663,10 +8694,11 @@ func_expr: func_name '(' ')' n->agg_star = FALSE; n->agg_distinct = FALSE; n->func_variadic = TRUE; + n->over = $8; n->location = @1; $$ = (Node *)n; } - | func_name '(' ALL expr_list ')' + | func_name '(' ALL expr_list ')' over_clause { FuncCall *n = makeNode(FuncCall); n->funcname = $1; @@ -8678,10 +8710,11 @@ func_expr: func_name '(' ')' * for that in FuncCall at the moment. */ n->func_variadic = FALSE; + n->over = $6; n->location = @1; $$ = (Node *)n; } - | func_name '(' DISTINCT expr_list ')' + | func_name '(' DISTINCT expr_list ')' over_clause { FuncCall *n = makeNode(FuncCall); n->funcname = $1; @@ -8689,10 +8722,11 @@ func_expr: func_name '(' ')' n->agg_star = FALSE; n->agg_distinct = TRUE; n->func_variadic = FALSE; + n->over = $6; n->location = @1; $$ = (Node *)n; } - | func_name '(' '*' ')' + | func_name '(' '*' ')' over_clause { /* * We consider AGGREGATE(*) to invoke a parameterless @@ -8710,6 +8744,7 @@ func_expr: func_name '(' ')' n->agg_star = TRUE; n->agg_distinct = FALSE; n->func_variadic = FALSE; + n->over = $5; n->location = @1; $$ = (Node *)n; } @@ -8769,6 +8804,7 @@ func_expr: func_name '(' ')' n->agg_star = FALSE; n->agg_distinct = FALSE; n->func_variadic = FALSE; + n->over = NULL; n->location = @1; $$ = (Node *)n; } @@ -8839,6 +8875,7 @@ func_expr: func_name '(' ')' n->agg_star = FALSE; n->agg_distinct = FALSE; n->func_variadic = FALSE; + n->over = NULL; n->location = @1; $$ = (Node *)n; } @@ -8850,6 +8887,7 @@ func_expr: func_name '(' ')' n->agg_star = FALSE; n->agg_distinct = FALSE; n->func_variadic = FALSE; + n->over = NULL; n->location = @1; $$ = (Node *)n; } @@ -8861,6 +8899,7 @@ func_expr: func_name '(' ')' n->agg_star = FALSE; n->agg_distinct = FALSE; n->func_variadic = FALSE; + n->over = NULL; n->location = @1; $$ = (Node *)n; } @@ -8872,6 +8911,7 @@ func_expr: func_name '(' ')' n->agg_star = FALSE; n->agg_distinct = FALSE; n->func_variadic = FALSE; + n->over = NULL; n->location = @1; $$ = (Node *)n; } @@ -8883,6 +8923,7 @@ func_expr: func_name '(' ')' n->agg_star = FALSE; n->agg_distinct = FALSE; n->func_variadic = FALSE; + n->over = NULL; n->location = @1; $$ = (Node *)n; } @@ -8894,6 +8935,7 @@ func_expr: func_name '(' ')' n->agg_star = FALSE; n->agg_distinct = FALSE; n->func_variadic = FALSE; + n->over = NULL; n->location = @1; $$ = (Node *)n; } @@ -8907,6 +8949,7 @@ func_expr: func_name '(' ')' n->agg_star = FALSE; n->agg_distinct = FALSE; n->func_variadic = FALSE; + n->over = NULL; n->location = @1; $$ = (Node *)n; } @@ -8923,6 +8966,7 @@ func_expr: func_name '(' ')' n->agg_star = FALSE; n->agg_distinct = FALSE; n->func_variadic = FALSE; + n->over = NULL; n->location = @1; $$ = (Node *)n; } @@ -8935,6 +8979,7 @@ func_expr: func_name '(' ')' n->agg_star = FALSE; n->agg_distinct = FALSE; n->func_variadic = FALSE; + n->over = NULL; n->location = @1; $$ = (Node *)n; } @@ -8949,6 +8994,7 @@ func_expr: func_name '(' ')' n->agg_star = FALSE; n->agg_distinct = FALSE; n->func_variadic = FALSE; + n->over = NULL; n->location = @1; $$ = (Node *)n; } @@ -8969,6 +9015,7 @@ func_expr: func_name '(' ')' n->agg_star = FALSE; n->agg_distinct = FALSE; n->func_variadic = FALSE; + n->over = NULL; n->location = @1; $$ = (Node *)n; } @@ -8983,6 +9030,7 @@ func_expr: func_name '(' ')' n->agg_star = FALSE; n->agg_distinct = FALSE; n->func_variadic = FALSE; + n->over = NULL; n->location = @1; $$ = (Node *)n; } @@ -8994,6 +9042,7 @@ func_expr: func_name '(' ')' n->agg_star = FALSE; n->agg_distinct = FALSE; n->func_variadic = FALSE; + n->over = NULL; n->location = @1; $$ = (Node *)n; } @@ -9005,6 +9054,7 @@ func_expr: func_name '(' ')' n->agg_star = FALSE; n->agg_distinct = FALSE; n->func_variadic = FALSE; + n->over = NULL; n->location = @1; $$ = (Node *)n; } @@ -9016,6 +9066,7 @@ func_expr: func_name '(' ')' n->agg_star = FALSE; n->agg_distinct = FALSE; n->func_variadic = FALSE; + n->over = NULL; n->location = @1; $$ = (Node *)n; } @@ -9157,6 +9208,77 @@ xml_whitespace_option: PRESERVE WHITESPACE_P { $$ = TRUE; } ; /* + * Window Definitions + */ +window_clause: + WINDOW window_definition_list { $$ = $2; } + | /*EMPTY*/ { $$ = NIL; } + ; + +window_definition_list: + window_definition { $$ = list_make1($1); } + | window_definition_list ',' window_definition + { $$ = lappend($1, $3); } + ; + +window_definition: + ColId AS window_specification + { + WindowDef *n = $3; + n->name = $1; + $$ = n; + } + ; + +over_clause: OVER window_specification + { $$ = $2; } + | OVER ColId + { + WindowDef *n = makeNode(WindowDef); + n->name = NULL; + n->refname = $2; + n->partitionClause = NIL; + n->orderClause = NIL; + n->location = @2; + $$ = n; + } + | /*EMPTY*/ + { $$ = NULL; } + ; + +window_specification: '(' opt_existing_window_name opt_partition_clause + opt_sort_clause ')' + { + WindowDef *n = makeNode(WindowDef); + n->name = NULL; + n->refname = $2; + n->partitionClause = $3; + n->orderClause = $4; + n->location = @1; + $$ = n; + } + ; + +/* + * If we see PARTITION, RANGE, or ROWS as the first token after the '(' + * of a window_specification, we want the assumption to be that there is + * no existing_window_name; but those keywords are unreserved and so could + * be ColIds. We fix this by making them have the same precedence as IDENT + * and giving the empty production here a slightly higher precedence, so + * that the shift/reduce conflict is resolved in favor of reducing the rule. + * These keywords are thus precluded from being an existing_window_name but + * are not reserved for any other purpose. + * (RANGE/ROWS are not an issue as of 8.4 for lack of frame_clause support.) + */ +opt_existing_window_name: ColId { $$ = $1; } + | /*EMPTY*/ %prec Op { $$ = NULL; } + ; + +opt_partition_clause: PARTITION BY expr_list { $$ = $3; } + | /*EMPTY*/ { $$ = NIL; } + ; + +/* * Supporting nonterminals for expressions. */ @@ -9961,6 +10083,7 @@ unreserved_keyword: | OWNER | PARSER | PARTIAL + | PARTITION | PASSWORD | PLANS | PREPARE @@ -10139,6 +10262,7 @@ type_func_name_keyword: | NATURAL | NOTNULL | OUTER_P + | OVER | OVERLAPS | RIGHT | SIMILAR @@ -10229,6 +10353,7 @@ reserved_keyword: | VARIADIC | WHEN | WHERE + | WINDOW | WITH ; @@ -10451,6 +10576,7 @@ makeOverlaps(List *largs, List *rargs, int location) n->agg_star = FALSE; n->agg_distinct = FALSE; n->func_variadic = FALSE; + n->over = NULL; n->location = location; return n; } diff --git a/src/backend/parser/keywords.c b/src/backend/parser/keywords.c index bf7b1f6ad2e..c3ad852258b 100644 --- a/src/backend/parser/keywords.c +++ b/src/backend/parser/keywords.c @@ -11,7 +11,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/parser/keywords.c,v 1.206 2008/12/19 16:25:17 petere Exp $ + * $PostgreSQL: pgsql/src/backend/parser/keywords.c,v 1.207 2008/12/28 18:53:58 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -287,12 +287,14 @@ const ScanKeyword ScanKeywords[] = { {"order", ORDER, RESERVED_KEYWORD}, {"out", OUT_P, COL_NAME_KEYWORD}, {"outer", OUTER_P, TYPE_FUNC_NAME_KEYWORD}, + {"over", OVER, TYPE_FUNC_NAME_KEYWORD}, {"overlaps", OVERLAPS, TYPE_FUNC_NAME_KEYWORD}, {"overlay", OVERLAY, COL_NAME_KEYWORD}, {"owned", OWNED, UNRESERVED_KEYWORD}, {"owner", OWNER, UNRESERVED_KEYWORD}, {"parser", PARSER, UNRESERVED_KEYWORD}, {"partial", PARTIAL, UNRESERVED_KEYWORD}, + {"partition", PARTITION, UNRESERVED_KEYWORD}, {"password", PASSWORD, UNRESERVED_KEYWORD}, {"placing", PLACING, RESERVED_KEYWORD}, {"plans", PLANS, UNRESERVED_KEYWORD}, @@ -411,6 +413,7 @@ const ScanKeyword ScanKeywords[] = { {"when", WHEN, RESERVED_KEYWORD}, {"where", WHERE, RESERVED_KEYWORD}, {"whitespace", WHITESPACE_P, UNRESERVED_KEYWORD}, + {"window", WINDOW, RESERVED_KEYWORD}, {"with", WITH, RESERVED_KEYWORD}, {"without", WITHOUT, UNRESERVED_KEYWORD}, {"work", WORK, UNRESERVED_KEYWORD}, diff --git a/src/backend/parser/parse_agg.c b/src/backend/parser/parse_agg.c index e2645462d57..6dba470e39f 100644 --- a/src/backend/parser/parse_agg.c +++ b/src/backend/parser/parse_agg.c @@ -1,14 +1,14 @@ /*------------------------------------------------------------------------- * * parse_agg.c - * handle aggregates in parser + * handle aggregates and window functions in parser * * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/parser/parse_agg.c,v 1.84 2008/10/04 21:56:54 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/parser/parse_agg.c,v 1.85 2008/12/28 18:53:58 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -67,7 +67,8 @@ transformAggregateCall(ParseState *pstate, Aggref *agg) */ if (min_varlevel == 0) { - if (checkExprHasAggs((Node *) agg->args)) + if (pstate->p_hasAggs && + checkExprHasAggs((Node *) agg->args)) ereport(ERROR, (errcode(ERRCODE_GROUPING_ERROR), errmsg("aggregate function calls cannot be nested"), @@ -75,6 +76,15 @@ transformAggregateCall(ParseState *pstate, Aggref *agg) locate_agg_of_level((Node *) agg->args, 0)))); } + /* It can't contain window functions either */ + if (pstate->p_hasWindowFuncs && + checkExprHasWindowFuncs((Node *) agg->args)) + ereport(ERROR, + (errcode(ERRCODE_GROUPING_ERROR), + errmsg("aggregate function calls cannot contain window function calls"), + parser_errposition(pstate, + locate_windowfunc((Node *) agg->args)))); + if (min_varlevel < 0) min_varlevel = 0; agg->agglevelsup = min_varlevel; @@ -85,6 +95,98 @@ transformAggregateCall(ParseState *pstate, Aggref *agg) pstate->p_hasAggs = true; } +/* + * transformWindowFuncCall - + * Finish initial transformation of a window function call + * + * parse_func.c has recognized the function as a window function, and has set + * up all the fields of the WindowFunc except winref. Here we must (1) add + * the WindowDef to the pstate (if not a duplicate of one already present) and + * set winref to link to it; and (2) mark p_hasWindowFuncs true in the pstate. + * Unlike aggregates, only the most closely nested pstate level need be + * considered --- there are no "outer window functions" per SQL spec. + */ +void +transformWindowFuncCall(ParseState *pstate, WindowFunc *wfunc, + WindowDef *windef) +{ + /* + * A window function call can't contain another one (but aggs are OK). + * XXX is this required by spec, or just an unimplemented feature? + */ + if (pstate->p_hasWindowFuncs && + checkExprHasWindowFuncs((Node *) wfunc->args)) + ereport(ERROR, + (errcode(ERRCODE_WINDOWING_ERROR), + errmsg("window function calls cannot be nested"), + parser_errposition(pstate, + locate_windowfunc((Node *) wfunc->args)))); + + /* + * If the OVER clause just specifies a reference name, find that + * WINDOW clause (which had better be present). Otherwise, try to + * match all the properties of the OVER clause, and make a new entry + * in the p_windowdefs list if no luck. + */ + Assert(!windef->name); + if (windef->refname && + windef->partitionClause == NIL && + windef->orderClause == NIL) + { + Index winref = 0; + ListCell *lc; + + foreach(lc, pstate->p_windowdefs) + { + WindowDef *refwin = (WindowDef *) lfirst(lc); + + winref++; + if (refwin->name && strcmp(refwin->name, windef->refname) == 0) + { + wfunc->winref = winref; + break; + } + } + if (lc == NULL) /* didn't find it? */ + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("window \"%s\" does not exist", windef->refname), + parser_errposition(pstate, windef->location))); + } + else + { + Index winref = 0; + ListCell *lc; + + foreach(lc, pstate->p_windowdefs) + { + WindowDef *refwin = (WindowDef *) lfirst(lc); + + winref++; + if (refwin->refname && windef->refname && + strcmp(refwin->name, windef->refname) == 0) + /* matched on refname */ ; + else if (!refwin->refname && !windef->refname) + /* matched, no refname */ ; + else + continue; + if (equal(refwin->partitionClause, windef->partitionClause) && + equal(refwin->orderClause, windef->orderClause)) + { + /* found a duplicate window specification */ + wfunc->winref = winref; + break; + } + } + if (lc == NULL) /* didn't find it? */ + { + pstate->p_windowdefs = lappend(pstate->p_windowdefs, windef); + wfunc->winref = list_length(pstate->p_windowdefs); + } + } + + pstate->p_hasWindowFuncs = true; +} /* * parseCheckAggregates @@ -207,6 +309,11 @@ parseCheckAggregates(ParseState *pstate, Query *qry) /* * Check the targetlist and HAVING clause for ungrouped variables. + * + * Note: because we check resjunk tlist elements as well as regular ones, + * this will also find ungrouped variables that came from ORDER BY and + * WINDOW clauses. For that matter, it's also going to examine the + * grouping expressions themselves --- but they'll all pass the test ... */ clause = (Node *) qry->targetList; if (hasJoinRTEs) @@ -226,11 +333,94 @@ parseCheckAggregates(ParseState *pstate, Query *qry) if (pstate->p_hasAggs && hasSelfRefRTEs) ereport(ERROR, (errcode(ERRCODE_INVALID_RECURSION), - errmsg("aggregates not allowed in a recursive query's recursive term"), + errmsg("aggregate functions not allowed in a recursive query's recursive term"), parser_errposition(pstate, locate_agg_of_level((Node *) qry, 0)))); } +/* + * parseCheckWindowFuncs + * Check for window functions where they shouldn't be. + * + * We have to forbid window functions in WHERE, JOIN/ON, HAVING, GROUP BY, + * and window specifications. (Other clauses, such as RETURNING and LIMIT, + * have already been checked.) Transformation of all these clauses must + * be completed already. + */ +void +parseCheckWindowFuncs(ParseState *pstate, Query *qry) +{ + ListCell *l; + + /* This should only be called if we found window functions */ + Assert(pstate->p_hasWindowFuncs); + + if (checkExprHasWindowFuncs(qry->jointree->quals)) + ereport(ERROR, + (errcode(ERRCODE_WINDOWING_ERROR), + errmsg("window functions not allowed in WHERE clause"), + parser_errposition(pstate, + locate_windowfunc(qry->jointree->quals)))); + if (checkExprHasWindowFuncs((Node *) qry->jointree->fromlist)) + ereport(ERROR, + (errcode(ERRCODE_WINDOWING_ERROR), + errmsg("window functions not allowed in JOIN conditions"), + parser_errposition(pstate, + locate_windowfunc((Node *) qry->jointree->fromlist)))); + if (checkExprHasWindowFuncs(qry->havingQual)) + ereport(ERROR, + (errcode(ERRCODE_WINDOWING_ERROR), + errmsg("window functions not allowed in HAVING clause"), + parser_errposition(pstate, + locate_windowfunc(qry->havingQual)))); + + foreach(l, qry->groupClause) + { + SortGroupClause *grpcl = (SortGroupClause *) lfirst(l); + Node *expr; + + expr = get_sortgroupclause_expr(grpcl, qry->targetList); + if (checkExprHasWindowFuncs(expr)) + ereport(ERROR, + (errcode(ERRCODE_WINDOWING_ERROR), + errmsg("window functions not allowed in GROUP BY clause"), + parser_errposition(pstate, + locate_windowfunc(expr)))); + } + + foreach(l, qry->windowClause) + { + WindowClause *wc = (WindowClause *) lfirst(l); + ListCell *l2; + + foreach(l2, wc->partitionClause) + { + SortGroupClause *grpcl = (SortGroupClause *) lfirst(l2); + Node *expr; + + expr = get_sortgroupclause_expr(grpcl, qry->targetList); + if (checkExprHasWindowFuncs(expr)) + ereport(ERROR, + (errcode(ERRCODE_WINDOWING_ERROR), + errmsg("window functions not allowed in window definition"), + parser_errposition(pstate, + locate_windowfunc(expr)))); + } + foreach(l2, wc->orderClause) + { + SortGroupClause *grpcl = (SortGroupClause *) lfirst(l2); + Node *expr; + + expr = get_sortgroupclause_expr(grpcl, qry->targetList); + if (checkExprHasWindowFuncs(expr)) + ereport(ERROR, + (errcode(ERRCODE_WINDOWING_ERROR), + errmsg("window functions not allowed in window definition"), + parser_errposition(pstate, + locate_windowfunc(expr)))); + } + } +} /* * check_ungrouped_columns - diff --git a/src/backend/parser/parse_clause.c b/src/backend/parser/parse_clause.c index 0e5fbfd28ac..df30361f0a5 100644 --- a/src/backend/parser/parse_clause.c +++ b/src/backend/parser/parse_clause.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/parser/parse_clause.c,v 1.181 2008/10/06 02:12:56 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/parser/parse_clause.c,v 1.182 2008/12/28 18:53:58 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -40,8 +40,14 @@ #define ORDER_CLAUSE 0 #define GROUP_CLAUSE 1 #define DISTINCT_ON_CLAUSE 2 +#define PARTITION_CLAUSE 3 -static char *clauseText[] = {"ORDER BY", "GROUP BY", "DISTINCT ON"}; +static const char * const clauseText[] = { + "ORDER BY", + "GROUP BY", + "DISTINCT ON", + "PARTITION BY" +}; static void extractRemainingColumns(List *common_colnames, List *src_colnames, List *src_colvars, @@ -76,6 +82,7 @@ static List *addTargetToSortList(ParseState *pstate, TargetEntry *tle, static List *addTargetToGroupList(ParseState *pstate, TargetEntry *tle, List *grouplist, List *targetlist, int location, bool resolveUnknown); +static WindowClause *findWindowClause(List *wclist, const char *name); /* @@ -555,15 +562,20 @@ transformRangeFunction(ParseState *pstate, RangeFunction *r) * Disallow aggregate functions in the expression. (No reason to postpone * this check until parseCheckAggregates.) */ - if (pstate->p_hasAggs) - { - if (checkExprHasAggs(funcexpr)) - ereport(ERROR, - (errcode(ERRCODE_GROUPING_ERROR), - errmsg("cannot use aggregate function in function expression in FROM"), - parser_errposition(pstate, - locate_agg_of_level(funcexpr, 0)))); - } + if (pstate->p_hasAggs && + checkExprHasAggs(funcexpr)) + ereport(ERROR, + (errcode(ERRCODE_GROUPING_ERROR), + errmsg("cannot use aggregate function in function expression in FROM"), + parser_errposition(pstate, + locate_agg_of_level(funcexpr, 0)))); + if (pstate->p_hasWindowFuncs && + checkExprHasWindowFuncs(funcexpr)) + ereport(ERROR, + (errcode(ERRCODE_WINDOWING_ERROR), + errmsg("cannot use window function in function expression in FROM"), + parser_errposition(pstate, + locate_windowfunc(funcexpr)))); /* * OK, build an RTE for the function. @@ -1156,16 +1168,28 @@ transformLimitClause(ParseState *pstate, Node *clause, parser_errposition(pstate, locate_var_of_level(qual, 0)))); } - if (checkExprHasAggs(qual)) + if (pstate->p_hasAggs && + checkExprHasAggs(qual)) { ereport(ERROR, (errcode(ERRCODE_GROUPING_ERROR), /* translator: %s is name of a SQL construct, eg LIMIT */ - errmsg("argument of %s must not contain aggregates", + errmsg("argument of %s must not contain aggregate functions", constructName), parser_errposition(pstate, locate_agg_of_level(qual, 0)))); } + if (pstate->p_hasWindowFuncs && + checkExprHasWindowFuncs(qual)) + { + ereport(ERROR, + (errcode(ERRCODE_WINDOWING_ERROR), + /* translator: %s is name of a SQL construct, eg LIMIT */ + errmsg("argument of %s must not contain window functions", + constructName), + parser_errposition(pstate, + locate_windowfunc(qual)))); + } return qual; } @@ -1234,7 +1258,7 @@ findTargetlistEntry(ParseState *pstate, Node *node, List **tlist, int clause) char *name = strVal(linitial(((ColumnRef *) node)->fields)); int location = ((ColumnRef *) node)->location; - if (clause == GROUP_CLAUSE) + if (clause == GROUP_CLAUSE || clause == PARTITION_CLAUSE) { /* * In GROUP BY, we must prefer a match against a FROM-clause @@ -1251,6 +1275,8 @@ findTargetlistEntry(ParseState *pstate, Node *node, List **tlist, int clause) * SQL99 do not allow GROUPing BY an outer reference, so this * breaks no cases that are legal per spec, and it seems a more * self-consistent behavior. + * + * Window PARTITION BY clauses should act exactly like GROUP BY. */ if (colNameToVar(pstate, name, true, location) != NULL) name = NULL; @@ -1356,12 +1382,17 @@ findTargetlistEntry(ParseState *pstate, Node *node, List **tlist, int clause) * * GROUP BY items will be added to the targetlist (as resjunk columns) * if not already present, so the targetlist must be passed by reference. + * + * This is also used for window PARTITION BY clauses (which actually act + * just the same, except for the clause name used in error messages). */ List * transformGroupClause(ParseState *pstate, List *grouplist, - List **targetlist, List *sortClause) + List **targetlist, List *sortClause, + bool isPartition) { List *result = NIL; + int clause = isPartition ? PARTITION_CLAUSE : GROUP_CLAUSE; ListCell *gl; foreach(gl, grouplist) @@ -1370,8 +1401,7 @@ transformGroupClause(ParseState *pstate, List *grouplist, TargetEntry *tle; bool found = false; - tle = findTargetlistEntry(pstate, gexpr, - targetlist, GROUP_CLAUSE); + tle = findTargetlistEntry(pstate, gexpr, targetlist, clause); /* Eliminate duplicates (GROUP BY x, x) */ if (targetIsInSortList(tle, InvalidOid, result)) @@ -1452,6 +1482,125 @@ transformSortClause(ParseState *pstate, } /* + * transformWindowDefinitions - + * transform window definitions (WindowDef to WindowClause) + */ +List * +transformWindowDefinitions(ParseState *pstate, + List *windowdefs, + List **targetlist) +{ + List *result = NIL; + Index winref = 0; + ListCell *lc; + + foreach(lc, windowdefs) + { + WindowDef *windef = (WindowDef *) lfirst(lc); + WindowClause *refwc = NULL; + List *partitionClause; + List *orderClause; + WindowClause *wc; + + winref++; + + /* + * Check for duplicate window names. + */ + if (windef->name && + findWindowClause(result, windef->name) != NULL) + ereport(ERROR, + (errcode(ERRCODE_WINDOWING_ERROR), + errmsg("window \"%s\" is already defined", windef->name), + parser_errposition(pstate, windef->location))); + + /* + * If it references a previous window, look that up. + */ + if (windef->refname) + { + refwc = findWindowClause(result, windef->refname); + if (refwc == NULL) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("window \"%s\" does not exist", + windef->refname), + parser_errposition(pstate, windef->location))); + } + + /* + * Transform PARTITION and ORDER specs, if any. These are treated + * exactly like top-level GROUP BY and ORDER BY clauses, including + * the special handling of nondefault operator semantics. + */ + orderClause = transformSortClause(pstate, + windef->orderClause, + targetlist, |