@@ -2809,6 +2809,226 @@ cost_agg(Path *path, PlannerInfo *root,
28092809 path -> total_cost = total_cost ;
28102810}
28112811
2812+ /*
2813+ * get_windowclause_startup_tuples
2814+ * Estimate how many tuples we'll need to fetch from a WindowAgg's
2815+ * subnode before we can output the first WindowAgg tuple.
2816+ *
2817+ * How many tuples need to be read depends on the WindowClause. For example,
2818+ * a WindowClause with no PARTITION BY and no ORDER BY requires that all
2819+ * subnode tuples are read and aggregated before the WindowAgg can output
2820+ * anything. If there's a PARTITION BY, then we only need to look at tuples
2821+ * in the first partition. Here we attempt to estimate just how many
2822+ * 'input_tuples' the WindowAgg will need to read for the given WindowClause
2823+ * before the first tuple can be output.
2824+ */
2825+ static double
2826+ get_windowclause_startup_tuples (PlannerInfo * root , WindowClause * wc ,
2827+ double input_tuples )
2828+ {
2829+ int frameOptions = wc -> frameOptions ;
2830+ double partition_tuples ;
2831+ double return_tuples ;
2832+ double peer_tuples ;
2833+
2834+ /*
2835+ * First, figure out how many partitions there are likely to be and set
2836+ * partition_tuples according to that estimate.
2837+ */
2838+ if (wc -> partitionClause != NIL )
2839+ {
2840+ double num_partitions ;
2841+ List * partexprs = get_sortgrouplist_exprs (wc -> partitionClause ,
2842+ root -> parse -> targetList );
2843+
2844+ num_partitions = estimate_num_groups (root , partexprs , input_tuples ,
2845+ NULL , NULL );
2846+ list_free (partexprs );
2847+
2848+ partition_tuples = input_tuples / num_partitions ;
2849+ }
2850+ else
2851+ {
2852+ /* all tuples belong to the same partition */
2853+ partition_tuples = input_tuples ;
2854+ }
2855+
2856+ /* estimate the number of tuples in each peer group */
2857+ if (wc -> orderClause != NIL )
2858+ {
2859+ double num_groups ;
2860+ List * orderexprs ;
2861+
2862+ orderexprs = get_sortgrouplist_exprs (wc -> orderClause ,
2863+ root -> parse -> targetList );
2864+
2865+ /* estimate out how many peer groups there are in the partition */
2866+ num_groups = estimate_num_groups (root , orderexprs ,
2867+ partition_tuples , NULL ,
2868+ NULL );
2869+ list_free (orderexprs );
2870+ peer_tuples = partition_tuples / num_groups ;
2871+ }
2872+ else
2873+ {
2874+ /* no ORDER BY so only 1 tuple belongs in each peer group */
2875+ peer_tuples = 1.0 ;
2876+ }
2877+
2878+ if (frameOptions & FRAMEOPTION_END_UNBOUNDED_FOLLOWING )
2879+ {
2880+ /* include all partition rows */
2881+ return_tuples = partition_tuples ;
2882+ }
2883+ else if (frameOptions & FRAMEOPTION_END_CURRENT_ROW )
2884+ {
2885+ if (frameOptions & FRAMEOPTION_ROWS )
2886+ {
2887+ /* just count the current row */
2888+ return_tuples = 1.0 ;
2889+ }
2890+ else if (frameOptions & (FRAMEOPTION_RANGE | FRAMEOPTION_GROUPS ))
2891+ {
2892+ /*
2893+ * When in RANGE/GROUPS mode, it's more complex. If there's no
2894+ * ORDER BY, then all rows in the partition are peers, otherwise
2895+ * we'll need to read the first group of peers.
2896+ */
2897+ if (wc -> orderClause == NIL )
2898+ return_tuples = partition_tuples ;
2899+ else
2900+ return_tuples = peer_tuples ;
2901+ }
2902+ else
2903+ {
2904+ /*
2905+ * Something new we don't support yet? This needs attention.
2906+ * We'll just return 1.0 in the meantime.
2907+ */
2908+ Assert (false);
2909+ return_tuples = 1.0 ;
2910+ }
2911+ }
2912+ else if (frameOptions & FRAMEOPTION_END_OFFSET_PRECEDING )
2913+ {
2914+ /*
2915+ * BETWEEN ... AND N PRECEDING will only need to read the WindowAgg's
2916+ * subnode after N ROWS/RANGES/GROUPS. N can be 0, but not negative,
2917+ * so we'll just assume only the current row needs to be read to fetch
2918+ * the first WindowAgg row.
2919+ */
2920+ return_tuples = 1.0 ;
2921+ }
2922+ else if (frameOptions & FRAMEOPTION_END_OFFSET_FOLLOWING )
2923+ {
2924+ Const * endOffset = (Const * ) wc -> endOffset ;
2925+ double end_offset_value ;
2926+
2927+ /* try and figure out the value specified in the endOffset. */
2928+ if (IsA (endOffset , Const ))
2929+ {
2930+ if (endOffset -> constisnull )
2931+ {
2932+ /*
2933+ * NULLs are not allowed, but currently, there's no code to
2934+ * error out if there's a NULL Const. We'll only discover
2935+ * this during execution. For now, just pretend everything is
2936+ * fine and assume that just the current row/range/group will
2937+ * be needed.
2938+ */
2939+ end_offset_value = 1.0 ;
2940+ }
2941+ else
2942+ {
2943+ switch (endOffset -> consttype )
2944+ {
2945+ case INT2OID :
2946+ end_offset_value =
2947+ (double ) DatumGetInt16 (endOffset -> constvalue );
2948+ break ;
2949+ case INT4OID :
2950+ end_offset_value =
2951+ (double ) DatumGetInt32 (endOffset -> constvalue );
2952+ break ;
2953+ case INT8OID :
2954+ end_offset_value =
2955+ (double ) DatumGetInt64 (endOffset -> constvalue );
2956+ break ;
2957+ default :
2958+ end_offset_value =
2959+ partition_tuples / peer_tuples *
2960+ DEFAULT_INEQ_SEL ;
2961+ break ;
2962+ }
2963+ }
2964+ }
2965+ else
2966+ {
2967+ /*
2968+ * When the end bound is not a Const, we'll just need to guess. We
2969+ * just make use of DEFAULT_INEQ_SEL.
2970+ */
2971+ end_offset_value =
2972+ partition_tuples / peer_tuples * DEFAULT_INEQ_SEL ;
2973+ }
2974+
2975+ if (frameOptions & FRAMEOPTION_ROWS )
2976+ {
2977+ /* include the N FOLLOWING and the current row */
2978+ return_tuples = end_offset_value + 1.0 ;
2979+ }
2980+ else if (frameOptions & (FRAMEOPTION_RANGE | FRAMEOPTION_GROUPS ))
2981+ {
2982+ /* include N FOLLOWING ranges/group and the initial range/group */
2983+ return_tuples = peer_tuples * (end_offset_value + 1.0 );
2984+ }
2985+ else
2986+ {
2987+ /*
2988+ * Something new we don't support yet? This needs attention.
2989+ * We'll just return 1.0 in the meantime.
2990+ */
2991+ Assert (false);
2992+ return_tuples = 1.0 ;
2993+ }
2994+ }
2995+ else
2996+ {
2997+ /*
2998+ * Something new we don't support yet? This needs attention. We'll
2999+ * just return 1.0 in the meantime.
3000+ */
3001+ Assert (false);
3002+ return_tuples = 1.0 ;
3003+ }
3004+
3005+ if (wc -> partitionClause != NIL || wc -> orderClause != NIL )
3006+ {
3007+ /*
3008+ * Cap the return value to the estimated partition tuples and account
3009+ * for the extra tuple WindowAgg will need to read to confirm the next
3010+ * tuple does not belong to the same partition or peer group.
3011+ */
3012+ return_tuples = Min (return_tuples + 1.0 , partition_tuples );
3013+ }
3014+ else
3015+ {
3016+ /*
3017+ * Cap the return value so it's never higher than the expected tuples
3018+ * in the partition.
3019+ */
3020+ return_tuples = Min (return_tuples , partition_tuples );
3021+ }
3022+
3023+ /*
3024+ * We needn't worry about any EXCLUDE options as those only exclude rows
3025+ * from being aggregated, not from being read from the WindowAgg's
3026+ * subnode.
3027+ */
3028+
3029+ return clamp_row_est (return_tuples );
3030+ }
3031+
28123032/*
28133033 * cost_windowagg
28143034 * Determines and returns the cost of performing a WindowAgg plan node,
@@ -2818,17 +3038,32 @@ cost_agg(Path *path, PlannerInfo *root,
28183038 */
28193039void
28203040cost_windowagg (Path * path , PlannerInfo * root ,
2821- List * windowFuncs , int numPartCols , int numOrderCols ,
3041+ List * windowFuncs , WindowClause * winclause ,
28223042 Cost input_startup_cost , Cost input_total_cost ,
28233043 double input_tuples )
28243044{
28253045 Cost startup_cost ;
28263046 Cost total_cost ;
3047+ double startup_tuples ;
3048+ int numPartCols ;
3049+ int numOrderCols ;
28273050 ListCell * lc ;
28283051
3052+ numPartCols = list_length (winclause -> partitionClause );
3053+ numOrderCols = list_length (winclause -> orderClause );
3054+
28293055 startup_cost = input_startup_cost ;
28303056 total_cost = input_total_cost ;
28313057
3058+ /*
3059+ * Estimate how many tuples we'll need to read from the subnode before we
3060+ * can output the first WindowAgg row.
3061+ */
3062+ startup_tuples = get_windowclause_startup_tuples (root , winclause ,
3063+ input_tuples );
3064+
3065+ elog (DEBUG1 , "startup_tuples = %g" , startup_tuples ); /* XXX not for commit */
3066+
28323067 /*
28333068 * Window functions are assumed to cost their stated execution cost, plus
28343069 * the cost of evaluating their input expressions, per tuple. Since they
@@ -2880,6 +3115,18 @@ cost_windowagg(Path *path, PlannerInfo *root,
28803115 path -> rows = input_tuples ;
28813116 path -> startup_cost = startup_cost ;
28823117 path -> total_cost = total_cost ;
3118+
3119+ /*
3120+ * Also, take into account how many tuples we need to read from the
3121+ * subnode in order to produce the first tuple from the WindowAgg. To do
3122+ * this we proportion the run cost (total cost not including startup cost)
3123+ * over the estimated startup tuples. We already included the startup
3124+ * cost of the subnode, so we only need to do this when the estimated
3125+ * startup tuples is above 1.0.
3126+ */
3127+ if (startup_tuples > 1.0 )
3128+ path -> startup_cost += (total_cost - startup_cost ) / input_tuples *
3129+ (startup_tuples - 1.0 );
28833130}
28843131
28853132/*
0 commit comments