@@ -504,29 +504,44 @@ var_eq_non_const(VariableStatData *vardata, Oid oproid, Oid collation,
504
504
505
505
/*
506
506
* Search is for a value that we do not know a priori, but we will
507
- * assume it is not NULL. Estimate the selectivity as non-null
508
- * fraction divided by number of distinct values, so that we get a
509
- * result averaged over all possible values whether common or
510
- * uncommon. (Essentially, we are assuming that the not-yet-known
511
- * comparison value is equally likely to be any of the possible
512
- * values, regardless of their frequency in the table. Is that a good
513
- * idea?)
507
+ * assume it is not NULL. Estimate the selectivity as quadratic mean of
508
+ * non-null fraction divided by number of distinct values and set of MCV
509
+ * selectivities. Use quadratic mean because it includes the squared
510
+ * deviation (error) as well and here it would be nice to compute upper
511
+ * limit of estimation to prevent wrong choose of nested loop, for
512
+ * example.
514
513
*/
515
514
selec = 1.0 - nullfrac ;
516
515
ndistinct = get_variable_numdistinct (vardata , & isdefault );
517
516
if (ndistinct > 1 )
518
517
selec /= ndistinct ;
519
518
520
- /*
521
- * Cross-check: selectivity should never be estimated as more than the
522
- * most common value's.
523
- */
524
519
if (get_attstatsslot (& sslot , vardata -> statsTuple ,
525
520
STATISTIC_KIND_MCV , InvalidOid ,
526
- ATTSTATSSLOT_NUMBERS ))
521
+ ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS ))
527
522
{
528
- if (sslot .nnumbers > 0 && selec > sslot .numbers [0 ])
529
- selec = sslot .numbers [0 ];
523
+ int i ;
524
+ double sum_selec = 0.0 ;
525
+
526
+ /*
527
+ * Compute quadratic mean, walk on array in reverse direction to
528
+ * do not lose accuracy. We don't bother about sslot.nnumbers
529
+ * equality to zero, because in this case we just get the same
530
+ * result. But equality to zero is unlikely.
531
+ */
532
+ for (i = sslot .nnumbers - 1 ; i >=0 ; i -- )
533
+ sum_selec += sslot .numbers [i ] * sslot .numbers [i ];
534
+
535
+ selec = sqrt ((selec * selec + sum_selec ) /
536
+ ((double )sslot .nnumbers + 1.0 ));
537
+
538
+ /*
539
+ * Cross-check: selectivity should never be estimated as
540
+ * more than the most common value's.
541
+ */
542
+ if (sslot .nnumbers > 0 && selec > sslot .numbers [0 ])
543
+ selec = sslot .numbers [0 ];
544
+
530
545
free_attstatsslot (& sslot );
531
546
}
532
547
}
0 commit comments