summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Lane2005-06-15 16:24:07 +0000
committerTom Lane2005-06-15 16:24:07 +0000
commit0c8241e119358e8be6cf0adb3597ecf7346db14f (patch)
tree2b16f69ba901700956e7e9805bac12cb113b9328
parenteb1ea398d645106af606e6f93a3e89c8fcdfbdb6 (diff)
Improve hash method for bitmapsets: some examination of actual outputs
shows that adding a circular shift between words greatly improves the distribution of hash outputs.
-rw-r--r--src/backend/nodes/bitmapset.c26
1 files changed, 20 insertions, 6 deletions
diff --git a/src/backend/nodes/bitmapset.c b/src/backend/nodes/bitmapset.c
index 5d8327c108..5285293c72 100644
--- a/src/backend/nodes/bitmapset.c
+++ b/src/backend/nodes/bitmapset.c
@@ -769,22 +769,36 @@ bms_first_member(Bitmapset *a)
*
* Note: we must ensure that any two bitmapsets that are bms_equal() will
* hash to the same value; in practice this means that trailing all-zero
- * words cannot affect the result. Longitudinal XOR provides a reasonable
- * hash value that has this property.
+ * words cannot affect the result. The circular-shift-and-XOR hash method
+ * used here has this property, so long as we work from back to front.
+ *
+ * Note: you might wonder why we bother with the circular shift; at first
+ * glance a straight longitudinal XOR seems as good and much simpler. The
+ * reason is empirical: this gives a better distribution of hash values on
+ * the bitmapsets actually generated by the planner. A common way to have
+ * multiword bitmapsets is "a JOIN b JOIN c JOIN d ...", which gives rise
+ * to rangetables in which base tables and JOIN nodes alternate; so
+ * bitmapsets of base table RT indexes tend to use only odd-numbered or only
+ * even-numbered bits. A straight longitudinal XOR would preserve this
+ * property, leading to a much smaller set of possible outputs than if
+ * we include a shift.
*/
uint32
bms_hash_value(const Bitmapset *a)
{
bitmapword result = 0;
- int nwords;
int wordnum;
- if (a == NULL)
+ if (a == NULL || a->nwords <= 0)
return 0; /* All empty sets hash to 0 */
- nwords = a->nwords;
- for (wordnum = 0; wordnum < nwords; wordnum++)
+ for (wordnum = a->nwords; --wordnum > 0; )
{
result ^= a->words[wordnum];
+ if (result & ((bitmapword) 1 << (BITS_PER_BITMAPWORD - 1)))
+ result = (result << 1) | 1;
+ else
+ result = (result << 1);
}
+ result ^= a->words[0];
return (uint32) result;
}