Improve hash_any() to use word-wide fetches when hashing suitably aligned

data. This makes for a significant speedup at the cost that the results now vary between little-endian and big-endian machines; which forces us to add explicit ORDER BYs in a couple of regression tests to preserve machine-independent comparison results. Also, force initdb by bumping catversion, since the contents of hash indexes will change (at least on big-endian machines). Kenneth Marshall and Tom Lane, based on work from Bob Jenkins. This commit does not adopt Bob's new faster mix() algorithm, however, since we still need to convince ourselves that that doesn't degrade the quality of the hashing.
author: Tom Lane 2008-04-06 16:54:49 +0000
committer: Tom Lane 2008-04-06 16:54:49 +0000
commit: ca99e6a2c9468286904c78f01653ca96cd12cc76 (patch)
tree: bd2a85cd6f33a0b943e309926bf72e88af662dcd
parent: 15bd443a6e9f11a0d045b2be7e8ce22f57cdd366 (diff)
6 files changed, 225 insertions, 53 deletions
diff --git a/contrib/dblink/expected/dblink.out b/contrib/dblink/expected/dblink.out
index ec5284daf4..170d69c286 100644
--- a/contrib/dblink/expected/dblink.out
+++ b/contrib/dblink/expected/dblink.out
@@ -711,11 +711,19 @@ UNION
 UNION
 (SELECT * from dblink_get_result('dtest3') as t3(f1 int, f2 text, f3 text[]))
 ORDER by f1;
-SELECT dblink_get_connections();
- dblink_get_connections 
-------------------------
- {dtest1,dtest2,dtest3}
-(1 row)
+-- dblink_get_connections returns an array with elements in a machine-dependent
+-- ordering, so we must resort to unnesting and sorting for a stable result
+create function unnest(anyarray) returns setof anyelement
+language sql strict immutable as $$
+select $1[i] from generate_series(array_lower($1,1), array_upper($1,1)) as i
+$$;
+SELECT * FROM unnest(dblink_get_connections()) ORDER BY 1;
+ unnest 
+--------
+ dtest1
+ dtest2
+ dtest3
+(3 rows)
 
 SELECT dblink_is_busy('dtest1');
  dblink_is_busy 
diff --git a/contrib/dblink/sql/dblink.sql b/contrib/dblink/sql/dblink.sql
index 1d5f962db8..7e91e9c6db 100644
--- a/contrib/dblink/sql/dblink.sql
+++ b/contrib/dblink/sql/dblink.sql
@@ -340,7 +340,15 @@ UNION
 (SELECT * from dblink_get_result('dtest3') as t3(f1 int, f2 text, f3 text[]))
 ORDER by f1;
 
-SELECT dblink_get_connections();
+-- dblink_get_connections returns an array with elements in a machine-dependent
+-- ordering, so we must resort to unnesting and sorting for a stable result
+create function unnest(anyarray) returns setof anyelement
+language sql strict immutable as $$
+select $1[i] from generate_series(array_lower($1,1), array_upper($1,1)) as i
+$$;
+
+SELECT * FROM unnest(dblink_get_connections()) ORDER BY 1;
+
 SELECT dblink_is_busy('dtest1');
 
 SELECT dblink_disconnect('dtest1');
diff --git a/src/backend/access/hash/hashfunc.c b/src/backend/access/hash/hashfunc.c
index e20ed5017b..979065ef8c 100644
--- a/src/backend/access/hash/hashfunc.c
+++ b/src/backend/access/hash/hashfunc.c
@@ -199,8 +199,18 @@ hashvarlena(PG_FUNCTION_ARGS)
  * for PostgreSQL by Neil Conway. For more information on this
  * hash function, see http://burtleburtle.net/bob/hash/doobs.html,
  * or Bob's article in Dr. Dobb's Journal, Sept. 1997.
+ *
+ * In the current code, we have adopted an idea from Bob's 2006 update
+ * of his hash function, which is to fetch the data a word at a time when
+ * it is suitably aligned.  This makes for a useful speedup, at the cost
+ * of having to maintain four code paths (aligned vs unaligned, and
+ * little-endian vs big-endian).  Note that we have NOT adopted his newer
+ * mix() function, which is faster but may sacrifice some randomness.
  */
 
+/* Get a bit mask of the bits set in non-uint32 aligned addresses */
+#define UINT32_ALIGN_MASK (sizeof(uint32) - 1)
+
 /*----------
  * mix -- mix 3 32-bit values reversibly.
  * For every delta with one or two bits set, and the deltas of all three
@@ -235,6 +245,10 @@ hashvarlena(PG_FUNCTION_ARGS)
  * About 6*len+35 instructions. The best hash table sizes are powers
  * of 2.  There is no need to do mod a prime (mod is sooo slow!).
  * If you need less than 32 bits, use a bitmask.
+ *
+ * Note: we could easily change this function to return a 64-bit hash value
+ * by using the final values of both b and c.  b is perhaps a little less
+ * well mixed than c, however.
  */
 Datum
 hash_any(register const unsigned char *k, register int keylen)
@@ -249,46 +263,188 @@ hash_any(register const unsigned char *k, register int keylen)
 	a = b = 0x9e3779b9;			/* the golden ratio; an arbitrary value */
 	c = 3923095;				/* initialize with an arbitrary value */
 
-	/* handle most of the key */
-	while (len >= 12)
+	/* If the source pointer is word-aligned, we use word-wide fetches */
+	if (((long) k & UINT32_ALIGN_MASK) == 0)
 	{
-		a += (k[0] + ((uint32) k[1] << 8) + ((uint32) k[2] << 16) + ((uint32) k[3] << 24));
-		b += (k[4] + ((uint32) k[5] << 8) + ((uint32) k[6] << 16) + ((uint32) k[7] << 24));
-		c += (k[8] + ((uint32) k[9] << 8) + ((uint32) k[10] << 16) + ((uint32) k[11] << 24));
-		mix(a, b, c);
-		k += 12;
-		len -= 12;
+		/* Code path for aligned source data */
+		register const uint32 *ka = (const uint32 *) k;
+
+		/* handle most of the key */
+		while (len >= 12)
+		{
+			a += ka[0];
+			b += ka[1];
+			c += ka[2];
+			mix(a, b, c);
+			ka += 3;
+			len -= 12;
+		}
+
+		/* handle the last 11 bytes */
+		k = (const unsigned char *) ka;
+		c += keylen;
+#ifdef WORDS_BIGENDIAN
+		switch (len)
+		{
+			case 11:
+				c += ((uint32) k[10] << 8);
+				/* fall through */
+			case 10:
+				c += ((uint32) k[9] << 16);
+				/* fall through */
+			case 9:
+				c += ((uint32) k[8] << 24);
+				/* the lowest byte of c is reserved for the length */
+				/* fall through */
+			case 8:
+				b += ka[1];
+				a += ka[0];
+				break;
+			case 7:
+				b += ((uint32) k[6] << 8);
+				/* fall through */
+			case 6:
+				b += ((uint32) k[5] << 16);
+				/* fall through */
+			case 5:
+				b += ((uint32) k[4] << 24);
+				/* fall through */
+			case 4:
+				a += ka[0];
+				break;
+			case 3:
+				a += ((uint32) k[2] << 8);
+				/* fall through */
+			case 2:
+				a += ((uint32) k[1] << 16);
+				/* fall through */
+			case 1:
+				a += ((uint32) k[0] << 24);
+			/* case 0: nothing left to add */
+		}
+#else /* !WORDS_BIGENDIAN */
+		switch (len)
+		{
+			case 11:
+				c += ((uint32) k[10] << 24);
+				/* fall through */
+			case 10:
+				c += ((uint32) k[9] << 16);
+				/* fall through */
+			case 9:
+				c += ((uint32) k[8] << 8);
+				/* the lowest byte of c is reserved for the length */
+				/* fall through */
+			case 8:
+				b += ka[1];
+				a += ka[0];
+				break;
+			case 7:
+				b += ((uint32) k[6] << 16);
+				/* fall through */
+			case 6:
+				b += ((uint32) k[5] << 8);
+				/* fall through */
+			case 5:
+				b += k[4];
+				/* fall through */
+			case 4:
+				a += ka[0];
+				break;
+			case 3:
+				a += ((uint32) k[2] << 16);
+				/* fall through */
+			case 2:
+				a += ((uint32) k[1] << 8);
+				/* fall through */
+			case 1:
+				a += k[0];
+			/* case 0: nothing left to add */
+		}
+#endif /* WORDS_BIGENDIAN */
 	}
-
-	/* handle the last 11 bytes */
-	c += keylen;
-	switch (len)				/* all the case statements fall through */
+	else
 	{
-		case 11:
-			c += ((uint32) k[10] << 24);
-		case 10:
-			c += ((uint32) k[9] << 16);
-		case 9:
-			c += ((uint32) k[8] << 8);
-			/* the first byte of c is reserved for the length */
-		case 8:
-			b += ((uint32) k[7] << 24);
-		case 7:
-			b += ((uint32) k[6] << 16);
-		case 6:
-			b += ((uint32) k[5] << 8);
-		case 5:
-			b += k[4];
-		case 4:
-			a += ((uint32) k[3] << 24);
-		case 3:
-			a += ((uint32) k[2] << 16);
-		case 2:
-			a += ((uint32) k[1] << 8);
-		case 1:
-			a += k[0];
+		/* Code path for non-aligned source data */
+
+		/* handle most of the key */
+		while (len >= 12)
+		{
+#ifdef WORDS_BIGENDIAN
+			a += (k[3] + ((uint32) k[2] << 8) + ((uint32) k[1] << 16) + ((uint32) k[0] << 24));
+			b += (k[7] + ((uint32) k[6] << 8) + ((uint32) k[5] << 16) + ((uint32) k[4] << 24));
+			c += (k[11] + ((uint32) k[10] << 8) + ((uint32) k[9] << 16) + ((uint32) k[8] << 24));
+#else /* !WORDS_BIGENDIAN */
+			a += (k[0] + ((uint32) k[1] << 8) + ((uint32) k[2] << 16) + ((uint32) k[3] << 24));
+			b += (k[4] + ((uint32) k[5] << 8) + ((uint32) k[6] << 16) + ((uint32) k[7] << 24));
+			c += (k[8] + ((uint32) k[9] << 8) + ((uint32) k[10] << 16) + ((uint32) k[11] << 24));
+#endif /* WORDS_BIGENDIAN */
+			mix(a, b, c);
+			k += 12;
+			len -= 12;
+		}
+
+		/* handle the last 11 bytes */
+		c += keylen;
+#ifdef WORDS_BIGENDIAN
+		switch (len)			/* all the case statements fall through */
+		{
+			case 11:
+				c += ((uint32) k[10] << 8);
+			case 10:
+				c += ((uint32) k[9] << 16);
+			case 9:
+				c += ((uint32) k[8] << 24);
+				/* the lowest byte of c is reserved for the length */
+			case 8:
+				b += k[7];
+			case 7:
+				b += ((uint32) k[6] << 8);
+			case 6:
+				b += ((uint32) k[5] << 16);
+			case 5:
+				b += ((uint32) k[4] << 24);
+			case 4:
+				a += k[3];
+			case 3:
+				a += ((uint32) k[2] << 8);
+			case 2:
+				a += ((uint32) k[1] << 16);
+			case 1:
+				a += ((uint32) k[0] << 24);
 			/* case 0: nothing left to add */
+		}
+#else /* !WORDS_BIGENDIAN */
+		switch (len)			/* all the case statements fall through */
+		{
+			case 11:
+				c += ((uint32) k[10] << 24);
+			case 10:
+				c += ((uint32) k[9] << 16);
+			case 9:
+				c += ((uint32) k[8] << 8);
+				/* the lowest byte of c is reserved for the length */
+			case 8:
+				b += ((uint32) k[7] << 24);
+			case 7:
+				b += ((uint32) k[6] << 16);
+			case 6:
+				b += ((uint32) k[5] << 8);
+			case 5:
+				b += k[4];
+			case 4:
+				a += ((uint32) k[3] << 24);
+			case 3:
+				a += ((uint32) k[2] << 16);
+			case 2:
+				a += ((uint32) k[1] << 8);
+			case 1:
+				a += k[0];
+			/* case 0: nothing left to add */
+		}
+#endif /* WORDS_BIGENDIAN */
 	}
+
 	mix(a, b, c);
 
 	/* report the result */
@@ -298,7 +454,7 @@ hash_any(register const unsigned char *k, register int keylen)
 /*
  * hash_uint32() -- hash a 32-bit value
  *
- * This has the same result (at least on little-endian machines) as
+ * This has the same result as
  *		hash_any(&k, sizeof(uint32))
  * but is faster and doesn't force the caller to store k into memory.
  */
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h
index f112a9d976..798ec85e1c 100644
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -53,6 +53,6 @@
  */
 
 /*							yyyymmddN */
-#define CATALOG_VERSION_NO	200804041
+#define CATALOG_VERSION_NO	200804051
 
 #endif
diff --git a/src/test/regress/expected/portals.out b/src/test/regress/expected/portals.out
index 527550eabd..63b8a8be56 100644
--- a/src/test/regress/expected/portals.out
+++ b/src/test/regress/expected/portals.out
@@ -678,20 +678,20 @@ CLOSE foo12;
 -- leave some cursors open, to test that auto-close works.
 -- record this in the system view as well (don't query the time field there
 -- however)
-SELECT name, statement, is_holdable, is_binary, is_scrollable FROM pg_cursors;
+SELECT name, statement, is_holdable, is_binary, is_scrollable FROM pg_cursors ORDER BY 1;
  name  |                               statement                               | is_holdable | is_binary | is_scrollable 
 -------+-----------------------------------------------------------------------+-------------+-----------+---------------
  foo13 | DECLARE foo13 SCROLL CURSOR FOR SELECT * FROM tenk1 ORDER BY unique2; | f           | f         | t
+ foo14 | DECLARE foo14 SCROLL CURSOR FOR SELECT * FROM tenk2;                  | f           | f         | t
  foo15 | DECLARE foo15 SCROLL CURSOR FOR SELECT * FROM tenk1 ORDER BY unique2; | f           | f         | t
- foo19 | DECLARE foo19 SCROLL CURSOR FOR SELECT * FROM tenk1 ORDER BY unique2; | f           | f         | t
+ foo16 | DECLARE foo16 SCROLL CURSOR FOR SELECT * FROM tenk2;                  | f           | f         | t
  foo17 | DECLARE foo17 SCROLL CURSOR FOR SELECT * FROM tenk1 ORDER BY unique2; | f           | f         | t
- foo14 | DECLARE foo14 SCROLL CURSOR FOR SELECT * FROM tenk2;                  | f           | f         | t
- foo21 | DECLARE foo21 SCROLL CURSOR FOR SELECT * FROM tenk1 ORDER BY unique2; | f           | f         | t
- foo23 | DECLARE foo23 SCROLL CURSOR FOR SELECT * FROM tenk1 ORDER BY unique2; | f           | f         | t
  foo18 | DECLARE foo18 SCROLL CURSOR FOR SELECT * FROM tenk2;                  | f           | f         | t
+ foo19 | DECLARE foo19 SCROLL CURSOR FOR SELECT * FROM tenk1 ORDER BY unique2; | f           | f         | t
  foo20 | DECLARE foo20 SCROLL CURSOR FOR SELECT * FROM tenk2;                  | f           | f         | t
+ foo21 | DECLARE foo21 SCROLL CURSOR FOR SELECT * FROM tenk1 ORDER BY unique2; | f           | f         | t
  foo22 | DECLARE foo22 SCROLL CURSOR FOR SELECT * FROM tenk2;                  | f           | f         | t
- foo16 | DECLARE foo16 SCROLL CURSOR FOR SELECT * FROM tenk2;                  | f           | f         | t
+ foo23 | DECLARE foo23 SCROLL CURSOR FOR SELECT * FROM tenk1 ORDER BY unique2; | f           | f         | t
 (11 rows)
 
 END;
@@ -851,11 +851,11 @@ SELECT name, statement, is_holdable, is_binary, is_scrollable FROM pg_cursors;
 (1 row)
 
 DECLARE bc BINARY CURSOR FOR SELECT * FROM tenk1;
-SELECT name, statement, is_holdable, is_binary, is_scrollable FROM pg_cursors;
+SELECT name, statement, is_holdable, is_binary, is_scrollable FROM pg_cursors ORDER BY 1;
  name |                              statement                               | is_holdable | is_binary | is_scrollable 
 ------+----------------------------------------------------------------------+-------------+-----------+---------------
- c2   | declare c2 cursor with hold for select count_tt1_v(), count_tt1_s(); | t           | f         | f
  bc   | DECLARE bc BINARY CURSOR FOR SELECT * FROM tenk1;                    | f           | t         | t
+ c2   | declare c2 cursor with hold for select count_tt1_v(), count_tt1_s(); | t           | f         | f
 (2 rows)
 
 ROLLBACK;
diff --git a/src/test/regress/sql/portals.sql b/src/test/regress/sql/portals.sql
index 8275ed78c8..63a689666a 100644
--- a/src/test/regress/sql/portals.sql
+++ b/src/test/regress/sql/portals.sql
@@ -170,7 +170,7 @@ CLOSE foo12;
 
 -- record this in the system view as well (don't query the time field there
 -- however)
-SELECT name, statement, is_holdable, is_binary, is_scrollable FROM pg_cursors;
+SELECT name, statement, is_holdable, is_binary, is_scrollable FROM pg_cursors ORDER BY 1;
 
 END;
 
@@ -295,7 +295,7 @@ drop function count_tt1_s();
 BEGIN;
 SELECT name, statement, is_holdable, is_binary, is_scrollable FROM pg_cursors;
 DECLARE bc BINARY CURSOR FOR SELECT * FROM tenk1;
-SELECT name, statement, is_holdable, is_binary, is_scrollable FROM pg_cursors;
+SELECT name, statement, is_holdable, is_binary, is_scrollable FROM pg_cursors ORDER BY 1;
 ROLLBACK;
 
 -- We should not see the portal that is created internally to
author	Tom Lane	2008-04-06 16:54:49 +0000
committer	Tom Lane	2008-04-06 16:54:49 +0000
commit	ca99e6a2c9468286904c78f01653ca96cd12cc76 (patch)
tree	bd2a85cd6f33a0b943e309926bf72e88af662dcd
parent	15bd443a6e9f11a0d045b2be7e8ce22f57cdd366 (diff)