summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Lane2016-07-16 19:30:15 +0000
committerTom Lane2016-07-16 19:30:15 +0000
commit9563d5b5e4c75e676d73a45546bd47b77c2bd739 (patch)
tree59b0aacc876ef9710b3f7379d4fee5915f6bfedc
parent278148907a971ec7fa4ffb24248103d8012155d2 (diff)
Add regression test case exercising the sorting path for hash index build.
We've broken this code path at least twice in the past, so it's prudent to have a test case that covers it. To allow exercising the code path without creating a very large (and slow to run) test case, redefine the sort threshold to be bounded by maintenance_work_mem as well as the number of available buffers. While at it, fix an ancient oversight that when building a temp index, the number of available buffers is not NBuffers but NLocBuffer. Also, if assertions are enabled, apply a direct test that the sort actually does return the tuples in the expected order. Peter Geoghegan Patch: <CAM3SWZTBAo4hjbBd780+MrOKiKp_TMo1N3A0Rw9_im8gbD7fQA@mail.gmail.com>
-rw-r--r--src/backend/access/hash/hash.c20
-rw-r--r--src/backend/access/hash/hashsort.c23
-rw-r--r--src/test/regress/expected/create_index.out7
-rw-r--r--src/test/regress/sql/create_index.sql7
4 files changed, 51 insertions, 6 deletions
diff --git a/src/backend/access/hash/hash.c b/src/backend/access/hash/hash.c
index 19695ee102..30c82e191c 100644
--- a/src/backend/access/hash/hash.c
+++ b/src/backend/access/hash/hash.c
@@ -22,6 +22,7 @@
#include "access/relscan.h"
#include "catalog/index.h"
#include "commands/vacuum.h"
+#include "miscadmin.h"
#include "optimizer/plancat.h"
#include "utils/index_selfuncs.h"
#include "utils/rel.h"
@@ -97,6 +98,7 @@ hashbuild(Relation heap, Relation index, IndexInfo *indexInfo)
double reltuples;
double allvisfrac;
uint32 num_buckets;
+ long sort_threshold;
HashBuildState buildstate;
/*
@@ -120,12 +122,24 @@ hashbuild(Relation heap, Relation index, IndexInfo *indexInfo)
* then we'll thrash horribly. To prevent that scenario, we can sort the
* tuples by (expected) bucket number. However, such a sort is useless
* overhead when the index does fit in RAM. We choose to sort if the
- * initial index size exceeds NBuffers.
+ * initial index size exceeds maintenance_work_mem, or the number of
+ * buffers usable for the index, whichever is less. (Limiting by the
+ * number of buffers should reduce thrashing between PG buffers and kernel
+ * buffers, which seems useful even if no physical I/O results. Limiting
+ * by maintenance_work_mem is useful to allow easy testing of the sort
+ * code path, and may be useful to DBAs as an additional control knob.)
*
* NOTE: this test will need adjustment if a bucket is ever different from
- * one page.
+ * one page. Also, "initial index size" accounting does not include the
+ * metapage, nor the first bitmap page.
*/
- if (num_buckets >= (uint32) NBuffers)
+ sort_threshold = (maintenance_work_mem * 1024L) / BLCKSZ;
+ if (index->rd_rel->relpersistence != RELPERSISTENCE_TEMP)
+ sort_threshold = Min(sort_threshold, NBuffers);
+ else
+ sort_threshold = Min(sort_threshold, NLocBuffer);
+
+ if (num_buckets >= (uint32) sort_threshold)
buildstate.spool = _h_spoolinit(heap, index, num_buckets);
else
buildstate.spool = NULL;
diff --git a/src/backend/access/hash/hashsort.c b/src/backend/access/hash/hashsort.c
index 51b9f3f792..8938ab5b24 100644
--- a/src/backend/access/hash/hashsort.c
+++ b/src/backend/access/hash/hashsort.c
@@ -37,6 +37,7 @@ struct HSpool
{
Tuplesortstate *sortstate; /* state data for tuplesort.c */
Relation index;
+ uint32 hash_mask; /* bitmask for hash codes */
};
@@ -47,7 +48,6 @@ HSpool *
_h_spoolinit(Relation heap, Relation index, uint32 num_buckets)
{
HSpool *hspool = (HSpool *) palloc0(sizeof(HSpool));
- uint32 hash_mask;
hspool->index = index;
@@ -60,7 +60,7 @@ _h_spoolinit(Relation heap, Relation index, uint32 num_buckets)
* we could just compute num_buckets - 1. We prefer not to assume that
* here, though.
*/
- hash_mask = (((uint32) 1) << _hash_log2(num_buckets)) - 1;
+ hspool->hash_mask = (((uint32) 1) << _hash_log2(num_buckets)) - 1;
/*
* We size the sort area as maintenance_work_mem rather than work_mem to
@@ -69,7 +69,7 @@ _h_spoolinit(Relation heap, Relation index, uint32 num_buckets)
*/
hspool->sortstate = tuplesort_begin_index_hash(heap,
index,
- hash_mask,
+ hspool->hash_mask,
maintenance_work_mem,
false);
@@ -105,12 +105,29 @@ _h_indexbuild(HSpool *hspool)
{
IndexTuple itup;
bool should_free;
+#ifdef USE_ASSERT_CHECKING
+ uint32 hashkey = 0;
+#endif
tuplesort_performsort(hspool->sortstate);
while ((itup = tuplesort_getindextuple(hspool->sortstate,
true, &should_free)) != NULL)
{
+ /*
+ * Technically, it isn't critical that hash keys be found in sorted
+ * order, since this sorting is only used to increase locality of
+ * access as a performance optimization. It still seems like a good
+ * idea to test tuplesort.c's handling of hash index tuple sorts
+ * through an assertion, though.
+ */
+#ifdef USE_ASSERT_CHECKING
+ uint32 lasthashkey = hashkey;
+
+ hashkey = _hash_get_indextuple_hashkey(itup) & hspool->hash_mask;
+ Assert(hashkey >= lasthashkey);
+#endif
+
_hash_doinsert(hspool->index, itup);
if (should_free)
pfree(itup);
diff --git a/src/test/regress/expected/create_index.out b/src/test/regress/expected/create_index.out
index 97cc49e623..0be5cf2dbe 100644
--- a/src/test/regress/expected/create_index.out
+++ b/src/test/regress/expected/create_index.out
@@ -2346,6 +2346,13 @@ CREATE UNLOGGED TABLE unlogged_hash_table (id int4);
CREATE INDEX unlogged_hash_index ON unlogged_hash_table USING hash (id int4_ops);
DROP TABLE unlogged_hash_table;
-- CREATE INDEX hash_ovfl_index ON hash_ovfl_heap USING hash (x int4_ops);
+-- Test hash index build tuplesorting. Force hash tuplesort using low
+-- maintenance_work_mem setting and fillfactor:
+SET maintenance_work_mem = '1MB';
+CREATE INDEX hash_tuplesort_idx ON tenk1 USING hash (stringu1 name_ops) WITH (fillfactor = 10);
+WARNING: hash indexes are not WAL-logged and their use is discouraged
+DROP INDEX hash_tuplesort_idx;
+RESET maintenance_work_mem;
--
-- Test functional index
--
diff --git a/src/test/regress/sql/create_index.sql b/src/test/regress/sql/create_index.sql
index 7c582ea810..7e0bd84ff7 100644
--- a/src/test/regress/sql/create_index.sql
+++ b/src/test/regress/sql/create_index.sql
@@ -690,6 +690,13 @@ DROP TABLE unlogged_hash_table;
-- CREATE INDEX hash_ovfl_index ON hash_ovfl_heap USING hash (x int4_ops);
+-- Test hash index build tuplesorting. Force hash tuplesort using low
+-- maintenance_work_mem setting and fillfactor:
+SET maintenance_work_mem = '1MB';
+CREATE INDEX hash_tuplesort_idx ON tenk1 USING hash (stringu1 name_ops) WITH (fillfactor = 10);
+DROP INDEX hash_tuplesort_idx;
+RESET maintenance_work_mem;
+
--
-- Test functional index