summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Lane2009-03-24 22:06:03 +0000
committerTom Lane2009-03-24 22:06:03 +0000
commitab0ec9767579d7fe8db8cb34528c3e265911aec5 (patch)
tree832403ba8f9df142f67735c51f4f93ee8b79d34e
parent7b77f24e9f26c3e5945bf819ec2860a33031d618 (diff)
Install a search tree depth limit in GIN bulk-insert operations, to prevent
them from degrading badly when the input is sorted or nearly so. In this scenario the tree is unbalanced to the point of becoming a mere linked list, so insertions become O(N^2). The easiest and most safely back-patchable solution is to stop growing the tree sooner, ie limit the growth of N. We might later consider a rebalancing tree algorithm, but it's not clear that the benefit would be worth the cost and complexity. Per report from Sergey Burladyan and an earlier complaint from Heikki. Back-patch to 8.2; older versions didn't have GIN indexes.
-rw-r--r--src/backend/access/gin/ginfast.c7
-rw-r--r--src/backend/access/gin/gininsert.c4
-rw-r--r--src/include/access/gin.h15
3 files changed, 17 insertions, 9 deletions
diff --git a/src/backend/access/gin/ginfast.c b/src/backend/access/gin/ginfast.c
index 0c3ee2577c..bad62ad81e 100644
--- a/src/backend/access/gin/ginfast.c
+++ b/src/backend/access/gin/ginfast.c
@@ -749,9 +749,10 @@ ginInsertCleanup(Relation index, GinState *ginstate,
* XXX using up maintenance_work_mem here is probably unreasonably
* much, since vacuum might already be using that much.
*/
- if ( GinPageGetOpaque(page)->rightlink == InvalidBlockNumber ||
- ( GinPageHasFullRow(page) &&
- accum.allocatedMemory > maintenance_work_mem * 1024L ) )
+ if (GinPageGetOpaque(page)->rightlink == InvalidBlockNumber ||
+ (GinPageHasFullRow(page) &&
+ (accum.allocatedMemory >= maintenance_work_mem * 1024L ||
+ accum.maxdepth > GIN_MAX_TREE_DEPTH)))
{
ItemPointerData *list;
uint32 nlist;
diff --git a/src/backend/access/gin/gininsert.c b/src/backend/access/gin/gininsert.c
index b1ecc127a6..0190b2508f 100644
--- a/src/backend/access/gin/gininsert.c
+++ b/src/backend/access/gin/gininsert.c
@@ -245,7 +245,9 @@ ginBuildCallback(Relation index, HeapTuple htup, Datum *values,
&htup->t_self);
/* If we've maxed out our available memory, dump everything to the index */
- if (buildstate->accum.allocatedMemory >= maintenance_work_mem * 1024L)
+ /* Also dump if the tree seems to be getting too unbalanced */
+ if (buildstate->accum.allocatedMemory >= maintenance_work_mem * 1024L ||
+ buildstate->accum.maxdepth > GIN_MAX_TREE_DEPTH)
{
ItemPointerData *list;
Datum entry;
diff --git a/src/include/access/gin.h b/src/include/access/gin.h
index 346597867d..3bec7071b6 100644
--- a/src/include/access/gin.h
+++ b/src/include/access/gin.h
@@ -27,6 +27,14 @@
#define GINNProcs 5
/*
+ * Max depth allowed in search tree during bulk inserts. This is to keep from
+ * degenerating to O(N^2) behavior when the tree is unbalanced due to sorted
+ * or nearly-sorted input. (Perhaps it would be better to use a balanced-tree
+ * algorithm, but in common cases that would only add useless overhead.)
+ */
+#define GIN_MAX_TREE_DEPTH 100
+
+/*
* Page opaque data in a inverted index page.
*
* Note: GIN does not include a page ID word as do the other index types.
@@ -434,12 +442,9 @@ extern IndexTuple ginPageGetLinkItup(Buffer buf);
/* gindatapage.c */
extern int compareItemPointers(ItemPointer a, ItemPointer b);
-extern void
-MergeItemPointers(
- ItemPointerData *dst,
+extern void MergeItemPointers(ItemPointerData *dst,
ItemPointerData *a, uint32 na,
- ItemPointerData *b, uint32 nb
-);
+ ItemPointerData *b, uint32 nb);
extern void GinDataPageAddItem(Page page, void *data, OffsetNumber offset);
extern void PageDeletePostingItem(Page page, OffsetNumber offset);