/*-------------------------------------------------------------------------
*
* spgdoinsert.c
* implementation of insert algorithm
*
*
* Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* src/backend/access/spgist/spgdoinsert.c
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "access/genam.h"
#include "access/spgist_private.h"
#include "access/spgxlog.h"
#include "access/xloginsert.h"
#include "common/int.h"
#include "common/pg_prng.h"
#include "miscadmin.h"
#include "storage/bufmgr.h"
#include "utils/rel.h"
/*
* SPPageDesc tracks all info about a page we are inserting into. In some
* situations it actually identifies a tuple, or even a specific node within
* an inner tuple. But any of the fields can be invalid. If the buffer
* field is valid, it implies we hold pin and exclusive lock on that buffer.
* page pointer should be valid exactly when buffer is.
*/
typedef struct SPPageDesc
{
BlockNumber blkno; /* block number, or InvalidBlockNumber */
Buffer buffer; /* page's buffer number, or InvalidBuffer */
Page page; /* pointer to page buffer, or NULL */
OffsetNumber offnum; /* offset of tuple, or InvalidOffsetNumber */
int node; /* node number within inner tuple, or -1 */
} SPPageDesc;
/*
* Set the item pointer in the nodeN'th entry in inner tuple tup. This
* is used to update the parent inner tuple's downlink after a move or
* split operation.
*/
void
spgUpdateNodeLink(SpGistInnerTuple tup, int nodeN,
BlockNumber blkno, OffsetNumber offset)
{
int i;
SpGistNodeTuple node;
SGITITERATE(tup, i, node)
{
if (i == nodeN)
{
ItemPointerSet(&node->t_tid, blkno, offset);
return;
}
}
elog(ERROR, "failed to find requested node %d in SPGiST inner tuple",
nodeN);
}
/*
* Form a new inner tuple containing one more node than the given one, with
* the specified label datum, inserted at offset "offset" in the node array.
* The new tuple's prefix is the same as the old one's.
*
* Note that the new node initially has an invalid downlink. We'll find a
* page to point it to later.
*/
static SpGistInnerTuple
addNode(SpGistState *state, SpGistInnerTuple tuple, Datum label, int offset)
{
SpGistNodeTuple node,
*nodes;
int i;
/* if offset is negative, insert at end */
if (offset < 0)
offset = tuple->nNodes;
else if (offset > tuple->nNodes)
elog(ERROR, "invalid offset for adding node to SPGiST inner tuple");
nodes = palloc(sizeof(SpGistNodeTuple) * (tuple->nNodes + 1));
SGITITERATE(tuple, i, node)
{
if (i < offset)
nodes[i] = node;
else
nodes[i + 1] = node;
}
nodes[offset] = spgFormNodeTuple(state, label, false);
return spgFormInnerTuple(state,
(tuple->prefixSize > 0),
SGITDATUM(tuple, state),
tuple->nNodes + 1,
nodes);
}
/* qsort comparator for sorting OffsetNumbers */
static int
cmpOffsetNumbers(const void *a, const void *b)
{
return pg_cmp_u16(*(const OffsetNumber *) a, *(const OffsetNumber *) b);
}
/*
* Delete multiple tuples from an index page, preserving tuple offset numbers.
*
* The first tuple in the given list is replaced with a dead tuple of type
* "firststate" (REDIRECT/DEAD/PLACEHOLDER); the remaining tuples are replaced
* with dead tuples of type "reststate". If either firststate or reststate
* is REDIRECT, blkno/offnum specify where to link to.
*
* NB: this is used during WAL replay, so beware of trying to make it too
* smart. In particular, it shouldn't use "state" except for calling
* spgFormDeadTuple(). This is also used in a critical section, so no
* pallocs either!
*/
void
spgPageIndexMultiDelete(SpGistState *state, Page page,
OffsetNumber *itemnos, int nitems,
int firststate, int reststate,
BlockNumber blkno, OffsetNumber offnum)
{
OffsetNumber firstItem;
OffsetNumber sortednos[MaxIndexTuplesPerPage];
SpGistDeadTuple tuple = NULL;
int i;
if (nitems == 0)
return; /* nothing to do */
/*
* For efficiency we want to use PageIndexMultiDelete, which requires the
* targets to be listed in sorted order, so we have to sort the itemnos
* array. (This also greatly simplifies the math for reinserting the
* replacement tuples.) However, we must not scribble on the caller's
* array, so we have to make a copy.
*/
memcpy(sortednos, itemnos, sizeof(OffsetNumber) * nitems);
if (nitems > 1)
qsort(sortednos, nitems, sizeof(OffsetNumber), cmpOffsetNumbers);
PageIndexMultiDelete(page, sortednos, nitems);
firstItem = itemnos[0];
for (i = 0; i < nitems; i++)
{
OffsetNumber itemno = sortednos[i];
int tupstate;
tupstate = (itemno == firstItem) ? firststate : reststate;
if (tuple == NULL || tuple->tupstate != tupstate)
tuple = spgFormDeadTuple(state, tupstate, blkno, offnum);
if (PageAddItem(page, (Item) tuple, tuple->size,
itemno, false, false) != itemno)
elog(ERROR, "failed to add item of size %u to SPGiST index page",
tuple->size);
if (tupstate == SPGIST_REDIRECT)
SpGistPageGetOpaque(page)->nRedirection++;
else if (tupstate == SPGIST_PLACEHOLDER)
SpGistPageGetOpaque(page)->nPlaceholder++;
}
}
/*
* Update the parent inner tuple's downlink, and mark the parent buffer
* dirty (this must be the last change to the parent page in the current
* WAL action).
*/
static void
saveNodeLink(Relation index, SPPageDesc *parent,
BlockNumber blkno, OffsetNumber offnum)
{
SpGistInnerTuple innerTuple;
innerTuple = (SpGistInnerTuple) PageGetItem(parent->page,
PageGetItemId(parent->page, parent->offnum));
spgUpdateNodeLink(innerTuple, parent->node, blkno, offnum);
MarkBufferDirty(parent->buffer);
}
/*
* Add a leaf tuple to a leaf page where there is known to be room for it
*/
static void
addLeafTuple(Relation index, SpGistState *state, SpGistLeafTuple leafTuple,
SPPageDesc *current, SPPageDesc *parent, bool isNulls, bool isNew)
{
spgxlogAddLeaf xlrec;
xlrec.newPage = isNew;
xlrec.storesNulls = isNulls;
/* these will be filled below as needed */
xlrec.offnumLeaf = InvalidOffsetNumber;
xlrec.offnumHeadLeaf = InvalidOffsetNumber;
xlrec.offnumParent = InvalidOffsetNumber;
xlrec.nodeI = 0;
START_CRIT_SECTION();
if (current->offnum == InvalidOffsetNumber ||
SpGistBlockIsRoot(current->blkno))
{
/* Tuple is not part of a chain */
SGLT_SET_NEXTOFFSET(leafTuple, InvalidOffsetNumber);
current->offnum = SpGistPageAddNewItem(state, current->page,
(Item) leafTuple, leafTuple->size,
NULL, false);
xlrec.offnumLeaf = current->offnum;
/* Must update parent's downlink if any */
if (parent->buffer != InvalidBuffer)
{
xlrec.offnumParent = parent->offnum;
xlrec.nodeI = parent->node;
saveNodeLink(index, parent, current->blkno, current->offnum);
}
}
else
{
/*
* Tuple must be inserted into existing chain. We mustn't change the
* chain's head address, but we don't need to chase the entire chain
* to put the tuple at the end; we can insert it second.
*
* Also, it's possible that the "chain" consists only of a DEAD tuple,
* in which case we should replace the DEAD tuple in-place.
*/
SpGistLeafTuple head;
OffsetNumber offnum;
head = (SpGistLeafTuple) PageGetItem(current->page,
PageGetItemId(current->page, current->offnum));
if (head->tupstate == SPGIST_LIVE)
{
SGLT_SET_NEXTOFFSET(leafTuple, SGLT_GET_NEXTOFFSET(head));
offnum = SpGistPageAddNewItem(state, current->page,
(Item) leafTuple, leafTuple->size,
NULL, false);
/*
* re-get head of list because it could have been moved on page,
* and set new second element
*/
head = (SpGistLeafTuple) PageGetItem(current->page,
PageGetItemId(current->page, current->offnum));
SGLT_SET_NEXTOFFSET(head, offnum);
xlrec.offnumLeaf = offnum;
xlrec.offnumHeadLeaf = current->offnum;
}
else if (head->tupstate == SPGIST_DEAD)
{
SGLT_SET_NEXTOFFSET(leafTuple, InvalidOffsetNumber);
PageIndexTupleDelete(current->page, current->offnum);
if (PageAddItem(current->page,
(Item) leafTuple, leafTuple->size,
current->offnum, false, false) != current->offnum)
elog(ERROR, "failed to add item of size %u to SPGiST index page",
leafTuple->size);
/* WAL replay distinguishes this case by equal offnums */
xlrec.offnumLeaf = current->offnum;
xlrec.offnumHeadLeaf = current->offnum;
}
else
elog(ERROR, "unexpected SPGiST tuple state: %d", head->tupstate);
}
MarkBufferDirty(current->buffer);
if (RelationNeedsWAL(index) && !state->isBuild)
{
XLogRecPtr recptr;
int flags;
XLogBeginInsert();
XLogRegisterData(&xlrec, sizeof(xlrec));
XLogRegisterData(leafTuple, leafTuple->size);
flags = REGBUF_STANDARD;
if (xlrec.newPage)
flags |= REGBUF_WILL_INIT;
XLogRegisterBuffer(0, current->buffer, flags);
if (xlrec.offnumParent != InvalidOffsetNumber)
XLogRegisterBuffer(1, parent->buffer, REGBUF_STANDARD);
recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_ADD_LEAF);
PageSetLSN(current->page, recptr);
/* update parent only if we actually changed it */
if (xlrec.offnumParent != InvalidOffsetNumber)
{
PageSetLSN(parent->page, rec
|