Avoid having two copies of the HOT-chain search logic.
authorRobert Haas <[email protected]>
Mon, 27 Jun 2011 14:27:17 +0000 (10:27 -0400)
committerRobert Haas <[email protected]>
Mon, 27 Jun 2011 14:27:17 +0000 (10:27 -0400)
It's been like this since HOT was originally introduced, but the logic
is complex enough that this is a recipe for bugs, as we've already
found out with SSI.  So refactor heap_hot_search_buffer() so that it
can satisfy the needs of index_getnext(), and make index_getnext() use
that rather than duplicating the logic.

This change was originally proposed by Heikki Linnakangas as part of a
larger refactoring oriented towards allowing index-only scans.  I
extracted and adjusted this part, since it seems to have independent
merit.  Review by Jeff Davis.

src/backend/access/heap/heapam.c
src/backend/access/index/genam.c
src/backend/access/index/indexam.c
src/backend/executor/nodeBitmapHeapscan.c
src/include/access/heapam.h
src/include/access/relscan.h

index 7bb4a874c47e009f8b22714a4cf0d50daac2e07d..a48e0572703de70c76267be42711d466b76484ff 100644 (file)
@@ -1514,6 +1514,10 @@ heap_fetch(Relation relation,
  * found, we update *tid to reference that tuple's offset number, and
  * return TRUE.  If no match, return FALSE without modifying *tid.
  *
+ * heapTuple is a caller-supplied buffer.  When a match is found, we return
+ * the tuple here, in addition to updating *tid.  If no match is found, the
+ * contents of this buffer on return are undefined.
+ *
  * If all_dead is not NULL, we check non-visible tuples to see if they are
  * globally dead; *all_dead is set TRUE if all members of the HOT chain
  * are vacuumable, FALSE if not.
@@ -1524,28 +1528,31 @@ heap_fetch(Relation relation,
  */
 bool
 heap_hot_search_buffer(ItemPointer tid, Relation relation, Buffer buffer,
-                      Snapshot snapshot, bool *all_dead)
+                      Snapshot snapshot, HeapTuple heapTuple,
+                      bool *all_dead, bool first_call)
 {
    Page        dp = (Page) BufferGetPage(buffer);
    TransactionId prev_xmax = InvalidTransactionId;
    OffsetNumber offnum;
    bool        at_chain_start;
    bool        valid;
+   bool        skip;
 
+   /* If this is not the first call, previous call returned a (live!) tuple */
    if (all_dead)
-       *all_dead = true;
+       *all_dead = first_call;
 
    Assert(TransactionIdIsValid(RecentGlobalXmin));
 
    Assert(ItemPointerGetBlockNumber(tid) == BufferGetBlockNumber(buffer));
    offnum = ItemPointerGetOffsetNumber(tid);
-   at_chain_start = true;
+   at_chain_start = first_call;
+   skip = !first_call;
 
    /* Scan through possible multiple members of HOT-chain */
    for (;;)
    {
        ItemId      lp;
-       HeapTupleData heapTuple;
 
        /* check for bogus TID */
        if (offnum < FirstOffsetNumber || offnum > PageGetMaxOffsetNumber(dp))
@@ -1568,15 +1575,15 @@ heap_hot_search_buffer(ItemPointer tid, Relation relation, Buffer buffer,
            break;
        }
 
-       heapTuple.t_data = (HeapTupleHeader) PageGetItem(dp, lp);
-       heapTuple.t_len = ItemIdGetLength(lp);
-       heapTuple.t_tableOid = relation->rd_id;
-       heapTuple.t_self = *tid;
+       heapTuple->t_data = (HeapTupleHeader) PageGetItem(dp, lp);
+       heapTuple->t_len = ItemIdGetLength(lp);
+       heapTuple->t_tableOid = relation->rd_id;
+       heapTuple->t_self = *tid;
 
        /*
         * Shouldn't see a HEAP_ONLY tuple at chain start.
         */
-       if (at_chain_start && HeapTupleIsHeapOnly(&heapTuple))
+       if (at_chain_start && HeapTupleIsHeapOnly(heapTuple))
            break;
 
        /*
@@ -1585,21 +1592,32 @@ heap_hot_search_buffer(ItemPointer tid, Relation relation, Buffer buffer,
         */
        if (TransactionIdIsValid(prev_xmax) &&
            !TransactionIdEquals(prev_xmax,
-                                HeapTupleHeaderGetXmin(heapTuple.t_data)))
+                                HeapTupleHeaderGetXmin(heapTuple->t_data)))
            break;
 
-       /* If it's visible per the snapshot, we must return it */
-       valid = HeapTupleSatisfiesVisibility(&heapTuple, snapshot, buffer);
-       CheckForSerializableConflictOut(valid, relation, &heapTuple, buffer,
-                                       snapshot);
-       if (valid)
+       /*
+        * When first_call is true (and thus, skip is initally false) we'll
+        * return the first tuple we find.  But on later passes, heapTuple
+        * will initially be pointing to the tuple we returned last time.
+        * Returning it again would be incorrect (and would loop forever),
+        * so we skip it and return the next match we find.
+        */
+       if (!skip)
        {
-           ItemPointerSetOffsetNumber(tid, offnum);
-           PredicateLockTuple(relation, &heapTuple, snapshot);
-           if (all_dead)
-               *all_dead = false;
-           return true;
+           /* If it's visible per the snapshot, we must return it */
+           valid = HeapTupleSatisfiesVisibility(heapTuple, snapshot, buffer);
+           CheckForSerializableConflictOut(valid, relation, heapTuple,
+                                           buffer, snapshot);
+           if (valid)
+           {
+               ItemPointerSetOffsetNumber(tid, offnum);
+               PredicateLockTuple(relation, heapTuple, snapshot);
+               if (all_dead)
+                   *all_dead = false;
+               return true;
+           }
        }
+       skip = false;
 
        /*
         * If we can't see it, maybe no one else can either.  At caller
@@ -1607,7 +1625,7 @@ heap_hot_search_buffer(ItemPointer tid, Relation relation, Buffer buffer,
         * transactions.
         */
        if (all_dead && *all_dead &&
-           HeapTupleSatisfiesVacuum(heapTuple.t_data, RecentGlobalXmin,
+           HeapTupleSatisfiesVacuum(heapTuple->t_data, RecentGlobalXmin,
                                     buffer) != HEAPTUPLE_DEAD)
            *all_dead = false;
 
@@ -1615,13 +1633,13 @@ heap_hot_search_buffer(ItemPointer tid, Relation relation, Buffer buffer,
         * Check to see if HOT chain continues past this tuple; if so fetch
         * the next offnum and loop around.
         */
-       if (HeapTupleIsHotUpdated(&heapTuple))
+       if (HeapTupleIsHotUpdated(heapTuple))
        {
-           Assert(ItemPointerGetBlockNumber(&heapTuple.t_data->t_ctid) ==
+           Assert(ItemPointerGetBlockNumber(&heapTuple->t_data->t_ctid) ==
                   ItemPointerGetBlockNumber(tid));
-           offnum = ItemPointerGetOffsetNumber(&heapTuple.t_data->t_ctid);
+           offnum = ItemPointerGetOffsetNumber(&heapTuple->t_data->t_ctid);
            at_chain_start = false;
-           prev_xmax = HeapTupleHeaderGetXmax(heapTuple.t_data);
+           prev_xmax = HeapTupleHeaderGetXmax(heapTuple->t_data);
        }
        else
            break;              /* end of chain */
@@ -1643,10 +1661,12 @@ heap_hot_search(ItemPointer tid, Relation relation, Snapshot snapshot,
 {
    bool        result;
    Buffer      buffer;
+   HeapTupleData   heapTuple;
 
    buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(tid));
    LockBuffer(buffer, BUFFER_LOCK_SHARE);
-   result = heap_hot_search_buffer(tid, relation, buffer, snapshot, all_dead);
+   result = heap_hot_search_buffer(tid, relation, buffer, snapshot,
+                                   &heapTuple, all_dead, true);
    LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
    ReleaseBuffer(buffer);
    return result;
index db04e26a65dba2ab23a7cb76d57b6960f4753575..fe3aa3c9c1d27c75bc06915c5c37190722a48699 100644 (file)
@@ -113,9 +113,7 @@ RelationGetIndexScan(Relation indexRelation, int nkeys, int norderbys)
    ItemPointerSetInvalid(&scan->xs_ctup.t_self);
    scan->xs_ctup.t_data = NULL;
    scan->xs_cbuf = InvalidBuffer;
-   scan->xs_hot_dead = false;
-   scan->xs_next_hot = InvalidOffsetNumber;
-   scan->xs_prev_xmax = InvalidTransactionId;
+   scan->xs_continue_hot = false;
 
    return scan;
 }
index 31d705c0561756598a7d335e6bf5061d7555027d..13e68d644eac1b3cc4ebb21d1aae9f918b3f087a 100644 (file)
@@ -335,7 +335,7 @@ index_rescan(IndexScanDesc scan,
        scan->xs_cbuf = InvalidBuffer;
    }
 
-   scan->xs_next_hot = InvalidOffsetNumber;
+   scan->xs_continue_hot = false;
 
    scan->kill_prior_tuple = false;     /* for safety */
 
@@ -417,7 +417,7 @@ index_restrpos(IndexScanDesc scan)
    SCAN_CHECKS;
    GET_SCAN_PROCEDURE(amrestrpos);
 
-   scan->xs_next_hot = InvalidOffsetNumber;
+   scan->xs_continue_hot = false;
 
    scan->kill_prior_tuple = false;     /* for safety */
 
@@ -443,26 +443,18 @@ index_getnext(IndexScanDesc scan, ScanDirection direction)
    HeapTuple   heapTuple = &scan->xs_ctup;
    ItemPointer tid = &heapTuple->t_self;
    FmgrInfo   *procedure;
+   bool        all_dead = false;
 
    SCAN_CHECKS;
    GET_SCAN_PROCEDURE(amgettuple);
 
    Assert(TransactionIdIsValid(RecentGlobalXmin));
 
-   /*
-    * We always reset xs_hot_dead; if we are here then either we are just
-    * starting the scan, or we previously returned a visible tuple, and in
-    * either case it's inappropriate to kill the prior index entry.
-    */
-   scan->xs_hot_dead = false;
-
    for (;;)
    {
-       OffsetNumber offnum;
-       bool        at_chain_start;
-       Page        dp;
+       bool    got_heap_tuple;
 
-       if (scan->xs_next_hot != InvalidOffsetNumber)
+       if (scan->xs_continue_hot)
        {
            /*
             * We are resuming scan of a HOT chain after having returned an
@@ -471,10 +463,6 @@ index_getnext(IndexScanDesc scan, ScanDirection direction)
            Assert(BufferIsValid(scan->xs_cbuf));
            Assert(ItemPointerGetBlockNumber(tid) ==
                   BufferGetBlockNumber(scan->xs_cbuf));
-           Assert(TransactionIdIsValid(scan->xs_prev_xmax));
-           offnum = scan->xs_next_hot;
-           at_chain_start = false;
-           scan->xs_next_hot = InvalidOffsetNumber;
        }
        else
        {
@@ -488,7 +476,7 @@ index_getnext(IndexScanDesc scan, ScanDirection direction)
             * comments in RelationGetIndexScan().
             */
            if (!scan->xactStartedInRecovery)
-               scan->kill_prior_tuple = scan->xs_hot_dead;
+               scan->kill_prior_tuple = all_dead;
 
            /*
             * The AM's gettuple proc finds the next index entry matching the
@@ -521,151 +509,31 @@ index_getnext(IndexScanDesc scan, ScanDirection direction)
            if (prev_buf != scan->xs_cbuf)
                heap_page_prune_opt(scan->heapRelation, scan->xs_cbuf,
                                    RecentGlobalXmin);
-
-           /* Prepare to scan HOT chain starting at index-referenced offnum */
-           offnum = ItemPointerGetOffsetNumber(tid);
-           at_chain_start = true;
-
-           /* We don't know what the first tuple's xmin should be */
-           scan->xs_prev_xmax = InvalidTransactionId;
-
-           /* Initialize flag to detect if all entries are dead */
-           scan->xs_hot_dead = true;
        }
 
        /* Obtain share-lock on the buffer so we can examine visibility */
        LockBuffer(scan->xs_cbuf, BUFFER_LOCK_SHARE);
+       got_heap_tuple = heap_hot_search_buffer(tid, scan->heapRelation,
+                                               scan->xs_cbuf,
+                                               scan->xs_snapshot,
+                                               &scan->xs_ctup,
+                                               &all_dead,
+                                               !scan->xs_continue_hot);
+       LockBuffer(scan->xs_cbuf, BUFFER_LOCK_UNLOCK);
 
-       dp = (Page) BufferGetPage(scan->xs_cbuf);
-
-       /* Scan through possible multiple members of HOT-chain */
-       for (;;)
+       if (got_heap_tuple)
        {
-           ItemId      lp;
-           ItemPointer ctid;
-           bool        valid;
-
-           /* check for bogus TID */
-           if (offnum < FirstOffsetNumber ||
-               offnum > PageGetMaxOffsetNumber(dp))
-               break;
-
-           lp = PageGetItemId(dp, offnum);
-
-           /* check for unused, dead, or redirected items */
-           if (!ItemIdIsNormal(lp))
-           {
-               /* We should only see a redirect at start of chain */
-               if (ItemIdIsRedirected(lp) && at_chain_start)
-               {
-                   /* Follow the redirect */
-                   offnum = ItemIdGetRedirect(lp);
-                   at_chain_start = false;
-                   continue;
-               }
-               /* else must be end of chain */
-               break;
-           }
-
-           /*
-            * We must initialize all of *heapTuple (ie, scan->xs_ctup) since
-            * it is returned to the executor on success.
-            */
-           heapTuple->t_data = (HeapTupleHeader) PageGetItem(dp, lp);
-           heapTuple->t_len = ItemIdGetLength(lp);
-           ItemPointerSetOffsetNumber(tid, offnum);
-           heapTuple->t_tableOid = RelationGetRelid(scan->heapRelation);
-           ctid = &heapTuple->t_data->t_ctid;
-
            /*
-            * Shouldn't see a HEAP_ONLY tuple at chain start.  (This test
-            * should be unnecessary, since the chain root can't be removed
-            * while we have pin on the index entry, but let's make it
-            * anyway.)
+            * Only in a non-MVCC snapshot can more than one member of the
+            * HOT chain be visible.
             */
-           if (at_chain_start && HeapTupleIsHeapOnly(heapTuple))
-               break;
-
-           /*
-            * The xmin should match the previous xmax value, else chain is
-            * broken.  (Note: this test is not optional because it protects
-            * us against the case where the prior chain member's xmax aborted
-            * since we looked at it.)
-            */
-           if (TransactionIdIsValid(scan->xs_prev_xmax) &&
-               !TransactionIdEquals(scan->xs_prev_xmax,
-                                 HeapTupleHeaderGetXmin(heapTuple->t_data)))
-               break;
-
-           /* If it's visible per the snapshot, we must return it */
-           valid = HeapTupleSatisfiesVisibility(heapTuple, scan->xs_snapshot,
-                                                scan->xs_cbuf);
-
-           CheckForSerializableConflictOut(valid, scan->heapRelation,
-                                           heapTuple, scan->xs_cbuf,
-                                           scan->xs_snapshot);
-
-           if (valid)
-           {
-               /*
-                * If the snapshot is MVCC, we know that it could accept at
-                * most one member of the HOT chain, so we can skip examining
-                * any more members.  Otherwise, check for continuation of the
-                * HOT-chain, and set state for next time.
-                */
-               if (IsMVCCSnapshot(scan->xs_snapshot))
-                   scan->xs_next_hot = InvalidOffsetNumber;
-               else if (HeapTupleIsHotUpdated(heapTuple))
-               {
-                   Assert(ItemPointerGetBlockNumber(ctid) ==
-                          ItemPointerGetBlockNumber(tid));
-                   scan->xs_next_hot = ItemPointerGetOffsetNumber(ctid);
-                   scan->xs_prev_xmax = HeapTupleHeaderGetXmax(heapTuple->t_data);
-               }
-               else
-                   scan->xs_next_hot = InvalidOffsetNumber;
-
-               PredicateLockTuple(scan->heapRelation, heapTuple, scan->xs_snapshot);
-
-               LockBuffer(scan->xs_cbuf, BUFFER_LOCK_UNLOCK);
-
-               pgstat_count_heap_fetch(scan->indexRelation);
-
-               return heapTuple;
-           }
-
-           /*
-            * If we can't see it, maybe no one else can either.  Check to see
-            * if the tuple is dead to all transactions.  If we find that all
-            * the tuples in the HOT chain are dead, we'll signal the index AM
-            * to not return that TID on future indexscans.
-            */
-           if (scan->xs_hot_dead &&
-               HeapTupleSatisfiesVacuum(heapTuple->t_data, RecentGlobalXmin,
-                                        scan->xs_cbuf) != HEAPTUPLE_DEAD)
-               scan->xs_hot_dead = false;
-
-           /*
-            * Check to see if HOT chain continues past this tuple; if so
-            * fetch the next offnum (we don't bother storing it into
-            * xs_next_hot, but must store xs_prev_xmax), and loop around.
-            */
-           if (HeapTupleIsHotUpdated(heapTuple))
-           {
-               Assert(ItemPointerGetBlockNumber(ctid) ==
-                      ItemPointerGetBlockNumber(tid));
-               offnum = ItemPointerGetOffsetNumber(ctid);
-               at_chain_start = false;
-               scan->xs_prev_xmax = HeapTupleHeaderGetXmax(heapTuple->t_data);
-           }
-           else
-               break;          /* end of chain */
-       }                       /* loop over a single HOT chain */
-
-       LockBuffer(scan->xs_cbuf, BUFFER_LOCK_UNLOCK);
+           scan->xs_continue_hot = !IsMVCCSnapshot(scan->xs_snapshot);
+           pgstat_count_heap_fetch(scan->indexRelation);
+           return heapTuple;
+       }
 
        /* Loop around to ask index AM for another TID */
-       scan->xs_next_hot = InvalidOffsetNumber;
+       scan->xs_continue_hot = false;
    }
 
    /* Release any held pin on a heap page */
index e20ef1433ed1d2343e37266cc79e19a413dfa8a2..40ad77575879191e4c8b97321cfb9900cd68feee 100644 (file)
@@ -349,9 +349,11 @@ bitgetpage(HeapScanDesc scan, TBMIterateResult *tbmres)
        {
            OffsetNumber offnum = tbmres->offsets[curslot];
            ItemPointerData tid;
+           HeapTupleData   heapTuple;
 
            ItemPointerSet(&tid, page, offnum);
-           if (heap_hot_search_buffer(&tid, scan->rs_rd, buffer, snapshot, NULL))
+           if (heap_hot_search_buffer(&tid, scan->rs_rd, buffer, snapshot,
+                                      &heapTuple, NULL, true))
                scan->rs_vistuples[ntup++] = ItemPointerGetOffsetNumber(&tid);
        }
    }
index fc657612ed06325b48c09cc20effb8e78f00c416..ee474d6c909ea85cbb67ec2361996039748a4a7b 100644 (file)
@@ -83,7 +83,8 @@ extern bool heap_fetch(Relation relation, Snapshot snapshot,
           HeapTuple tuple, Buffer *userbuf, bool keep_buf,
           Relation stats_relation);
 extern bool heap_hot_search_buffer(ItemPointer tid, Relation relation,
-                      Buffer buffer, Snapshot snapshot, bool *all_dead);
+                      Buffer buffer, Snapshot snapshot, HeapTuple heapTuple,
+                      bool *all_dead, bool first_call);
 extern bool heap_hot_search(ItemPointer tid, Relation relation,
                Snapshot snapshot, bool *all_dead);
 
index 7663033723b413c0fb6c035a5b9f68e4e9d815cf..db1131e26000f17ce0a68953e493637108318f8c 100644 (file)
@@ -84,9 +84,7 @@ typedef struct IndexScanDescData
    bool        xs_recheck;     /* T means scan keys must be rechecked */
 
    /* state data for traversing HOT chains in index_getnext */
-   bool        xs_hot_dead;    /* T if all members of HOT chain are dead */
-   OffsetNumber xs_next_hot;   /* next member of HOT chain, if any */
-   TransactionId xs_prev_xmax; /* previous HOT chain member's XMAX, if any */
+   bool        xs_continue_hot;    /* T if must keep walking HOT chain */
 }  IndexScanDescData;
 
 /* Struct for heap-or-index scans of system tables */