@@ -51,20 +51,22 @@ typedef struct
5151 * 1. Otherwise every access would need to subtract 1.
5252 */
5353 bool marked [MaxHeapTuplesPerPage + 1 ];
54+
55+ int ndeleted ; /* Number of tuples deleted from the page */
5456} PruneState ;
5557
5658/* Local functions */
5759static HTSV_Result heap_prune_satisfies_vacuum (PruneState * prstate ,
5860 HeapTuple tup ,
5961 Buffer buffer );
60- static int heap_prune_chain (Page page , BlockNumber blockno , OffsetNumber maxoff ,
62+ static void heap_prune_chain (Page page , BlockNumber blockno , OffsetNumber maxoff ,
6163 OffsetNumber rootoffnum , int8 * htsv , PruneState * prstate );
6264static void heap_prune_record_prunable (PruneState * prstate , TransactionId xid );
6365static void heap_prune_record_redirect (PruneState * prstate ,
64- OffsetNumber offnum , OffsetNumber rdoffnum );
65- static void heap_prune_record_dead (PruneState * prstate , OffsetNumber offnum );
66- static void heap_prune_record_dead_or_unused (PruneState * prstate , OffsetNumber offnum );
67- static void heap_prune_record_unused (PruneState * prstate , OffsetNumber offnum );
66+ OffsetNumber offnum , OffsetNumber rdoffnum , bool was_normal );
67+ static void heap_prune_record_dead (PruneState * prstate , OffsetNumber offnum , bool was_normal );
68+ static void heap_prune_record_dead_or_unused (PruneState * prstate , OffsetNumber offnum , bool was_normal );
69+ static void heap_prune_record_unused (PruneState * prstate , OffsetNumber offnum , bool was_normal );
6870static void page_verify_redirects (Page page );
6971
7072
@@ -241,6 +243,7 @@ heap_page_prune(Relation relation, Buffer buffer,
241243 prstate .snapshotConflictHorizon = InvalidTransactionId ;
242244 prstate .nredirected = prstate .ndead = prstate .nunused = 0 ;
243245 memset (prstate .marked , 0 , sizeof (prstate .marked ));
246+ prstate .ndeleted = 0 ;
244247
245248 /*
246249 * presult->htsv is not initialized here because all ntuple spots in the
@@ -321,8 +324,8 @@ heap_page_prune(Relation relation, Buffer buffer,
321324 continue ;
322325
323326 /* Process this item or chain of items */
324- presult -> ndeleted += heap_prune_chain (page , blockno , maxoff , offnum ,
325- presult -> htsv , & prstate );
327+ heap_prune_chain (page , blockno , maxoff ,
328+ offnum , presult -> htsv , & prstate );
326329 }
327330
328331 /* Clear the offset information once we have processed the given page. */
@@ -394,8 +397,9 @@ heap_page_prune(Relation relation, Buffer buffer,
394397
395398 END_CRIT_SECTION ();
396399
397- /* Record number of newly-set-LP_DEAD items for caller */
400+ /* Copy information back for caller */
398401 presult -> nnewlpdead = prstate .ndead ;
402+ presult -> ndeleted = prstate .ndeleted ;
399403}
400404
401405
@@ -444,22 +448,23 @@ heap_prune_satisfies_vacuum(PruneState *prstate, HeapTuple tup, Buffer buffer)
444448 * to the redirected[] array (two entries per redirection); items to be set to
445449 * LP_DEAD state are added to nowdead[]; and items to be set to LP_UNUSED
446450 * state are added to nowunused[].
447- *
448- * Returns the number of tuples (to be) deleted from the page.
449451 */
450- static int
452+ static void
451453heap_prune_chain (Page page , BlockNumber blockno , OffsetNumber maxoff ,
452454 OffsetNumber rootoffnum , int8 * htsv , PruneState * prstate )
453455{
454- int ndeleted = 0 ;
455456 TransactionId priorXmax = InvalidTransactionId ;
456457 ItemId rootlp ;
457458 HeapTupleHeader htup ;
458- OffsetNumber latestdead = InvalidOffsetNumber ,
459- offnum ;
459+ OffsetNumber offnum ;
460460 OffsetNumber chainitems [MaxHeapTuplesPerPage ];
461- int nchain = 0 ,
462- i ;
461+
462+ /*
463+ * After traversing the HOT chain, ndeadchain is the index in chainitems
464+ * of the first live successor after the last dead item.
465+ */
466+ int ndeadchain = 0 ,
467+ nchain = 0 ;
463468
464469 rootlp = PageGetItemId (page , rootoffnum );
465470
@@ -494,14 +499,12 @@ heap_prune_chain(Page page, BlockNumber blockno, OffsetNumber maxoff,
494499 if (htsv [rootoffnum ] == HEAPTUPLE_DEAD &&
495500 !HeapTupleHeaderIsHotUpdated (htup ))
496501 {
497- heap_prune_record_unused (prstate , rootoffnum );
502+ heap_prune_record_unused (prstate , rootoffnum , true );
498503 HeapTupleHeaderAdvanceConflictHorizon (htup ,
499504 & prstate -> snapshotConflictHorizon );
500- ndeleted ++ ;
501505 }
502506
503- /* Nothing more to do */
504- return ndeleted ;
507+ return ;
505508 }
506509 }
507510
@@ -512,8 +515,6 @@ heap_prune_chain(Page page, BlockNumber blockno, OffsetNumber maxoff,
512515 for (;;)
513516 {
514517 ItemId lp ;
515- bool tupdead ,
516- recent_dead ;
517518
518519 /* Sanity check (pure paranoia) */
519520 if (offnum < FirstOffsetNumber )
@@ -563,7 +564,7 @@ heap_prune_chain(Page page, BlockNumber blockno, OffsetNumber maxoff,
563564 * the LP was already marked dead.
564565 */
565566 if (unlikely (prstate -> mark_unused_now ))
566- heap_prune_record_unused (prstate , offnum );
567+ heap_prune_record_unused (prstate , offnum , false );
567568
568569 break ;
569570 }
@@ -586,23 +587,41 @@ heap_prune_chain(Page page, BlockNumber blockno, OffsetNumber maxoff,
586587 /*
587588 * Check tuple's visibility status.
588589 */
589- tupdead = recent_dead = false;
590-
591590 switch (htsv_get_valid_status (htsv [offnum ]))
592591 {
593592 case HEAPTUPLE_DEAD :
594- tupdead = true;
593+
594+ /* Remember the last DEAD tuple seen */
595+ ndeadchain = nchain ;
596+ HeapTupleHeaderAdvanceConflictHorizon (htup ,
597+ & prstate -> snapshotConflictHorizon );
598+
599+ /* Advance to next chain member */
595600 break ;
596601
597602 case HEAPTUPLE_RECENTLY_DEAD :
598- recent_dead = true;
599603
600604 /*
601605 * This tuple may soon become DEAD. Update the hint field so
602606 * that the page is reconsidered for pruning in future.
607+ *
608+ * We don't need to advance the conflict horizon for
609+ * RECENTLY_DEAD tuples, even if we are removing them. This
610+ * is because we only remove RECENTLY_DEAD tuples if they
611+ * precede a DEAD tuple, and the DEAD tuple must have been
612+ * inserted by a newer transaction than the RECENTLY_DEAD
613+ * tuple by virtue of being later in the chain. We will have
614+ * advanced the conflict horizon for the DEAD tuple.
603615 */
604616 heap_prune_record_prunable (prstate ,
605617 HeapTupleHeaderGetUpdateXid (htup ));
618+
619+ /*
620+ * Advance past RECENTLY_DEAD tuples just in case there's a
621+ * DEAD one after them. We have to make sure that we don't
622+ * miss any DEAD tuples, since DEAD tuples that still have
623+ * tuple storage after pruning will confuse VACUUM.
624+ */
606625 break ;
607626
608627 case HEAPTUPLE_DELETE_IN_PROGRESS :
@@ -613,7 +632,7 @@ heap_prune_chain(Page page, BlockNumber blockno, OffsetNumber maxoff,
613632 */
614633 heap_prune_record_prunable (prstate ,
615634 HeapTupleHeaderGetUpdateXid (htup ));
616- break ;
635+ goto process_chain ;
617636
618637 case HEAPTUPLE_LIVE :
619638 case HEAPTUPLE_INSERT_IN_PROGRESS :
@@ -624,35 +643,19 @@ heap_prune_chain(Page page, BlockNumber blockno, OffsetNumber maxoff,
624643 * But we don't. See related decisions about when to mark the
625644 * page prunable in heapam.c.
626645 */
627- break ;
646+ goto process_chain ;
628647
629648 default :
630649 elog (ERROR , "unexpected HeapTupleSatisfiesVacuum result" );
631- break ;
650+ goto process_chain ;
632651 }
633652
634- /*
635- * Remember the last DEAD tuple seen. We will advance past
636- * RECENTLY_DEAD tuples just in case there's a DEAD one after them;
637- * but we can't advance past anything else. We have to make sure that
638- * we don't miss any DEAD tuples, since DEAD tuples that still have
639- * tuple storage after pruning will confuse VACUUM.
640- */
641- if (tupdead )
642- {
643- latestdead = offnum ;
644- HeapTupleHeaderAdvanceConflictHorizon (htup ,
645- & prstate -> snapshotConflictHorizon );
646- }
647- else if (!recent_dead )
648- break ;
649-
650653 /*
651654 * If the tuple is not HOT-updated, then we are at the end of this
652655 * HOT-update chain.
653656 */
654657 if (!HeapTupleHeaderIsHotUpdated (htup ))
655- break ;
658+ goto process_chain ;
656659
657660 /* HOT implies it can't have moved to different partition */
658661 Assert (!HeapTupleHeaderIndicatesMovedPartitions (htup ));
@@ -665,57 +668,52 @@ heap_prune_chain(Page page, BlockNumber blockno, OffsetNumber maxoff,
665668 priorXmax = HeapTupleHeaderGetUpdateXid (htup );
666669 }
667670
668- /*
669- * If we found a DEAD tuple in the chain, adjust the HOT chain so that all
670- * the DEAD tuples at the start of the chain are removed and the root line
671- * pointer is appropriately redirected.
672- */
673- if (OffsetNumberIsValid (latestdead ))
671+ if (ItemIdIsRedirected (rootlp ) && nchain < 2 )
674672 {
675673 /*
676- * Mark as unused each intermediate item that we are able to remove
677- * from the chain.
678- *
679- * When the previous item is the last dead tuple seen, we are at the
680- * right candidate for redirection .
674+ * We found a redirect item that doesn't point to a valid follow-on
675+ * item. This can happen if the loop in heap_page_prune caused us to
676+ * visit the dead successor of a redirect item before visiting the
677+ * redirect item. We can clean up by setting the redirect item to
678+ * LP_DEAD state or LP_UNUSED if the caller indicated .
681679 */
682- for (i = 1 ; (i < nchain ) && (chainitems [i - 1 ] != latestdead ); i ++ )
683- {
684- heap_prune_record_unused (prstate , chainitems [i ]);
685- ndeleted ++ ;
686- }
680+ heap_prune_record_dead_or_unused (prstate , rootoffnum , false);
681+ return ;
682+ }
687683
688- /*
689- * If the root entry had been a normal tuple, we are deleting it, so
690- * count it in the result. But changing a redirect (even to DEAD
691- * state) doesn't count.
692- */
693- if (ItemIdIsNormal (rootlp ))
694- ndeleted ++ ;
684+ process_chain :
695685
686+ if (ndeadchain == 0 )
687+ {
696688 /*
697- * If the DEAD tuple is at the end of the chain, the entire chain is
698- * dead and the root line pointer can be marked dead. Otherwise just
699- * redirect the root to the correct chain member.
689+ * No DEAD tuple was found, so the chain is entirely composed of
690+ * normal, unchanged tuples. Leave it alone.
700691 */
701- if (i >= nchain )
702- heap_prune_record_dead_or_unused (prstate , rootoffnum );
703- else
704- heap_prune_record_redirect (prstate , rootoffnum , chainitems [i ]);
705692 }
706- else if (nchain < 2 && ItemIdIsRedirected ( rootlp ) )
693+ else if (ndeadchain == nchain )
707694 {
708695 /*
709- * We found a redirect item that doesn't point to a valid follow-on
710- * item. This can happen if the loop in heap_page_prune caused us to
711- * visit the dead successor of a redirect item before visiting the
712- * redirect item. We can clean up by setting the redirect item to
713- * DEAD state or LP_UNUSED if the caller indicated.
696+ * The entire chain is dead. Mark the root line pointer LP_DEAD, and
697+ * fully remove the other tuples in the chain.
714698 */
715- heap_prune_record_dead_or_unused (prstate , rootoffnum );
699+ heap_prune_record_dead_or_unused (prstate , rootoffnum , ItemIdIsNormal (rootlp ));
700+ for (int i = 1 ; i < nchain ; i ++ )
701+ heap_prune_record_unused (prstate , chainitems [i ], true);
716702 }
703+ else
704+ {
705+ /*
706+ * We found a DEAD tuple in the chain. Redirect the root line pointer
707+ * to the first non-DEAD tuple, and mark as unused each intermediate
708+ * item that we are able to remove from the chain.
709+ */
710+ heap_prune_record_redirect (prstate , rootoffnum , chainitems [ndeadchain ],
711+ ItemIdIsNormal (rootlp ));
712+ for (int i = 1 ; i < ndeadchain ; i ++ )
713+ heap_prune_record_unused (prstate , chainitems [i ], true);
717714
718- return ndeleted ;
715+ /* the rest of tuples in the chain are normal, unchanged tuples */
716+ }
719717}
720718
721719/* Record lowest soon-prunable XID */
@@ -735,7 +733,8 @@ heap_prune_record_prunable(PruneState *prstate, TransactionId xid)
735733/* Record line pointer to be redirected */
736734static void
737735heap_prune_record_redirect (PruneState * prstate ,
738- OffsetNumber offnum , OffsetNumber rdoffnum )
736+ OffsetNumber offnum , OffsetNumber rdoffnum ,
737+ bool was_normal )
739738{
740739 Assert (prstate -> nredirected < MaxHeapTuplesPerPage );
741740 prstate -> redirected [prstate -> nredirected * 2 ] = offnum ;
@@ -745,17 +744,34 @@ heap_prune_record_redirect(PruneState *prstate,
745744 prstate -> marked [offnum ] = true;
746745 Assert (!prstate -> marked [rdoffnum ]);
747746 prstate -> marked [rdoffnum ] = true;
747+
748+ /*
749+ * If the root entry had been a normal tuple, we are deleting it, so count
750+ * it in the result. But changing a redirect (even to DEAD state) doesn't
751+ * count.
752+ */
753+ if (was_normal )
754+ prstate -> ndeleted ++ ;
748755}
749756
750757/* Record line pointer to be marked dead */
751758static void
752- heap_prune_record_dead (PruneState * prstate , OffsetNumber offnum )
759+ heap_prune_record_dead (PruneState * prstate , OffsetNumber offnum ,
760+ bool was_normal )
753761{
754762 Assert (prstate -> ndead < MaxHeapTuplesPerPage );
755763 prstate -> nowdead [prstate -> ndead ] = offnum ;
756764 prstate -> ndead ++ ;
757765 Assert (!prstate -> marked [offnum ]);
758766 prstate -> marked [offnum ] = true;
767+
768+ /*
769+ * If the root entry had been a normal tuple, we are deleting it, so count
770+ * it in the result. But changing a redirect (even to DEAD state) doesn't
771+ * count.
772+ */
773+ if (was_normal )
774+ prstate -> ndeleted ++ ;
759775}
760776
761777/*
@@ -765,7 +781,8 @@ heap_prune_record_dead(PruneState *prstate, OffsetNumber offnum)
765781 * pointers LP_DEAD if mark_unused_now is true.
766782 */
767783static void
768- heap_prune_record_dead_or_unused (PruneState * prstate , OffsetNumber offnum )
784+ heap_prune_record_dead_or_unused (PruneState * prstate , OffsetNumber offnum ,
785+ bool was_normal )
769786{
770787 /*
771788 * If the caller set mark_unused_now to true, we can remove dead tuples
@@ -774,20 +791,28 @@ heap_prune_record_dead_or_unused(PruneState *prstate, OffsetNumber offnum)
774791 * likely.
775792 */
776793 if (unlikely (prstate -> mark_unused_now ))
777- heap_prune_record_unused (prstate , offnum );
794+ heap_prune_record_unused (prstate , offnum , was_normal );
778795 else
779- heap_prune_record_dead (prstate , offnum );
796+ heap_prune_record_dead (prstate , offnum , was_normal );
780797}
781798
782799/* Record line pointer to be marked unused */
783800static void
784- heap_prune_record_unused (PruneState * prstate , OffsetNumber offnum )
801+ heap_prune_record_unused (PruneState * prstate , OffsetNumber offnum , bool was_normal )
785802{
786803 Assert (prstate -> nunused < MaxHeapTuplesPerPage );
787804 prstate -> nowunused [prstate -> nunused ] = offnum ;
788805 prstate -> nunused ++ ;
789806 Assert (!prstate -> marked [offnum ]);
790807 prstate -> marked [offnum ] = true;
808+
809+ /*
810+ * If the root entry had been a normal tuple, we are deleting it, so count
811+ * it in the result. But changing a redirect (even to DEAD state) doesn't
812+ * count.
813+ */
814+ if (was_normal )
815+ prstate -> ndeleted ++ ;
791816}
792817
793818
0 commit comments