@@ -204,6 +204,12 @@ typedef struct LVRelState
204204 int64 live_tuples ; /* # live tuples remaining */
205205 int64 recently_dead_tuples ; /* # dead, but not yet removable */
206206 int64 missed_dead_tuples ; /* # removable, but not removed */
207+
208+ /* State maintained by heap_vac_scan_next_block() */
209+ BlockNumber current_block ; /* last block returned */
210+ BlockNumber next_unskippable_block ; /* next unskippable block */
211+ bool next_unskippable_allvis ; /* its visibility status */
212+ Buffer next_unskippable_vmbuffer ; /* buffer containing its VM bit */
207213} LVRelState ;
208214
209215/* Struct for saving and restoring vacuum error information. */
@@ -217,10 +223,9 @@ typedef struct LVSavedErrInfo
217223
218224/* non-export function prototypes */
219225static void lazy_scan_heap (LVRelState * vacrel );
220- static BlockNumber lazy_scan_skip (LVRelState * vacrel , Buffer * vmbuffer ,
221- BlockNumber next_block ,
222- bool * next_unskippable_allvis ,
223- bool * skipping_current_range );
226+ static bool heap_vac_scan_next_block (LVRelState * vacrel , BlockNumber * blkno ,
227+ bool * all_visible_according_to_vm );
228+ static void find_next_unskippable_block (LVRelState * vacrel , bool * skipsallvis );
224229static bool lazy_scan_new_or_empty (LVRelState * vacrel , Buffer buf ,
225230 BlockNumber blkno , Page page ,
226231 bool sharelock , Buffer vmbuffer );
@@ -803,12 +808,11 @@ lazy_scan_heap(LVRelState *vacrel)
803808{
804809 BlockNumber rel_pages = vacrel -> rel_pages ,
805810 blkno ,
806- next_unskippable_block ,
807811 next_fsm_block_to_vacuum = 0 ;
812+ bool all_visible_according_to_vm ;
813+
808814 VacDeadItems * dead_items = vacrel -> dead_items ;
809815 Buffer vmbuffer = InvalidBuffer ;
810- bool next_unskippable_allvis ,
811- skipping_current_range ;
812816 const int initprog_index [] = {
813817 PROGRESS_VACUUM_PHASE ,
814818 PROGRESS_VACUUM_TOTAL_HEAP_BLKS ,
@@ -822,44 +826,19 @@ lazy_scan_heap(LVRelState *vacrel)
822826 initprog_val [2 ] = dead_items -> max_items ;
823827 pgstat_progress_update_multi_param (3 , initprog_index , initprog_val );
824828
825- /* Set up an initial range of skippable blocks using the visibility map */
826- next_unskippable_block = lazy_scan_skip (vacrel , & vmbuffer , 0 ,
827- & next_unskippable_allvis ,
828- & skipping_current_range );
829- for (blkno = 0 ; blkno < rel_pages ; blkno ++ )
829+ /* Initialize for the first heap_vac_scan_next_block() call */
830+ vacrel -> current_block = InvalidBlockNumber ;
831+ vacrel -> next_unskippable_block = InvalidBlockNumber ;
832+ vacrel -> next_unskippable_allvis = false;
833+ vacrel -> next_unskippable_vmbuffer = InvalidBuffer ;
834+
835+ while (heap_vac_scan_next_block (vacrel , & blkno , & all_visible_according_to_vm ))
830836 {
831837 Buffer buf ;
832838 Page page ;
833- bool all_visible_according_to_vm ;
834839 bool has_lpdead_items ;
835840 bool got_cleanup_lock = false;
836841
837- if (blkno == next_unskippable_block )
838- {
839- /*
840- * Can't skip this page safely. Must scan the page. But
841- * determine the next skippable range after the page first.
842- */
843- all_visible_according_to_vm = next_unskippable_allvis ;
844- next_unskippable_block = lazy_scan_skip (vacrel , & vmbuffer ,
845- blkno + 1 ,
846- & next_unskippable_allvis ,
847- & skipping_current_range );
848-
849- Assert (next_unskippable_block >= blkno + 1 );
850- }
851- else
852- {
853- /* Last page always scanned (may need to set nonempty_pages) */
854- Assert (blkno < rel_pages - 1 );
855-
856- if (skipping_current_range )
857- continue ;
858-
859- /* Current range is too small to skip -- just scan the page */
860- all_visible_according_to_vm = true;
861- }
862-
863842 vacrel -> scanned_pages ++ ;
864843
865844 /* Report as block scanned, update error traceback information */
@@ -1077,47 +1056,146 @@ lazy_scan_heap(LVRelState *vacrel)
10771056}
10781057
10791058/*
1080- * lazy_scan_skip () -- set up range of skippable blocks using visibility map.
1059+ * heap_vac_scan_next_block () -- get next block for vacuum to process
10811060 *
1082- * lazy_scan_heap() calls here every time it needs to set up a new range of
1083- * blocks to skip via the visibility map. Caller passes the next block in
1084- * line. We return a next_unskippable_block for this range. When there are
1085- * no skippable blocks we just return caller's next_block. The all-visible
1086- * status of the returned block is set in *next_unskippable_allvis for caller,
1087- * too. Block usually won't be all-visible (since it's unskippable), but it
1088- * can be during aggressive VACUUMs (as well as in certain edge cases).
1061+ * lazy_scan_heap() calls here every time it needs to get the next block to
1062+ * prune and vacuum. The function uses the visibility map, vacuum options,
1063+ * and various thresholds to skip blocks which do not need to be processed and
1064+ * sets blkno to the next block to process.
10891065 *
1090- * Sets *skipping_current_range to indicate if caller should skip this range.
1091- * Costs and benefits drive our decision. Very small ranges won't be skipped.
1066+ * The block number and visibility status of the next block to process are set
1067+ * in *blkno and *all_visible_according_to_vm. The return value is false if
1068+ * there are no further blocks to process.
1069+ *
1070+ * vacrel is an in/out parameter here. Vacuum options and information about
1071+ * the relation are read. vacrel->skippedallvis is set if we skip a block
1072+ * that's all-visible but not all-frozen, to ensure that we don't update
1073+ * relfrozenxid in that case. vacrel also holds information about the next
1074+ * unskippable block, as bookkeeping for this function.
1075+ */
1076+ static bool
1077+ heap_vac_scan_next_block (LVRelState * vacrel , BlockNumber * blkno ,
1078+ bool * all_visible_according_to_vm )
1079+ {
1080+ BlockNumber next_block ;
1081+
1082+ /* relies on InvalidBlockNumber + 1 overflowing to 0 on first call */
1083+ next_block = vacrel -> current_block + 1 ;
1084+
1085+ /* Have we reached the end of the relation? */
1086+ if (next_block >= vacrel -> rel_pages )
1087+ {
1088+ if (BufferIsValid (vacrel -> next_unskippable_vmbuffer ))
1089+ {
1090+ ReleaseBuffer (vacrel -> next_unskippable_vmbuffer );
1091+ vacrel -> next_unskippable_vmbuffer = InvalidBuffer ;
1092+ }
1093+ * blkno = vacrel -> rel_pages ;
1094+ return false;
1095+ }
1096+
1097+ /*
1098+ * We must be in one of the three following states:
1099+ */
1100+ if (next_block > vacrel -> next_unskippable_block ||
1101+ vacrel -> next_unskippable_block == InvalidBlockNumber )
1102+ {
1103+ /*
1104+ * 1. We have just processed an unskippable block (or we're at the
1105+ * beginning of the scan). Find the next unskippable block using the
1106+ * visibility map.
1107+ */
1108+ bool skipsallvis ;
1109+
1110+ find_next_unskippable_block (vacrel , & skipsallvis );
1111+
1112+ /*
1113+ * We now know the next block that we must process. It can be the
1114+ * next block after the one we just processed, or something further
1115+ * ahead. If it's further ahead, we can jump to it, but we choose to
1116+ * do so only if we can skip at least SKIP_PAGES_THRESHOLD consecutive
1117+ * pages. Since we're reading sequentially, the OS should be doing
1118+ * readahead for us, so there's no gain in skipping a page now and
1119+ * then. Skipping such a range might even discourage sequential
1120+ * detection.
1121+ *
1122+ * This test also enables more frequent relfrozenxid advancement
1123+ * during non-aggressive VACUUMs. If the range has any all-visible
1124+ * pages then skipping makes updating relfrozenxid unsafe, which is a
1125+ * real downside.
1126+ */
1127+ if (vacrel -> next_unskippable_block - next_block >= SKIP_PAGES_THRESHOLD )
1128+ {
1129+ next_block = vacrel -> next_unskippable_block ;
1130+ if (skipsallvis )
1131+ vacrel -> skippedallvis = true;
1132+ }
1133+ }
1134+
1135+ /* Now we must be in one of the two remaining states: */
1136+ if (next_block < vacrel -> next_unskippable_block )
1137+ {
1138+ /*
1139+ * 2. We are processing a range of blocks that we could have skipped
1140+ * but chose not to. We know that they are all-visible in the VM,
1141+ * otherwise they would've been unskippable.
1142+ */
1143+ * blkno = vacrel -> current_block = next_block ;
1144+ * all_visible_according_to_vm = true;
1145+ return true;
1146+ }
1147+ else
1148+ {
1149+ /*
1150+ * 3. We reached the next unskippable block. Process it. On next
1151+ * iteration, we will be back in state 1.
1152+ */
1153+ Assert (next_block == vacrel -> next_unskippable_block );
1154+
1155+ * blkno = vacrel -> current_block = next_block ;
1156+ * all_visible_according_to_vm = vacrel -> next_unskippable_allvis ;
1157+ return true;
1158+ }
1159+ }
1160+
1161+ /*
1162+ * Find the next unskippable block in a vacuum scan using the visibility map.
1163+ * The next unskippable block and its visibility information is updated in
1164+ * vacrel.
10921165 *
10931166 * Note: our opinion of which blocks can be skipped can go stale immediately.
10941167 * It's okay if caller "misses" a page whose all-visible or all-frozen marking
10951168 * was concurrently cleared, though. All that matters is that caller scan all
10961169 * pages whose tuples might contain XIDs < OldestXmin, or MXIDs < OldestMxact.
10971170 * (Actually, non-aggressive VACUUMs can choose to skip all-visible pages with
1098- * older XIDs/MXIDs. The vacrel-> skippedallvis flag will be set here when the
1099- * choice to skip such a range is actually made, making everything safe.)
1171+ * older XIDs/MXIDs. The * skippedallvis flag will be set here when the choice
1172+ * to skip such a range is actually made, making everything safe.)
11001173 */
1101- static BlockNumber
1102- lazy_scan_skip (LVRelState * vacrel , Buffer * vmbuffer , BlockNumber next_block ,
1103- bool * next_unskippable_allvis , bool * skipping_current_range )
1174+ static void
1175+ find_next_unskippable_block (LVRelState * vacrel , bool * skipsallvis )
11041176{
1105- BlockNumber rel_pages = vacrel -> rel_pages ,
1106- next_unskippable_block = next_block ,
1107- nskippable_blocks = 0 ;
1108- bool skipsallvis = false;
1177+ BlockNumber rel_pages = vacrel -> rel_pages ;
1178+ BlockNumber next_unskippable_block = vacrel -> next_unskippable_block + 1 ;
1179+ Buffer next_unskippable_vmbuffer = vacrel -> next_unskippable_vmbuffer ;
1180+ bool next_unskippable_allvis ;
1181+
1182+ * skipsallvis = false;
11091183
1110- * next_unskippable_allvis = true;
1111- while (next_unskippable_block < rel_pages )
1184+ for (;;)
11121185 {
11131186 uint8 mapbits = visibilitymap_get_status (vacrel -> rel ,
11141187 next_unskippable_block ,
1115- vmbuffer );
1188+ & next_unskippable_vmbuffer );
11161189
1117- if ((mapbits & VISIBILITYMAP_ALL_VISIBLE ) == 0 )
1190+ next_unskippable_allvis = (mapbits & VISIBILITYMAP_ALL_VISIBLE ) != 0 ;
1191+
1192+ /*
1193+ * A block is unskippable if it is not all visible according to the
1194+ * visibility map.
1195+ */
1196+ if (!next_unskippable_allvis )
11181197 {
11191198 Assert ((mapbits & VISIBILITYMAP_ALL_FROZEN ) == 0 );
1120- * next_unskippable_allvis = false;
11211199 break ;
11221200 }
11231201
@@ -1152,34 +1230,17 @@ lazy_scan_skip(LVRelState *vacrel, Buffer *vmbuffer, BlockNumber next_block,
11521230 * All-visible block is safe to skip in non-aggressive case. But
11531231 * remember that the final range contains such a block for later.
11541232 */
1155- skipsallvis = true;
1233+ * skipsallvis = true;
11561234 }
11571235
11581236 vacuum_delay_point ();
11591237 next_unskippable_block ++ ;
1160- nskippable_blocks ++ ;
1161- }
1162-
1163- /*
1164- * We only skip a range with at least SKIP_PAGES_THRESHOLD consecutive
1165- * pages. Since we're reading sequentially, the OS should be doing
1166- * readahead for us, so there's no gain in skipping a page now and then.
1167- * Skipping such a range might even discourage sequential detection.
1168- *
1169- * This test also enables more frequent relfrozenxid advancement during
1170- * non-aggressive VACUUMs. If the range has any all-visible pages then
1171- * skipping makes updating relfrozenxid unsafe, which is a real downside.
1172- */
1173- if (nskippable_blocks < SKIP_PAGES_THRESHOLD )
1174- * skipping_current_range = false;
1175- else
1176- {
1177- * skipping_current_range = true;
1178- if (skipsallvis )
1179- vacrel -> skippedallvis = true;
11801238 }
11811239
1182- return next_unskippable_block ;
1240+ /* write the local variables back to vacrel */
1241+ vacrel -> next_unskippable_block = next_unskippable_block ;
1242+ vacrel -> next_unskippable_allvis = next_unskippable_allvis ;
1243+ vacrel -> next_unskippable_vmbuffer = next_unskippable_vmbuffer ;
11831244}
11841245
11851246/*
@@ -1752,8 +1813,8 @@ lazy_scan_prune(LVRelState *vacrel,
17521813
17531814 /*
17541815 * Handle setting visibility map bit based on information from the VM (as
1755- * of last lazy_scan_skip () call), and from all_visible and all_frozen
1756- * variables
1816+ * of last heap_vac_scan_next_block () call), and from all_visible and
1817+ * all_frozen variables
17571818 */
17581819 if (!all_visible_according_to_vm && all_visible )
17591820 {
@@ -1788,8 +1849,8 @@ lazy_scan_prune(LVRelState *vacrel,
17881849 /*
17891850 * As of PostgreSQL 9.2, the visibility map bit should never be set if the
17901851 * page-level bit is clear. However, it's possible that the bit got
1791- * cleared after lazy_scan_skip () was called, so we must recheck with
1792- * buffer lock before concluding that the VM is corrupt.
1852+ * cleared after heap_vac_scan_next_block () was called, so we must recheck
1853+ * with buffer lock before concluding that the VM is corrupt.
17931854 */
17941855 else if (all_visible_according_to_vm && !PageIsAllVisible (page ) &&
17951856 visibilitymap_get_status (vacrel -> rel , blkno , & vmbuffer ) != 0 )
0 commit comments