diff --git a/doc/src/sgml/ref/vacuum.sgml b/doc/src/sgml/ref/vacuum.sgml
index bd5dcaf86a5c..294494877d9d 100644
--- a/doc/src/sgml/ref/vacuum.sgml
+++ b/doc/src/sgml/ref/vacuum.sgml
@@ -280,25 +280,41 @@ VACUUM [ ( option [, ...] ) ] [ PARALLEL
- Perform index vacuum and index cleanup phases of VACUUM
- in parallel using integer
- background workers (for the details of each vacuum phase, please
- refer to ). The number of workers used
- to perform the operation is equal to the number of indexes on the
- relation that support parallel vacuum which is limited by the number of
- workers specified with PARALLEL option if any which is
- further limited by .
- An index can participate in parallel vacuum if and only if the size of the
- index is more than .
- Please note that it is not guaranteed that the number of parallel workers
- specified in integer will be
- used during execution. It is possible for a vacuum to run with fewer
- workers than specified, or even with no workers at all. Only one worker
- can be used per index. So parallel workers are launched only when there
- are at least 2 indexes in the table. Workers for
- vacuum are launched before the start of each phase and exit at the end of
- the phase. These behaviors might change in a future release. This
- option can't be used with the FULL option.
+ Perform scanning heap, index vacuum, and index cleanup phases of
+ VACUUM in parallel using
+ integer background workers
+ (for the details of each vacuum phase, please refer to
+ ).
+
+
+ For heap tables, the number of workers used to perform the scanning
+ heap is determined based on the size of table. A table can participate in
+ parallel scanning heap if and only if the size of the table is more than
+ . During scanning heap,
+ the heap table's blocks will be divided into ranges and shared among the
+ cooperating processes. Each worker process will complete the scanning of
+ its given range of blocks before requesting an additional range of blocks.
+
+
+ The number of workers used to perform parallel index vacuum and index
+ cleanup is equal to the number of indexes on the relation that support
+ parallel vacuum. An index can participate in parallel vacuum if and only
+ if the size of the index is more than .
+ Only one worker can be used per index. So parallel workers for index vacuum
+ and index cleanup are launched only when there are at least 2
+ indexes in the table.
+
+
+ Workers for vacuum are launched before the start of each phase and exit
+ at the end of the phase. The number of workers for each phase is limited by
+ the number of workers specified with PARALLEL option if
+ any which is futher limited by .
+ Please note that in any parallel vacuum phase, it is not guaanteed that the
+ number of parallel workers specified in integer
+ will be used during execution. It is possible for a vacuum to run with fewer
+ workers than specified, or even with no workers at all. These behaviors might
+ change in a future release. This option can't be used with the FULL
+ option.
diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c
index ac082fefa77a..dc02be807fbc 100644
--- a/src/backend/access/heap/heapam_handler.c
+++ b/src/backend/access/heap/heapam_handler.c
@@ -2668,7 +2668,13 @@ static const TableAmRoutine heapam_methods = {
.scan_bitmap_next_tuple = heapam_scan_bitmap_next_tuple,
.scan_sample_next_block = heapam_scan_sample_next_block,
- .scan_sample_next_tuple = heapam_scan_sample_next_tuple
+ .scan_sample_next_tuple = heapam_scan_sample_next_tuple,
+
+ .parallel_vacuum_compute_workers = heap_parallel_vacuum_compute_workers,
+ .parallel_vacuum_estimate = heap_parallel_vacuum_estimate,
+ .parallel_vacuum_initialize = heap_parallel_vacuum_initialize,
+ .parallel_vacuum_initialize_worker = heap_parallel_vacuum_initialize_worker,
+ .parallel_vacuum_collect_dead_items = heap_parallel_vacuum_collect_dead_items,
};
diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c
index f28326bad095..88e13eea0fc4 100644
--- a/src/backend/access/heap/vacuumlazy.c
+++ b/src/backend/access/heap/vacuumlazy.c
@@ -99,6 +99,46 @@
* After pruning and freezing, pages that are newly all-visible and all-frozen
* are marked as such in the visibility map.
*
+ * Parallel Vacuum:
+ *
+ * Lazy vacuum on heap tables supports parallel processing for phase I and
+ * phase II. Before starting phase I, we initialize parallel vacuum state,
+ * ParallelVacuumState, and allocate the TID store in a DSA area if we can
+ * use parallel mode for any of these two phases.
+ *
+ * We could require different number of parallel vacuum workers for each phase
+ * for various factors such as table size and number of indexes. Parallel
+ * workers are launched at the beginning of each phase and exit at the end of
+ * each phase.
+ *
+ * For the parallel lazy heap scan (i.e. parallel phase I), we employ a parallel
+ * block table scan, controlled by ParallelBlockTableScanDesc, in conjunction
+ * with the read stream. The table is split into multiple chunks, which are
+ * then distributed among parallel workers.
+ *
+ * While vacuum cutoffs are shared between leader and worker processes, each
+ * individual process uses its own GlobalVisState, potentially causing some
+ * workers to remove fewer tuples than optimal. During parallel lazy heap scans,
+ * each worker tracks the oldest existing XID and MXID. The leader computes the
+ * globally oldest existing XID and MXID after the parallel scan, while
+ * gathering table data too.
+ *
+ * The workers' parallel scan descriptions, ParallelBlockTableScanWorkerData,
+ * are stored in the DSM space, enabling different parallel workers to resume
+ * phase I from their previous state. However, due to the potential presence
+ * of pinned buffers loaded by the read stream's look-ahead mechanism, we
+ * cannot abruptly stop phase I even when the space of dead_items TIDs exceeds
+ * the limit. Instead, once this threshold is surpassed, we begin processing
+ * pages without attempting to retrieve additional blocks until the read
+ * stream is exhausted. While this approach may increase the memory usage, it
+ * typically doesn't pose a significant problem, as processing a few 10s-100s
+ * buffers doesn't substantially increase the size of dead_items TIDs.
+ *
+ * If the leader launches fewer workers than the previous time to resume the
+ * parallel lazy heap scan, some block within chunks may remain un-scanned.
+ * To address this, the leader completes workers' unfinished scans at the end
+ * of the parallel lazy heap scan (see complete_unfinished_lazy_scan_heap()).
+ *
* Dead TID Storage:
*
* The major space usage for vacuuming is storage for the dead tuple IDs that
@@ -147,6 +187,7 @@
#include "common/pg_prng.h"
#include "executor/instrument.h"
#include "miscadmin.h"
+#include "optimizer/paths.h" /* for min_parallel_table_scan_size */
#include "pgstat.h"
#include "portability/instr_time.h"
#include "postmaster/autovacuum.h"
@@ -214,11 +255,21 @@
*/
#define PREFETCH_SIZE ((BlockNumber) 32)
+/*
+ * DSM keys for parallel lazy vacuum. Unlike other parallel execution code, we
+ * we don't need to worry about DSM keys conflicting with plan_node_id, but need to
+ * avoid conflicting with DSM keys used in vacuumparallel.c.
+ */
+#define PARALLEL_LV_KEY_SHARED 0xFFFF0001
+#define PARALLEL_LV_KEY_SCANDESC 0xFFFF0002
+#define PARALLEL_LV_KEY_SCANWORKER 0xFFFF0003
+
/*
* Macro to check if we are in a parallel vacuum. If true, we are in the
* parallel mode and the DSM segment is initialized.
*/
#define ParallelVacuumIsActive(vacrel) ((vacrel)->pvs != NULL)
+#define ParallelHeapVacuumIsActive(vacrel) ((vacrel)->plvstate != NULL)
/* Phases of vacuum during which we report error context. */
typedef enum
@@ -256,6 +307,130 @@ typedef enum
#define VAC_BLK_WAS_EAGER_SCANNED (1 << 0)
#define VAC_BLK_ALL_VISIBLE_ACCORDING_TO_VM (1 << 1)
+/*
+ * Data and counters updated during lazy heap scan.
+ */
+typedef struct LVScanData
+{
+ BlockNumber rel_pages; /* total number of pages */
+
+ BlockNumber scanned_pages; /* # pages examined (not skipped via VM) */
+
+ /*
+ * Count of all-visible blocks eagerly scanned (for logging only). This
+ * does not include skippable blocks scanned due to SKIP_PAGES_THRESHOLD.
+ */
+ BlockNumber eager_scanned_pages;
+
+ BlockNumber removed_pages; /* # pages removed by relation truncation */
+ BlockNumber new_frozen_tuple_pages; /* # pages with newly frozen tuples */
+
+ /* # pages newly set all-visible in the VM */
+ BlockNumber vm_new_visible_pages;
+
+ /*
+ * # pages newly set all-visible and all-frozen in the VM. This is a
+ * subset of vm_new_visible_pages. That is, vm_new_visible_pages includes
+ * all pages set all-visible, but vm_new_visible_frozen_pages includes
+ * only those which were also set all-frozen.
+ */
+ BlockNumber vm_new_visible_frozen_pages;
+
+ /* # all-visible pages newly set all-frozen in the VM */
+ BlockNumber vm_new_frozen_pages;
+
+ BlockNumber lpdead_item_pages; /* # pages with LP_DEAD items */
+ BlockNumber missed_dead_pages; /* # pages with missed dead tuples */
+ BlockNumber nonempty_pages; /* actually, last nonempty page + 1 */
+
+ /* Counters that follow are only for scanned_pages */
+ int64 tuples_deleted; /* # deleted from table */
+ int64 tuples_frozen; /* # newly frozen */
+ int64 lpdead_items; /* # deleted from indexes */
+ int64 live_tuples; /* # live tuples remaining */
+ int64 recently_dead_tuples; /* # dead, but not yet removable */
+ int64 missed_dead_tuples; /* # removable, but not removed */
+
+ /* Tracks oldest extant XID/MXID for setting relfrozenxid/relminmxid. */
+ TransactionId NewRelfrozenXid;
+ MultiXactId NewRelminMxid;
+ bool skippedallvis;
+} LVScanData;
+
+/*
+ * Struct for information that needs to be shared among parallel workers
+ * for parallel lazy vacuum. All fields are static, set by the leader
+ * process.
+ */
+typedef struct ParallelLVShared
+{
+ bool aggressive;
+ bool skipwithvm;
+
+ /* The current oldest extant XID/MXID shared by the leader process */
+ TransactionId NewRelfrozenXid;
+ MultiXactId NewRelminMxid;
+
+ /* VACUUM operation's cutoffs for freezing and pruning */
+ struct VacuumCutoffs cutoffs;
+} ParallelLVShared;
+
+/*
+ * Per-worker data for scan description, statistics counters, and
+ * miscellaneous data need to be shared with the leader.
+ */
+typedef struct ParallelLVScanWorker
+{
+ /* Both last_blkno and pbscanworkdata are initialized? */
+ bool scan_inited;
+
+ /* The last processed block number */
+ pg_atomic_uint32 last_blkno;
+
+ /* per-worker parallel table scan state */
+ ParallelBlockTableScanWorkerData pbscanworkdata;
+
+ /* per-worker scan data and counters */
+ LVScanData scandata;
+} ParallelLVScanWorker;
+
+/*
+ * Struct to store parallel lazy vacuum working state.
+ */
+typedef struct ParallelLVState
+{
+ /* Parallel scan description shared among parallel workers */
+ ParallelBlockTableScanDesc pbscan;
+
+ /* Per-worker parallel table scan state */
+ ParallelBlockTableScanWorker pbscanwork;
+
+ /* Shared static information */
+ ParallelLVShared *shared;
+
+ /* Per-worker scan data. NULL for the leader process */
+ ParallelLVScanWorker *scanworker;
+} ParallelLVState;
+
+/*
+ * Struct for the leader process in parallel lazy vacuum.
+ */
+typedef struct ParallelLVLeader
+{
+ /* Shared memory size for each shared object */
+ Size pbscan_len;
+ Size shared_len;
+ Size scanworker_len;
+
+ /* The number of workers launched for parallel lazy heap scan */
+ int nworkers_launched;
+
+ /*
+ * Points to the array of all per-worker scan states stored on DSM area.
+ */
+ ParallelLVScanWorker *scanworkers;
+} ParallelLVLeader;
+
typedef struct LVRelState
{
/* Target heap relation and its indexes */
@@ -282,10 +457,6 @@ typedef struct LVRelState
/* VACUUM operation's cutoffs for freezing and pruning */
struct VacuumCutoffs cutoffs;
GlobalVisState *vistest;
- /* Tracks oldest extant XID/MXID for setting relfrozenxid/relminmxid */
- TransactionId NewRelfrozenXid;
- MultiXactId NewRelminMxid;
- bool skippedallvis;
/* Error reporting state */
char *dbname;
@@ -310,35 +481,8 @@ typedef struct LVRelState
TidStore *dead_items; /* TIDs whose index tuples we'll delete */
VacDeadItemsInfo *dead_items_info;
- BlockNumber rel_pages; /* total number of pages */
- BlockNumber scanned_pages; /* # pages examined (not skipped via VM) */
-
- /*
- * Count of all-visible blocks eagerly scanned (for logging only). This
- * does not include skippable blocks scanned due to SKIP_PAGES_THRESHOLD.
- */
- BlockNumber eager_scanned_pages;
-
- BlockNumber removed_pages; /* # pages removed by relation truncation */
- BlockNumber new_frozen_tuple_pages; /* # pages with newly frozen tuples */
-
- /* # pages newly set all-visible in the VM */
- BlockNumber vm_new_visible_pages;
-
- /*
- * # pages newly set all-visible and all-frozen in the VM. This is a
- * subset of vm_new_visible_pages. That is, vm_new_visible_pages includes
- * all pages set all-visible, but vm_new_visible_frozen_pages includes
- * only those which were also set all-frozen.
- */
- BlockNumber vm_new_visible_frozen_pages;
-
- /* # all-visible pages newly set all-frozen in the VM */
- BlockNumber vm_new_frozen_pages;
-
- BlockNumber lpdead_item_pages; /* # pages with LP_DEAD items */
- BlockNumber missed_dead_pages; /* # pages with missed dead tuples */
- BlockNumber nonempty_pages; /* actually, last nonempty page + 1 */
+ /* Data and counters updated during lazy heap scan */
+ LVScanData *scan_data;
/* Statistics output by us, for table */
double new_rel_tuples; /* new estimated total # of tuples */
@@ -348,13 +492,12 @@ typedef struct LVRelState
/* Instrumentation counters */
int num_index_scans;
- /* Counters that follow are only for scanned_pages */
- int64 tuples_deleted; /* # deleted from table */
- int64 tuples_frozen; /* # newly frozen */
- int64 lpdead_items; /* # deleted from indexes */
- int64 live_tuples; /* # live tuples remaining */
- int64 recently_dead_tuples; /* # dead, but not yet removable */
- int64 missed_dead_tuples; /* # removable, but not removed */
+
+ /* Last processed block number */
+ BlockNumber last_blkno;
+
+ /* Next block to check for FSM vacuum */
+ BlockNumber next_fsm_block_to_vacuum;
/* State maintained by heap_vac_scan_next_block() */
BlockNumber current_block; /* last block returned */
@@ -363,6 +506,16 @@ typedef struct LVRelState
bool next_unskippable_eager_scanned; /* if it was eagerly scanned */
Buffer next_unskippable_vmbuffer; /* buffer containing its VM bit */
+ /* Fields used for parallel lazy vacuum */
+
+ /* Parallel lazy vacuum working state */
+ ParallelLVState *plvstate;
+
+ /*
+ * The leader state for parallel lazy vacuum. NULL for parallel workers.
+ */
+ ParallelLVLeader *leader;
+
/* State related to managing eager scanning of all-visible pages */
/*
@@ -422,12 +575,14 @@ typedef struct LVSavedErrInfo
/* non-export function prototypes */
static void lazy_scan_heap(LVRelState *vacrel);
+static void do_lazy_scan_heap(LVRelState *vacrel, bool do_vacuum);
static void heap_vacuum_eager_scan_setup(LVRelState *vacrel,
VacuumParams *params);
static BlockNumber heap_vac_scan_next_block(ReadStream *stream,
void *callback_private_data,
void *per_buffer_data);
-static void find_next_unskippable_block(LVRelState *vacrel, bool *skipsallvis);
+static bool find_next_unskippable_block(LVRelState *vacrel, bool *skipsallvis,
+ BlockNumber start_blk, BlockNumber end_blk);
static bool lazy_scan_new_or_empty(LVRelState *vacrel, Buffer buf,
BlockNumber blkno, Page page,
bool sharelock, Buffer vmbuffer);
@@ -438,6 +593,12 @@ static void lazy_scan_prune(LVRelState *vacrel, Buffer buf,
static bool lazy_scan_noprune(LVRelState *vacrel, Buffer buf,
BlockNumber blkno, Page page,
bool *has_lpdead_items);
+static void do_parallel_lazy_scan_heap(LVRelState *vacrel);
+static BlockNumber parallel_lazy_scan_compute_min_scan_block(LVRelState *vacrel);
+static void complete_unfinished_lazy_scan_heap(LVRelState *vacrel);
+static void parallel_lazy_scan_heap_begin(LVRelState *vacrel);
+static void parallel_lazy_scan_heap_end(LVRelState *vacrel);
+static void parallel_lazy_scan_gather_scan_results(LVRelState *vacrel);
static void lazy_vacuum(LVRelState *vacrel);
static bool lazy_vacuum_all_indexes(LVRelState *vacrel);
static void lazy_vacuum_heap_rel(LVRelState *vacrel);
@@ -462,6 +623,7 @@ static BlockNumber count_nondeletable_pages(LVRelState *vacrel,
static void dead_items_alloc(LVRelState *vacrel, int nworkers);
static void dead_items_add(LVRelState *vacrel, BlockNumber blkno, OffsetNumber *offsets,
int num_offsets);
+static bool dead_items_check_memory_limit(LVRelState *vacrel);
static void dead_items_reset(LVRelState *vacrel);
static void dead_items_cleanup(LVRelState *vacrel);
static bool heap_page_is_all_visible(LVRelState *vacrel, Buffer buf,
@@ -517,6 +679,22 @@ heap_vacuum_eager_scan_setup(LVRelState *vacrel, VacuumParams *params)
if (vacrel->aggressive)
return;
+ /*
+ * Disable eager scanning if parallel lazy vacuum is enabled.
+ *
+ * One might think that it would make sense to use the eager scanning even
+ * during parallel lazy vacuum, but parallel vacuum is available only in
+ * VACUUM command and would not be something that happens frequently,
+ * which seems not fit to the purpose of the eager scanning. Also, it
+ * would require making the code complex. So it would make sense to
+ * disable it for now.
+ *
+ * XXX: this limitation might need to be eliminated in the future for
+ * example when we use parallel vacuum also in autovacuum.
+ */
+ if (ParallelHeapVacuumIsActive(vacrel))
+ return;
+
/*
* Aggressively vacuuming a small relation shouldn't take long, so it
* isn't worth amortizing. We use two times the region size as the size
@@ -524,7 +702,7 @@ heap_vacuum_eager_scan_setup(LVRelState *vacrel, VacuumParams *params)
* the first region, making the second region the first to be eager
* scanned normally.
*/
- if (vacrel->rel_pages < 2 * EAGER_SCAN_REGION_SIZE)
+ if (vacrel->scan_data->rel_pages < 2 * EAGER_SCAN_REGION_SIZE)
return;
/*
@@ -616,6 +794,7 @@ heap_vacuum_rel(Relation rel, VacuumParams *params,
BufferAccessStrategy bstrategy)
{
LVRelState *vacrel;
+ LVScanData *scan_data;
bool verbose,
instrument,
skipwithvm,
@@ -730,14 +909,25 @@ heap_vacuum_rel(Relation rel, VacuumParams *params,
}
/* Initialize page counters explicitly (be tidy) */
- vacrel->scanned_pages = 0;
- vacrel->eager_scanned_pages = 0;
- vacrel->removed_pages = 0;
- vacrel->new_frozen_tuple_pages = 0;
- vacrel->lpdead_item_pages = 0;
- vacrel->missed_dead_pages = 0;
- vacrel->nonempty_pages = 0;
- /* dead_items_alloc allocates vacrel->dead_items later on */
+ scan_data = palloc(sizeof(LVScanData));
+ scan_data->scanned_pages = 0;
+ scan_data->eager_scanned_pages = 0;
+ scan_data->removed_pages = 0;
+ scan_data->new_frozen_tuple_pages = 0;
+ scan_data->lpdead_item_pages = 0;
+ scan_data->missed_dead_pages = 0;
+ scan_data->nonempty_pages = 0;
+ scan_data->tuples_deleted = 0;
+ scan_data->tuples_frozen = 0;
+ scan_data->lpdead_items = 0;
+ scan_data->live_tuples = 0;
+ scan_data->recently_dead_tuples = 0;
+ scan_data->missed_dead_tuples = 0;
+ scan_data->vm_new_visible_pages = 0;
+ scan_data->vm_new_visible_frozen_pages = 0;
+ scan_data->vm_new_frozen_pages = 0;
+ scan_data->rel_pages = orig_rel_pages = RelationGetNumberOfBlocks(rel);
+ vacrel->scan_data = scan_data;
/* Allocate/initialize output statistics state */
vacrel->new_rel_tuples = 0;
@@ -747,17 +937,9 @@ heap_vacuum_rel(Relation rel, VacuumParams *params,
/* Initialize remaining counters (be tidy) */
vacrel->num_index_scans = 0;
- vacrel->tuples_deleted = 0;
- vacrel->tuples_frozen = 0;
- vacrel->lpdead_items = 0;
- vacrel->live_tuples = 0;
- vacrel->recently_dead_tuples = 0;
- vacrel->missed_dead_tuples = 0;
+ vacrel->next_fsm_block_to_vacuum = 0;
- vacrel->vm_new_visible_pages = 0;
- vacrel->vm_new_visible_frozen_pages = 0;
- vacrel->vm_new_frozen_pages = 0;
- vacrel->rel_pages = orig_rel_pages = RelationGetNumberOfBlocks(rel);
+ /* dead_items_alloc allocates vacrel->dead_items later on */
/*
* Get cutoffs that determine which deleted tuples are considered DEAD,
@@ -778,15 +960,15 @@ heap_vacuum_rel(Relation rel, VacuumParams *params,
vacrel->aggressive = vacuum_get_cutoffs(rel, params, &vacrel->cutoffs);
vacrel->vistest = GlobalVisTestFor(rel);
/* Initialize state used to track oldest extant XID/MXID */
- vacrel->NewRelfrozenXid = vacrel->cutoffs.OldestXmin;
- vacrel->NewRelminMxid = vacrel->cutoffs.OldestMxact;
+ vacrel->scan_data->NewRelfrozenXid = vacrel->cutoffs.OldestXmin;
+ vacrel->scan_data->NewRelminMxid = vacrel->cutoffs.OldestMxact;
/*
* Initialize state related to tracking all-visible page skipping. This is
* very important to determine whether or not it is safe to advance the
* relfrozenxid/relminmxid.
*/
- vacrel->skippedallvis = false;
+ vacrel->scan_data->skippedallvis = false;
skipwithvm = true;
if (params->options & VACOPT_DISABLE_PAGE_SKIPPING)
{
@@ -800,13 +982,6 @@ heap_vacuum_rel(Relation rel, VacuumParams *params,
vacrel->skipwithvm = skipwithvm;
- /*
- * Set up eager scan tracking state. This must happen after determining
- * whether or not the vacuum must be aggressive, because only normal
- * vacuums use the eager scan algorithm.
- */
- heap_vacuum_eager_scan_setup(vacrel, params);
-
if (verbose)
{
if (vacrel->aggressive)
@@ -831,6 +1006,13 @@ heap_vacuum_rel(Relation rel, VacuumParams *params,
lazy_check_wraparound_failsafe(vacrel);
dead_items_alloc(vacrel, params->nworkers);
+ /*
+ * Set up eager scan tracking state. This must happen after determining
+ * whether or not the vacuum must be aggressive, because only normal
+ * vacuums use the eager scan algorithm.
+ */
+ heap_vacuum_eager_scan_setup(vacrel, params);
+
/*
* Call lazy_scan_heap to perform all required heap pruning, index
* vacuuming, and heap vacuuming (plus related processing)
@@ -874,15 +1056,15 @@ heap_vacuum_rel(Relation rel, VacuumParams *params,
* value >= FreezeLimit, and relminmxid to a value >= MultiXactCutoff.
* Non-aggressive VACUUMs may advance them by any amount, or not at all.
*/
- Assert(vacrel->NewRelfrozenXid == vacrel->cutoffs.OldestXmin ||
+ Assert(vacrel->scan_data->NewRelfrozenXid == vacrel->cutoffs.OldestXmin ||
TransactionIdPrecedesOrEquals(vacrel->aggressive ? vacrel->cutoffs.FreezeLimit :
vacrel->cutoffs.relfrozenxid,
- vacrel->NewRelfrozenXid));
- Assert(vacrel->NewRelminMxid == vacrel->cutoffs.OldestMxact ||
+ vacrel->scan_data->NewRelfrozenXid));
+ Assert(vacrel->scan_data->NewRelminMxid == vacrel->cutoffs.OldestMxact ||
MultiXactIdPrecedesOrEquals(vacrel->aggressive ? vacrel->cutoffs.MultiXactCutoff :
vacrel->cutoffs.relminmxid,
- vacrel->NewRelminMxid));
- if (vacrel->skippedallvis)
+ vacrel->scan_data->NewRelminMxid));
+ if (vacrel->scan_data->skippedallvis)
{
/*
* Must keep original relfrozenxid in a non-aggressive VACUUM that
@@ -890,15 +1072,16 @@ heap_vacuum_rel(Relation rel, VacuumParams *params,
* values will have missed unfrozen XIDs from the pages we skipped.
*/
Assert(!vacrel->aggressive);
- vacrel->NewRelfrozenXid = InvalidTransactionId;
- vacrel->NewRelminMxid = InvalidMultiXactId;
+ vacrel->scan_data->NewRelfrozenXid = InvalidTransactionId;
+ vacrel->scan_data->NewRelminMxid = InvalidMultiXactId;
}
/*
* For safety, clamp relallvisible to be not more than what we're setting
* pg_class.relpages to
*/
- new_rel_pages = vacrel->rel_pages; /* After possible rel truncation */
+ new_rel_pages = vacrel->scan_data->rel_pages; /* After possible rel
+ * truncation */
visibilitymap_count(rel, &new_rel_allvisible, &new_rel_allfrozen);
if (new_rel_allvisible > new_rel_pages)
new_rel_allvisible = new_rel_pages;
@@ -921,7 +1104,8 @@ heap_vacuum_rel(Relation rel, VacuumParams *params,
vac_update_relstats(rel, new_rel_pages, vacrel->new_live_tuples,
new_rel_allvisible, new_rel_allfrozen,
vacrel->nindexes > 0,
- vacrel->NewRelfrozenXid, vacrel->NewRelminMxid,
+ vacrel->scan_data->NewRelfrozenXid,
+ vacrel->scan_data->NewRelminMxid,
&frozenxid_updated, &minmulti_updated, false);
/*
@@ -937,8 +1121,8 @@ heap_vacuum_rel(Relation rel, VacuumParams *params,
pgstat_report_vacuum(RelationGetRelid(rel),
rel->rd_rel->relisshared,
Max(vacrel->new_live_tuples, 0),
- vacrel->recently_dead_tuples +
- vacrel->missed_dead_tuples,
+ vacrel->scan_data->recently_dead_tuples +
+ vacrel->scan_data->missed_dead_tuples,
starttime);
pgstat_progress_end_command();
@@ -1012,23 +1196,23 @@ heap_vacuum_rel(Relation rel, VacuumParams *params,
vacrel->relname,
vacrel->num_index_scans);
appendStringInfo(&buf, _("pages: %u removed, %u remain, %u scanned (%.2f%% of total), %u eagerly scanned\n"),
- vacrel->removed_pages,
+ vacrel->scan_data->removed_pages,
new_rel_pages,
- vacrel->scanned_pages,
+ vacrel->scan_data->scanned_pages,
orig_rel_pages == 0 ? 100.0 :
- 100.0 * vacrel->scanned_pages /
+ 100.0 * vacrel->scan_data->scanned_pages /
orig_rel_pages,
- vacrel->eager_scanned_pages);
+ vacrel->scan_data->eager_scanned_pages);
appendStringInfo(&buf,
_("tuples: %" PRId64 " removed, %" PRId64 " remain, %" PRId64 " are dead but not yet removable\n"),
- vacrel->tuples_deleted,
+ vacrel->scan_data->tuples_deleted,
(int64) vacrel->new_rel_tuples,
- vacrel->recently_dead_tuples);
- if (vacrel->missed_dead_tuples > 0)
+ vacrel->scan_data->recently_dead_tuples);
+ if (vacrel->scan_data->missed_dead_tuples > 0)
appendStringInfo(&buf,
_("tuples missed: %" PRId64 " dead from %u pages not removed due to cleanup lock contention\n"),
- vacrel->missed_dead_tuples,
- vacrel->missed_dead_pages);
+ vacrel->scan_data->missed_dead_tuples,
+ vacrel->scan_data->missed_dead_pages);
diff = (int32) (ReadNextTransactionId() -
vacrel->cutoffs.OldestXmin);
appendStringInfo(&buf,
@@ -1036,33 +1220,33 @@ heap_vacuum_rel(Relation rel, VacuumParams *params,
vacrel->cutoffs.OldestXmin, diff);
if (frozenxid_updated)
{
- diff = (int32) (vacrel->NewRelfrozenXid -
+ diff = (int32) (vacrel->scan_data->NewRelfrozenXid -
vacrel->cutoffs.relfrozenxid);
appendStringInfo(&buf,
_("new relfrozenxid: %u, which is %d XIDs ahead of previous value\n"),
- vacrel->NewRelfrozenXid, diff);
+ vacrel->scan_data->NewRelfrozenXid, diff);
}
if (minmulti_updated)
{
- diff = (int32) (vacrel->NewRelminMxid -
+ diff = (int32) (vacrel->scan_data->NewRelminMxid -
vacrel->cutoffs.relminmxid);
appendStringInfo(&buf,
_("new relminmxid: %u, which is %d MXIDs ahead of previous value\n"),
- vacrel->NewRelminMxid, diff);
+ vacrel->scan_data->NewRelminMxid, diff);
}
appendStringInfo(&buf, _("frozen: %u pages from table (%.2f%% of total) had %" PRId64 " tuples frozen\n"),
- vacrel->new_frozen_tuple_pages,
+ vacrel->scan_data->new_frozen_tuple_pages,
orig_rel_pages == 0 ? 100.0 :
- 100.0 * vacrel->new_frozen_tuple_pages /
+ 100.0 * vacrel->scan_data->new_frozen_tuple_pages /
orig_rel_pages,
- vacrel->tuples_frozen);
+ vacrel->scan_data->tuples_frozen);
appendStringInfo(&buf,
_("visibility map: %u pages set all-visible, %u pages set all-frozen (%u were all-visible)\n"),
- vacrel->vm_new_visible_pages,
- vacrel->vm_new_visible_frozen_pages +
- vacrel->vm_new_frozen_pages,
- vacrel->vm_new_frozen_pages);
+ vacrel->scan_data->vm_new_visible_pages,
+ vacrel->scan_data->vm_new_visible_frozen_pages +
+ vacrel->scan_data->vm_new_frozen_pages,
+ vacrel->scan_data->vm_new_frozen_pages);
if (vacrel->do_index_vacuuming)
{
if (vacrel->nindexes == 0 || vacrel->num_index_scans == 0)
@@ -1082,10 +1266,10 @@ heap_vacuum_rel(Relation rel, VacuumParams *params,
msgfmt = _("%u pages from table (%.2f%% of total) have %" PRId64 " dead item identifiers\n");
}
appendStringInfo(&buf, msgfmt,
- vacrel->lpdead_item_pages,
+ vacrel->scan_data->lpdead_item_pages,
orig_rel_pages == 0 ? 100.0 :
- 100.0 * vacrel->lpdead_item_pages / orig_rel_pages,
- vacrel->lpdead_items);
+ 100.0 * vacrel->scan_data->lpdead_item_pages / orig_rel_pages,
+ vacrel->scan_data->lpdead_items);
for (int i = 0; i < vacrel->nindexes; i++)
{
IndexBulkDeleteResult *istat = vacrel->indstats[i];
@@ -1198,13 +1382,7 @@ heap_vacuum_rel(Relation rel, VacuumParams *params,
static void
lazy_scan_heap(LVRelState *vacrel)
{
- ReadStream *stream;
- BlockNumber rel_pages = vacrel->rel_pages,
- blkno = 0,
- next_fsm_block_to_vacuum = 0;
- BlockNumber orig_eager_scan_success_limit =
- vacrel->eager_scan_remaining_successes; /* for logging */
- Buffer vmbuffer = InvalidBuffer;
+ BlockNumber rel_pages = vacrel->scan_data->rel_pages;
const int initprog_index[] = {
PROGRESS_VACUUM_PHASE,
PROGRESS_VACUUM_TOTAL_HEAP_BLKS,
@@ -1225,6 +1403,73 @@ lazy_scan_heap(LVRelState *vacrel)
vacrel->next_unskippable_eager_scanned = false;
vacrel->next_unskippable_vmbuffer = InvalidBuffer;
+ /* Do the actual work */
+ if (ParallelHeapVacuumIsActive(vacrel))
+ do_parallel_lazy_scan_heap(vacrel);
+ else
+ do_lazy_scan_heap(vacrel, true);
+
+ /*
+ * Report that everything is now scanned. We never skip scanning the last
+ * block in the relation, so we can pass rel_pages here.
+ */
+ pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_SCANNED,
+ rel_pages);
+
+ /* now we can compute the new value for pg_class.reltuples */
+ vacrel->new_live_tuples = vac_estimate_reltuples(vacrel->rel, rel_pages,
+ vacrel->scan_data->scanned_pages,
+ vacrel->scan_data->live_tuples);
+
+ /*
+ * Also compute the total number of surviving heap entries. In the
+ * (unlikely) scenario that new_live_tuples is -1, take it as zero.
+ */
+ vacrel->new_rel_tuples =
+ Max(vacrel->new_live_tuples, 0) + vacrel->scan_data->recently_dead_tuples +
+ vacrel->scan_data->missed_dead_tuples;
+
+ /*
+ * Do index vacuuming (call each index's ambulkdelete routine), then do
+ * related heap vacuuming
+ */
+ if (vacrel->dead_items_info->num_items > 0)
+ lazy_vacuum(vacrel);
+
+ /*
+ * Vacuum the remainder of the Free Space Map. We must do this whether or
+ * not there were indexes, and whether or not we bypassed index vacuuming.
+ * We can pass rel_pages here because we never skip scanning the last
+ * block of the relation.
+ */
+ if (rel_pages > vacrel->next_fsm_block_to_vacuum)
+ FreeSpaceMapVacuumRange(vacrel->rel, vacrel->next_fsm_block_to_vacuum, rel_pages);
+
+ /* report all blocks vacuumed */
+ pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_VACUUMED, rel_pages);
+
+ /* Do final index cleanup (call each index's amvacuumcleanup routine) */
+ if (vacrel->nindexes > 0 && vacrel->do_index_cleanup)
+ lazy_cleanup_all_indexes(vacrel);
+}
+
+/*
+ * Workhorse for lazy_scan_heap().
+ *
+ * If do_vacuum is true, we stop the lazy heap scan and invoke a cycle of index
+ * vacuuming and table vacuuming if the space of dead_items TIDs exceeds the limit, and
+ * then resume it. On the other hand, if it's false, we continue scanning until the
+ * read stream is exhausted.
+ */
+static void
+do_lazy_scan_heap(LVRelState *vacrel, bool do_vacuum)
+{
+ ReadStream *stream;
+ BlockNumber blkno = InvalidBlockNumber;
+ BlockNumber orig_eager_scan_success_limit =
+ vacrel->eager_scan_remaining_successes; /* for logging */
+ Buffer vmbuffer = InvalidBuffer;
+
/*
* Set up the read stream for vacuum's first pass through the heap.
*
@@ -1259,21 +1504,21 @@ lazy_scan_heap(LVRelState *vacrel)
* that point. This check also provides failsafe coverage for the
* one-pass strategy, and the two-pass strategy with the index_cleanup
* param set to 'off'.
+ *
+ * The failsafe check is done only by the leader process.
*/
- if (vacrel->scanned_pages > 0 &&
- vacrel->scanned_pages % FAILSAFE_EVERY_PAGES == 0)
+ if (!IsParallelWorker() &&
+ vacrel->scan_data->scanned_pages > 0 &&
+ vacrel->scan_data->scanned_pages % FAILSAFE_EVERY_PAGES == 0)
lazy_check_wraparound_failsafe(vacrel);
/*
* Consider if we definitely have enough space to process TIDs on page
* already. If we are close to overrunning the available space for
* dead_items TIDs, pause and do a cycle of vacuuming before we tackle
- * this page. However, let's force at least one page-worth of tuples
- * to be stored as to ensure we do at least some work when the memory
- * configured is so low that we run out before storing anything.
+ * this page.
*/
- if (vacrel->dead_items_info->num_items > 0 &&
- TidStoreMemoryUsage(vacrel->dead_items) > vacrel->dead_items_info->max_bytes)
+ if (do_vacuum && dead_items_check_memory_limit(vacrel))
{
/*
* Before beginning index vacuuming, we release any pin we may
@@ -1296,15 +1541,16 @@ lazy_scan_heap(LVRelState *vacrel)
* upper-level FSM pages. Note that blkno is the previously
* processed block.
*/
- FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum,
+ FreeSpaceMapVacuumRange(vacrel->rel, vacrel->next_fsm_block_to_vacuum,
blkno + 1);
- next_fsm_block_to_vacuum = blkno;
+ vacrel->next_fsm_block_to_vacuum = blkno;
/* Report that we are once again scanning the heap */
pgstat_progress_update_param(PROGRESS_VACUUM_PHASE,
PROGRESS_VACUUM_PHASE_SCAN_HEAP);
}
+ /* Read the next block to process */
buf = read_stream_next_buffer(stream, &per_buffer_data);
/* The relation is exhausted. */
@@ -1314,11 +1560,11 @@ lazy_scan_heap(LVRelState *vacrel)
blk_info = *((uint8 *) per_buffer_data);
CheckBufferIsPinnedOnce(buf);
page = BufferGetPage(buf);
- blkno = BufferGetBlockNumber(buf);
+ blkno = vacrel->last_blkno = BufferGetBlockNumber(buf);
- vacrel->scanned_pages++;
+ vacrel->scan_data->scanned_pages++;
if (blk_info & VAC_BLK_WAS_EAGER_SCANNED)
- vacrel->eager_scanned_pages++;
+ vacrel->scan_data->eager_scanned_pages++;
/* Report as block scanned, update error traceback information */
pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_SCANNED, blkno);
@@ -1474,13 +1720,36 @@ lazy_scan_heap(LVRelState *vacrel)
* visible on upper FSM pages. This is done after vacuuming if the
* table has indexes. There will only be newly-freed space if we
* held the cleanup lock and lazy_scan_prune() was called.
+ *
+ * During parallel lazy heap scanning, only the leader process
+ * vacuums the FSM. However, we cannot vacuum the FSM for blocks
+ * up to 'blk' because there may be un-scanned blocks or blocks
+ * being processed by workers before this point. Instead, parallel
+ * workers advertise the block numbers they have just processed,
+ * and the leader vacuums the FSM up to the smallest block number
+ * among them. This approach ensures we vacuum the FSM for
+ * consecutive processed blocks.
*/
if (got_cleanup_lock && vacrel->nindexes == 0 && has_lpdead_items &&
- blkno - next_fsm_block_to_vacuum >= VACUUM_FSM_EVERY_PAGES)
+ blkno - vacrel->next_fsm_block_to_vacuum >= VACUUM_FSM_EVERY_PAGES)
{
- FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum,
+ if (IsParallelWorker())
+ {
+ pg_atomic_write_u32(&(vacrel->plvstate->scanworker->last_blkno),
blkno);
- next_fsm_block_to_vacuum = blkno;
+ }
+ else
+ {
+ BlockNumber fsmvac_upto = blkno;
+
+ if (ParallelHeapVacuumIsActive(vacrel))
+ fsmvac_upto = parallel_lazy_scan_compute_min_scan_block(vacrel);
+
+ FreeSpaceMapVacuumRange(vacrel->rel, vacrel->next_fsm_block_to_vacuum,
+ fsmvac_upto);
+ }
+
+ vacrel->next_fsm_block_to_vacuum = blkno;
}
}
else
@@ -1491,50 +1760,7 @@ lazy_scan_heap(LVRelState *vacrel)
if (BufferIsValid(vmbuffer))
ReleaseBuffer(vmbuffer);
- /*
- * Report that everything is now scanned. We never skip scanning the last
- * block in the relation, so we can pass rel_pages here.
- */
- pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_SCANNED,
- rel_pages);
-
- /* now we can compute the new value for pg_class.reltuples */
- vacrel->new_live_tuples = vac_estimate_reltuples(vacrel->rel, rel_pages,
- vacrel->scanned_pages,
- vacrel->live_tuples);
-
- /*
- * Also compute the total number of surviving heap entries. In the
- * (unlikely) scenario that new_live_tuples is -1, take it as zero.
- */
- vacrel->new_rel_tuples =
- Max(vacrel->new_live_tuples, 0) + vacrel->recently_dead_tuples +
- vacrel->missed_dead_tuples;
-
read_stream_end(stream);
-
- /*
- * Do index vacuuming (call each index's ambulkdelete routine), then do
- * related heap vacuuming
- */
- if (vacrel->dead_items_info->num_items > 0)
- lazy_vacuum(vacrel);
-
- /*
- * Vacuum the remainder of the Free Space Map. We must do this whether or
- * not there were indexes, and whether or not we bypassed index vacuuming.
- * We can pass rel_pages here because we never skip scanning the last
- * block of the relation.
- */
- if (rel_pages > next_fsm_block_to_vacuum)
- FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum, rel_pages);
-
- /* report all blocks vacuumed */
- pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_VACUUMED, rel_pages);
-
- /* Do final index cleanup (call each index's amvacuumcleanup routine) */
- if (vacrel->nindexes > 0 && vacrel->do_index_cleanup)
- lazy_cleanup_all_indexes(vacrel);
}
/*
@@ -1548,7 +1774,8 @@ lazy_scan_heap(LVRelState *vacrel)
* heap_vac_scan_next_block() uses the visibility map, vacuum options, and
* various thresholds to skip blocks which do not need to be processed and
* returns the next block to process or InvalidBlockNumber if there are no
- * remaining blocks.
+ * remaining blocks or the space of dead_items TIDs reaches the limit (only
+ * in parallel lazy vacuum cases).
*
* The visibility status of the next block to process and whether or not it
* was eager scanned is set in the per_buffer_data.
@@ -1556,7 +1783,7 @@ lazy_scan_heap(LVRelState *vacrel)
* callback_private_data contains a reference to the LVRelState, passed to the
* read stream API during stream setup. The LVRelState is an in/out parameter
* here (locally named `vacrel`). Vacuum options and information about the
- * relation are read from it. vacrel->skippedallvis is set if we skip a block
+ * relation are read from it. vacrel->scan_data->skippedallvis is set if we skip a block
* that's all-visible but not all-frozen (to ensure that we don't update
* relfrozenxid in that case). vacrel also holds information about the next
* unskippable block -- as bookkeeping for this function.
@@ -1570,11 +1797,37 @@ heap_vac_scan_next_block(ReadStream *stream,
LVRelState *vacrel = callback_private_data;
uint8 blk_info = 0;
- /* relies on InvalidBlockNumber + 1 overflowing to 0 on first call */
- next_block = vacrel->current_block + 1;
+retry:
+ next_block = InvalidBlockNumber;
+
+ /* Get the next block to process */
+ if (ParallelHeapVacuumIsActive(vacrel))
+ {
+ /*
+ * Stop returning the next block to the read stream if we are close to
+ * overrunning the available space for dead_items TIDs so that the
+ * read stream returns pinned buffers in its buffers queue until the
+ * stream is exhausted. See the comments atop this file for details.
+ */
+ if (!dead_items_check_memory_limit(vacrel))
+ {
+ /*
+ * table_block_parallelscan_nextpage() returns InvalidBlockNumber
+ * if there are no remaining blocks.
+ */
+ next_block = table_block_parallelscan_nextpage(vacrel->rel,
+ vacrel->plvstate->pbscanwork,
+ vacrel->plvstate->pbscan);
+ }
+ }
+ else
+ {
+ /* relies on InvalidBlockNumber + 1 overflowing to 0 on first call */
+ next_block = vacrel->current_block + 1;
+ }
/* Have we reached the end of the relation? */
- if (next_block >= vacrel->rel_pages)
+ if (!BlockNumberIsValid(next_block) || next_block >= vacrel->scan_data->rel_pages)
{
if (BufferIsValid(vacrel->next_unskippable_vmbuffer))
{
@@ -1596,8 +1849,42 @@ heap_vac_scan_next_block(ReadStream *stream,
* visibility map.
*/
bool skipsallvis;
+ bool found;
+ BlockNumber end_block;
+ BlockNumber nblocks_skip;
+
+ if (ParallelHeapVacuumIsActive(vacrel))
+ {
+ /* We look for the next unskippable block within the chunk */
+ end_block = next_block +
+ vacrel->plvstate->pbscanwork->phsw_chunk_remaining + 1;
+ }
+ else
+ end_block = vacrel->scan_data->rel_pages;
- find_next_unskippable_block(vacrel, &skipsallvis);
+ found = find_next_unskippable_block(vacrel, &skipsallvis, next_block, end_block);
+
+ /*
+ * We must have found the next unskippable block within the specified
+ * range in non-parallel cases as the end_block is always the last
+ * block + 1 and we must scan the last block.
+ */
+ Assert(found || ParallelHeapVacuumIsActive(vacrel));
+
+ if (!found)
+ {
+ if (skipsallvis)
+ vacrel->scan_data->skippedallvis = true;
+
+ /*
+ * Skip all remaining blocks in the current chunk, and retry with
+ * the next chunk.
+ */
+ vacrel->plvstate->pbscanwork->phsw_chunk_remaining = 0;
+ goto retry;
+ }
+
+ Assert(vacrel->next_unskippable_block < end_block);
/*
* We now know the next block that we must process. It can be the
@@ -1614,11 +1901,20 @@ heap_vac_scan_next_block(ReadStream *stream,
* pages then skipping makes updating relfrozenxid unsafe, which is a
* real downside.
*/
- if (vacrel->next_unskippable_block - next_block >= SKIP_PAGES_THRESHOLD)
+ nblocks_skip = vacrel->next_unskippable_block - next_block;
+ if (nblocks_skip >= SKIP_PAGES_THRESHOLD)
{
- next_block = vacrel->next_unskippable_block;
if (skipsallvis)
- vacrel->skippedallvis = true;
+ vacrel->scan_data->skippedallvis = true;
+
+ /* Tell the parallel scans to skip blocks */
+ if (ParallelHeapVacuumIsActive(vacrel))
+ {
+ vacrel->plvstate->pbscanwork->phsw_chunk_remaining -= nblocks_skip;
+ Assert(vacrel->plvstate->pbscanwork->phsw_chunk_remaining > 0);
+ }
+
+ next_block = vacrel->next_unskippable_block;
}
}
@@ -1654,9 +1950,11 @@ heap_vac_scan_next_block(ReadStream *stream,
}
/*
- * Find the next unskippable block in a vacuum scan using the visibility map.
- * The next unskippable block and its visibility information is updated in
- * vacrel.
+ * Find the next unskippable block in a vacuum scan using the visibility map,
+ * in a range of 'start' (inclusive) and 'end' (exclusive).
+ *
+ * If found, the next unskippable block and its visibility information is updated
+ * in vacrel. Otherwise, return false and reset the information in vacrel.
*
* Note: our opinion of which blocks can be skipped can go stale immediately.
* It's okay if caller "misses" a page whose all-visible or all-frozen marking
@@ -1666,22 +1964,32 @@ heap_vac_scan_next_block(ReadStream *stream,
* older XIDs/MXIDs. The *skippedallvis flag will be set here when the choice
* to skip such a range is actually made, making everything safe.)
*/
-static void
-find_next_unskippable_block(LVRelState *vacrel, bool *skipsallvis)
+static bool
+find_next_unskippable_block(LVRelState *vacrel, bool *skipsallvis,
+ BlockNumber start, BlockNumber end)
{
- BlockNumber rel_pages = vacrel->rel_pages;
- BlockNumber next_unskippable_block = vacrel->next_unskippable_block + 1;
+ BlockNumber rel_pages = vacrel->scan_data->rel_pages;
+ BlockNumber next_unskippable_block = start;
Buffer next_unskippable_vmbuffer = vacrel->next_unskippable_vmbuffer;
bool next_unskippable_eager_scanned = false;
bool next_unskippable_allvis;
+ bool found = true;
*skipsallvis = false;
for (;; next_unskippable_block++)
{
- uint8 mapbits = visibilitymap_get_status(vacrel->rel,
- next_unskippable_block,
- &next_unskippable_vmbuffer);
+ uint8 mapbits;
+
+ /* Reach the end of range? */
+ if (next_unskippable_block >= end)
+ {
+ found = false;
+ break;
+ }
+
+ mapbits = visibilitymap_get_status(vacrel->rel, next_unskippable_block,
+ &next_unskippable_vmbuffer);
next_unskippable_allvis = (mapbits & VISIBILITYMAP_ALL_VISIBLE) != 0;
@@ -1757,11 +2065,274 @@ find_next_unskippable_block(LVRelState *vacrel, bool *skipsallvis)
*skipsallvis = true;
}
- /* write the local variables back to vacrel */
- vacrel->next_unskippable_block = next_unskippable_block;
- vacrel->next_unskippable_allvis = next_unskippable_allvis;
- vacrel->next_unskippable_eager_scanned = next_unskippable_eager_scanned;
- vacrel->next_unskippable_vmbuffer = next_unskippable_vmbuffer;
+ if (found)
+ {
+ /* write the local variables back to vacrel */
+ vacrel->next_unskippable_block = next_unskippable_block;
+ vacrel->next_unskippable_allvis = next_unskippable_allvis;
+ vacrel->next_unskippable_eager_scanned = next_unskippable_eager_scanned;
+ vacrel->next_unskippable_vmbuffer = next_unskippable_vmbuffer;
+ }
+ else
+ {
+ if (BufferIsValid(next_unskippable_vmbuffer))
+ ReleaseBuffer(next_unskippable_vmbuffer);
+
+ /*
+ * There is not unskippable block in the specified range. Reset the
+ * related fields in vacrel.
+ */
+ vacrel->next_unskippable_block = InvalidBlockNumber;
+ vacrel->next_unskippable_allvis = InvalidBlockNumber;
+ vacrel->next_unskippable_eager_scanned = false;
+ vacrel->next_unskippable_vmbuffer = InvalidBuffer;
+ }
+
+ return found;
+}
+
+/*
+ * A parallel variant of do_lazy_scan_heap(). The leader process launches
+ * parallel workers to scan the heap in parallel.
+*/
+static void
+do_parallel_lazy_scan_heap(LVRelState *vacrel)
+{
+ ParallelBlockTableScanWorkerData pbscanworkdata;
+
+ Assert(ParallelHeapVacuumIsActive(vacrel));
+ Assert(!IsParallelWorker());
+
+ /*
+ * Setup the parallel scan description for the leader to join as a worker.
+ */
+ table_block_parallelscan_startblock_init(vacrel->rel,
+ &pbscanworkdata,
+ vacrel->plvstate->pbscan);
+ vacrel->plvstate->pbscanwork = &pbscanworkdata;
+
+ for (;;)
+ {
+ BlockNumber fsmvac_upto;
+
+ /* Launch parallel workers */
+ parallel_lazy_scan_heap_begin(vacrel);
+
+ /*
+ * Do lazy heap scan until the read stream is exhausted. We will stop
+ * retrieving new blocks for the read stream once the space of
+ * dead_items TIDs exceeds the limit.
+ */
+ do_lazy_scan_heap(vacrel, false);
+
+ /* Wait for parallel workers to finish and gather scan results */
+ parallel_lazy_scan_heap_end(vacrel);
+
+ if (!dead_items_check_memory_limit(vacrel))
+ break;
+
+ /* Perform a round of index and heap vacuuming */
+ vacrel->consider_bypass_optimization = false;
+ lazy_vacuum(vacrel);
+
+ /* Compute the smallest processed block number */
+ fsmvac_upto = parallel_lazy_scan_compute_min_scan_block(vacrel);
+
+ /*
+ * Vacuum the Free Space Map to make newly-freed space visible on
+ * upper-level FSM pages.
+ */
+ if (fsmvac_upto > vacrel->next_fsm_block_to_vacuum)
+ {
+ FreeSpaceMapVacuumRange(vacrel->rel, vacrel->next_fsm_block_to_vacuum,
+ fsmvac_upto);
+ vacrel->next_fsm_block_to_vacuum = fsmvac_upto;
+ }
+
+ /* Report that we are once again scanning the heap */
+ pgstat_progress_update_param(PROGRESS_VACUUM_PHASE,
+ PROGRESS_VACUUM_PHASE_SCAN_HEAP);
+ }
+
+ /*
+ * The parallel heap scan finished, but it's possible that some workers
+ * have allocated blocks but not processed them yet. This can happen for
+ * example when workers exit because they are full of dead_items TIDs and
+ * the leader process launched fewer workers in the next cycle.
+ */
+ complete_unfinished_lazy_scan_heap(vacrel);
+}
+
+/*
+ * Return the smallest block number that the leader and workers have scanned.
+ */
+static BlockNumber
+parallel_lazy_scan_compute_min_scan_block(LVRelState *vacrel)
+{
+ BlockNumber min_blk;
+
+ Assert(ParallelHeapVacuumIsActive(vacrel));
+
+ /* Initialized with the leader's value */
+ min_blk = vacrel->last_blkno;
+
+ for (int i = 0; i < vacrel->leader->nworkers_launched; i++)
+ {
+ ParallelLVScanWorker *scanworker = &(vacrel->leader->scanworkers[i]);
+ BlockNumber blkno;
+
+ /* Skip if no worker has been initialized the scan state */
+ if (!scanworker->scan_inited)
+ continue;
+
+ blkno = pg_atomic_read_u32(&(scanworker->last_blkno));
+
+ if (!BlockNumberIsValid(min_blk) || min_blk > blkno)
+ min_blk = blkno;
+ }
+
+ Assert(BlockNumberIsValid(min_blk));
+
+ return min_blk;
+}
+
+/*
+ * Complete parallel heaps scans that have remaining blocks in their
+ * chunks.
+ */
+static void
+complete_unfinished_lazy_scan_heap(LVRelState *vacrel)
+{
+ int nworkers;
+
+ Assert(!IsParallelWorker());
+
+ nworkers = parallel_vacuum_get_nworkers_table(vacrel->pvs);
+
+ for (int i = 0; i < nworkers; i++)
+ {
+ ParallelLVScanWorker *scanworker = &(vacrel->leader->scanworkers[i]);
+
+ if (!scanworker->scan_inited)
+ continue;
+
+ if (scanworker->pbscanworkdata.phsw_chunk_remaining == 0)
+ continue;
+
+ /* Attach the worker's scan state */
+ vacrel->plvstate->pbscanwork = &(scanworker->pbscanworkdata);
+
+ /*
+ * Complete the unfinished scan. Note that we might perform multiple
+ * cycles of index and heap vacuuming while completing the scans.
+ */
+ vacrel->next_fsm_block_to_vacuum = pg_atomic_read_u32(&(scanworker->last_blkno));
+ do_lazy_scan_heap(vacrel, true);
+ }
+
+ /*
+ * We don't need to gather the scan results here because the leader's scan
+ * state got updated directly.
+ */
+}
+
+/*
+ * Helper routine to launch parallel workers for parallel lazy heap scan.
+ */
+static void
+parallel_lazy_scan_heap_begin(LVRelState *vacrel)
+{
+ Assert(ParallelHeapVacuumIsActive(vacrel));
+ Assert(!IsParallelWorker());
+
+ /* launcher workers */
+ vacrel->leader->nworkers_launched = parallel_vacuum_collect_dead_items_begin(vacrel->pvs);
+
+ ereport(vacrel->verbose ? INFO : DEBUG2,
+ (errmsg(ngettext("launched %d parallel vacuum worker for collecting dead tuples (planned: %d)",
+ "launched %d parallel vacuum workers for collecting dead tuples (planned: %d)",
+ vacrel->leader->nworkers_launched),
+ vacrel->leader->nworkers_launched,
+ parallel_vacuum_get_nworkers_table(vacrel->pvs))));
+}
+
+/*
+ * Helper routine to finish the parallel lazy heap scan.
+ */
+static void
+parallel_lazy_scan_heap_end(LVRelState *vacrel)
+{
+ /* Wait for all parallel workers to finish */
+ parallel_vacuum_collect_dead_items_end(vacrel->pvs);
+
+ /* Gather the workers' scan results */
+ parallel_lazy_scan_gather_scan_results(vacrel);
+}
+
+/*
+ * Accumulate each worker's scan results into the leader's.
+*/
+static void
+parallel_lazy_scan_gather_scan_results(LVRelState *vacrel)
+{
+ Assert(ParallelHeapVacuumIsActive(vacrel));
+ Assert(!IsParallelWorker());
+
+ /* Gather the workers' scan results */
+ for (int i = 0; i < vacrel->leader->nworkers_launched; i++)
+ {
+ LVScanData *data = &(vacrel->leader->scanworkers[i].scandata);
+
+ /* Accumulate the counters collected by workers */
+#define ACCUM_COUNT(item) vacrel->scan_data->item += data->item
+ ACCUM_COUNT(scanned_pages);
+ ACCUM_COUNT(removed_pages);
+ ACCUM_COUNT(new_frozen_tuple_pages);
+ ACCUM_COUNT(vm_new_visible_pages);
+ ACCUM_COUNT(vm_new_visible_frozen_pages);
+ ACCUM_COUNT(vm_new_frozen_pages);
+ ACCUM_COUNT(lpdead_item_pages);
+ ACCUM_COUNT(missed_dead_pages);
+ ACCUM_COUNT(tuples_deleted);
+ ACCUM_COUNT(tuples_frozen);
+ ACCUM_COUNT(lpdead_items);
+ ACCUM_COUNT(live_tuples);
+ ACCUM_COUNT(recently_dead_tuples);
+ ACCUM_COUNT(missed_dead_tuples);
+#undef ACCUM_COUNT
+
+ /*
+ * Track the greatest non-empty page among values the workers
+ * collected as it's used to cut-off point of heap truncation.
+ */
+ if (vacrel->scan_data->nonempty_pages < data->nonempty_pages)
+ vacrel->scan_data->nonempty_pages = data->nonempty_pages;
+
+ /*
+ * All workers must have initialized both values with the values
+ * passed by the leader.
+ */
+ Assert(TransactionIdIsValid(data->NewRelfrozenXid));
+ Assert(MultiXactIdIsValid(data->NewRelminMxid));
+
+ /*
+ * During parallel lazy scanning, since different workers process
+ * separate blocks, they may observe different existing XIDs and
+ * MXIDs. Therefore, we compute the oldest XID and MXID from the
+ * values observed by each worker (including the leader). These
+ * computations are crucial for correctly advancing both relfrozenxid
+ * and relmminmxid values.
+ */
+
+ if (TransactionIdPrecedes(data->NewRelfrozenXid, vacrel->scan_data->NewRelfrozenXid))
+ vacrel->scan_data->NewRelfrozenXid = data->NewRelfrozenXid;
+
+ if (MultiXactIdPrecedesOrEquals(data->NewRelminMxid, vacrel->scan_data->NewRelminMxid))
+ vacrel->scan_data->NewRelminMxid = data->NewRelminMxid;
+
+ /* Has any one of workers skipped all-visible page? */
+ vacrel->scan_data->skippedallvis |= data->skippedallvis;
+ }
}
/*
@@ -1900,11 +2471,11 @@ lazy_scan_new_or_empty(LVRelState *vacrel, Buffer buf, BlockNumber blkno,
*/
if ((old_vmbits & VISIBILITYMAP_ALL_VISIBLE) == 0)
{
- vacrel->vm_new_visible_pages++;
- vacrel->vm_new_visible_frozen_pages++;
+ vacrel->scan_data->vm_new_visible_pages++;
+ vacrel->scan_data->vm_new_visible_frozen_pages++;
}
else if ((old_vmbits & VISIBILITYMAP_ALL_FROZEN) == 0)
- vacrel->vm_new_frozen_pages++;
+ vacrel->scan_data->vm_new_frozen_pages++;
}
freespace = PageGetHeapFreeSpace(page);
@@ -1979,10 +2550,10 @@ lazy_scan_prune(LVRelState *vacrel,
heap_page_prune_and_freeze(rel, buf, vacrel->vistest, prune_options,
&vacrel->cutoffs, &presult, PRUNE_VACUUM_SCAN,
&vacrel->offnum,
- &vacrel->NewRelfrozenXid, &vacrel->NewRelminMxid);
+ &vacrel->scan_data->NewRelfrozenXid, &vacrel->scan_data->NewRelminMxid);
- Assert(MultiXactIdIsValid(vacrel->NewRelminMxid));
- Assert(TransactionIdIsValid(vacrel->NewRelfrozenXid));
+ Assert(MultiXactIdIsValid(vacrel->scan_data->NewRelminMxid));
+ Assert(TransactionIdIsValid(vacrel->scan_data->NewRelfrozenXid));
if (presult.nfrozen > 0)
{
@@ -1992,7 +2563,7 @@ lazy_scan_prune(LVRelState *vacrel,
* frozen tuples (don't confuse that with pages newly set all-frozen
* in VM).
*/
- vacrel->new_frozen_tuple_pages++;
+ vacrel->scan_data->new_frozen_tuple_pages++;
}
/*
@@ -2027,7 +2598,7 @@ lazy_scan_prune(LVRelState *vacrel,
*/
if (presult.lpdead_items > 0)
{
- vacrel->lpdead_item_pages++;
+ vacrel->scan_data->lpdead_item_pages++;
/*
* deadoffsets are collected incrementally in
@@ -2042,15 +2613,16 @@ lazy_scan_prune(LVRelState *vacrel,
}
/* Finally, add page-local counts to whole-VACUUM counts */
- vacrel->tuples_deleted += presult.ndeleted;
- vacrel->tuples_frozen += presult.nfrozen;
- vacrel->lpdead_items += presult.lpdead_items;
- vacrel->live_tuples += presult.live_tuples;
- vacrel->recently_dead_tuples += presult.recently_dead_tuples;
+ vacrel->scan_data->tuples_deleted += presult.ndeleted;
+ vacrel->scan_data->tuples_frozen += presult.nfrozen;
+ vacrel->scan_data->lpdead_items += presult.lpdead_items;
+ vacrel->scan_data->live_tuples += presult.live_tuples;
+ vacrel->scan_data->recently_dead_tuples += presult.recently_dead_tuples;
/* Can't truncate this page */
if (presult.hastup)
- vacrel->nonempty_pages = blkno + 1;
+ vacrel->scan_data->nonempty_pages =
+ Max(blkno + 1, vacrel->scan_data->nonempty_pages);
/* Did we find LP_DEAD items? */
*has_lpdead_items = (presult.lpdead_items > 0);
@@ -2099,17 +2671,17 @@ lazy_scan_prune(LVRelState *vacrel,
*/
if ((old_vmbits & VISIBILITYMAP_ALL_VISIBLE) == 0)
{
- vacrel->vm_new_visible_pages++;
+ vacrel->scan_data->vm_new_visible_pages++;
if (presult.all_frozen)
{
- vacrel->vm_new_visible_frozen_pages++;
+ vacrel->scan_data->vm_new_visible_frozen_pages++;
*vm_page_frozen = true;
}
}
else if ((old_vmbits & VISIBILITYMAP_ALL_FROZEN) == 0 &&
presult.all_frozen)
{
- vacrel->vm_new_frozen_pages++;
+ vacrel->scan_data->vm_new_frozen_pages++;
*vm_page_frozen = true;
}
}
@@ -2197,8 +2769,8 @@ lazy_scan_prune(LVRelState *vacrel,
*/
if ((old_vmbits & VISIBILITYMAP_ALL_VISIBLE) == 0)
{
- vacrel->vm_new_visible_pages++;
- vacrel->vm_new_visible_frozen_pages++;
+ vacrel->scan_data->vm_new_visible_pages++;
+ vacrel->scan_data->vm_new_visible_frozen_pages++;
*vm_page_frozen = true;
}
@@ -2208,7 +2780,7 @@ lazy_scan_prune(LVRelState *vacrel,
*/
else
{
- vacrel->vm_new_frozen_pages++;
+ vacrel->scan_data->vm_new_frozen_pages++;
*vm_page_frozen = true;
}
}
@@ -2249,8 +2821,8 @@ lazy_scan_noprune(LVRelState *vacrel,
missed_dead_tuples;
bool hastup;
HeapTupleHeader tupleheader;
- TransactionId NoFreezePageRelfrozenXid = vacrel->NewRelfrozenXid;
- MultiXactId NoFreezePageRelminMxid = vacrel->NewRelminMxid;
+ TransactionId NoFreezePageRelfrozenXid = vacrel->scan_data->NewRelfrozenXid;
+ MultiXactId NoFreezePageRelminMxid = vacrel->scan_data->NewRelminMxid;
OffsetNumber deadoffsets[MaxHeapTuplesPerPage];
Assert(BufferGetBlockNumber(buf) == blkno);
@@ -2377,8 +2949,8 @@ lazy_scan_noprune(LVRelState *vacrel,
* this particular page until the next VACUUM. Remember its details now.
* (lazy_scan_prune expects a clean slate, so we have to do this last.)
*/
- vacrel->NewRelfrozenXid = NoFreezePageRelfrozenXid;
- vacrel->NewRelminMxid = NoFreezePageRelminMxid;
+ vacrel->scan_data->NewRelfrozenXid = NoFreezePageRelfrozenXid;
+ vacrel->scan_data->NewRelminMxid = NoFreezePageRelminMxid;
/* Save any LP_DEAD items found on the page in dead_items */
if (vacrel->nindexes == 0)
@@ -2405,25 +2977,26 @@ lazy_scan_noprune(LVRelState *vacrel,
* indexes will be deleted during index vacuuming (and then marked
* LP_UNUSED in the heap)
*/
- vacrel->lpdead_item_pages++;
+ vacrel->scan_data->lpdead_item_pages++;
dead_items_add(vacrel, blkno, deadoffsets, lpdead_items);
- vacrel->lpdead_items += lpdead_items;
+ vacrel->scan_data->lpdead_items += lpdead_items;
}
/*
* Finally, add relevant page-local counts to whole-VACUUM counts
*/
- vacrel->live_tuples += live_tuples;
- vacrel->recently_dead_tuples += recently_dead_tuples;
- vacrel->missed_dead_tuples += missed_dead_tuples;
+ vacrel->scan_data->live_tuples += live_tuples;
+ vacrel->scan_data->recently_dead_tuples += recently_dead_tuples;
+ vacrel->scan_data->missed_dead_tuples += missed_dead_tuples;
if (missed_dead_tuples > 0)
- vacrel->missed_dead_pages++;
+ vacrel->scan_data->missed_dead_pages++;
/* Can't truncate this page */
if (hastup)
- vacrel->nonempty_pages = blkno + 1;
+ vacrel->scan_data->nonempty_pages =
+ Max(blkno + 1, vacrel->scan_data->nonempty_pages);
/* Did we find LP_DEAD items? */
*has_lpdead_items = (lpdead_items > 0);
@@ -2452,7 +3025,7 @@ lazy_vacuum(LVRelState *vacrel)
/* Should not end up here with no indexes */
Assert(vacrel->nindexes > 0);
- Assert(vacrel->lpdead_item_pages > 0);
+ Assert(vacrel->scan_data->lpdead_item_pages > 0);
if (!vacrel->do_index_vacuuming)
{
@@ -2481,12 +3054,12 @@ lazy_vacuum(LVRelState *vacrel)
* HOT through careful tuning.
*/
bypass = false;
- if (vacrel->consider_bypass_optimization && vacrel->rel_pages > 0)
+ if (vacrel->consider_bypass_optimization && vacrel->scan_data->rel_pages > 0)
{
BlockNumber threshold;
Assert(vacrel->num_index_scans == 0);
- Assert(vacrel->lpdead_items == vacrel->dead_items_info->num_items);
+ Assert(vacrel->scan_data->lpdead_items == vacrel->dead_items_info->num_items);
Assert(vacrel->do_index_vacuuming);
Assert(vacrel->do_index_cleanup);
@@ -2512,8 +3085,8 @@ lazy_vacuum(LVRelState *vacrel)
* be negligible. If this optimization is ever expanded to cover more
* cases then this may need to be reconsidered.
*/
- threshold = (double) vacrel->rel_pages * BYPASS_THRESHOLD_PAGES;
- bypass = (vacrel->lpdead_item_pages < threshold &&
+ threshold = (double) vacrel->scan_data->rel_pages * BYPASS_THRESHOLD_PAGES;
+ bypass = (vacrel->scan_data->lpdead_item_pages < threshold &&
TidStoreMemoryUsage(vacrel->dead_items) < 32 * 1024 * 1024);
}
@@ -2651,7 +3224,7 @@ lazy_vacuum_all_indexes(LVRelState *vacrel)
* place).
*/
Assert(vacrel->num_index_scans > 0 ||
- vacrel->dead_items_info->num_items == vacrel->lpdead_items);
+ vacrel->dead_items_info->num_items == vacrel->scan_data->lpdead_items);
Assert(allindexes || VacuumFailsafeActive);
/*
@@ -2813,8 +3386,8 @@ lazy_vacuum_heap_rel(LVRelState *vacrel)
* the second heap pass. No more, no less.
*/
Assert(vacrel->num_index_scans > 1 ||
- (vacrel->dead_items_info->num_items == vacrel->lpdead_items &&
- vacuumed_pages == vacrel->lpdead_item_pages));
+ (vacrel->dead_items_info->num_items == vacrel->scan_data->lpdead_items &&
+ vacuumed_pages == vacrel->scan_data->lpdead_item_pages));
ereport(DEBUG2,
(errmsg("table \"%s\": removed %" PRId64 " dead item identifiers in %u pages",
@@ -2930,14 +3503,14 @@ lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno, Buffer buffer,
*/
if ((old_vmbits & VISIBILITYMAP_ALL_VISIBLE) == 0)
{
- vacrel->vm_new_visible_pages++;
+ vacrel->scan_data->vm_new_visible_pages++;
if (all_frozen)
- vacrel->vm_new_visible_frozen_pages++;
+ vacrel->scan_data->vm_new_visible_frozen_pages++;
}
else if ((old_vmbits & VISIBILITYMAP_ALL_FROZEN) == 0 &&
all_frozen)
- vacrel->vm_new_frozen_pages++;
+ vacrel->scan_data->vm_new_frozen_pages++;
}
/* Revert to the previous phase information for error traceback */
@@ -3013,7 +3586,7 @@ static void
lazy_cleanup_all_indexes(LVRelState *vacrel)
{
double reltuples = vacrel->new_rel_tuples;
- bool estimated_count = vacrel->scanned_pages < vacrel->rel_pages;
+ bool estimated_count = vacrel->scan_data->scanned_pages < vacrel->scan_data->rel_pages;
const int progress_start_index[] = {
PROGRESS_VACUUM_PHASE,
PROGRESS_VACUUM_INDEXES_TOTAL
@@ -3194,10 +3767,10 @@ should_attempt_truncation(LVRelState *vacrel)
if (!vacrel->do_rel_truncate || VacuumFailsafeActive)
return false;
- possibly_freeable = vacrel->rel_pages - vacrel->nonempty_pages;
+ possibly_freeable = vacrel->scan_data->rel_pages - vacrel->scan_data->nonempty_pages;
if (possibly_freeable > 0 &&
(possibly_freeable >= REL_TRUNCATE_MINIMUM ||
- possibly_freeable >= vacrel->rel_pages / REL_TRUNCATE_FRACTION))
+ possibly_freeable >= vacrel->scan_data->rel_pages / REL_TRUNCATE_FRACTION))
return true;
return false;
@@ -3209,7 +3782,7 @@ should_attempt_truncation(LVRelState *vacrel)
static void
lazy_truncate_heap(LVRelState *vacrel)
{
- BlockNumber orig_rel_pages = vacrel->rel_pages;
+ BlockNumber orig_rel_pages = vacrel->scan_data->rel_pages;
BlockNumber new_rel_pages;
bool lock_waiter_detected;
int lock_retry;
@@ -3220,7 +3793,7 @@ lazy_truncate_heap(LVRelState *vacrel)
/* Update error traceback information one last time */
update_vacuum_error_info(vacrel, NULL, VACUUM_ERRCB_PHASE_TRUNCATE,
- vacrel->nonempty_pages, InvalidOffsetNumber);
+ vacrel->scan_data->nonempty_pages, InvalidOffsetNumber);
/*
* Loop until no more truncating can be done.
@@ -3321,15 +3894,15 @@ lazy_truncate_heap(LVRelState *vacrel)
* without also touching reltuples, since the tuple count wasn't
* changed by the truncation.
*/
- vacrel->removed_pages += orig_rel_pages - new_rel_pages;
- vacrel->rel_pages = new_rel_pages;
+ vacrel->scan_data->removed_pages += orig_rel_pages - new_rel_pages;
+ vacrel->scan_data->rel_pages = new_rel_pages;
ereport(vacrel->verbose ? INFO : DEBUG2,
(errmsg("table \"%s\": truncated %u to %u pages",
vacrel->relname,
orig_rel_pages, new_rel_pages)));
orig_rel_pages = new_rel_pages;
- } while (new_rel_pages > vacrel->nonempty_pages && lock_waiter_detected);
+ } while (new_rel_pages > vacrel->scan_data->nonempty_pages && lock_waiter_detected);
}
/*
@@ -3353,11 +3926,11 @@ count_nondeletable_pages(LVRelState *vacrel, bool *lock_waiter_detected)
* unsigned.) To make the scan faster, we prefetch a few blocks at a time
* in forward direction, so that OS-level readahead can kick in.
*/
- blkno = vacrel->rel_pages;
+ blkno = vacrel->scan_data->rel_pages;
StaticAssertStmt((PREFETCH_SIZE & (PREFETCH_SIZE - 1)) == 0,
"prefetch size must be power of 2");
prefetchedUntil = InvalidBlockNumber;
- while (blkno > vacrel->nonempty_pages)
+ while (blkno > vacrel->scan_data->nonempty_pages)
{
Buffer buf;
Page page;
@@ -3469,7 +4042,7 @@ count_nondeletable_pages(LVRelState *vacrel, bool *lock_waiter_detected)
* pages still are; we need not bother to look at the last known-nonempty
* page.
*/
- return vacrel->nonempty_pages;
+ return vacrel->scan_data->nonempty_pages;
}
/*
@@ -3487,12 +4060,8 @@ dead_items_alloc(LVRelState *vacrel, int nworkers)
autovacuum_work_mem != -1 ?
autovacuum_work_mem : maintenance_work_mem;
- /*
- * Initialize state for a parallel vacuum. As of now, only one worker can
- * be used for an index, so we invoke parallelism only if there are at
- * least two indexes on a table.
- */
- if (nworkers >= 0 && vacrel->nindexes > 1 && vacrel->do_index_vacuuming)
+ /* Initialize state for a parallel vacuum */
+ if (nworkers >= 0)
{
/*
* Since parallel workers cannot access data in temporary tables, we
@@ -3510,11 +4079,17 @@ dead_items_alloc(LVRelState *vacrel, int nworkers)
vacrel->relname)));
}
else
+ {
+ /*
+ * We initialize the parallel vacuum state for either lazy heap
+ * scan, index vacuuming, or both.
+ */
vacrel->pvs = parallel_vacuum_init(vacrel->rel, vacrel->indrels,
vacrel->nindexes, nworkers,
vac_work_mem,
vacrel->verbose ? INFO : DEBUG2,
- vacrel->bstrategy);
+ vacrel->bstrategy, (void *) vacrel);
+ }
/*
* If parallel mode started, dead_items and dead_items_info spaces are
@@ -3554,15 +4129,35 @@ dead_items_add(LVRelState *vacrel, BlockNumber blkno, OffsetNumber *offsets,
};
int64 prog_val[2];
+ if (ParallelHeapVacuumIsActive(vacrel))
+ TidStoreLockExclusive(vacrel->dead_items);
+
TidStoreSetBlockOffsets(vacrel->dead_items, blkno, offsets, num_offsets);
vacrel->dead_items_info->num_items += num_offsets;
+ if (ParallelHeapVacuumIsActive(vacrel))
+ TidStoreUnlock(vacrel->dead_items);
+
/* update the progress information */
prog_val[0] = vacrel->dead_items_info->num_items;
prog_val[1] = TidStoreMemoryUsage(vacrel->dead_items);
pgstat_progress_update_multi_param(2, prog_index, prog_val);
}
+/*
+ * Check the memory usage of the collected dead items and return true
+ * if we are close to overrunning the available space for dead_items TIDs.
+ * However, let's force at least one page-worth of tuples to be stored as
+ * to ensure we do at least some work when the memory configured is so low
+ * that we run out before storing anything.
+ */
+static bool
+dead_items_check_memory_limit(LVRelState *vacrel)
+{
+ return vacrel->dead_items_info->num_items > 0 &&
+ TidStoreMemoryUsage(vacrel->dead_items) > vacrel->dead_items_info->max_bytes;
+}
+
/*
* Forget all collected dead items.
*/
@@ -3756,6 +4351,228 @@ update_relstats_all_indexes(LVRelState *vacrel)
}
}
+/*
+ * Compute the number of workers for parallel heap vacuum.
+ */
+int
+heap_parallel_vacuum_compute_workers(Relation rel, int nworkers_requested,
+ void *state)
+{
+ int parallel_workers = 0;
+
+ if (nworkers_requested == 0)
+ {
+ LVRelState *vacrel = (LVRelState *) state;
+ int heap_parallel_threshold;
+ int heap_pages;
+ BlockNumber allvisible;
+ BlockNumber allfrozen;
+
+ /*
+ * Estimate the number of blocks that we're going to scan during
+ * lazy_scan_heap().
+ */
+ visibilitymap_count(rel, &allvisible, &allfrozen);
+ heap_pages = RelationGetNumberOfBlocks(rel) -
+ (vacrel->aggressive ? allfrozen : allvisible);
+
+ Assert(heap_pages >= 0);
+
+ /*
+ * Select the number of workers based on the log of the number of
+ * pages to scan. Note that the upper limit of the
+ * min_parallel_table_scan_size GUC is chosen to prevent overflow
+ * here.
+ */
+ heap_parallel_threshold = Max(min_parallel_table_scan_size, 1);
+ while (heap_pages >= (BlockNumber) (heap_parallel_threshold * 3))
+ {
+ parallel_workers++;
+ heap_parallel_threshold *= 3;
+ if (heap_parallel_threshold > INT_MAX / 3)
+ break;
+ }
+ }
+ else
+ parallel_workers = nworkers_requested;
+
+ return parallel_workers;
+}
+
+/*
+ * Estimate shared memory size required for parallel heap vacuum.
+ */
+void
+heap_parallel_vacuum_estimate(Relation rel, ParallelContext *pcxt, int nworkers,
+ void *state)
+{
+ LVRelState *vacrel = (LVRelState *) state;
+ Size size = 0;
+
+ vacrel->leader = palloc(sizeof(ParallelLVLeader));
+
+ /* Estimate space for ParallelLVShared */
+ size = add_size(size, sizeof(ParallelLVShared));
+ vacrel->leader->shared_len = size;
+ shm_toc_estimate_chunk(&pcxt->estimator, vacrel->leader->shared_len);
+ shm_toc_estimate_keys(&pcxt->estimator, 1);
+
+ /* Estimate space for ParallelBlockTableScanDesc */
+ vacrel->leader->pbscan_len = table_block_parallelscan_estimate(rel);
+ shm_toc_estimate_chunk(&pcxt->estimator, vacrel->leader->pbscan_len);
+ shm_toc_estimate_keys(&pcxt->estimator, 1);
+
+ /* Estimate space for an array of ParallelLVScanWorker */
+ vacrel->leader->scanworker_len = mul_size(sizeof(ParallelLVScanWorker), nworkers);
+ shm_toc_estimate_chunk(&pcxt->estimator, vacrel->leader->scanworker_len);
+ shm_toc_estimate_keys(&pcxt->estimator, 1);
+}
+
+/*
+ * Set up shared memory for parallel heap vacuum.
+ */
+void
+heap_parallel_vacuum_initialize(Relation rel, ParallelContext *pcxt, int nworkers,
+ void *state)
+{
+ LVRelState *vacrel = (LVRelState *) state;
+ ParallelLVShared *shared;
+ ParallelBlockTableScanDesc pbscan;
+ ParallelLVScanWorker *scanworkers;
+
+ vacrel->plvstate = palloc0(sizeof(ParallelLVState));
+
+ /* Initialize ParallelLVShared */
+ shared = shm_toc_allocate(pcxt->toc, vacrel->leader->shared_len);
+ MemSet(shared, 0, vacrel->leader->shared_len);
+ shared->aggressive = vacrel->aggressive;
+ shared->skipwithvm = vacrel->skipwithvm;
+ shared->cutoffs = vacrel->cutoffs;
+ shared->NewRelfrozenXid = vacrel->scan_data->NewRelfrozenXid;
+ shared->NewRelminMxid = vacrel->scan_data->NewRelminMxid;
+ shm_toc_insert(pcxt->toc, PARALLEL_LV_KEY_SHARED, shared);
+ vacrel->plvstate->shared = shared;
+
+ /* Initialize ParallelBlockTableScanDesc */
+ pbscan = shm_toc_allocate(pcxt->toc, vacrel->leader->pbscan_len);
+ table_block_parallelscan_initialize(rel, (ParallelTableScanDesc) pbscan);
+ pbscan->base.phs_syncscan = false; /* always start from the first block */
+ shm_toc_insert(pcxt->toc, PARALLEL_LV_KEY_SCANDESC, pbscan);
+ vacrel->plvstate->pbscan = pbscan;
+
+ /* Initialize the array of ParallelLVScanWorker */
+ scanworkers = shm_toc_allocate(pcxt->toc, vacrel->leader->scanworker_len);
+ MemSet(scanworkers, 0, vacrel->leader->scanworker_len);
+ shm_toc_insert(pcxt->toc, PARALLEL_LV_KEY_SCANWORKER, scanworkers);
+ vacrel->leader->scanworkers = scanworkers;
+}
+
+/*
+ * Initialize lazy vacuum state with the information retrieved from
+ * shared memory.
+ */
+void
+heap_parallel_vacuum_initialize_worker(Relation rel, ParallelVacuumState *pvs,
+ ParallelWorkerContext *pwcxt,
+ void **state_out)
+{
+ LVRelState *vacrel;
+ ParallelLVState *plvstate;
+ ParallelLVShared *shared;
+ ParallelLVScanWorker *scanworker;
+ ParallelBlockTableScanDesc pbscan;
+
+ /* Initialize ParallelLVState and prepare the related objects */
+
+ plvstate = palloc0(sizeof(ParallelLVState));
+
+ /* Prepare ParallelLVShared */
+ shared = (ParallelLVShared *) shm_toc_lookup(pwcxt->toc, PARALLEL_LV_KEY_SHARED, false);
+ plvstate->shared = shared;
+
+ /* Prepare ParallelBlockTableScanWorkerData */
+ pbscan = shm_toc_lookup(pwcxt->toc, PARALLEL_LV_KEY_SCANDESC, false);
+ plvstate->pbscan = pbscan;
+
+ /* Prepare ParallelLVScanWorker */
+ scanworker = shm_toc_lookup(pwcxt->toc, PARALLEL_LV_KEY_SCANWORKER, false);
+ plvstate->scanworker = &(scanworker[ParallelWorkerNumber]);
+ plvstate->pbscanwork = &(plvstate->scanworker->pbscanworkdata);
+
+ /* Initialize LVRelState and prepare fields required by lazy scan heap */
+ vacrel = palloc0(sizeof(LVRelState));
+ vacrel->rel = rel;
+ vacrel->indrels = parallel_vacuum_get_table_indexes(pvs,
+ &vacrel->nindexes);
+ vacrel->bstrategy = parallel_vacuum_get_bstrategy(pvs);
+ vacrel->pvs = pvs;
+ vacrel->aggressive = shared->aggressive;
+ vacrel->skipwithvm = shared->skipwithvm;
+ vacrel->vistest = GlobalVisTestFor(rel);
+ vacrel->cutoffs = shared->cutoffs;
+ vacrel->dead_items = parallel_vacuum_get_dead_items(pvs,
+ &vacrel->dead_items_info);
+ vacrel->plvstate = plvstate;
+ vacrel->scan_data = &(plvstate->scanworker->scandata);
+ MemSet(vacrel->scan_data, 0, sizeof(LVScanData));
+ vacrel->scan_data->NewRelfrozenXid = shared->NewRelfrozenXid;
+ vacrel->scan_data->NewRelminMxid = shared->NewRelminMxid;
+ vacrel->scan_data->skippedallvis = false;
+ vacrel->scan_data->rel_pages = RelationGetNumberOfBlocks(rel);
+
+ /*
+ * Initialize the scan state if not yet. The chunk of blocks will be
+ * allocated when to get the scan block for the first time.
+ */
+ if (!vacrel->plvstate->scanworker->scan_inited)
+ {
+ vacrel->plvstate->scanworker->scan_inited = true;
+ table_block_parallelscan_startblock_init(rel,
+ vacrel->plvstate->pbscanwork,
+ vacrel->plvstate->pbscan);
+ pg_atomic_init_u32(&(vacrel->plvstate->scanworker->last_blkno),
+ InvalidBlockNumber);
+ }
+
+ *state_out = (void *) vacrel;
+}
+
+/*
+ * Parallel heap vacuum callback for collecting dead items (i.e., lazy heap scan).
+ */
+void
+heap_parallel_vacuum_collect_dead_items(Relation rel, ParallelVacuumState *pvs,
+ void *state)
+{
+ LVRelState *vacrel = (LVRelState *) state;
+ ErrorContextCallback errcallback;
+
+ Assert(ParallelHeapVacuumIsActive(vacrel));
+
+ /*
+ * Setup error traceback support for ereport() for parallel table vacuum
+ * workers
+ */
+ vacrel->dbname = get_database_name(MyDatabaseId);
+ vacrel->relnamespace = get_database_name(RelationGetNamespace(rel));
+ vacrel->relname = pstrdup(RelationGetRelationName(rel));
+ vacrel->indname = NULL;
+ vacrel->phase = VACUUM_ERRCB_PHASE_SCAN_HEAP;
+ errcallback.callback = vacuum_error_callback;
+ errcallback.arg = &vacrel;
+ errcallback.previous = error_context_stack;
+ error_context_stack = &errcallback;
+
+ /* Join the parallel heap vacuum */
+ do_lazy_scan_heap(vacrel, false);
+
+ /* Advertise the last processed block number */
+ pg_atomic_write_u32(&(vacrel->plvstate->scanworker->last_blkno), vacrel->last_blkno);
+
+ /* Pop the error context stack */
+ error_context_stack = errcallback.previous;
+}
+
/*
* Error context callback for errors occurring during vacuum. The error
* context messages for index phases should match the messages set in parallel
diff --git a/src/backend/access/table/tableamapi.c b/src/backend/access/table/tableamapi.c
index 476663b66aad..c3ee9869e12b 100644
--- a/src/backend/access/table/tableamapi.c
+++ b/src/backend/access/table/tableamapi.c
@@ -81,6 +81,7 @@ GetTableAmRoutine(Oid amhandler)
Assert(routine->relation_copy_data != NULL);
Assert(routine->relation_copy_for_cluster != NULL);
Assert(routine->relation_vacuum != NULL);
+ Assert(routine->parallel_vacuum_compute_workers != NULL);
Assert(routine->scan_analyze_next_block != NULL);
Assert(routine->scan_analyze_next_tuple != NULL);
Assert(routine->index_build_range_scan != NULL);
@@ -94,6 +95,16 @@ GetTableAmRoutine(Oid amhandler)
Assert(routine->scan_sample_next_block != NULL);
Assert(routine->scan_sample_next_tuple != NULL);
+ /*
+ * Callbacks for parallel vacuum are also optional (except for
+ * parallel_vacuum_compute_workers). But one callback implies presence of
+ * the others.
+ */
+ Assert(((((routine->parallel_vacuum_estimate == NULL) ==
+ (routine->parallel_vacuum_initialize == NULL)) ==
+ (routine->parallel_vacuum_initialize_worker == NULL)) ==
+ (routine->parallel_vacuum_collect_dead_items == NULL)));
+
return routine;
}
diff --git a/src/backend/commands/vacuumparallel.c b/src/backend/commands/vacuumparallel.c
index 2b9d548cdeb1..770e0395a964 100644
--- a/src/backend/commands/vacuumparallel.c
+++ b/src/backend/commands/vacuumparallel.c
@@ -4,17 +4,18 @@
* Support routines for parallel vacuum execution.
*
* This file contains routines that are intended to support setting up, using,
- * and tearing down a ParallelVacuumState.
+ * and tearing down a ParallelVacuumState. ParallelVacuumState contains shared
+ * information as well as the memory space for storing dead items allocated in
+ * the DSA area. We launch
*
- * In a parallel vacuum, we perform both index bulk deletion and index cleanup
- * with parallel worker processes. Individual indexes are processed by one
- * vacuum process. ParallelVacuumState contains shared information as well as
- * the memory space for storing dead items allocated in the DSA area. We
- * launch parallel worker processes at the start of parallel index
- * bulk-deletion and index cleanup and once all indexes are processed, the
- * parallel worker processes exit. Each time we process indexes in parallel,
- * the parallel context is re-initialized so that the same DSM can be used for
- * multiple passes of index bulk-deletion and index cleanup.
+ * In a parallel vacuum, we perform table scan, index bulk-deletion, index
+ * cleanup, or all of them with parallel worker processes depending on the
+ * number of parallel workers required for each phase. So different numbers of
+ * workers might be required for the table scanning and index processing.
+ * We launch parallel worker processes at the start of a phase, and once we
+ * complete all work in the phase, parallel workers exit. Each time we process
+ * table or indexes in parallel, the parallel context is re-initialized so that
+ * the same DSM can be used for multiple passes of each phase.
*
* Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
@@ -26,8 +27,10 @@
*/
#include "postgres.h"
+#include "access/parallel.h"
#include "access/amapi.h"
#include "access/table.h"
+#include "access/tableam.h"
#include "access/xact.h"
#include "commands/progress.h"
#include "commands/vacuum.h"
@@ -50,6 +53,13 @@
#define PARALLEL_VACUUM_KEY_WAL_USAGE 4
#define PARALLEL_VACUUM_KEY_INDEX_STATS 5
+/* The kind of parallel vacuum phases */
+typedef enum
+{
+ PV_WORK_PHASE_PROCESS_INDEXES, /* index vacuuming or cleanup */
+ PV_WORK_PHASE_COLLECT_DEAD_ITEMS, /* collect dead tuples */
+} PVWorkPhase;
+
/*
* Shared information among parallel workers. So this is allocated in the DSM
* segment.
@@ -65,6 +75,12 @@ typedef struct PVShared
int elevel;
uint64 queryid;
+ /*
+ * Tell parallel workers what phase to perform: processing indexes or
+ * collecting dead tuples from the table.
+ */
+ PVWorkPhase work_phase;
+
/*
* Fields for both index vacuum and cleanup.
*
@@ -164,6 +180,9 @@ struct ParallelVacuumState
/* NULL for worker processes */
ParallelContext *pcxt;
+ /* Do we need to reinitialize parallel DSM? */
+ bool need_reinitialize_dsm;
+
/* Parent Heap Relation */
Relation heaprel;
@@ -178,7 +197,7 @@ struct ParallelVacuumState
* Shared index statistics among parallel vacuum workers. The array
* element is allocated for every index, even those indexes where parallel
* index vacuuming is unsafe or not worthwhile (e.g.,
- * will_parallel_vacuum[] is false). During parallel vacuum,
+ * idx_will_parallel_vacuum[] is false). During parallel vacuum,
* IndexBulkDeleteResult of each index is kept in DSM and is copied into
* local memory at the end of parallel vacuum.
*/
@@ -193,12 +212,18 @@ struct ParallelVacuumState
/* Points to WAL usage area in DSM */
WalUsage *wal_usage;
+ /*
+ * The number of workers for parallel table vacuuming. If 0, the parallel
+ * table vacuum is disabled.
+ */
+ int nworkers_for_table;
+
/*
* False if the index is totally unsuitable target for all parallel
* processing. For example, the index could be <
* min_parallel_index_scan_size cutoff.
*/
- bool *will_parallel_vacuum;
+ bool *idx_will_parallel_vacuum;
/*
* The number of indexes that support parallel index bulk-deletion and
@@ -221,8 +246,10 @@ struct ParallelVacuumState
PVIndVacStatus status;
};
-static int parallel_vacuum_compute_workers(Relation *indrels, int nindexes, int nrequested,
- bool *will_parallel_vacuum);
+static int parallel_vacuum_compute_workers(Relation rel, Relation *indrels, int nindexes,
+ int nrequested, int *nworkers_for_table,
+ bool *idx_will_parallel_vacuum,
+ void *state);
static void parallel_vacuum_process_all_indexes(ParallelVacuumState *pvs, int num_index_scans,
bool vacuum);
static void parallel_vacuum_process_safe_indexes(ParallelVacuumState *pvs);
@@ -231,18 +258,25 @@ static void parallel_vacuum_process_one_index(ParallelVacuumState *pvs, Relation
PVIndStats *indstats);
static bool parallel_vacuum_index_is_parallel_safe(Relation indrel, int num_index_scans,
bool vacuum);
+static void parallel_vacuum_begin_work_phase(ParallelVacuumState *pvs, int nworkers,
+ PVWorkPhase work_phase);
+static void parallel_vacuum_end_worke_phase(ParallelVacuumState *pvs);
static void parallel_vacuum_error_callback(void *arg);
/*
* Try to enter parallel mode and create a parallel context. Then initialize
* shared memory state.
*
+ * nrequested_workers is the requested parallel degree. 0 means that the parallel
+ * degrees for table and indexes vacuum are decided differently. See the comments
+ * of parallel_vacuum_compute_workers() for details.
+ *
* On success, return parallel vacuum state. Otherwise return NULL.
*/
ParallelVacuumState *
parallel_vacuum_init(Relation rel, Relation *indrels, int nindexes,
int nrequested_workers, int vac_work_mem,
- int elevel, BufferAccessStrategy bstrategy)
+ int elevel, BufferAccessStrategy bstrategy, void *state)
{
ParallelVacuumState *pvs;
ParallelContext *pcxt;
@@ -251,38 +285,38 @@ parallel_vacuum_init(Relation rel, Relation *indrels, int nindexes,
PVIndStats *indstats;
BufferUsage *buffer_usage;
WalUsage *wal_usage;
- bool *will_parallel_vacuum;
+ bool *idx_will_parallel_vacuum;
Size est_indstats_len;
Size est_shared_len;
int nindexes_mwm = 0;
int parallel_workers = 0;
+ int nworkers_for_table;
int querylen;
- /*
- * A parallel vacuum must be requested and there must be indexes on the
- * relation
- */
+ /* A parallel vacuum must be requested */
Assert(nrequested_workers >= 0);
- Assert(nindexes > 0);
/*
* Compute the number of parallel vacuum workers to launch
*/
- will_parallel_vacuum = (bool *) palloc0(sizeof(bool) * nindexes);
- parallel_workers = parallel_vacuum_compute_workers(indrels, nindexes,
+ idx_will_parallel_vacuum = (bool *) palloc0(sizeof(bool) * nindexes);
+ parallel_workers = parallel_vacuum_compute_workers(rel, indrels, nindexes,
nrequested_workers,
- will_parallel_vacuum);
+ &nworkers_for_table,
+ idx_will_parallel_vacuum,
+ state);
+
if (parallel_workers <= 0)
{
/* Can't perform vacuum in parallel -- return NULL */
- pfree(will_parallel_vacuum);
+ pfree(idx_will_parallel_vacuum);
return NULL;
}
pvs = (ParallelVacuumState *) palloc0(sizeof(ParallelVacuumState));
pvs->indrels = indrels;
pvs->nindexes = nindexes;
- pvs->will_parallel_vacuum = will_parallel_vacuum;
+ pvs->idx_will_parallel_vacuum = idx_will_parallel_vacuum;
pvs->bstrategy = bstrategy;
pvs->heaprel = rel;
@@ -291,6 +325,8 @@ parallel_vacuum_init(Relation rel, Relation *indrels, int nindexes,
parallel_workers);
Assert(pcxt->nworkers > 0);
pvs->pcxt = pcxt;
+ pvs->need_reinitialize_dsm = false;
+ pvs->nworkers_for_table = nworkers_for_table;
/* Estimate size for index vacuum stats -- PARALLEL_VACUUM_KEY_INDEX_STATS */
est_indstats_len = mul_size(sizeof(PVIndStats), nindexes);
@@ -327,6 +363,10 @@ parallel_vacuum_init(Relation rel, Relation *indrels, int nindexes,
else
querylen = 0; /* keep compiler quiet */
+ /* Estimate AM-specific space for parallel table vacuum */
+ if (pvs->nworkers_for_table > 0)
+ table_parallel_vacuum_estimate(rel, pcxt, pvs->nworkers_for_table, state);
+
InitializeParallelDSM(pcxt);
/* Prepare index vacuum stats */
@@ -345,7 +385,7 @@ parallel_vacuum_init(Relation rel, Relation *indrels, int nindexes,
((vacoptions & VACUUM_OPTION_PARALLEL_COND_CLEANUP) == 0));
Assert(vacoptions <= VACUUM_OPTION_MAX_VALID_VALUE);
- if (!will_parallel_vacuum[i])
+ if (!idx_will_parallel_vacuum[i])
continue;
if (indrel->rd_indam->amusemaintenanceworkmem)
@@ -419,6 +459,10 @@ parallel_vacuum_init(Relation rel, Relation *indrels, int nindexes,
PARALLEL_VACUUM_KEY_QUERY_TEXT, sharedquery);
}
+ /* Initialize AM-specific DSM space for parallel table vacuum */
+ if (pvs->nworkers_for_table > 0)
+ table_parallel_vacuum_initialize(rel, pcxt, pvs->nworkers_for_table, state);
+
/* Success -- return parallel vacuum state */
return pvs;
}
@@ -456,10 +500,39 @@ parallel_vacuum_end(ParallelVacuumState *pvs, IndexBulkDeleteResult **istats)
DestroyParallelContext(pvs->pcxt);
ExitParallelMode();
- pfree(pvs->will_parallel_vacuum);
+ pfree(pvs->idx_will_parallel_vacuum);
pfree(pvs);
}
+/*
+ * Return the number of parallel workers initialized for parallel table vacuum.
+ */
+int
+parallel_vacuum_get_nworkers_table(ParallelVacuumState *pvs)
+{
+ return pvs->nworkers_for_table;
+}
+
+/*
+ * Return the array of indexes associated to the given table to be vacuumed.
+ */
+Relation *
+parallel_vacuum_get_table_indexes(ParallelVacuumState *pvs, int *nindexes)
+{
+ *nindexes = pvs->nindexes;
+
+ return pvs->indrels;
+}
+
+/*
+ * Return the buffer strategy for parallel vacuum.
+ */
+BufferAccessStrategy
+parallel_vacuum_get_bstrategy(ParallelVacuumState *pvs)
+{
+ return pvs->bstrategy;
+}
+
/*
* Returns the dead items space and dead items information.
*/
@@ -533,26 +606,35 @@ parallel_vacuum_cleanup_all_indexes(ParallelVacuumState *pvs, long num_table_tup
}
/*
- * Compute the number of parallel worker processes to request. Both index
- * vacuum and index cleanup can be executed with parallel workers.
- * The index is eligible for parallel vacuum iff its size is greater than
- * min_parallel_index_scan_size as invoking workers for very small indexes
- * can hurt performance.
+ * Compute the number of parallel worker processes to request for table
+ * vacuum and index vacuum/cleanup. Return the maximum number of parallel
+ * workers for table vacuuming and index vacuuming.
+ *
+ * nrequested is the number of parallel workers that user requested, which
+ * applies to both the number of workers for table vacuum and index vacuum.
+ * If nrequested is 0, we compute the parallel degree for them differently
+ * as described below.
*
- * nrequested is the number of parallel workers that user requested. If
- * nrequested is 0, we compute the parallel degree based on nindexes, that is
- * the number of indexes that support parallel vacuum. This function also
- * sets will_parallel_vacuum to remember indexes that participate in parallel
- * vacuum.
+ * For parallel table vacuum, we ask AM-specific routine to compute the
+ * number of parallel worker processes. The result is set to nworkers_table_p.
+ *
+ * For parallel index vacuum, the index is eligible for parallel vacuum iff
+ * its size is greater than min_parallel_index_scan_size as invoking workers
+ * for very small indexes can hurt performance. This function sets
+ * idx_will_parallel_vacuum to remember indexes that participate in parallel vacuum.
*/
static int
-parallel_vacuum_compute_workers(Relation *indrels, int nindexes, int nrequested,
- bool *will_parallel_vacuum)
+parallel_vacuum_compute_workers(Relation rel, Relation *indrels, int nindexes,
+ int nrequested, int *nworkers_table_p,
+ bool *idx_will_parallel_vacuum, void *state)
{
int nindexes_parallel = 0;
int nindexes_parallel_bulkdel = 0;
int nindexes_parallel_cleanup = 0;
- int parallel_workers;
+ int nworkers_table = 0;
+ int nworkers_index = 0;
+
+ *nworkers_table_p = 0;
/*
* We don't allow performing parallel operation in standalone backend or
@@ -561,6 +643,13 @@ parallel_vacuum_compute_workers(Relation *indrels, int nindexes, int nrequested,
if (!IsUnderPostmaster || max_parallel_maintenance_workers == 0)
return 0;
+ /* Compute the number of workers for parallel table scan */
+ nworkers_table = table_parallel_vacuum_compute_workers(rel, nrequested,
+ state);
+
+ /* Cap by max_parallel_maintenance_workers */
+ nworkers_table = Min(nworkers_table, max_parallel_maintenance_workers);
+
/*
* Compute the number of indexes that can participate in parallel vacuum.
*/
@@ -574,7 +663,7 @@ parallel_vacuum_compute_workers(Relation *indrels, int nindexes, int nrequested,
RelationGetNumberOfBlocks(indrel) < min_parallel_index_scan_size)
continue;
- will_parallel_vacuum[i] = true;
+ idx_will_parallel_vacuum[i] = true;
if ((vacoptions & VACUUM_OPTION_PARALLEL_BULKDEL) != 0)
nindexes_parallel_bulkdel++;
@@ -589,18 +678,18 @@ parallel_vacuum_compute_workers(Relation *indrels, int nindexes, int nrequested,
/* The leader process takes one index */
nindexes_parallel--;
- /* No index supports parallel vacuum */
- if (nindexes_parallel <= 0)
- return 0;
-
- /* Compute the parallel degree */
- parallel_workers = (nrequested > 0) ?
- Min(nrequested, nindexes_parallel) : nindexes_parallel;
+ if (nindexes_parallel > 0)
+ {
+ /* Take into account the requested number of workers */
+ nworkers_index = (nrequested > 0) ?
+ Min(nrequested, nindexes_parallel) : nindexes_parallel;
- /* Cap by max_parallel_maintenance_workers */
- parallel_workers = Min(parallel_workers, max_parallel_maintenance_workers);
+ /* Cap by max_parallel_maintenance_workers */
+ nworkers_index = Min(nworkers_index, max_parallel_maintenance_workers);
+ }
- return parallel_workers;
+ *nworkers_table_p = nworkers_table;
+ return Max(nworkers_table, nworkers_index);
}
/*
@@ -657,7 +746,7 @@ parallel_vacuum_process_all_indexes(ParallelVacuumState *pvs, int num_index_scan
Assert(indstats->status == PARALLEL_INDVAC_STATUS_INITIAL);
indstats->status = new_status;
indstats->parallel_workers_can_process =
- (pvs->will_parallel_vacuum[i] &&
+ (pvs->idx_will_parallel_vacuum[i] &&
parallel_vacuum_index_is_parallel_safe(pvs->indrels[i],
num_index_scans,
vacuum));
@@ -669,40 +758,9 @@ parallel_vacuum_process_all_indexes(ParallelVacuumState *pvs, int num_index_scan
/* Setup the shared cost-based vacuum delay and launch workers */
if (nworkers > 0)
{
- /* Reinitialize parallel context to relaunch parallel workers */
- if (num_index_scans > 0)
- ReinitializeParallelDSM(pvs->pcxt);
-
- /*
- * Set up shared cost balance and the number of active workers for
- * vacuum delay. We need to do this before launching workers as
- * otherwise, they might not see the updated values for these
- * parameters.
- */
- pg_atomic_write_u32(&(pvs->shared->cost_balance), VacuumCostBalance);
- pg_atomic_write_u32(&(pvs->shared->active_nworkers), 0);
-
- /*
- * The number of workers can vary between bulkdelete and cleanup
- * phase.
- */
- ReinitializeParallelWorkers(pvs->pcxt, nworkers);
-
- LaunchParallelWorkers(pvs->pcxt);
-
- if (pvs->pcxt->nworkers_launched > 0)
- {
- /*
- * Reset the local cost values for leader backend as we have
- * already accumulated the remaining balance of heap.
- */
- VacuumCostBalance = 0;
- VacuumCostBalanceLocal = 0;
-
- /* Enable shared cost balance for leader backend */
- VacuumSharedCostBalance = &(pvs->shared->cost_balance);
- VacuumActiveNWorkers = &(pvs->shared->active_nworkers);
- }
+ /* Start parallel vacuum workers for processing indexes */
+ parallel_vacuum_begin_work_phase(pvs, nworkers,
+ PV_WORK_PHASE_PROCESS_INDEXES);
if (vacuum)
ereport(pvs->shared->elevel,
@@ -732,13 +790,7 @@ parallel_vacuum_process_all_indexes(ParallelVacuumState *pvs, int num_index_scan
* to finish, or we might get incomplete data.)
*/
if (nworkers > 0)
- {
- /* Wait for all vacuum workers to finish */
- WaitForParallelWorkersToFinish(pvs->pcxt);
-
- for (int i = 0; i < pvs->pcxt->nworkers_launched; i++)
- InstrAccumParallelQuery(&pvs->buffer_usage[i], &pvs->wal_usage[i]);
- }
+ parallel_vacuum_end_worke_phase(pvs);
/*
* Reset all index status back to initial (while checking that we have
@@ -755,15 +807,8 @@ parallel_vacuum_process_all_indexes(ParallelVacuumState *pvs, int num_index_scan
indstats->status = PARALLEL_INDVAC_STATUS_INITIAL;
}
- /*
- * Carry the shared balance value to heap scan and disable shared costing
- */
- if (VacuumSharedCostBalance)
- {
- VacuumCostBalance = pg_atomic_read_u32(VacuumSharedCostBalance);
- VacuumSharedCostBalance = NULL;
- VacuumActiveNWorkers = NULL;
- }
+ /* Parallel DSM will need to be reinitialized for the next execution */
+ pvs->need_reinitialize_dsm = true;
}
/*
@@ -979,6 +1024,77 @@ parallel_vacuum_index_is_parallel_safe(Relation indrel, int num_index_scans,
return true;
}
+/*
+ * Begin the parallel scan to collect dead items. Return the number of
+ * launched parallel workers.
+ *
+ * The caller must call parallel_vacuum_collect_dead_items_end() to finish
+ * the parallel scan.
+ */
+int
+parallel_vacuum_collect_dead_items_begin(ParallelVacuumState *pvs)
+{
+ Assert(!IsParallelWorker());
+
+ if (pvs->nworkers_for_table == 0)
+ return 0;
+
+ /* Start parallel vacuum workers for collecting dead items */
+ Assert(pvs->nworkers_for_table <= pvs->pcxt->nworkers);
+ parallel_vacuum_begin_work_phase(pvs, pvs->nworkers_for_table,
+ PV_WORK_PHASE_COLLECT_DEAD_ITEMS);
+
+ /* Include the worker count for the leader itself */
+ if (pvs->pcxt->nworkers_launched > 0)
+ pg_atomic_add_fetch_u32(VacuumActiveNWorkers, 1);
+
+ return pvs->pcxt->nworkers_launched;
+}
+
+/*
+ * Wait for all workers for parallel vacuum workers launched by
+ * parallel_vacuum_collect_dead_items_begin(), and gather workers' statistics.
+ */
+void
+parallel_vacuum_collect_dead_items_end(ParallelVacuumState *pvs)
+{
+ Assert(!IsParallelWorker());
+ Assert(pvs->shared->work_phase == PV_WORK_PHASE_COLLECT_DEAD_ITEMS);
+
+ if (pvs->nworkers_for_table == 0)
+ return;
+
+ /* Wait for parallel workers to finish */
+ parallel_vacuum_end_worke_phase(pvs);
+
+ /* Decrement the worker count for the leader itself */
+ if (VacuumActiveNWorkers)
+ pg_atomic_sub_fetch_u32(VacuumActiveNWorkers, 1);
+}
+
+/*
+ * The function is for parallel workers to execute the parallel scan to
+ * collect dead tuples.
+ */
+static void
+parallel_vacuum_process_table(ParallelVacuumState *pvs, void *state)
+{
+ Assert(VacuumActiveNWorkers);
+ Assert(pvs->shared->work_phase == PV_WORK_PHASE_COLLECT_DEAD_ITEMS);
+
+ /* Increment the active worker before starting the table vacuum */
+ pg_atomic_add_fetch_u32(VacuumActiveNWorkers, 1);
+
+ /* Do the parallel scan to collect dead tuples */
+ table_parallel_vacuum_collect_dead_items(pvs->heaprel, pvs, state);
+
+ /*
+ * We have completed the table vacuum so decrement the active worker
+ * count.
+ */
+ pg_atomic_sub_fetch_u32(VacuumActiveNWorkers, 1);
+}
+
/*
* Perform work within a launched parallel process.
*
@@ -998,6 +1114,7 @@ parallel_vacuum_main(dsm_segment *seg, shm_toc *toc)
WalUsage *wal_usage;
int nindexes;
char *sharedquery;
+ void *state;
ErrorContextCallback errcallback;
/*
@@ -1030,7 +1147,6 @@ parallel_vacuum_main(dsm_segment *seg, shm_toc *toc)
* matched to the leader's one.
*/
vac_open_indexes(rel, RowExclusiveLock, &nindexes, &indrels);
- Assert(nindexes > 0);
/*
* Apply the desired value of maintenance_work_mem within this process.
@@ -1076,6 +1192,17 @@ parallel_vacuum_main(dsm_segment *seg, shm_toc *toc)
pvs.bstrategy = GetAccessStrategyWithSize(BAS_VACUUM,
shared->ring_nbuffers * (BLCKSZ / 1024));
+ /* Initialize AM-specific vacuum state for parallel table vacuuming */
+ if (shared->work_phase == PV_WORK_PHASE_COLLECT_DEAD_ITEMS)
+ {
+ ParallelWorkerContext pwcxt;
+
+ pwcxt.toc = toc;
+ pwcxt.seg = seg;
+ table_parallel_vacuum_initialize_worker(rel, &pvs, &pwcxt,
+ &state);
+ }
+
/* Setup error traceback support for ereport() */
errcallback.callback = parallel_vacuum_error_callback;
errcallback.arg = &pvs;
@@ -1085,8 +1212,19 @@ parallel_vacuum_main(dsm_segment *seg, shm_toc *toc)
/* Prepare to track buffer usage during parallel execution */
InstrStartParallelQuery();
- /* Process indexes to perform vacuum/cleanup */
- parallel_vacuum_process_safe_indexes(&pvs);
+ switch (pvs.shared->work_phase)
+ {
+ case PV_WORK_PHASE_COLLECT_DEAD_ITEMS:
+ /* Scan the table to collect dead items */
+ parallel_vacuum_process_table(&pvs, state);
+ break;
+ case PV_WORK_PHASE_PROCESS_INDEXES:
+ /* Process indexes to perform vacuum/cleanup */
+ parallel_vacuum_process_safe_indexes(&pvs);
+ break;
+ default:
+ elog(ERROR, "unrecognized parallel vacuum phase %d", pvs.shared->work_phase);
+ }
/* Report buffer/WAL usage during parallel execution */
buffer_usage = shm_toc_lookup(toc, PARALLEL_VACUUM_KEY_BUFFER_USAGE, false);
@@ -1109,6 +1247,77 @@ parallel_vacuum_main(dsm_segment *seg, shm_toc *toc)
FreeAccessStrategy(pvs.bstrategy);
}
+/*
+ * Launch parallel vacuum workers for the given phase. If at least one
+ * worker launched, enable the shared vacuum delay costing.
+ */
+static void
+parallel_vacuum_begin_work_phase(ParallelVacuumState *pvs, int nworkers,
+ PVWorkPhase work_phase)
+{
+ /* Set the work phase */
+ pvs->shared->work_phase = work_phase;
+
+ /* Reinitialize parallel context to relaunch parallel workers */
+ if (pvs->need_reinitialize_dsm)
+ ReinitializeParallelDSM(pvs->pcxt);
+
+ /*
+ * Set up shared cost balance and the number of active workers for vacuum
+ * delay. We need to do this before launching workers as otherwise, they
+ * might not see the updated values for these parameters.
+ */
+ pg_atomic_write_u32(&(pvs->shared->cost_balance), VacuumCostBalance);
+ pg_atomic_write_u32(&(pvs->shared->active_nworkers), 0);
+
+ /*
+ * The number of workers can vary between bulkdelete and cleanup phase.
+ */
+ ReinitializeParallelWorkers(pvs->pcxt, nworkers);
+
+ LaunchParallelWorkers(pvs->pcxt);
+
+ /* Enable shared vacuum costing if we are able to launch any worker */
+ if (pvs->pcxt->nworkers_launched > 0)
+ {
+ /*
+ * Reset the local cost values for leader backend as we have already
+ * accumulated the remaining balance of heap.
+ */
+ VacuumCostBalance = 0;
+ VacuumCostBalanceLocal = 0;
+
+ /* Enable shared cost balance for leader backend */
+ VacuumSharedCostBalance = &(pvs->shared->cost_balance);
+ VacuumActiveNWorkers = &(pvs->shared->active_nworkers);
+ }
+}
+
+/*
+ * Wait for parallel vacuum workers to finish, accumulate the statistics,
+ * and disable shared vacuum delay costing if enabled.
+ */
+static void
+parallel_vacuum_end_worke_phase(ParallelVacuumState *pvs)
+{
+ /* Wait for all vacuum workers to finish */
+ WaitForParallelWorkersToFinish(pvs->pcxt);
+
+ for (int i = 0; i < pvs->pcxt->nworkers_launched; i++)
+ InstrAccumParallelQuery(&pvs->buffer_usage[i], &pvs->wal_usage[i]);
+
+ /* Carry the shared balance value and disable shared costing */
+ if (VacuumSharedCostBalance)
+ {
+ VacuumCostBalance = pg_atomic_read_u32(VacuumSharedCostBalance);
+ VacuumSharedCostBalance = NULL;
+ VacuumActiveNWorkers = NULL;
+ }
+
+ /* Parallel DSM will need to be reinitialized for the next execution */
+ pvs->need_reinitialize_dsm = true;
+}
+
/*
* Error context callback for errors occurring during parallel index vacuum.
* The error context messages should match the messages set in the lazy vacuum
diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h
index e48fe434cd39..d09d353af57f 100644
--- a/src/include/access/heapam.h
+++ b/src/include/access/heapam.h
@@ -15,6 +15,7 @@
#define HEAPAM_H
#include "access/heapam_xlog.h"
+#include "access/parallel.h"
#include "access/relation.h" /* for backward compatibility */
#include "access/relscan.h"
#include "access/sdir.h"
@@ -397,8 +398,20 @@ extern void log_heap_prune_and_freeze(Relation relation, Buffer buffer,
/* in heap/vacuumlazy.c */
struct VacuumParams;
+struct ParallelVacuumState;
extern void heap_vacuum_rel(Relation rel,
struct VacuumParams *params, BufferAccessStrategy bstrategy);
+extern int heap_parallel_vacuum_compute_workers(Relation rel, int nworkers_requested,
+ void *state);
+extern void heap_parallel_vacuum_estimate(Relation rel, ParallelContext *pcxt, int nworkers,
+ void *state);
+extern void heap_parallel_vacuum_initialize(Relation rel, ParallelContext *pcxt,
+ int nworkers, void *state);
+extern void heap_parallel_vacuum_initialize_worker(Relation rel, struct ParallelVacuumState *pvs,
+ ParallelWorkerContext *pwcxt,
+ void **state_out);
+extern void heap_parallel_vacuum_collect_dead_items(Relation rel, struct ParallelVacuumState *pvs,
+ void *state);
/* in heap/heapam_visibility.c */
extern bool HeapTupleSatisfiesVisibility(HeapTuple htup, Snapshot snapshot,
diff --git a/src/include/access/tableam.h b/src/include/access/tableam.h
index 8713e12cbfb9..4cecb9c92907 100644
--- a/src/include/access/tableam.h
+++ b/src/include/access/tableam.h
@@ -35,6 +35,9 @@ extern PGDLLIMPORT bool synchronize_seqscans;
struct BulkInsertStateData;
struct IndexInfo;
+struct ParallelContext;
+struct ParallelVacuumState;
+struct ParallelWorkerContext;
struct SampleScanState;
struct VacuumParams;
struct ValidateIndexState;
@@ -648,6 +651,81 @@ typedef struct TableAmRoutine
struct VacuumParams *params,
BufferAccessStrategy bstrategy);
+ /* ------------------------------------------------------------------------
+ * Callbacks for parallel table vacuum.
+ * ------------------------------------------------------------------------
+ */
+
+ /*
+ * Compute the number of parallel workers for parallel table vacuum. The
+ * parallel degree for parallel vacuum is further limited by
+ * max_parallel_maintenance_workers. The function must return 0 to disable
+ * parallel table vacuum.
+ *
+ * 'nworkers_requested' is a >=0 number and the requested number of
+ * workers. This comes from the PARALLEL option. 0 means to choose the
+ * parallel degree based on the table AM specific factors such as table
+ * size.
+ */
+ int (*parallel_vacuum_compute_workers) (Relation rel,
+ int nworkers_requested,
+ void *state);
+
+ /*
+ * Estimate the size of shared memory needed for a parallel table vacuum
+ * of this relation.
+ *
+ * Not called if parallel table vacuum is disabled.
+ *
+ * Optional callback, but either all other parallel vacuum callbacks need
+ * to exist, or neither.
+ */
+ void (*parallel_vacuum_estimate) (Relation rel,
+ struct ParallelContext *pcxt,
+ int nworkers,
+ void *state);
+
+ /*
+ * Initialize DSM space for parallel table vacuum.
+ *
+ * Not called if parallel table vacuum is disabled.
+ *
+ * Optional callback, but either all other parallel vacuum callbacks need
+ * to exist, or neither.
+ */
+ void (*parallel_vacuum_initialize) (Relation rel,
+ struct ParallelContext *pctx,
+ int nworkers,
+ void *state);
+
+ /*
+ * Initialize AM-specific vacuum state for worker processes.
+ *
+ * The state_out is the output parameter so that arbitrary data can be
+ * passed to the subsequent callback, parallel_vacuum_remove_dead_items.
+ *
+ * Not called if parallel table vacuum is disabled.
+ *
+ * Optional callback, but either all other parallel vacuum callbacks need
+ * to exist, or neither.
+ */
+ void (*parallel_vacuum_initialize_worker) (Relation rel,
+ struct ParallelVacuumState *pvs,
+ struct ParallelWorkerContext *pwcxt,
+ void **state_out);
+
+ /*
+ * Execute a parallel scan to collect dead items.
+ *
+ * Not called if parallel table vacuum is disabled.
+ *
+ * Optional callback, but either all other parallel vacuum callbacks need
+ * to exist, or neither.
+ */
+ void (*parallel_vacuum_collect_dead_items) (Relation rel,
+ struct ParallelVacuumState *pvs,
+ void *state);
+
/*
* Prepare to analyze block `blockno` of `scan`. The scan has been started
* with table_beginscan_analyze(). See also
@@ -1670,6 +1748,68 @@ table_relation_vacuum(Relation rel, struct VacuumParams *params,
rel->rd_tableam->relation_vacuum(rel, params, bstrategy);
}
+/* ----------------------------------------------------------------------------
+ * Parallel vacuum related functions.
+ * ----------------------------------------------------------------------------
+ */
+
+/*
+ * Compute the number of parallel workers for a parallel vacuum scan of this
+ * relation.
+ */
+static inline int
+table_parallel_vacuum_compute_workers(Relation rel, int nworkers_requested,
+ void *state)
+{
+ return rel->rd_tableam->parallel_vacuum_compute_workers(rel,
+ nworkers_requested,
+ state);
+}
+
+/*
+ * Estimate the size of shared memory needed for a parallel vacuum scan of this
+ * of this relation.
+ */
+static inline void
+table_parallel_vacuum_estimate(Relation rel, struct ParallelContext *pcxt,
+ int nworkers, void *state)
+{
+ Assert(nworkers > 0);
+ rel->rd_tableam->parallel_vacuum_estimate(rel, pcxt, nworkers, state);
+}
+
+/*
+ * Initialize shared memory area for a parallel vacuum scan of this relation.
+ */
+static inline void
+table_parallel_vacuum_initialize(Relation rel, struct ParallelContext *pcxt,
+ int nworkers, void *state)
+{
+ Assert(nworkers > 0);
+ rel->rd_tableam->parallel_vacuum_initialize(rel, pcxt, nworkers, state);
+}
+
+/*
+ * Initialize AM-specific vacuum state for worker processes.
+ */
+static inline void
+table_parallel_vacuum_initialize_worker(Relation rel, struct ParallelVacuumState *pvs,
+ struct ParallelWorkerContext *pwcxt,
+ void **state_out)
+{
+ rel->rd_tableam->parallel_vacuum_initialize_worker(rel, pvs, pwcxt, state_out);
+}
+
+/*
+ * Execute a parallel vacuum scan to collect dead items.
+ */
+static inline void
+table_parallel_vacuum_collect_dead_items(Relation rel, struct ParallelVacuumState *pvs,
+ void *state)
+{
+ rel->rd_tableam->parallel_vacuum_collect_dead_items(rel, pvs, state);
+}
+
/*
* Prepare to analyze the next block in the read stream. The scan needs to
* have been started with table_beginscan_analyze(). Note that this routine
diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h
index bc37a80dc74f..849cb4dcc74d 100644
--- a/src/include/commands/vacuum.h
+++ b/src/include/commands/vacuum.h
@@ -382,8 +382,12 @@ extern void VacuumUpdateCosts(void);
extern ParallelVacuumState *parallel_vacuum_init(Relation rel, Relation *indrels,
int nindexes, int nrequested_workers,
int vac_work_mem, int elevel,
- BufferAccessStrategy bstrategy);
+ BufferAccessStrategy bstrategy,
+ void *state);
extern void parallel_vacuum_end(ParallelVacuumState *pvs, IndexBulkDeleteResult **istats);
+extern int parallel_vacuum_get_nworkers_table(ParallelVacuumState *pvs);
+extern Relation *parallel_vacuum_get_table_indexes(ParallelVacuumState *pvs, int *nindexes);
+extern BufferAccessStrategy parallel_vacuum_get_bstrategy(ParallelVacuumState *pvs);
extern TidStore *parallel_vacuum_get_dead_items(ParallelVacuumState *pvs,
VacDeadItemsInfo **dead_items_info_p);
extern void parallel_vacuum_reset_dead_items(ParallelVacuumState *pvs);
@@ -394,6 +398,8 @@ extern void parallel_vacuum_cleanup_all_indexes(ParallelVacuumState *pvs,
long num_table_tuples,
int num_index_scans,
bool estimated_count);
+extern int parallel_vacuum_collect_dead_items_begin(ParallelVacuumState *pvs);
+extern void parallel_vacuum_collect_dead_items_end(ParallelVacuumState *pvs);
extern void parallel_vacuum_main(dsm_segment *seg, shm_toc *toc);
/* in commands/analyze.c */
diff --git a/src/test/regress/expected/vacuum.out b/src/test/regress/expected/vacuum.out
index 0abcc99989e0..f92c3f73c29a 100644
--- a/src/test/regress/expected/vacuum.out
+++ b/src/test/regress/expected/vacuum.out
@@ -160,6 +160,11 @@ UPDATE pvactst SET i = i WHERE i < 1000;
VACUUM (PARALLEL 2) pvactst;
UPDATE pvactst SET i = i WHERE i < 1000;
VACUUM (PARALLEL 0) pvactst; -- disable parallel vacuum
+-- VACUUM invokes parallel heap vacuum.
+SET min_parallel_table_scan_size to 0;
+VACUUM (PARALLEL 2, FREEZE) pvactst2;
+UPDATE pvactst2 SET i = i WHERE i < 1000;
+VACUUM (PARALLEL 1) pvactst2;
VACUUM (PARALLEL -1) pvactst; -- error
ERROR: parallel workers for vacuum must be between 0 and 1024
LINE 1: VACUUM (PARALLEL -1) pvactst;
@@ -185,6 +190,7 @@ VACUUM (PARALLEL 1, FULL FALSE) tmp; -- parallel vacuum disabled for temp tables
WARNING: disabling parallel option of vacuum on "tmp" --- cannot vacuum temporary tables in parallel
VACUUM (PARALLEL 0, FULL TRUE) tmp; -- can specify parallel disabled (even though that's implied by FULL)
RESET min_parallel_index_scan_size;
+RESET min_parallel_table_scan_size;
DROP TABLE pvactst;
DROP TABLE pvactst2;
-- INDEX_CLEANUP option
diff --git a/src/test/regress/sql/vacuum.sql b/src/test/regress/sql/vacuum.sql
index a72bdb5b619d..b8abab28ea92 100644
--- a/src/test/regress/sql/vacuum.sql
+++ b/src/test/regress/sql/vacuum.sql
@@ -129,6 +129,12 @@ VACUUM (PARALLEL 2) pvactst;
UPDATE pvactst SET i = i WHERE i < 1000;
VACUUM (PARALLEL 0) pvactst; -- disable parallel vacuum
+-- VACUUM invokes parallel heap vacuum.
+SET min_parallel_table_scan_size to 0;
+VACUUM (PARALLEL 2, FREEZE) pvactst2;
+UPDATE pvactst2 SET i = i WHERE i < 1000;
+VACUUM (PARALLEL 1) pvactst2;
+
VACUUM (PARALLEL -1) pvactst; -- error
VACUUM (PARALLEL 2, INDEX_CLEANUP FALSE) pvactst;
VACUUM (PARALLEL 2, FULL TRUE) pvactst; -- error, cannot use both PARALLEL and FULL
@@ -148,6 +154,7 @@ CREATE INDEX tmp_idx1 ON tmp (a);
VACUUM (PARALLEL 1, FULL FALSE) tmp; -- parallel vacuum disabled for temp tables
VACUUM (PARALLEL 0, FULL TRUE) tmp; -- can specify parallel disabled (even though that's implied by FULL)
RESET min_parallel_index_scan_size;
+RESET min_parallel_table_scan_size;
DROP TABLE pvactst;
DROP TABLE pvactst2;
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index e5879e00dffe..bb8eefd34263 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -1522,6 +1522,7 @@ LSEG
LUID
LVRelState
LVSavedErrInfo
+LVScanData
LWLock
LWLockHandle
LWLockMode
@@ -1958,6 +1959,10 @@ PLpgSQL_type
PLpgSQL_type_type
PLpgSQL_var
PLpgSQL_variable
+ParallelLVLeader
+ParallelLVScanWorker
+ParallelLVShared
+ParallelLVState
PLwdatum
PLword
PLyArrayToOb
@@ -2031,6 +2036,7 @@ PVIndStats
PVIndVacStatus
PVOID
PVShared
+PVWorkPhase
PX_Alias
PX_Cipher
PX_Combo