summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Rowley2024-07-09 00:15:47 +0000
committerDavid Rowley2024-07-09 00:15:47 +0000
commit5a1e6df3b84c91957f80b19edb497a5eec83c403 (patch)
treeb8f090995c93d9faa08d28e2a53a1b78f97d1057
parente41f7130979442362d9053d1ae24b2f87980e842 (diff)
Show Parallel Bitmap Heap Scan worker stats in EXPLAIN ANALYZE
Nodes like Memoize report the cache stats for each parallel worker, so it makes sense to show the exact and lossy pages in Parallel Bitmap Heap Scan in a similar way. Likewise, Sort shows the method and memory used for each worker. There was some discussion on whether the leader stats should include the totals for each parallel worker or not. I did some analysis on this to see what other parallel node types do and it seems only Parallel Hash does anything like this. All the rest, per what's supported by ExecParallelRetrieveInstrumentation() are consistent with each other. Author: David Geier <[email protected]> Author: Heikki Linnakangas <[email protected]> Author: Donghang Lin <[email protected]> Author: Alena Rybakina <[email protected]> Author: David Rowley <[email protected]> Reviewed-by: Dmitry Dolgov <[email protected]> Reviewed-by: Michael Christofides <[email protected]> Reviewed-by: Robert Haas <[email protected]> Reviewed-by: Dilip Kumar <[email protected]> Reviewed-by: Tomas Vondra <[email protected]> Reviewed-by: Melanie Plageman <[email protected]> Reviewed-by: Donghang Lin <[email protected]> Reviewed-by: Masahiro Ikeda <[email protected]> Discussion: https://postgr.es/m/b3d80961-c2e5-38cc-6a32-61886cdf766d%40gmail.com
-rw-r--r--src/backend/commands/explain.c58
-rw-r--r--src/backend/executor/execParallel.c3
-rw-r--r--src/backend/executor/nodeBitmapHeapscan.c105
-rw-r--r--src/include/executor/nodeBitmapHeapscan.h1
-rw-r--r--src/include/nodes/execnodes.h35
-rw-r--r--src/tools/pgindent/typedefs.list2
6 files changed, 181 insertions, 23 deletions
diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index 6defd26df50..118db12903c 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -2010,8 +2010,7 @@ ExplainNode(PlanState *planstate, List *ancestors,
if (plan->qual)
show_instrumentation_count("Rows Removed by Filter", 1,
planstate, es);
- if (es->analyze)
- show_tidbitmap_info((BitmapHeapScanState *) planstate, es);
+ show_tidbitmap_info((BitmapHeapScanState *) planstate, es);
break;
case T_SampleScan:
show_tablesample(((SampleScan *) plan)->tablesample,
@@ -3628,31 +3627,70 @@ show_hashagg_info(AggState *aggstate, ExplainState *es)
}
/*
- * If it's EXPLAIN ANALYZE, show exact/lossy pages for a BitmapHeapScan node
+ * Show exact/lossy pages for a BitmapHeapScan node
*/
static void
show_tidbitmap_info(BitmapHeapScanState *planstate, ExplainState *es)
{
+ if (!es->analyze)
+ return;
+
if (es->format != EXPLAIN_FORMAT_TEXT)
{
ExplainPropertyUInteger("Exact Heap Blocks", NULL,
- planstate->exact_pages, es);
+ planstate->stats.exact_pages, es);
ExplainPropertyUInteger("Lossy Heap Blocks", NULL,
- planstate->lossy_pages, es);
+ planstate->stats.lossy_pages, es);
}
else
{
- if (planstate->exact_pages > 0 || planstate->lossy_pages > 0)
+ if (planstate->stats.exact_pages > 0 || planstate->stats.lossy_pages > 0)
{
ExplainIndentText(es);
appendStringInfoString(es->str, "Heap Blocks:");
- if (planstate->exact_pages > 0)
- appendStringInfo(es->str, " exact=" UINT64_FORMAT, planstate->exact_pages);
- if (planstate->lossy_pages > 0)
- appendStringInfo(es->str, " lossy=" UINT64_FORMAT, planstate->lossy_pages);
+ if (planstate->stats.exact_pages > 0)
+ appendStringInfo(es->str, " exact=" UINT64_FORMAT, planstate->stats.exact_pages);
+ if (planstate->stats.lossy_pages > 0)
+ appendStringInfo(es->str, " lossy=" UINT64_FORMAT, planstate->stats.lossy_pages);
appendStringInfoChar(es->str, '\n');
}
}
+
+ /* Display stats for each parallel worker */
+ if (planstate->pstate != NULL)
+ {
+ for (int n = 0; n < planstate->sinstrument->num_workers; n++)
+ {
+ BitmapHeapScanInstrumentation *si = &planstate->sinstrument->sinstrument[n];
+
+ if (si->exact_pages == 0 && si->lossy_pages == 0)
+ continue;
+
+ if (es->workers_state)
+ ExplainOpenWorker(n, es);
+
+ if (es->format == EXPLAIN_FORMAT_TEXT)
+ {
+ ExplainIndentText(es);
+ appendStringInfoString(es->str, "Heap Blocks:");
+ if (si->exact_pages > 0)
+ appendStringInfo(es->str, " exact=" UINT64_FORMAT, si->exact_pages);
+ if (si->lossy_pages > 0)
+ appendStringInfo(es->str, " lossy=" UINT64_FORMAT, si->lossy_pages);
+ appendStringInfoChar(es->str, '\n');
+ }
+ else
+ {
+ ExplainPropertyUInteger("Exact Heap Blocks", NULL,
+ si->exact_pages, es);
+ ExplainPropertyUInteger("Lossy Heap Blocks", NULL,
+ si->lossy_pages, es);
+ }
+
+ if (es->workers_state)
+ ExplainCloseWorker(n, es);
+ }
+ }
}
/*
diff --git a/src/backend/executor/execParallel.c b/src/backend/executor/execParallel.c
index 8c53d1834e9..bfb3419efb7 100644
--- a/src/backend/executor/execParallel.c
+++ b/src/backend/executor/execParallel.c
@@ -1076,6 +1076,9 @@ ExecParallelRetrieveInstrumentation(PlanState *planstate,
case T_MemoizeState:
ExecMemoizeRetrieveInstrumentation((MemoizeState *) planstate);
break;
+ case T_BitmapHeapScanState:
+ ExecBitmapHeapRetrieveInstrumentation((BitmapHeapScanState *) planstate);
+ break;
default:
break;
}
diff --git a/src/backend/executor/nodeBitmapHeapscan.c b/src/backend/executor/nodeBitmapHeapscan.c
index 6b48a6d8350..3c63bdd93df 100644
--- a/src/backend/executor/nodeBitmapHeapscan.c
+++ b/src/backend/executor/nodeBitmapHeapscan.c
@@ -236,9 +236,9 @@ BitmapHeapNext(BitmapHeapScanState *node)
valid_block = table_scan_bitmap_next_block(scan, tbmres);
if (tbmres->ntuples >= 0)
- node->exact_pages++;
+ node->stats.exact_pages++;
else
- node->lossy_pages++;
+ node->stats.lossy_pages++;
if (!valid_block)
{
@@ -628,6 +628,29 @@ ExecEndBitmapHeapScan(BitmapHeapScanState *node)
TableScanDesc scanDesc;
/*
+ * When ending a parallel worker, copy the statistics gathered by the
+ * worker back into shared memory so that it can be picked up by the main
+ * process to report in EXPLAIN ANALYZE.
+ */
+ if (node->sinstrument != NULL && IsParallelWorker())
+ {
+ BitmapHeapScanInstrumentation *si;
+
+ Assert(ParallelWorkerNumber <= node->sinstrument->num_workers);
+ si = &node->sinstrument->sinstrument[ParallelWorkerNumber];
+
+ /*
+ * Here we accumulate the stats rather than performing memcpy on
+ * node->stats into si. When a Gather/GatherMerge node finishes it
+ * will perform planner shutdown on the workers. On rescan it will
+ * spin up new workers which will have a new BitmapHeapScanState and
+ * zeroed stats.
+ */
+ si->exact_pages += node->stats.exact_pages;
+ si->lossy_pages += node->stats.lossy_pages;
+ }
+
+ /*
* extract information from the node
*/
scanDesc = node->ss.ss_currentScanDesc;
@@ -694,8 +717,10 @@ ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags)
scanstate->tbmiterator = NULL;
scanstate->tbmres = NULL;
scanstate->pvmbuffer = InvalidBuffer;
- scanstate->exact_pages = 0;
- scanstate->lossy_pages = 0;
+
+ /* Zero the statistics counters */
+ memset(&scanstate->stats, 0, sizeof(BitmapHeapScanInstrumentation));
+
scanstate->prefetch_iterator = NULL;
scanstate->prefetch_pages = 0;
scanstate->prefetch_target = 0;
@@ -803,7 +828,18 @@ void
ExecBitmapHeapEstimate(BitmapHeapScanState *node,
ParallelContext *pcxt)
{
- shm_toc_estimate_chunk(&pcxt->estimator, sizeof(ParallelBitmapHeapState));
+ Size size;
+
+ size = MAXALIGN(sizeof(ParallelBitmapHeapState));
+
+ /* account for instrumentation, if required */
+ if (node->ss.ps.instrument && pcxt->nworkers > 0)
+ {
+ size = add_size(size, offsetof(SharedBitmapHeapInstrumentation, sinstrument));
+ size = add_size(size, mul_size(pcxt->nworkers, sizeof(BitmapHeapScanInstrumentation)));
+ }
+
+ shm_toc_estimate_chunk(&pcxt->estimator, size);
shm_toc_estimate_keys(&pcxt->estimator, 1);
}
@@ -818,13 +854,27 @@ ExecBitmapHeapInitializeDSM(BitmapHeapScanState *node,
ParallelContext *pcxt)
{
ParallelBitmapHeapState *pstate;
+ SharedBitmapHeapInstrumentation *sinstrument = NULL;
dsa_area *dsa = node->ss.ps.state->es_query_dsa;
+ char *ptr;
+ Size size;
/* If there's no DSA, there are no workers; initialize nothing. */
if (dsa == NULL)
return;
- pstate = shm_toc_allocate(pcxt->toc, sizeof(ParallelBitmapHeapState));
+ size = MAXALIGN(sizeof(ParallelBitmapHeapState));
+ if (node->ss.ps.instrument && pcxt->nworkers > 0)
+ {
+ size = add_size(size, offsetof(SharedBitmapHeapInstrumentation, sinstrument));
+ size = add_size(size, mul_size(pcxt->nworkers, sizeof(BitmapHeapScanInstrumentation)));
+ }
+
+ ptr = shm_toc_allocate(pcxt->toc, size);
+ pstate = (ParallelBitmapHeapState *) ptr;
+ ptr += MAXALIGN(sizeof(ParallelBitmapHeapState));
+ if (node->ss.ps.instrument && pcxt->nworkers > 0)
+ sinstrument = (SharedBitmapHeapInstrumentation *) ptr;
pstate->tbmiterator = 0;
pstate->prefetch_iterator = 0;
@@ -837,8 +887,18 @@ ExecBitmapHeapInitializeDSM(BitmapHeapScanState *node,
ConditionVariableInit(&pstate->cv);
+ if (sinstrument)
+ {
+ sinstrument->num_workers = pcxt->nworkers;
+
+ /* ensure any unfilled slots will contain zeroes */
+ memset(sinstrument->sinstrument, 0,
+ pcxt->nworkers * sizeof(BitmapHeapScanInstrumentation));
+ }
+
shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id, pstate);
node->pstate = pstate;
+ node->sinstrument = sinstrument;
}
/* ----------------------------------------------------------------
@@ -880,10 +940,37 @@ void
ExecBitmapHeapInitializeWorker(BitmapHeapScanState *node,
ParallelWorkerContext *pwcxt)
{
- ParallelBitmapHeapState *pstate;
+ char *ptr;
Assert(node->ss.ps.state->es_query_dsa != NULL);
- pstate = shm_toc_lookup(pwcxt->toc, node->ss.ps.plan->plan_node_id, false);
- node->pstate = pstate;
+ ptr = shm_toc_lookup(pwcxt->toc, node->ss.ps.plan->plan_node_id, false);
+
+ node->pstate = (ParallelBitmapHeapState *) ptr;
+ ptr += MAXALIGN(sizeof(ParallelBitmapHeapState));
+
+ if (node->ss.ps.instrument)
+ node->sinstrument = (SharedBitmapHeapInstrumentation *) ptr;
+}
+
+/* ----------------------------------------------------------------
+ * ExecBitmapHeapRetrieveInstrumentation
+ *
+ * Transfer bitmap heap scan statistics from DSM to private memory.
+ * ----------------------------------------------------------------
+ */
+void
+ExecBitmapHeapRetrieveInstrumentation(BitmapHeapScanState *node)
+{
+ SharedBitmapHeapInstrumentation *sinstrument = node->sinstrument;
+ Size size;
+
+ if (sinstrument == NULL)
+ return;
+
+ size = offsetof(SharedBitmapHeapInstrumentation, sinstrument)
+ + sinstrument->num_workers * sizeof(BitmapHeapScanInstrumentation);
+
+ node->sinstrument = palloc(size);
+ memcpy(node->sinstrument, sinstrument, size);
}
diff --git a/src/include/executor/nodeBitmapHeapscan.h b/src/include/executor/nodeBitmapHeapscan.h
index ea003a9caae..446a664590a 100644
--- a/src/include/executor/nodeBitmapHeapscan.h
+++ b/src/include/executor/nodeBitmapHeapscan.h
@@ -28,5 +28,6 @@ extern void ExecBitmapHeapReInitializeDSM(BitmapHeapScanState *node,
ParallelContext *pcxt);
extern void ExecBitmapHeapInitializeWorker(BitmapHeapScanState *node,
ParallelWorkerContext *pwcxt);
+extern void ExecBitmapHeapRetrieveInstrumentation(BitmapHeapScanState *node);
#endif /* NODEBITMAPHEAPSCAN_H */
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index abfcd5f5905..cac684d9b3a 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -1747,6 +1747,19 @@ typedef struct BitmapIndexScanState
} BitmapIndexScanState;
/* ----------------
+ * BitmapHeapScanInstrumentation information
+ *
+ * exact_pages total number of exact pages retrieved
+ * lossy_pages total number of lossy pages retrieved
+ * ----------------
+ */
+typedef struct BitmapHeapScanInstrumentation
+{
+ uint64 exact_pages;
+ uint64 lossy_pages;
+} BitmapHeapScanInstrumentation;
+
+/* ----------------
* SharedBitmapState information
*
* BM_INITIAL TIDBitmap creation is not yet started, so first worker
@@ -1790,6 +1803,20 @@ typedef struct ParallelBitmapHeapState
} ParallelBitmapHeapState;
/* ----------------
+ * Instrumentation data for a parallel bitmap heap scan.
+ *
+ * A shared memory struct that each parallel worker copies its
+ * BitmapHeapScanInstrumentation information into at executor shutdown to
+ * allow the leader to display the information in EXPLAIN ANALYZE.
+ * ----------------
+ */
+typedef struct SharedBitmapHeapInstrumentation
+{
+ int num_workers;
+ BitmapHeapScanInstrumentation sinstrument[FLEXIBLE_ARRAY_MEMBER];
+} SharedBitmapHeapInstrumentation;
+
+/* ----------------
* BitmapHeapScanState information
*
* bitmapqualorig execution state for bitmapqualorig expressions
@@ -1797,8 +1824,7 @@ typedef struct ParallelBitmapHeapState
* tbmiterator iterator for scanning current pages
* tbmres current-page data
* pvmbuffer buffer for visibility-map lookups of prefetched pages
- * exact_pages total number of exact pages retrieved
- * lossy_pages total number of lossy pages retrieved
+ * stats execution statistics
* prefetch_iterator iterator for prefetching ahead of current page
* prefetch_pages # pages prefetch iterator is ahead of current
* prefetch_target current target prefetch distance
@@ -1807,6 +1833,7 @@ typedef struct ParallelBitmapHeapState
* shared_tbmiterator shared iterator
* shared_prefetch_iterator shared iterator for prefetching
* pstate shared state for parallel bitmap scan
+ * sinstrument statistics for parallel workers
* ----------------
*/
typedef struct BitmapHeapScanState
@@ -1817,8 +1844,7 @@ typedef struct BitmapHeapScanState
TBMIterator *tbmiterator;
TBMIterateResult *tbmres;
Buffer pvmbuffer;
- uint64 exact_pages;
- uint64 lossy_pages;
+ BitmapHeapScanInstrumentation stats;
TBMIterator *prefetch_iterator;
int prefetch_pages;
int prefetch_target;
@@ -1827,6 +1853,7 @@ typedef struct BitmapHeapScanState
TBMSharedIterator *shared_tbmiterator;
TBMSharedIterator *shared_prefetch_iterator;
ParallelBitmapHeapState *pstate;
+ SharedBitmapHeapInstrumentation *sinstrument;
} BitmapHeapScanState;
/* ----------------
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 9320e4d8080..635e6d6e215 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -262,6 +262,7 @@ BitmapAndPath
BitmapAndState
BitmapHeapPath
BitmapHeapScan
+BitmapHeapScanInstrumentation
BitmapHeapScanState
BitmapIndexScan
BitmapIndexScanState
@@ -2603,6 +2604,7 @@ SetToDefault
SetupWorkerPtrType
ShDependObjectInfo
SharedAggInfo
+SharedBitmapHeapInstrumentation
SharedBitmapState
SharedDependencyObjectType
SharedDependencyType