Provide a way block-level table AMs could re-use acquire_sample_rows()
authorAlexander Korotkov <[email protected]>
Mon, 8 Apr 2024 11:30:30 +0000 (14:30 +0300)
committerAlexander Korotkov <[email protected]>
Mon, 8 Apr 2024 11:39:48 +0000 (14:39 +0300)
While keeping API the same, this commit provides a way for block-level table
AMs to re-use existing acquire_sample_rows() by providing custom callbacks
for getting the next block and the next tuple.

Reported-by: Andres Freund
Discussion: https://postgr.es/m/20240407214001.jgpg5q3yv33ve6y3%40awork3.anarazel.de
Reviewed-by: Pavel Borisov
src/backend/access/heap/heapam_handler.c
src/backend/commands/analyze.c
src/include/access/tableam.h
src/include/commands/vacuum.h
src/tools/pgindent/typedefs.list

index 6b1d2dd7a48a3c88d3741ab7bfea378436dc8094..30095d88b09b4c935eba2b6bc0086bbb4d47195f 100644 (file)
@@ -2666,6 +2666,18 @@ SampleHeapTupleVisible(TableScanDesc scan, Buffer buffer,
    }
 }
 
+/*
+ * heapap_analyze -- implementation of relation_analyze() for heap
+ *                  table access method
+ */
+static void
+heapam_analyze(Relation relation, AcquireSampleRowsFunc *func,
+              BlockNumber *totalpages, BufferAccessStrategy bstrategy)
+{
+   block_level_table_analyze(relation, func, totalpages, bstrategy,
+                             heapam_scan_analyze_next_block,
+                             heapam_scan_analyze_next_tuple);
+}
 
 /* ------------------------------------------------------------------------
  * Definition of the heap table access method.
index da27a13a3f0cc9e8d8f57629fb1e9177405ba311..516b43b0e341d2f37b7338c72be43fd0ada5954b 100644 (file)
@@ -76,6 +76,8 @@ int           default_statistics_target = 100;
 /* A few variables that don't seem worth passing around as parameters */
 static MemoryContext anl_context = NULL;
 static BufferAccessStrategy vac_strategy;
+static ScanAnalyzeNextBlockFunc scan_analyze_next_block;
+static ScanAnalyzeNextTupleFunc scan_analyze_next_tuple;
 
 
 static void do_analyze_rel(Relation onerel,
@@ -88,9 +90,6 @@ static void compute_index_stats(Relation onerel, double totalrows,
                                MemoryContext col_context);
 static VacAttrStats *examine_attribute(Relation onerel, int attnum,
                                       Node *index_expr);
-static int acquire_sample_rows(Relation onerel, int elevel,
-                               HeapTuple *rows, int targrows,
-                               double *totalrows, double *totaldeadrows);
 static int compare_rows(const void *a, const void *b, void *arg);
 static int acquire_inherited_sample_rows(Relation onerel, int elevel,
                                          HeapTuple *rows, int targrows,
@@ -191,7 +190,10 @@ analyze_rel(Oid relid, RangeVar *relation,
    if (onerel->rd_rel->relkind == RELKIND_RELATION ||
        onerel->rd_rel->relkind == RELKIND_MATVIEW)
    {
-       /* Use row acquisition function provided by table AM */
+       /*
+        * Get row acquisition function, blocks and tuples iteration callbacks
+        * provided by table AM
+        */
        table_relation_analyze(onerel, &acquirefunc,
                               &relpages, vac_strategy);
    }
@@ -1117,15 +1119,17 @@ block_sampling_read_stream_next(ReadStream *stream,
 }
 
 /*
- * acquire_sample_rows -- acquire a random sample of rows from the heap
+ * acquire_sample_rows -- acquire a random sample of rows from the
+ * block-based relation
  *
  * Selected rows are returned in the caller-allocated array rows[], which
  * must have at least targrows entries.
  * The actual number of rows selected is returned as the function result.
- * We also estimate the total numbers of live and dead rows in the heap,
+ * We also estimate the total numbers of live and dead rows in the relation,
  * and return them into *totalrows and *totaldeadrows, respectively.
  *
- * The returned list of tuples is in order by physical position in the heap.
+ * The returned list of tuples is in order by physical position in the
+ * relation.
  * (We will rely on this later to derive correlation estimates.)
  *
  * As of May 2004 we use a new two-stage method:  Stage one selects up
@@ -1147,7 +1151,7 @@ block_sampling_read_stream_next(ReadStream *stream,
  * look at a statistically unbiased set of blocks, we should get
  * unbiased estimates of the average numbers of live and dead rows per
  * block.  The previous sampling method put too much credence in the row
- * density near the start of the heap.
+ * density near the start of the relation.
  */
 static int
 acquire_sample_rows(Relation onerel, int elevel,
@@ -1188,7 +1192,7 @@ acquire_sample_rows(Relation onerel, int elevel,
    /* Prepare for sampling rows */
    reservoir_init_selection_state(&rstate, targrows);
 
-   scan = heap_beginscan(onerel, NULL, 0, NULL, NULL, SO_TYPE_ANALYZE);
+   scan = table_beginscan_analyze(onerel);
    slot = table_slot_create(onerel, NULL);
 
    stream = read_stream_begin_relation(READ_STREAM_MAINTENANCE,
@@ -1200,11 +1204,11 @@ acquire_sample_rows(Relation onerel, int elevel,
                                        0);
 
    /* Outer loop over blocks to sample */
-   while (heapam_scan_analyze_next_block(scan, stream))
+   while (scan_analyze_next_block(scan, stream))
    {
        vacuum_delay_point();
 
-       while (heapam_scan_analyze_next_tuple(scan, OldestXmin, &liverows, &deadrows, slot))
+       while (scan_analyze_next_tuple(scan, OldestXmin, &liverows, &deadrows, slot))
        {
            /*
             * The first targrows sample rows are simply copied into the
@@ -1256,7 +1260,7 @@ acquire_sample_rows(Relation onerel, int elevel,
    read_stream_end(stream);
 
    ExecDropSingleTupleTableSlot(slot);
-   heap_endscan(scan);
+   table_endscan(scan);
 
    /*
     * If we didn't find as many tuples as we wanted then we're done. No sort
@@ -1328,16 +1332,22 @@ compare_rows(const void *a, const void *b, void *arg)
 }
 
 /*
- * heapam_analyze -- implementation of relation_analyze() table access method
- *                  callback for heap
+ * block_level_table_analyze -- implementation of relation_analyze() for
+ *                             block-level table access methods
  */
 void
-heapam_analyze(Relation relation, AcquireSampleRowsFunc *func,
-              BlockNumber *totalpages, BufferAccessStrategy bstrategy)
+block_level_table_analyze(Relation relation,
+                         AcquireSampleRowsFunc *func,
+                         BlockNumber *totalpages,
+                         BufferAccessStrategy bstrategy,
+                         ScanAnalyzeNextBlockFunc scan_analyze_next_block_cb,
+                         ScanAnalyzeNextTupleFunc scan_analyze_next_tuple_cb)
 {
    *func = acquire_sample_rows;
    *totalpages = RelationGetNumberOfBlocks(relation);
    vac_strategy = bstrategy;
+   scan_analyze_next_block = scan_analyze_next_block_cb;
+   scan_analyze_next_tuple = scan_analyze_next_tuple_cb;
 }
 
 
index ec827ac12bf175719ce9f417a26100561bbf6393..d1cd71b7a17d244849ada6b714141973fd1ee41b 100644 (file)
@@ -1020,6 +1020,19 @@ table_beginscan_tid(Relation rel, Snapshot snapshot)
    return rel->rd_tableam->scan_begin(rel, snapshot, 0, NULL, NULL, flags);
 }
 
+/*
+ * table_beginscan_analyze is an alternative entry point for setting up a
+ * TableScanDesc for an ANALYZE scan.  As with bitmap scans, it's worth using
+ * the same data structure although the behavior is rather different.
+ */
+static inline TableScanDesc
+table_beginscan_analyze(Relation rel)
+{
+   uint32      flags = SO_TYPE_ANALYZE;
+
+   return rel->rd_tableam->scan_begin(rel, NULL, 0, NULL, NULL, flags);
+}
+
 /*
  * End relation scan.
  */
index 9514f8b2fd8ef48090a65aaac587829db13c447c..12a03abb75a56ec9a442666c907bf3999e5705f7 100644 (file)
 #include "catalog/pg_class.h"
 #include "catalog/pg_statistic.h"
 #include "catalog/pg_type.h"
+#include "executor/tuptable.h"
 #include "parser/parse_node.h"
 #include "storage/buf.h"
 #include "storage/lock.h"
+#include "storage/read_stream.h"
 #include "utils/relcache.h"
 
 /*
@@ -390,12 +392,60 @@ extern void parallel_vacuum_cleanup_all_indexes(ParallelVacuumState *pvs,
 extern void parallel_vacuum_main(dsm_segment *seg, shm_toc *toc);
 
 /* in commands/analyze.c */
+
+struct TableScanDescData;
+
+
+/*
+ * A callback to prepare to analyze block from `stream` of `scan`. The scan
+ * has been started with table_beginscan_analyze().
+ *
+ * The callback may acquire resources like locks that are held until
+ * ScanAnalyzeNextTupleFunc returns false. In some cases it could be
+ * useful to hold a lock until all tuples in a block have been analyzed by
+ * ScanAnalyzeNextTupleFunc.
+ *
+ * The callback can return false if the block is not suitable for
+ * sampling, e.g. because it's a metapage that could never contain tuples.
+ *
+ * This is primarily suited for block-based AMs. It's not clear what a
+ * good interface for non block-based AMs would be, so there isn't one
+ * yet and sampling using a custom implementation of acquire_sample_rows
+ * may be preferred.
+ */
+typedef bool (*ScanAnalyzeNextBlockFunc) (struct TableScanDescData *scan,
+                                         ReadStream *stream);
+
+/*
+ * A callback to iterate over tuples in the block selected with
+ * ScanAnalyzeNextBlockFunc (which needs to have returned true, and
+ * this routine may not have returned false for the same block before). If
+ * a tuple that's suitable for sampling is found, true is returned and a
+ * tuple is stored in `slot`.
+ *
+ * *liverows and *deadrows are incremented according to the encountered
+ * tuples.
+ *
+ * Not every AM might have a meaningful concept of dead rows, in which
+ * case it's OK to not increment *deadrows - but note that that may
+ * influence autovacuum scheduling (see comment for relation_vacuum
+ * callback).
+ */
+typedef bool (*ScanAnalyzeNextTupleFunc) (struct TableScanDescData *scan,
+                                         TransactionId OldestXmin,
+                                         double *liverows,
+                                         double *deadrows,
+                                         TupleTableSlot *slot);
+
 extern void analyze_rel(Oid relid, RangeVar *relation,
                        VacuumParams *params, List *va_cols, bool in_outer_xact,
                        BufferAccessStrategy bstrategy);
-extern void heapam_analyze(Relation relation, AcquireSampleRowsFunc *func,
-                          BlockNumber *totalpages,
-                          BufferAccessStrategy bstrategy);
+extern void block_level_table_analyze(Relation relation,
+                                     AcquireSampleRowsFunc *func,
+                                     BlockNumber *totalpages,
+                                     BufferAccessStrategy bstrategy,
+                                     ScanAnalyzeNextBlockFunc scan_analyze_next_block_cb,
+                                     ScanAnalyzeNextTupleFunc scan_analyze_next_tuple_cb);
 
 extern bool std_typanalyze(VacAttrStats *stats);
 
index cb78f11119c86b10106b0203862feb32b9dccd48..704e61dcaa2716100ac7b1b9b5f62a38e637944b 100644 (file)
@@ -2535,6 +2535,8 @@ ScalarIOData
 ScalarItem
 ScalarMCVItem
 Scan
+ScanAnalyzeNextBlockFunc
+ScanAnalyzeNextTupleFunc
 ScanDirection
 ScanKey
 ScanKeyData