From 33069569392761057856a046420f3ecfefd217a3 Mon Sep 17 00:00:00 2001 From: nkey Date: Fri, 13 Dec 2024 00:53:49 +0100 Subject: [PATCH] amcheck: Fix bt_index_parent_check behavior with CREATE INDEX CONCURRENTLY Modify snapshot handling in verify_nbtree to properly handle indexes created with CREATE INDEX CONCURRENTLY. Previously, the function could fail to find some tuples in bloom when verifying such indexes because it was using the SnapshotAny, while indexes were build using MVCC snapshot. This could lead to false alarms in bt_index_parent_check's validation. Add a regression test for the the issue. Discussion: https://postgr.es/m/CANtu0ojmVd27fEhfpST7RG2KZvwkX%3DdMyKUqg0KM87FkOSdz8Q%40mail.gmail.com --- contrib/amcheck/meson.build | 1 + .../t/006_cic_bt_index_parent_check.pl | 39 +++++++++++ contrib/amcheck/verify_nbtree.c | 68 ++++++++----------- 3 files changed, 69 insertions(+), 39 deletions(-) create mode 100644 contrib/amcheck/t/006_cic_bt_index_parent_check.pl diff --git a/contrib/amcheck/meson.build b/contrib/amcheck/meson.build index b33e8c9b062f..b040000dd55f 100644 --- a/contrib/amcheck/meson.build +++ b/contrib/amcheck/meson.build @@ -49,6 +49,7 @@ tests += { 't/003_cic_2pc.pl', 't/004_verify_nbtree_unique.pl', 't/005_pitr.pl', + 't/006_cic_bt_index_parent_check.pl', ], }, } diff --git a/contrib/amcheck/t/006_cic_bt_index_parent_check.pl b/contrib/amcheck/t/006_cic_bt_index_parent_check.pl new file mode 100644 index 000000000000..6e52c5e39ec9 --- /dev/null +++ b/contrib/amcheck/t/006_cic_bt_index_parent_check.pl @@ -0,0 +1,39 @@ +# Copyright (c) 2025, PostgreSQL Global Development Group + +# Test bt_index_parent_check with index created with CREATE INDEX CONCURRENTLY +use strict; +use warnings FATAL => 'all'; + +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; + +use Test::More; + +my ($node, $result); + +# +# Test set-up +# +$node = PostgreSQL::Test::Cluster->new('CIC_bt_index_parent_check_test'); +$node->init; +$node->start; +$node->safe_psql('postgres', q(CREATE EXTENSION amcheck)); +$node->safe_psql('postgres', q(CREATE TABLE tbl(i int primary key))); +# Insert two rows into index +$node->safe_psql('postgres', q(INSERT INTO tbl SELECT i FROM generate_series(1, 2) s(i);)); + +# start background transaction +my $in_progress_h = $node->background_psql('postgres'); +$in_progress_h->query_safe(q(BEGIN; SELECT pg_current_xact_id();)); + +# delete one row from table, while background transaction is in progress +$node->safe_psql('postgres', q(DELETE FROM tbl WHERE i = 1;)); +# create index concurrently, which will skip the deleted row +$node->safe_psql('postgres', q(CREATE INDEX CONCURRENTLY idx ON tbl(i);)); + +# check index using bt_index_parent_check +$result = $node->psql('postgres', q(SELECT bt_index_parent_check('idx', heapallindexed => true))); +is($result, '0', 'bt_index_parent_check for CIC after removed row'); + +$in_progress_h->quit; +done_testing(); diff --git a/contrib/amcheck/verify_nbtree.c b/contrib/amcheck/verify_nbtree.c index f11c43a0ed79..3048e044aecb 100644 --- a/contrib/amcheck/verify_nbtree.c +++ b/contrib/amcheck/verify_nbtree.c @@ -382,7 +382,6 @@ bt_check_every_level(Relation rel, Relation heaprel, bool heapkeyspace, BTMetaPageData *metad; uint32 previouslevel; BtreeLevel current; - Snapshot snapshot = SnapshotAny; if (!readonly) elog(DEBUG1, "verifying consistency of tree structure for index \"%s\"", @@ -433,38 +432,35 @@ bt_check_every_level(Relation rel, Relation heaprel, bool heapkeyspace, state->heaptuplespresent = 0; /* - * Register our own snapshot in !readonly case, rather than asking + * Register our own snapshot for heapallindexed, rather than asking * table_index_build_scan() to do this for us later. This needs to * happen before index fingerprinting begins, so we can later be * certain that index fingerprinting should have reached all tuples * returned by table_index_build_scan(). */ - if (!state->readonly) - { - snapshot = RegisterSnapshot(GetTransactionSnapshot()); + state->snapshot = RegisterSnapshot(GetTransactionSnapshot()); - /* - * GetTransactionSnapshot() always acquires a new MVCC snapshot in - * READ COMMITTED mode. A new snapshot is guaranteed to have all - * the entries it requires in the index. - * - * We must defend against the possibility that an old xact - * snapshot was returned at higher isolation levels when that - * snapshot is not safe for index scans of the target index. This - * is possible when the snapshot sees tuples that are before the - * index's indcheckxmin horizon. Throwing an error here should be - * very rare. It doesn't seem worth using a secondary snapshot to - * avoid this. - */ - if (IsolationUsesXactSnapshot() && rel->rd_index->indcheckxmin && - !TransactionIdPrecedes(HeapTupleHeaderGetXmin(rel->rd_indextuple->t_data), - snapshot->xmin)) - ereport(ERROR, - (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), - errmsg("index \"%s\" cannot be verified using transaction snapshot", - RelationGetRelationName(rel)))); - } - } + /* + * GetTransactionSnapshot() always acquires a new MVCC snapshot in + * READ COMMITTED mode. A new snapshot is guaranteed to have all + * the entries it requires in the index. + * + * We must defend against the possibility that an old xact + * snapshot was returned at higher isolation levels when that + * snapshot is not safe for index scans of the target index. This + * is possible when the snapshot sees tuples that are before the + * index's indcheckxmin horizon. Throwing an error here should be + * very rare. It doesn't seem worth using a secondary snapshot to + * avoid this. + */ + if (IsolationUsesXactSnapshot() && rel->rd_index->indcheckxmin && + !TransactionIdPrecedes(HeapTupleHeaderGetXmin(rel->rd_indextuple->t_data), + state->snapshot->xmin)) + ereport(ERROR, + (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), + errmsg("index \"%s\" cannot be verified using transaction snapshot", + RelationGetRelationName(rel)))); +} /* * We need a snapshot to check the uniqueness of the index. For better @@ -476,9 +472,7 @@ bt_check_every_level(Relation rel, Relation heaprel, bool heapkeyspace, state->indexinfo = BuildIndexInfo(state->rel); if (state->indexinfo->ii_Unique) { - if (snapshot != SnapshotAny) - state->snapshot = snapshot; - else + if (state->snapshot == InvalidSnapshot) state->snapshot = RegisterSnapshot(GetTransactionSnapshot()); } } @@ -555,13 +549,12 @@ bt_check_every_level(Relation rel, Relation heaprel, bool heapkeyspace, /* * Create our own scan for table_index_build_scan(), rather than * getting it to do so for us. This is required so that we can - * actually use the MVCC snapshot registered earlier in !readonly - * case. + * actually use the MVCC snapshot registered earlier. * * Note that table_index_build_scan() calls heap_endscan() for us. */ scan = table_beginscan_strat(state->heaprel, /* relation */ - snapshot, /* snapshot */ + state->snapshot, /* snapshot */ 0, /* number of keys */ NULL, /* scan key */ true, /* buffer access strategy OK */ @@ -569,7 +562,7 @@ bt_check_every_level(Relation rel, Relation heaprel, bool heapkeyspace, /* * Scan will behave as the first scan of a CREATE INDEX CONCURRENTLY - * behaves in !readonly case. + * behaves. * * It's okay that we don't actually use the same lock strength for the * heap relation as any other ii_Concurrent caller would in !readonly @@ -578,7 +571,7 @@ bt_check_every_level(Relation rel, Relation heaprel, bool heapkeyspace, * that needs to be sure that there was no concurrent recycling of * TIDs. */ - indexinfo->ii_Concurrent = !state->readonly; + indexinfo->ii_Concurrent = true; /* * Don't wait for uncommitted tuple xact commit/abort when index is a @@ -602,14 +595,11 @@ bt_check_every_level(Relation rel, Relation heaprel, bool heapkeyspace, state->heaptuplespresent, RelationGetRelationName(heaprel), 100.0 * bloom_prop_bits_set(state->filter)))); - if (snapshot != SnapshotAny) - UnregisterSnapshot(snapshot); - bloom_free(state->filter); } /* Be tidy: */ - if (snapshot == SnapshotAny && state->snapshot != InvalidSnapshot) + if (state->snapshot != InvalidSnapshot) UnregisterSnapshot(state->snapshot); MemoryContextDelete(state->targetcontext); }