Skip to content

Commit 90c885c

Browse files
committed
Increment xactCompletionCount during subtransaction abort.
Snapshot caching, introduced in 623a9ba, did not increment xactCompletionCount during subtransaction abort. That could lead to an older snapshot being reused. That is, at least as far as I can see, not a correctness issue (for MVCC snapshots there's no difference between "in progress" and "aborted"). The only difference between the old and new snapshots would be a newer ->xmax. While HeapTupleSatisfiesMVCC makes the same visibility determination, reusing the old snapshot leads HeapTupleSatisfiesMVCC to not set HEAP_XMIN_INVALID. Which subsequently causes the kill_prior_tuple optimization to not kick in (via HeapTupleIsSurelyDead() returning false). The performance effects of doing the same index-lookups over and over again is how the issue was discovered... Fix the issue by incrementing xactCompletionCount in XidCacheRemoveRunningXids. It already acquires ProcArrayLock exclusively, making that an easy proposition. Add a test to ensure that kill_prior_tuple prevents index growth when it involves aborted subtransaction of the current transaction. Author: Andres Freund Discussion: https://postgr.es/m/[email protected] Discussion: https://postgr.es/m/20210317055718.v6qs3ltzrformqoa%40alap3.anarazel.de
1 parent 8523492 commit 90c885c

File tree

5 files changed

+96
-1
lines changed

5 files changed

+96
-1
lines changed

src/backend/storage/ipc/procarray.c

+8
Original file line numberDiff line numberDiff line change
@@ -1210,6 +1210,11 @@ ProcArrayApplyRecoveryInfo(RunningTransactions running)
12101210
*/
12111211
MaintainLatestCompletedXidRecovery(running->latestCompletedXid);
12121212

1213+
/*
1214+
* NB: No need to increment ShmemVariableCache->xactCompletionCount here,
1215+
* nobody can see it yet.
1216+
*/
1217+
12131218
LWLockRelease(ProcArrayLock);
12141219

12151220
/* ShmemVariableCache->nextXid must be beyond any observed xid. */
@@ -3915,6 +3920,9 @@ XidCacheRemoveRunningXids(TransactionId xid,
39153920
/* Also advance global latestCompletedXid while holding the lock */
39163921
MaintainLatestCompletedXid(latestXid);
39173922

3923+
/* ... and xactCompletionCount */
3924+
ShmemVariableCache->xactCompletionCount++;
3925+
39183926
LWLockRelease(ProcArrayLock);
39193927
}
39203928

src/test/regress/expected/mvcc.out

+42
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
--
2+
-- Verify that index scans encountering dead rows produced by an
3+
-- aborted subtransaction of the current transaction can utilize the
4+
-- kill_prio_tuple optimization
5+
--
6+
-- NB: The table size is currently *not* expected to stay the same, we
7+
-- don't have logic to trigger opportunistic pruning in cases like
8+
-- this.
9+
BEGIN;
10+
SET LOCAL enable_seqscan = false;
11+
SET LOCAL enable_indexonlyscan = false;
12+
SET LOCAL enable_bitmapscan = false;
13+
-- Can't easily use a unique index, since dead tuples can be found
14+
-- independent of the kill_prior_tuples optimization.
15+
CREATE TABLE clean_aborted_self(key int, data text);
16+
CREATE INDEX clean_aborted_self_key ON clean_aborted_self(key);
17+
INSERT INTO clean_aborted_self (key, data) VALUES (-1, 'just to allocate metapage');
18+
-- save index size from before the changes, for comparison
19+
SELECT pg_relation_size('clean_aborted_self_key') AS clean_aborted_self_key_before \gset
20+
DO $$
21+
BEGIN
22+
-- iterate often enough to see index growth even on larger-than-default page sizes
23+
FOR i IN 1..100 LOOP
24+
BEGIN
25+
-- perform index scan over all the inserted keys to get them to be seen as dead
26+
IF EXISTS(SELECT * FROM clean_aborted_self WHERE key > 0 AND key < 100) THEN
27+
RAISE data_corrupted USING MESSAGE = 'these rows should not exist';
28+
END IF;
29+
INSERT INTO clean_aborted_self SELECT g.i, 'rolling back in a sec' FROM generate_series(1, 100) g(i);
30+
-- just some error that's not normally thrown
31+
RAISE reading_sql_data_not_permitted USING MESSAGE = 'round and round again';
32+
EXCEPTION WHEN reading_sql_data_not_permitted THEN END;
33+
END LOOP;
34+
END;$$;
35+
-- show sizes only if they differ
36+
SELECT :clean_aborted_self_key_before AS size_before, pg_relation_size('clean_aborted_self_key') size_after
37+
WHERE :clean_aborted_self_key_before != pg_relation_size('clean_aborted_self_key');
38+
size_before | size_after
39+
-------------+------------
40+
(0 rows)
41+
42+
ROLLBACK;

src/test/regress/parallel_schedule

+1-1
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ test: strings numerology point lseg line box path polygon circle date time timet
2929
# geometry depends on point, lseg, box, path, polygon and circle
3030
# horology depends on interval, timetz, timestamp, timestamptz
3131
# ----------
32-
test: geometry horology regex type_sanity opr_sanity misc_sanity comments expressions unicode xid
32+
test: geometry horology regex type_sanity opr_sanity misc_sanity comments expressions unicode xid mvcc
3333

3434
# ----------
3535
# These four each depend on the previous one

src/test/regress/serial_schedule

+1
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ test: int4
1111
test: int8
1212
test: oid
1313
test: xid
14+
test: mvcc
1415
test: float4
1516
test: float8
1617
test: bit

src/test/regress/sql/mvcc.sql

+44
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
--
2+
-- Verify that index scans encountering dead rows produced by an
3+
-- aborted subtransaction of the current transaction can utilize the
4+
-- kill_prio_tuple optimization
5+
--
6+
-- NB: The table size is currently *not* expected to stay the same, we
7+
-- don't have logic to trigger opportunistic pruning in cases like
8+
-- this.
9+
BEGIN;
10+
11+
SET LOCAL enable_seqscan = false;
12+
SET LOCAL enable_indexonlyscan = false;
13+
SET LOCAL enable_bitmapscan = false;
14+
15+
-- Can't easily use a unique index, since dead tuples can be found
16+
-- independent of the kill_prior_tuples optimization.
17+
CREATE TABLE clean_aborted_self(key int, data text);
18+
CREATE INDEX clean_aborted_self_key ON clean_aborted_self(key);
19+
INSERT INTO clean_aborted_self (key, data) VALUES (-1, 'just to allocate metapage');
20+
21+
-- save index size from before the changes, for comparison
22+
SELECT pg_relation_size('clean_aborted_self_key') AS clean_aborted_self_key_before \gset
23+
24+
DO $$
25+
BEGIN
26+
-- iterate often enough to see index growth even on larger-than-default page sizes
27+
FOR i IN 1..100 LOOP
28+
BEGIN
29+
-- perform index scan over all the inserted keys to get them to be seen as dead
30+
IF EXISTS(SELECT * FROM clean_aborted_self WHERE key > 0 AND key < 100) THEN
31+
RAISE data_corrupted USING MESSAGE = 'these rows should not exist';
32+
END IF;
33+
INSERT INTO clean_aborted_self SELECT g.i, 'rolling back in a sec' FROM generate_series(1, 100) g(i);
34+
-- just some error that's not normally thrown
35+
RAISE reading_sql_data_not_permitted USING MESSAGE = 'round and round again';
36+
EXCEPTION WHEN reading_sql_data_not_permitted THEN END;
37+
END LOOP;
38+
END;$$;
39+
40+
-- show sizes only if they differ
41+
SELECT :clean_aborted_self_key_before AS size_before, pg_relation_size('clean_aborted_self_key') size_after
42+
WHERE :clean_aborted_self_key_before != pg_relation_size('clean_aborted_self_key');
43+
44+
ROLLBACK;

0 commit comments

Comments
 (0)