diff options
author | Peter Geoghegan | 2018-08-10 20:01:34 +0000 |
---|---|---|
committer | Peter Geoghegan | 2018-08-10 20:01:34 +0000 |
commit | 4974d7f87e62a58e80c6524e49677cb25cc10e12 (patch) | |
tree | dff7c5c0c15c4e407aa0187047cf0dc922460656 /src/backend/access/transam | |
parent | d4a900458e505092a8013eb77c9631d58c3c2a0a (diff) |
Handle parallel index builds on mapped relations.
Commit 9da0cc35284, which introduced parallel CREATE INDEX, failed to
propagate relmapper.c backend local cache state to parallel worker
processes. This could result in parallel index builds against mapped
catalog relations where the leader process (participating as a worker)
scans the new, pristine relfilenode, while worker processes scan the
obsolescent relfilenode. When this happened, the final index structure
was typically not consistent with the owning table's structure. The
final index structure could contain entries formed from both heap
relfilenodes. Only rebuilds on mapped catalog relations that occur as
part of a VACUUM FULL or CLUSTER could become corrupt in practice, since
their mapped relation relfilenode swap is what allows the inconsistency
to arise.
On master, fix the problem by propagating the required relmapper.c
backend state as part of standard parallel initialization (Cf. commit
29d58fd3). On v11, simply disallow builds against mapped catalog
relations by deeming them parallel unsafe.
Author: Peter Geoghegan
Reported-By: "death lock"
Reviewed-By: Tom Lane, Amit Kapila
Bug: #15309
Discussion: https://postgr.es/m/[email protected]
Backpatch: 11-, where parallel CREATE INDEX was introduced.
Diffstat (limited to 'src/backend/access/transam')
-rw-r--r-- | src/backend/access/transam/README.parallel | 4 | ||||
-rw-r--r-- | src/backend/access/transam/parallel.c | 19 | ||||
-rw-r--r-- | src/backend/access/transam/xact.c | 4 |
3 files changed, 24 insertions, 3 deletions
diff --git a/src/backend/access/transam/README.parallel b/src/backend/access/transam/README.parallel index f09a5806345..85e5840feba 100644 --- a/src/backend/access/transam/README.parallel +++ b/src/backend/access/transam/README.parallel @@ -125,6 +125,10 @@ worker. This includes: - State related to pending REINDEX operations, which prevents access to an index that is currently being rebuilt. + - Active relmapper.c mapping state. This is needed to allow consistent + answers when fetching the current relfilenode for relation oids of + mapped relations. + To prevent unprincipled deadlocks when running in parallel mode, this code also arranges for the leader and all workers to participate in group locking. See src/backend/storage/lmgr/README for more details. diff --git a/src/backend/access/transam/parallel.c b/src/backend/access/transam/parallel.c index 30ddf94c952..c1681184670 100644 --- a/src/backend/access/transam/parallel.c +++ b/src/backend/access/transam/parallel.c @@ -37,6 +37,7 @@ #include "utils/guc.h" #include "utils/inval.h" #include "utils/memutils.h" +#include "utils/relmapper.h" #include "utils/snapmgr.h" #include "utils/typcache.h" @@ -69,6 +70,7 @@ #define PARALLEL_KEY_ENTRYPOINT UINT64CONST(0xFFFFFFFFFFFF0009) #define PARALLEL_KEY_SESSION_DSM UINT64CONST(0xFFFFFFFFFFFF000A) #define PARALLEL_KEY_REINDEX_STATE UINT64CONST(0xFFFFFFFFFFFF000B) +#define PARALLEL_KEY_RELMAPPER_STATE UINT64CONST(0xFFFFFFFFFFFF000C) /* Fixed-size parallel state. */ typedef struct FixedParallelState @@ -205,6 +207,7 @@ InitializeParallelDSM(ParallelContext *pcxt) Size asnaplen = 0; Size tstatelen = 0; Size reindexlen = 0; + Size relmapperlen = 0; Size segsize = 0; int i; FixedParallelState *fps; @@ -256,8 +259,10 @@ InitializeParallelDSM(ParallelContext *pcxt) shm_toc_estimate_chunk(&pcxt->estimator, sizeof(dsm_handle)); reindexlen = EstimateReindexStateSpace(); shm_toc_estimate_chunk(&pcxt->estimator, reindexlen); + relmapperlen = EstimateRelationMapSpace(); + shm_toc_estimate_chunk(&pcxt->estimator, relmapperlen); /* If you add more chunks here, you probably need to add keys. */ - shm_toc_estimate_keys(&pcxt->estimator, 8); + shm_toc_estimate_keys(&pcxt->estimator, 9); /* Estimate space need for error queues. */ StaticAssertStmt(BUFFERALIGN(PARALLEL_ERROR_QUEUE_SIZE) == @@ -327,6 +332,7 @@ InitializeParallelDSM(ParallelContext *pcxt) char *asnapspace; char *tstatespace; char *reindexspace; + char *relmapperspace; char *error_queue_space; char *session_dsm_handle_space; char *entrypointstate; @@ -373,6 +379,12 @@ InitializeParallelDSM(ParallelContext *pcxt) SerializeReindexState(reindexlen, reindexspace); shm_toc_insert(pcxt->toc, PARALLEL_KEY_REINDEX_STATE, reindexspace); + /* Serialize relmapper state. */ + relmapperspace = shm_toc_allocate(pcxt->toc, relmapperlen); + SerializeRelationMap(relmapperlen, relmapperspace); + shm_toc_insert(pcxt->toc, PARALLEL_KEY_RELMAPPER_STATE, + relmapperspace); + /* Allocate space for worker information. */ pcxt->worker = palloc0(sizeof(ParallelWorkerInfo) * pcxt->nworkers); @@ -1205,6 +1217,7 @@ ParallelWorkerMain(Datum main_arg) char *asnapspace; char *tstatespace; char *reindexspace; + char *relmapperspace; StringInfoData msgbuf; char *session_dsm_handle_space; @@ -1380,6 +1393,10 @@ ParallelWorkerMain(Datum main_arg) reindexspace = shm_toc_lookup(toc, PARALLEL_KEY_REINDEX_STATE, false); RestoreReindexState(reindexspace); + /* Restore relmapper state. */ + relmapperspace = shm_toc_lookup(toc, PARALLEL_KEY_RELMAPPER_STATE, false); + RestoreRelationMap(relmapperspace); + /* * We've initialized all of our state now; nothing should change * hereafter. diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c index 9aa63c8792b..cd8270d5fb0 100644 --- a/src/backend/access/transam/xact.c +++ b/src/backend/access/transam/xact.c @@ -2018,7 +2018,7 @@ CommitTransaction(void) HOLD_INTERRUPTS(); /* Commit updates to the relation map --- do this as late as possible */ - AtEOXact_RelationMap(true); + AtEOXact_RelationMap(true, is_parallel_worker); /* * set the current transaction state information appropriately during @@ -2539,7 +2539,7 @@ AbortTransaction(void) AtAbort_Portals(); AtEOXact_LargeObject(false); AtAbort_Notify(); - AtEOXact_RelationMap(false); + AtEOXact_RelationMap(false, is_parallel_worker); AtAbort_Twophase(); /* |