summaryrefslogtreecommitdiff
path: root/src/backend/access/transam
diff options
context:
space:
mode:
authorPeter Geoghegan2018-08-10 20:01:34 +0000
committerPeter Geoghegan2018-08-10 20:01:34 +0000
commit4974d7f87e62a58e80c6524e49677cb25cc10e12 (patch)
treedff7c5c0c15c4e407aa0187047cf0dc922460656 /src/backend/access/transam
parentd4a900458e505092a8013eb77c9631d58c3c2a0a (diff)
Handle parallel index builds on mapped relations.
Commit 9da0cc35284, which introduced parallel CREATE INDEX, failed to propagate relmapper.c backend local cache state to parallel worker processes. This could result in parallel index builds against mapped catalog relations where the leader process (participating as a worker) scans the new, pristine relfilenode, while worker processes scan the obsolescent relfilenode. When this happened, the final index structure was typically not consistent with the owning table's structure. The final index structure could contain entries formed from both heap relfilenodes. Only rebuilds on mapped catalog relations that occur as part of a VACUUM FULL or CLUSTER could become corrupt in practice, since their mapped relation relfilenode swap is what allows the inconsistency to arise. On master, fix the problem by propagating the required relmapper.c backend state as part of standard parallel initialization (Cf. commit 29d58fd3). On v11, simply disallow builds against mapped catalog relations by deeming them parallel unsafe. Author: Peter Geoghegan Reported-By: "death lock" Reviewed-By: Tom Lane, Amit Kapila Bug: #15309 Discussion: https://postgr.es/m/[email protected] Backpatch: 11-, where parallel CREATE INDEX was introduced.
Diffstat (limited to 'src/backend/access/transam')
-rw-r--r--src/backend/access/transam/README.parallel4
-rw-r--r--src/backend/access/transam/parallel.c19
-rw-r--r--src/backend/access/transam/xact.c4
3 files changed, 24 insertions, 3 deletions
diff --git a/src/backend/access/transam/README.parallel b/src/backend/access/transam/README.parallel
index f09a5806345..85e5840feba 100644
--- a/src/backend/access/transam/README.parallel
+++ b/src/backend/access/transam/README.parallel
@@ -125,6 +125,10 @@ worker. This includes:
- State related to pending REINDEX operations, which prevents access to
an index that is currently being rebuilt.
+ - Active relmapper.c mapping state. This is needed to allow consistent
+ answers when fetching the current relfilenode for relation oids of
+ mapped relations.
+
To prevent unprincipled deadlocks when running in parallel mode, this code
also arranges for the leader and all workers to participate in group
locking. See src/backend/storage/lmgr/README for more details.
diff --git a/src/backend/access/transam/parallel.c b/src/backend/access/transam/parallel.c
index 30ddf94c952..c1681184670 100644
--- a/src/backend/access/transam/parallel.c
+++ b/src/backend/access/transam/parallel.c
@@ -37,6 +37,7 @@
#include "utils/guc.h"
#include "utils/inval.h"
#include "utils/memutils.h"
+#include "utils/relmapper.h"
#include "utils/snapmgr.h"
#include "utils/typcache.h"
@@ -69,6 +70,7 @@
#define PARALLEL_KEY_ENTRYPOINT UINT64CONST(0xFFFFFFFFFFFF0009)
#define PARALLEL_KEY_SESSION_DSM UINT64CONST(0xFFFFFFFFFFFF000A)
#define PARALLEL_KEY_REINDEX_STATE UINT64CONST(0xFFFFFFFFFFFF000B)
+#define PARALLEL_KEY_RELMAPPER_STATE UINT64CONST(0xFFFFFFFFFFFF000C)
/* Fixed-size parallel state. */
typedef struct FixedParallelState
@@ -205,6 +207,7 @@ InitializeParallelDSM(ParallelContext *pcxt)
Size asnaplen = 0;
Size tstatelen = 0;
Size reindexlen = 0;
+ Size relmapperlen = 0;
Size segsize = 0;
int i;
FixedParallelState *fps;
@@ -256,8 +259,10 @@ InitializeParallelDSM(ParallelContext *pcxt)
shm_toc_estimate_chunk(&pcxt->estimator, sizeof(dsm_handle));
reindexlen = EstimateReindexStateSpace();
shm_toc_estimate_chunk(&pcxt->estimator, reindexlen);
+ relmapperlen = EstimateRelationMapSpace();
+ shm_toc_estimate_chunk(&pcxt->estimator, relmapperlen);
/* If you add more chunks here, you probably need to add keys. */
- shm_toc_estimate_keys(&pcxt->estimator, 8);
+ shm_toc_estimate_keys(&pcxt->estimator, 9);
/* Estimate space need for error queues. */
StaticAssertStmt(BUFFERALIGN(PARALLEL_ERROR_QUEUE_SIZE) ==
@@ -327,6 +332,7 @@ InitializeParallelDSM(ParallelContext *pcxt)
char *asnapspace;
char *tstatespace;
char *reindexspace;
+ char *relmapperspace;
char *error_queue_space;
char *session_dsm_handle_space;
char *entrypointstate;
@@ -373,6 +379,12 @@ InitializeParallelDSM(ParallelContext *pcxt)
SerializeReindexState(reindexlen, reindexspace);
shm_toc_insert(pcxt->toc, PARALLEL_KEY_REINDEX_STATE, reindexspace);
+ /* Serialize relmapper state. */
+ relmapperspace = shm_toc_allocate(pcxt->toc, relmapperlen);
+ SerializeRelationMap(relmapperlen, relmapperspace);
+ shm_toc_insert(pcxt->toc, PARALLEL_KEY_RELMAPPER_STATE,
+ relmapperspace);
+
/* Allocate space for worker information. */
pcxt->worker = palloc0(sizeof(ParallelWorkerInfo) * pcxt->nworkers);
@@ -1205,6 +1217,7 @@ ParallelWorkerMain(Datum main_arg)
char *asnapspace;
char *tstatespace;
char *reindexspace;
+ char *relmapperspace;
StringInfoData msgbuf;
char *session_dsm_handle_space;
@@ -1380,6 +1393,10 @@ ParallelWorkerMain(Datum main_arg)
reindexspace = shm_toc_lookup(toc, PARALLEL_KEY_REINDEX_STATE, false);
RestoreReindexState(reindexspace);
+ /* Restore relmapper state. */
+ relmapperspace = shm_toc_lookup(toc, PARALLEL_KEY_RELMAPPER_STATE, false);
+ RestoreRelationMap(relmapperspace);
+
/*
* We've initialized all of our state now; nothing should change
* hereafter.
diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c
index 9aa63c8792b..cd8270d5fb0 100644
--- a/src/backend/access/transam/xact.c
+++ b/src/backend/access/transam/xact.c
@@ -2018,7 +2018,7 @@ CommitTransaction(void)
HOLD_INTERRUPTS();
/* Commit updates to the relation map --- do this as late as possible */
- AtEOXact_RelationMap(true);
+ AtEOXact_RelationMap(true, is_parallel_worker);
/*
* set the current transaction state information appropriately during
@@ -2539,7 +2539,7 @@ AbortTransaction(void)
AtAbort_Portals();
AtEOXact_LargeObject(false);
AtAbort_Notify();
- AtEOXact_RelationMap(false);
+ AtEOXact_RelationMap(false, is_parallel_worker);
AtAbort_Twophase();
/*