summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Lane2011-08-16 17:12:03 +0000
committerTom Lane2011-08-16 17:12:03 +0000
commit45476031a54c12bebf2f209ea2b6fb3bfc905193 (patch)
treeb85d6590e81df75b7842f6a7cd960de43c3f19a9
parent0615782b23d3e4a9d9acece919bd77f919928953 (diff)
Fix race condition in relcache init file invalidation.
The previous code tried to synchronize by unlinking the init file twice, but that doesn't actually work: it leaves a window wherein a third process could read the already-stale init file but miss the SI messages that would tell it the data is stale. The result would be bizarre failures in catalog accesses, typically "could not read block 0 in file ..." later during startup. Instead, hold RelCacheInitLock across both the unlink and the sending of the SI messages. This is more straightforward, and might even be a bit faster since only one unlink call is needed. This has been wrong since it was put in (in 2002!), so back-patch to all supported releases.
-rw-r--r--src/backend/access/transam/twophase.c4
-rw-r--r--src/backend/utils/cache/inval.c33
-rw-r--r--src/backend/utils/cache/relcache.c66
-rw-r--r--src/include/utils/relcache.h3
4 files changed, 57 insertions, 49 deletions
diff --git a/src/backend/access/transam/twophase.c b/src/backend/access/transam/twophase.c
index 281268120ef..54176ee9df9 100644
--- a/src/backend/access/transam/twophase.c
+++ b/src/backend/access/transam/twophase.c
@@ -1356,10 +1356,10 @@ FinishPreparedTransaction(const char *gid, bool isCommit)
* after we send the SI messages. See AtEOXact_Inval()
*/
if (hdr->initfileinval)
- RelationCacheInitFileInvalidate(true);
+ RelationCacheInitFilePreInvalidate();
SendSharedInvalidMessages(invalmsgs, hdr->ninvalmsgs);
if (hdr->initfileinval)
- RelationCacheInitFileInvalidate(false);
+ RelationCacheInitFilePostInvalidate();
/* And now do the callbacks */
if (isCommit)
diff --git a/src/backend/utils/cache/inval.c b/src/backend/utils/cache/inval.c
index a5580bd92fb..4249bd33765 100644
--- a/src/backend/utils/cache/inval.c
+++ b/src/backend/utils/cache/inval.c
@@ -854,24 +854,12 @@ xactGetCommittedInvalidationMessages(SharedInvalidationMessage **msgs,
return numSharedInvalidMessagesArray;
}
-#define RecoveryRelationCacheInitFileInvalidate(dbo, tbo, tf) \
-{ \
- DatabasePath = GetDatabasePath(dbo, tbo); \
- elog(trace_recovery(DEBUG4), "removing relcache init file in %s", DatabasePath); \
- RelationCacheInitFileInvalidate(tf); \
- pfree(DatabasePath); \
-}
-
/*
* ProcessCommittedInvalidationMessages is executed by xact_redo_commit()
* to process invalidation messages added to commit records.
*
* Relcache init file invalidation requires processing both
* before and after we send the SI messages. See AtEOXact_Inval()
- *
- * We deliberately avoid SetDatabasePath() since it is intended to be used
- * only once by normal backends, so we set DatabasePath directly then
- * pfree after use. See RecoveryRelationCacheInitFileInvalidate() macro.
*/
void
ProcessCommittedInvalidationMessages(SharedInvalidationMessage *msgs,
@@ -885,12 +873,25 @@ ProcessCommittedInvalidationMessages(SharedInvalidationMessage *msgs,
(RelcacheInitFileInval ? " and relcache file invalidation" : ""));
if (RelcacheInitFileInval)
- RecoveryRelationCacheInitFileInvalidate(dbid, tsid, true);
+ {
+ /*
+ * RelationCacheInitFilePreInvalidate requires DatabasePath to be set,
+ * but we should not use SetDatabasePath during recovery, since it is
+ * intended to be used only once by normal backends. Hence, a quick
+ * hack: set DatabasePath directly then unset after use.
+ */
+ DatabasePath = GetDatabasePath(dbid, tsid);
+ elog(trace_recovery(DEBUG4), "removing relcache init file in \"%s\"",
+ DatabasePath);
+ RelationCacheInitFilePreInvalidate();
+ pfree(DatabasePath);
+ DatabasePath = NULL;
+ }
SendSharedInvalidMessages(msgs, nmsgs);
if (RelcacheInitFileInval)
- RecoveryRelationCacheInitFileInvalidate(dbid, tsid, false);
+ RelationCacheInitFilePostInvalidate();
}
/*
@@ -931,7 +932,7 @@ AtEOXact_Inval(bool isCommit)
* unless we committed.
*/
if (transInvalInfo->RelcacheInitFileInval)
- RelationCacheInitFileInvalidate(true);
+ RelationCacheInitFilePreInvalidate();
AppendInvalidationMessages(&transInvalInfo->PriorCmdInvalidMsgs,
&transInvalInfo->CurrentCmdInvalidMsgs);
@@ -940,7 +941,7 @@ AtEOXact_Inval(bool isCommit)
SendSharedInvalidMessages);
if (transInvalInfo->RelcacheInitFileInval)
- RelationCacheInitFileInvalidate(false);
+ RelationCacheInitFilePostInvalidate();
}
else if (transInvalInfo != NULL)
{
diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c
index 1a400a0a576..bc52c56021c 100644
--- a/src/backend/utils/cache/relcache.c
+++ b/src/backend/utils/cache/relcache.c
@@ -4377,8 +4377,8 @@ write_relcache_init_file(bool shared)
* updated by SI message processing, but we can't be sure whether what we
* wrote out was up-to-date.)
*
- * This mustn't run concurrently with RelationCacheInitFileInvalidate, so
- * grab a serialization lock for the duration.
+ * This mustn't run concurrently with the code that unlinks an init file
+ * and sends SI messages, so grab a serialization lock for the duration.
*/
LWLockAcquire(RelCacheInitLock, LW_EXCLUSIVE);
@@ -4442,19 +4442,22 @@ RelationIdIsInInitFile(Oid relationId)
* changed one or more of the relation cache entries that are kept in the
* local init file.
*
- * We actually need to remove the init file twice: once just before sending
- * the SI messages that include relcache inval for such relations, and once
- * just after sending them. The unlink before ensures that a backend that's
- * currently starting cannot read the now-obsolete init file and then miss
- * the SI messages that will force it to update its relcache entries. (This
- * works because the backend startup sequence gets into the PGPROC array before
- * trying to load the init file.) The unlink after is to synchronize with a
- * backend that may currently be trying to write an init file based on data
- * that we've just rendered invalid. Such a backend will see the SI messages,
- * but we can't leave the init file sitting around to fool later backends.
+ * To be safe against concurrent inspection or rewriting of the init file,
+ * we must take RelCacheInitLock, then remove the old init file, then send
+ * the SI messages that include relcache inval for such relations, and then
+ * release RelCacheInitLock. This serializes the whole affair against
+ * write_relcache_init_file, so that we can be sure that any other process
+ * that's concurrently trying to create a new init file won't move an
+ * already-stale version into place after we unlink. Also, because we unlink
+ * before sending the SI messages, a backend that's currently starting cannot
+ * read the now-obsolete init file and then miss the SI messages that will
+ * force it to update its relcache entries. (This works because the backend
+ * startup sequence gets into the sinval array before trying to load the init
+ * file.)
*
- * Ignore any failure to unlink the file, since it might not be there if
- * no backend has been started since the last removal.
+ * We take the lock and do the unlink in RelationCacheInitFilePreInvalidate,
+ * then release the lock in RelationCacheInitFilePostInvalidate. Caller must
+ * send any pending SI messages between those calls.
*
* Notice this deals only with the local init file, not the shared init file.
* The reason is that there can never be a "significant" change to the
@@ -4464,34 +4467,37 @@ RelationIdIsInInitFile(Oid relationId)
* be invalid enough to make it necessary to remove it.
*/
void
-RelationCacheInitFileInvalidate(bool beforeSend)
+RelationCacheInitFilePreInvalidate(void)
{
char initfilename[MAXPGPATH];
snprintf(initfilename, sizeof(initfilename), "%s/%s",
DatabasePath, RELCACHE_INIT_FILENAME);
- if (beforeSend)
- {
- /* no interlock needed here */
- unlink(initfilename);
- }
- else
+ LWLockAcquire(RelCacheInitLock, LW_EXCLUSIVE);
+
+ if (unlink(initfilename) < 0)
{
/*
- * We need to interlock this against write_relcache_init_file, to
- * guard against possibility that someone renames a new-but-
- * already-obsolete init file into place just after we unlink. With
- * the interlock, it's certain that write_relcache_init_file will
- * notice our SI inval message before renaming into place, or else
- * that we will execute second and successfully unlink the file.
+ * The file might not be there if no backend has been started since
+ * the last removal. But complain about failures other than ENOENT.
+ * Fortunately, it's not too late to abort the transaction if we
+ * can't get rid of the would-be-obsolete init file.
*/
- LWLockAcquire(RelCacheInitLock, LW_EXCLUSIVE);
- unlink(initfilename);
- LWLockRelease(RelCacheInitLock);
+ if (errno != ENOENT)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not remove cache file \"%s\": %m",
+ initfilename)));
}
}
+void
+RelationCacheInitFilePostInvalidate(void)
+{
+ LWLockRelease(RelCacheInitLock);
+}
+
/*
* Remove the init files during postmaster startup.
*
diff --git a/src/include/utils/relcache.h b/src/include/utils/relcache.h
index 1f4def56845..5de9f359ef2 100644
--- a/src/include/utils/relcache.h
+++ b/src/include/utils/relcache.h
@@ -97,7 +97,8 @@ extern void AtEOSubXact_RelationCache(bool isCommit, SubTransactionId mySubid,
* Routines to help manage rebuilding of relcache init files
*/
extern bool RelationIdIsInInitFile(Oid relationId);
-extern void RelationCacheInitFileInvalidate(bool beforeSend);
+extern void RelationCacheInitFilePreInvalidate(void);
+extern void RelationCacheInitFilePostInvalidate(void);
extern void RelationCacheInitFileRemove(void);
/* should be used only by relcache.c and catcache.c */