Skip to content

Commit 4e87c48

Browse files
committed
Fix handling of WAL segments ready to be archived during crash recovery
78ea8b5 has fixed an issue related to the recycling of WAL segments on standbys depending on archive_mode. However, it has introduced a regression with the handling of WAL segments ready to be archived during crash recovery, causing those files to be recycled without getting archived. This commit fixes the regression by tracking in shared memory if a live cluster is either in crash recovery or archive recovery as the handling of WAL segments ready to be archived is different in both cases (those WAL segments should not be removed during crash recovery), and by using this new shared memory state to decide if a segment can be recycled or not. Previously, it was not possible to know if a cluster was in crash recovery or archive recovery as the shared state was able to track only if recovery was happening or not, leading to the problem. A set of TAP tests is added to close the gap here, making sure that WAL segments ready to be archived are correctly handled when a cluster is in archive or crash recovery with archive_mode set to "on" or "always", for both standby and primary. Reported-by: Benoît Lobréau Author: Jehan-Guillaume de Rorthais Reviewed-by: Kyotaro Horiguchi, Fujii Masao, Michael Paquier Discussion: https://postgr.es/m/20200331172229.40ee00dc@firost Backpatch-through: 9.5
1 parent 3436c5e commit 4e87c48

File tree

5 files changed

+286
-16
lines changed

5 files changed

+286
-16
lines changed

src/backend/access/transam/xlog.c

+48-9
Original file line numberDiff line numberDiff line change
@@ -221,8 +221,9 @@ static TimeLineID receiveTLI = 0;
221221
static bool lastFullPageWrites;
222222

223223
/*
224-
* Local copy of SharedRecoveryInProgress variable. True actually means "not
225-
* known, need to check the shared state".
224+
* Local copy of the state tracked by SharedRecoveryState in shared memory,
225+
* It is false if SharedRecoveryState is RECOVERY_STATE_DONE. True actually
226+
* means "not known, need to check the shared state".
226227
*/
227228
static bool LocalRecoveryInProgress = true;
228229

@@ -653,10 +654,10 @@ typedef struct XLogCtlData
653654
TimeLineID PrevTimeLineID;
654655

655656
/*
656-
* SharedRecoveryInProgress indicates if we're still in crash or archive
657+
* SharedRecoveryState indicates if we're still in crash or archive
657658
* recovery. Protected by info_lck.
658659
*/
659-
bool SharedRecoveryInProgress;
660+
RecoveryState SharedRecoveryState;
660661

661662
/*
662663
* SharedHotStandbyActive indicates if we allow hot standby queries to be
@@ -4434,6 +4435,16 @@ ReadRecord(XLogReaderState *xlogreader, int emode,
44344435
updateMinRecoveryPoint = true;
44354436

44364437
UpdateControlFile();
4438+
4439+
/*
4440+
* We update SharedRecoveryState while holding the lock on
4441+
* ControlFileLock so both states are consistent in shared
4442+
* memory.
4443+
*/
4444+
SpinLockAcquire(&XLogCtl->info_lck);
4445+
XLogCtl->SharedRecoveryState = RECOVERY_STATE_ARCHIVE;
4446+
SpinLockRelease(&XLogCtl->info_lck);
4447+
44374448
LWLockRelease(ControlFileLock);
44384449

44394450
CheckRecoveryConsistency();
@@ -5166,7 +5177,7 @@ XLOGShmemInit(void)
51665177
* in additional info.)
51675178
*/
51685179
XLogCtl->XLogCacheBlck = XLOGbuffers - 1;
5169-
XLogCtl->SharedRecoveryInProgress = true;
5180+
XLogCtl->SharedRecoveryState = RECOVERY_STATE_CRASH;
51705181
XLogCtl->SharedHotStandbyActive = false;
51715182
XLogCtl->SharedPromoteIsTriggered = false;
51725183
XLogCtl->WalWriterSleeping = false;
@@ -6871,7 +6882,13 @@ StartupXLOG(void)
68716882
*/
68726883
dbstate_at_startup = ControlFile->state;
68736884
if (InArchiveRecovery)
6885+
{
68746886
ControlFile->state = DB_IN_ARCHIVE_RECOVERY;
6887+
6888+
SpinLockAcquire(&XLogCtl->info_lck);
6889+
XLogCtl->SharedRecoveryState = RECOVERY_STATE_ARCHIVE;
6890+
SpinLockRelease(&XLogCtl->info_lck);
6891+
}
68756892
else
68766893
{
68776894
ereport(LOG,
@@ -6884,6 +6901,10 @@ StartupXLOG(void)
68846901
ControlFile->checkPointCopy.ThisTimeLineID,
68856902
recoveryTargetTLI)));
68866903
ControlFile->state = DB_IN_CRASH_RECOVERY;
6904+
6905+
SpinLockAcquire(&XLogCtl->info_lck);
6906+
XLogCtl->SharedRecoveryState = RECOVERY_STATE_CRASH;
6907+
SpinLockRelease(&XLogCtl->info_lck);
68876908
}
68886909
ControlFile->checkPoint = checkPointLoc;
68896910
ControlFile->checkPointCopy = checkPoint;
@@ -7911,7 +7932,7 @@ StartupXLOG(void)
79117932
ControlFile->time = (pg_time_t) time(NULL);
79127933

79137934
SpinLockAcquire(&XLogCtl->info_lck);
7914-
XLogCtl->SharedRecoveryInProgress = false;
7935+
XLogCtl->SharedRecoveryState = RECOVERY_STATE_DONE;
79157936
SpinLockRelease(&XLogCtl->info_lck);
79167937

79177938
UpdateControlFile();
@@ -8057,7 +8078,7 @@ RecoveryInProgress(void)
80578078
*/
80588079
volatile XLogCtlData *xlogctl = XLogCtl;
80598080

8060-
LocalRecoveryInProgress = xlogctl->SharedRecoveryInProgress;
8081+
LocalRecoveryInProgress = (xlogctl->SharedRecoveryState != RECOVERY_STATE_DONE);
80618082

80628083
/*
80638084
* Initialize TimeLineID and RedoRecPtr when we discover that recovery
@@ -8069,8 +8090,8 @@ RecoveryInProgress(void)
80698090
{
80708091
/*
80718092
* If we just exited recovery, make sure we read TimeLineID and
8072-
* RedoRecPtr after SharedRecoveryInProgress (for machines with
8073-
* weak memory ordering).
8093+
* RedoRecPtr after SharedRecoveryState (for machines with weak
8094+
* memory ordering).
80748095
*/
80758096
pg_memory_barrier();
80768097
InitXLOGAccess();
@@ -8086,6 +8107,24 @@ RecoveryInProgress(void)
80868107
}
80878108
}
80888109

8110+
/*
8111+
* Returns current recovery state from shared memory.
8112+
*
8113+
* This returned state is kept consistent with the contents of the control
8114+
* file. See details about the possible values of RecoveryState in xlog.h.
8115+
*/
8116+
RecoveryState
8117+
GetRecoveryState(void)
8118+
{
8119+
RecoveryState retval;
8120+
8121+
SpinLockAcquire(&XLogCtl->info_lck);
8122+
retval = XLogCtl->SharedRecoveryState;
8123+
SpinLockRelease(&XLogCtl->info_lck);
8124+
8125+
return retval;
8126+
}
8127+
80898128
/*
80908129
* Is HotStandby active yet? This is only important in special backends
80918130
* since normal backends won't ever be able to connect until this returns

src/backend/access/transam/xlogarchive.c

+14-7
Original file line numberDiff line numberDiff line change
@@ -572,18 +572,25 @@ XLogArchiveCheckDone(const char *xlog)
572572
{
573573
char archiveStatusPath[MAXPGPATH];
574574
struct stat stat_buf;
575-
bool inRecovery = RecoveryInProgress();
575+
576+
/* The file is always deletable if archive_mode is "off". */
577+
if (!XLogArchivingActive())
578+
return true;
576579

577580
/*
578-
* The file is always deletable if archive_mode is "off". On standbys
579-
* archiving is disabled if archive_mode is "on", and enabled with
580-
* "always". On a primary, archiving is enabled if archive_mode is "on"
581-
* or "always".
581+
* During archive recovery, the file is deletable if archive_mode is not
582+
* "always".
582583
*/
583-
if (!((XLogArchivingActive() && !inRecovery) ||
584-
(XLogArchivingAlways() && inRecovery)))
584+
if (!XLogArchivingAlways() &&
585+
GetRecoveryState() == RECOVERY_STATE_ARCHIVE)
585586
return true;
586587

588+
/*
589+
* At this point of the logic, note that we are either a primary with
590+
* archive_mode set to "on" or "always", or a standby with archive_mode
591+
* set to "always".
592+
*/
593+
587594
/* First check for .done --- this means archiver is done with it */
588595
StatusFilePath(archiveStatusPath, xlog, ".done");
589596
if (stat(archiveStatusPath, &stat_buf) == 0)

src/include/access/xlog.h

+9
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,14 @@ typedef enum WalLevel
166166
WAL_LEVEL_LOGICAL
167167
} WalLevel;
168168

169+
/* Recovery states */
170+
typedef enum RecoveryState
171+
{
172+
RECOVERY_STATE_CRASH = 0, /* crash recovery */
173+
RECOVERY_STATE_ARCHIVE, /* archive recovery */
174+
RECOVERY_STATE_DONE /* currently in production */
175+
} RecoveryState;
176+
169177
extern PGDLLIMPORT int wal_level;
170178

171179
/* Is WAL archiving enabled (always or only while server is running normally)? */
@@ -291,6 +299,7 @@ extern const char *xlog_identify(uint8 info);
291299
extern void issue_xlog_fsync(int fd, XLogSegNo segno);
292300

293301
extern bool RecoveryInProgress(void);
302+
extern RecoveryState GetRecoveryState(void);
294303
extern bool HotStandbyActive(void);
295304
extern bool HotStandbyActiveInReplay(void);
296305
extern bool XLogInsertAllowed(void);

0 commit comments

Comments
 (0)