*** pgsql/src/backend/access/transam/xlog.c	2009/02/07 10:49:36	1.330
--- pgsql/src/backend/access/transam/xlog.c	2009/02/18 15:58:40	1.331
***************
*** 7,13 ****
   * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
!  * $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.329 2009/01/23 11:19:34 heikki Exp $
   *
   *-------------------------------------------------------------------------
   */
--- 7,13 ----
   * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
!  * $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.330 2009/02/07 10:49:36 heikki Exp $
   *
   *-------------------------------------------------------------------------
   */
***************
*** 36,41 ****
--- 36,42 ----
  #include "catalog/pg_control.h"
  #include "catalog/pg_type.h"
  #include "funcapi.h"
+ #include "libpq/pqsignal.h"
  #include "miscadmin.h"
  #include "pgstat.h"
  #include "postmaster/bgwriter.h"
***************
*** 47,52 ****
--- 48,54 ----
  #include "storage/smgr.h"
  #include "storage/spin.h"
  #include "utils/builtins.h"
+ #include "utils/flatfiles.h"
  #include "utils/guc.h"
  #include "utils/ps_status.h"
  #include "pg_trace.h"
*************** CheckpointStatsData CheckpointStats;
*** 119,130 ****
   */
  TimeLineID	ThisTimeLineID = 0;
  
! /* Are we doing recovery from XLOG? */
  bool		InRecovery = false;
  
  /* Are we recovering using offline XLOG archives? */
  static bool InArchiveRecovery = false;
  
  /* Was the last xlog file restored from archive, or local? */
  static bool restoredFromArchive = false;
  
--- 121,147 ----
   */
  TimeLineID	ThisTimeLineID = 0;
  
! /*
!  * Are we doing recovery from XLOG? 
!  *
!  * This is only ever true in the startup process, even if the system is still
!  * in recovery. Prior to 8.4, all activity during recovery were carried out
!  * by Startup process. This local variable continues to be used in functions
!  * that need to act differently when called from a redo function (e.g skip
!  * WAL logging). To check whether the system is in recovery regardless of what
!  * process you're running in, use RecoveryInProgress().
!  */
  bool		InRecovery = false;
  
  /* Are we recovering using offline XLOG archives? */
  static bool InArchiveRecovery = false;
  
+ /*
+  * Local copy of SharedRecoveryInProgress variable. True actually means "not
+  * known, need to check the shared state"
+  */
+ static bool LocalRecoveryInProgress = true;
+ 
  /* Was the last xlog file restored from archive, or local? */
  static bool restoredFromArchive = false;
  
*************** static char *recoveryRestoreCommand = NU
*** 133,139 ****
  static bool recoveryTarget = false;
  static bool recoveryTargetExact = false;
  static bool recoveryTargetInclusive = true;
- static bool recoveryLogRestartpoints = false;
  static TransactionId recoveryTargetXid;
  static TimestampTz recoveryTargetTime;
  static TimestampTz recoveryLastXTime = 0;
--- 150,155 ----
*************** static XLogRecPtr RedoRecPtr;
*** 242,250 ****
   * ControlFileLock: must be held to read/update control file or create
   * new log file.
   *
!  * CheckpointLock: must be held to do a checkpoint (ensures only one
!  * checkpointer at a time; currently, with all checkpoints done by the
!  * bgwriter, this is just pro forma).
   *
   *----------
   */
--- 258,265 ----
   * ControlFileLock: must be held to read/update control file or create
   * new log file.
   *
!  * CheckpointLock: must be held to do a checkpoint or restartpoint (ensures
!  * only one checkpointer at a time)
   *
   *----------
   */
*************** typedef struct XLogCtlData
*** 313,318 ****
--- 328,352 ----
  	int			XLogCacheBlck;	/* highest allocated xlog buffer index */
  	TimeLineID	ThisTimeLineID;
  
+ 	/*
+ 	 * SharedRecoveryInProgress indicates if we're still in crash or archive
+ 	 * recovery.  It's checked by RecoveryInProgress().
+ 	 */
+ 	bool		SharedRecoveryInProgress;
+ 
+ 	/*
+ 	 * During recovery, we keep a copy of the latest checkpoint record
+ 	 * here.  Used by the background writer when it wants to create
+ 	 * a restartpoint.
+ 	 *
+ 	 * Protected by info_lck.
+ 	 */
+ 	XLogRecPtr	lastCheckPointRecPtr;
+ 	CheckPoint	lastCheckPoint;
+ 
+ 	/* end+1 of the last record replayed (or being replayed) */
+ 	XLogRecPtr	replayEndRecPtr;
+ 
  	slock_t		info_lck;		/* locks shared variables shown above */
  } XLogCtlData;
  
*************** static XLogRecPtr ReadRecPtr;	/* start o
*** 387,395 ****
--- 421,441 ----
  static XLogRecPtr EndRecPtr;	/* end+1 of last record read */
  static XLogRecord *nextRecord = NULL;
  static TimeLineID lastPageTLI = 0;
+ static XLogRecPtr minRecoveryPoint; /* local copy of ControlFile->minRecoveryPoint */
+ static bool	updateMinRecoveryPoint = true;
  
  static bool InRedo = false;
  
+ /*
+  * Flag set by interrupt handlers for later service in the redo loop.
+  */
+ static volatile sig_atomic_t shutdown_requested = false;
+ /*
+  * Flag set when executing a restore command, to tell SIGTERM signal handler
+  * that it's safe to just proc_exit(0).
+  */
+ static volatile sig_atomic_t in_restore_command = false;
+ 
  
  static void XLogArchiveNotify(const char *xlog);
  static void XLogArchiveNotifySeg(uint32 log, uint32 seg);
*************** static void PreallocXlogFiles(XLogRecPtr
*** 420,425 ****
--- 466,472 ----
  static void RemoveOldXlogFiles(uint32 log, uint32 seg, XLogRecPtr endptr);
  static void ValidateXLOGDirectoryStructure(void);
  static void CleanupBackupHistory(void);
+ static void UpdateMinRecoveryPoint(XLogRecPtr lsn, bool force);
  static XLogRecord *ReadRecord(XLogRecPtr *RecPtr, int emode);
  static bool ValidXLOGHeader(XLogPageHeader hdr, int emode);
  static XLogRecord *ReadCheckpointRecord(XLogRecPtr RecPtr, int whichChkpt);
*************** XLogInsert(RmgrId rmid, uint8 info, XLog
*** 484,489 ****
--- 531,540 ----
  	bool		doPageWrites;
  	bool		isLogSwitch = (rmid == RM_XLOG_ID && info == XLOG_SWITCH);
  
+ 	/* cross-check on whether we should be here or not */
+ 	if (RecoveryInProgress())
+ 		elog(FATAL, "cannot make new WAL entries during recovery");
+ 
  	/* info's high bits are reserved for use by me */
  	if (info & XLR_INFO_MASK)
  		elog(PANIC, "invalid xlog info mask %02X", info);
*************** XLogSetAsyncCommitLSN(XLogRecPtr asyncCo
*** 1718,1723 ****
--- 1769,1831 ----
  }
  
  /*
+  * Advance minRecoveryPoint in control file.
+  *
+  * If we crash during recovery, we must reach this point again before the
+  * database is consistent. 
+  * 
+  * If 'force' is true, 'lsn' argument is ignored. Otherwise, minRecoveryPoint
+  * is is only updated if it's not already greater than or equal to 'lsn'.
+  */
+ static void
+ UpdateMinRecoveryPoint(XLogRecPtr lsn, bool force)
+ {
+ 	/* Quick check using our local copy of the variable */
+ 	if (!updateMinRecoveryPoint || (!force && XLByteLE(lsn, minRecoveryPoint)))
+ 		return;
+ 
+ 	LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
+ 
+ 	/* update local copy */
+ 	minRecoveryPoint = ControlFile->minRecoveryPoint;
+ 
+ 	/*
+ 	 * An invalid minRecoveryPoint means that we need to recover all the WAL,
+ 	 * ie. crash recovery. Don't update the control file in that case.
+ 	 */
+ 	if (minRecoveryPoint.xlogid == 0 && minRecoveryPoint.xrecoff == 0)
+ 		updateMinRecoveryPoint = false;
+ 	else if (force || XLByteLT(minRecoveryPoint, lsn))
+ 	{
+ 		/* use volatile pointer to prevent code rearrangement */
+ 		volatile XLogCtlData *xlogctl = XLogCtl;
+ 		XLogRecPtr newMinRecoveryPoint;
+ 
+ 		/*
+ 		 * To avoid having to update the control file too often, we update it
+ 		 * all the way to the last record being replayed, even though 'lsn'
+ 		 * would suffice for correctness.
+ 		 */
+ 		SpinLockAcquire(&xlogctl->info_lck);
+ 		newMinRecoveryPoint = xlogctl->replayEndRecPtr;
+ 		SpinLockRelease(&xlogctl->info_lck);
+ 
+ 		/* update control file */
+ 		if (XLByteLT(ControlFile->minRecoveryPoint, newMinRecoveryPoint))
+ 		{
+ 			ControlFile->minRecoveryPoint = newMinRecoveryPoint;
+ 			UpdateControlFile();
+ 			minRecoveryPoint = newMinRecoveryPoint;
+ 
+ 			ereport(DEBUG2,
+ 					(errmsg("updated min recovery point to %X/%X",
+ 						minRecoveryPoint.xlogid, minRecoveryPoint.xrecoff)));
+ 		}
+ 	}
+ 	LWLockRelease(ControlFileLock);
+ }
+ 
+ /*
   * Ensure that all XLOG data through the given position is flushed to disk.
   *
   * NOTE: this differs from XLogWrite mainly in that the WALWriteLock is not
*************** XLogFlush(XLogRecPtr record)
*** 1729,1737 ****
  	XLogRecPtr	WriteRqstPtr;
  	XLogwrtRqst WriteRqst;
  
! 	/* Disabled during REDO */
! 	if (InRedo)
  		return;
  
  	/* Quick exit if already known flushed */
  	if (XLByteLE(record, LogwrtResult.Flush))
--- 1837,1851 ----
  	XLogRecPtr	WriteRqstPtr;
  	XLogwrtRqst WriteRqst;
  
! 	/*
! 	 * During REDO, we don't try to flush the WAL, but update minRecoveryPoint
! 	 * instead.
! 	 */
! 	if (RecoveryInProgress())
! 	{
! 		UpdateMinRecoveryPoint(record, false);
  		return;
+ 	}
  
  	/* Quick exit if already known flushed */
  	if (XLByteLE(record, LogwrtResult.Flush))
*************** XLogFlush(XLogRecPtr record)
*** 1818,1826 ****
  	 * the bad page is encountered again during recovery then we would be
  	 * unable to restart the database at all!  (This scenario has actually
  	 * happened in the field several times with 7.1 releases. Note that we
! 	 * cannot get here while InRedo is true, but if the bad page is brought in
! 	 * and marked dirty during recovery then CreateCheckPoint will try to
! 	 * flush it at the end of recovery.)
  	 *
  	 * The current approach is to ERROR under normal conditions, but only
  	 * WARNING during recovery, so that the system can be brought up even if
--- 1932,1940 ----
  	 * the bad page is encountered again during recovery then we would be
  	 * unable to restart the database at all!  (This scenario has actually
  	 * happened in the field several times with 7.1 releases. Note that we
! 	 * cannot get here while RecoveryInProgress(), but if the bad page is
! 	 * brought in and marked dirty during recovery then if a checkpoint were
! 	 * performed at the end of recovery it will try to flush it.
  	 *
  	 * The current approach is to ERROR under normal conditions, but only
  	 * WARNING during recovery, so that the system can be brought up even if
*************** XLogBackgroundFlush(void)
*** 1857,1862 ****
--- 1971,1980 ----
  	XLogRecPtr	WriteRqstPtr;
  	bool		flexible = true;
  
+ 	/* XLOG doesn't need flushing during recovery */
+ 	if (RecoveryInProgress())
+ 		return;
+ 
  	/* read LogwrtResult and update local state */
  	{
  		/* use volatile pointer to prevent code rearrangement */
*************** XLogAsyncCommitFlush(void)
*** 1928,1933 ****
--- 2046,2055 ----
  	/* use volatile pointer to prevent code rearrangement */
  	volatile XLogCtlData *xlogctl = XLogCtl;
  
+ 	/* There's no asynchronously committed transactions during recovery */
+ 	if (RecoveryInProgress())
+ 		return;
+ 
  	SpinLockAcquire(&xlogctl->info_lck);
  	WriteRqstPtr = xlogctl->asyncCommitLSN;
  	SpinLockRelease(&xlogctl->info_lck);
*************** XLogAsyncCommitFlush(void)
*** 1944,1949 ****
--- 2066,2075 ----
  bool
  XLogNeedsFlush(XLogRecPtr record)
  {
+ 	/* XLOG doesn't need flushing during recovery */
+ 	if (RecoveryInProgress())
+ 		return false;
+ 
  	/* Quick exit if already known flushed */
  	if (XLByteLE(record, LogwrtResult.Flush))
  		return false;
*************** RestoreArchivedFile(char *path, const ch
*** 2619,2627 ****
--- 2745,2766 ----
  							 xlogRestoreCmd)));
  
  	/*
+ 	 * Set in_restore_command to tell the signal handler that we should exit
+ 	 * right away on SIGTERM. We know that we're in a safe point to do that.
+ 	 * Check if we had already received the signal, so that we don't miss a
+ 	 * shutdown request received just before this.
+ 	 */
+ 	in_restore_command = true;
+ 	if (shutdown_requested)
+ 		proc_exit(0);
+ 
+ 	/*
  	 * Copy xlog from archival storage to XLOGDIR
  	 */
  	rc = system(xlogRestoreCmd);
+ 
+ 	in_restore_command = false;
+ 
  	if (rc == 0)
  	{
  		/*
*************** RestoreArchivedFile(char *path, const ch
*** 2674,2687 ****
  	 * assume that recovery is complete and start up the database!) It's
  	 * essential to abort on child SIGINT and SIGQUIT, because per spec
  	 * system() ignores SIGINT and SIGQUIT while waiting; if we see one of
! 	 * those it's a good bet we should have gotten it too.  Aborting on other
! 	 * signals such as SIGTERM seems a good idea as well.
  	 *
  	 * Per the Single Unix Spec, shells report exit status > 128 when a called
  	 * command died on a signal.  Also, 126 and 127 are used to report
  	 * problems such as an unfindable command; treat those as fatal errors
  	 * too.
  	 */
  	signaled = WIFSIGNALED(rc) || WEXITSTATUS(rc) > 125;
  
  	ereport(signaled ? FATAL : DEBUG2,
--- 2813,2836 ----
  	 * assume that recovery is complete and start up the database!) It's
  	 * essential to abort on child SIGINT and SIGQUIT, because per spec
  	 * system() ignores SIGINT and SIGQUIT while waiting; if we see one of
! 	 * those it's a good bet we should have gotten it too.
! 	 *
! 	 * On SIGTERM, assume we have received a fast shutdown request, and exit
! 	 * cleanly. It's pure chance whether we receive the SIGTERM first, or the
! 	 * child process. If we receive it first, the signal handler will call
! 	 * proc_exit(0), otherwise we do it here. If we or the child process
! 	 * received SIGTERM for any other reason than a fast shutdown request,
! 	 * postmaster will perform an immediate shutdown when it sees us exiting
! 	 * unexpectedly.
  	 *
  	 * Per the Single Unix Spec, shells report exit status > 128 when a called
  	 * command died on a signal.  Also, 126 and 127 are used to report
  	 * problems such as an unfindable command; treat those as fatal errors
  	 * too.
  	 */
+ 	if (WTERMSIG(rc) == SIGTERM)
+ 		proc_exit(0);
+ 
  	signaled = WIFSIGNALED(rc) || WEXITSTATUS(rc) > 125;
  
  	ereport(signaled ? FATAL : DEBUG2,
*************** readRecoveryCommandFile(void)
*** 4584,4601 ****
  			ereport(LOG,
  					(errmsg("recovery_target_inclusive = %s", tok2)));
  		}
- 		else if (strcmp(tok1, "log_restartpoints") == 0)
- 		{
- 			/*
- 			 * does nothing if a recovery_target is not also set
- 			 */
- 			if (!parse_bool(tok2, &recoveryLogRestartpoints))
- 				  ereport(ERROR,
- 							(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
- 					  errmsg("parameter \"log_restartpoints\" requires a Boolean value")));
- 			ereport(LOG,
- 					(errmsg("log_restartpoints = %s", tok2)));
- 		}
  		else
  			ereport(FATAL,
  					(errmsg("unrecognized recovery parameter \"%s\"",
--- 4733,4738 ----
*************** StartupXLOG(void)
*** 4877,4883 ****
  	XLogRecPtr	RecPtr,
  				LastRec,
  				checkPointLoc,
! 				minRecoveryLoc,
  				EndOfLog;
  	uint32		endLogId;
  	uint32		endLogSeg;
--- 5014,5020 ----
  	XLogRecPtr	RecPtr,
  				LastRec,
  				checkPointLoc,
! 				backupStopLoc,
  				EndOfLog;
  	uint32		endLogId;
  	uint32		endLogSeg;
*************** StartupXLOG(void)
*** 4885,4890 ****
--- 5022,5029 ----
  	uint32		freespace;
  	TransactionId oldestActiveXID;
  
+ 	XLogCtl->SharedRecoveryInProgress = true;
+ 
  	/*
  	 * Read control file and check XLOG status looks valid.
  	 *
*************** StartupXLOG(void)
*** 4964,4970 ****
  						recoveryTargetTLI,
  						ControlFile->checkPointCopy.ThisTimeLineID)));
  
! 	if (read_backup_label(&checkPointLoc, &minRecoveryLoc))
  	{
  		/*
  		 * When a backup_label file is present, we want to roll forward from
--- 5103,5109 ----
  						recoveryTargetTLI,
  						ControlFile->checkPointCopy.ThisTimeLineID)));
  
! 	if (read_backup_label(&checkPointLoc, &backupStopLoc))
  	{
  		/*
  		 * When a backup_label file is present, we want to roll forward from
*************** StartupXLOG(void)
*** 5102,5112 ****
  		ControlFile->prevCheckPoint = ControlFile->checkPoint;
  		ControlFile->checkPoint = checkPointLoc;
  		ControlFile->checkPointCopy = checkPoint;
! 		if (minRecoveryLoc.xlogid != 0 || minRecoveryLoc.xrecoff != 0)
! 			ControlFile->minRecoveryPoint = minRecoveryLoc;
  		ControlFile->time = (pg_time_t) time(NULL);
  		UpdateControlFile();
  
  		/*
  		 * If there was a backup label file, it's done its job and the info
  		 * has now been propagated into pg_control.  We must get rid of the
--- 5241,5263 ----
  		ControlFile->prevCheckPoint = ControlFile->checkPoint;
  		ControlFile->checkPoint = checkPointLoc;
  		ControlFile->checkPointCopy = checkPoint;
! 		if (backupStopLoc.xlogid != 0 || backupStopLoc.xrecoff != 0)
! 		{
! 			if (XLByteLT(ControlFile->minRecoveryPoint, backupStopLoc))
! 				ControlFile->minRecoveryPoint = backupStopLoc;
! 		}
  		ControlFile->time = (pg_time_t) time(NULL);
+ 		/* No need to hold ControlFileLock yet, we aren't up far enough */
  		UpdateControlFile();
  
+ 		/* update our local copy of minRecoveryPoint */
+ 		minRecoveryPoint = ControlFile->minRecoveryPoint;
+ 
+ 		/*
+ 		 * Reset pgstat data, because it may be invalid after recovery.
+ 		 */
+ 		pgstat_reset_all();
+ 
  		/*
  		 * If there was a backup label file, it's done its job and the info
  		 * has now been propagated into pg_control.  We must get rid of the
*************** StartupXLOG(void)
*** 5151,5162 ****
  		{
  			bool		recoveryContinue = true;
  			bool		recoveryApply = true;
  			ErrorContextCallback errcontext;
  
  			InRedo = true;
! 			ereport(LOG,
! 					(errmsg("redo starts at %X/%X",
! 							ReadRecPtr.xlogid, ReadRecPtr.xrecoff)));
  
  			/*
  			 * main redo apply loop
--- 5302,5342 ----
  		{
  			bool		recoveryContinue = true;
  			bool		recoveryApply = true;
+ 			bool		reachedMinRecoveryPoint = false;
  			ErrorContextCallback errcontext;
+ 			/* use volatile pointer to prevent code rearrangement */
+ 			volatile XLogCtlData *xlogctl = XLogCtl;
+ 
+ 			/* Update shared replayEndRecPtr */
+ 			SpinLockAcquire(&xlogctl->info_lck);
+ 			xlogctl->replayEndRecPtr = ReadRecPtr;
+ 			SpinLockRelease(&xlogctl->info_lck);
  
  			InRedo = true;
! 
! 			if (minRecoveryPoint.xlogid == 0 && minRecoveryPoint.xrecoff == 0)
! 				ereport(LOG,
! 						(errmsg("redo starts at %X/%X",
! 								ReadRecPtr.xlogid, ReadRecPtr.xrecoff)));
! 			else
! 				ereport(LOG,
! 						(errmsg("redo starts at %X/%X, consistency will be reached at %X/%X",
! 						ReadRecPtr.xlogid, ReadRecPtr.xrecoff,
! 						minRecoveryPoint.xlogid, minRecoveryPoint.xrecoff)));
! 
! 			/*
! 			 * Let postmaster know we've started redo now, so that it can
! 			 * launch bgwriter to perform restartpoints.  We don't bother
! 			 * during crash recovery as restartpoints can only be performed
! 			 * during archive recovery.  And we'd like to keep crash recovery
! 			 * simple, to avoid introducing bugs that could you from
! 			 * recovering after crash.
! 			 *
! 			 * After this point, we can no longer assume that we're the only
! 			 * process in addition to postmaster!
! 			 */
! 			if (InArchiveRecovery && IsUnderPostmaster)
! 				SendPostmasterSignal(PMSIGNAL_RECOVERY_STARTED);
  
  			/*
  			 * main redo apply loop
*************** StartupXLOG(void)
*** 5183,5188 ****
--- 5363,5392 ----
  #endif
  
  				/*
+ 				 * Check if we were requested to exit without finishing
+ 				 * recovery.
+ 				 */
+ 				if (shutdown_requested)
+ 					proc_exit(0);
+ 
+ 				/*
+ 				 * Have we reached our safe starting point? If so, we can
+ 				 * tell postmaster that the database is consistent now.
+ 				 */
+ 				if (!reachedMinRecoveryPoint && 
+ 					 XLByteLE(minRecoveryPoint, EndRecPtr))
+ 				{
+ 					reachedMinRecoveryPoint = true;
+ 					if (InArchiveRecovery)
+ 					{
+ 						ereport(LOG,
+ 								(errmsg("consistent recovery state reached")));
+ 						if (IsUnderPostmaster)
+ 							SendPostmasterSignal(PMSIGNAL_RECOVERY_CONSISTENT);
+ 					}
+ 				}
+ 
+ 				/*
  				 * Have we reached our recovery target?
  				 */
  				if (recoveryStopsHere(record, &recoveryApply))
*************** StartupXLOG(void)
*** 5207,5212 ****
--- 5411,5425 ----
  					TransactionIdAdvance(ShmemVariableCache->nextXid);
  				}
  
+ 				/*
+ 				 * Update shared replayEndRecPtr before replaying this
+ 				 * record, so that XLogFlush will update minRecoveryPoint
+ 				 * correctly.
+ 				 */
+ 				SpinLockAcquire(&xlogctl->info_lck);
+ 				xlogctl->replayEndRecPtr = EndRecPtr;
+ 				SpinLockRelease(&xlogctl->info_lck);
+ 
  				RmgrTable[record->xl_rmid].rm_redo(EndRecPtr, record);
  
  				/* Pop the error context stack */
*************** StartupXLOG(void)
*** 5250,5263 ****
  	 * Complain if we did not roll forward far enough to render the backup
  	 * dump consistent.
  	 */
! 	if (XLByteLT(EndOfLog, ControlFile->minRecoveryPoint))
  	{
  		if (reachedStopPoint)	/* stopped because of stop request */
  			ereport(FATAL,
! 					(errmsg("requested recovery stop point is before end time of backup dump")));
  		else	/* ran off end of WAL */
  			ereport(FATAL,
! 					(errmsg("WAL ends before end time of backup dump")));
  	}
  
  	/*
--- 5463,5476 ----
  	 * Complain if we did not roll forward far enough to render the backup
  	 * dump consistent.
  	 */
! 	if (InRecovery && XLByteLT(EndOfLog, minRecoveryPoint))
  	{
  		if (reachedStopPoint)	/* stopped because of stop request */
  			ereport(FATAL,
! 					(errmsg("requested recovery stop point is before consistent recovery point")));
  		else	/* ran off end of WAL */
  			ereport(FATAL,
! 					(errmsg("WAL ends before consistent recovery point")));
  	}
  
  	/*
*************** StartupXLOG(void)
*** 5352,5357 ****
--- 5565,5576 ----
  	/* Pre-scan prepared transactions to find out the range of XIDs present */
  	oldestActiveXID = PrescanPreparedTransactions();
  
+ 	/*
+ 	 * Allow writing WAL for us, so that we can create a checkpoint record.
+ 	 * But not yet for other backends!
+ 	 */
+ 	LocalRecoveryInProgress = false;
+ 
  	if (InRecovery)
  	{
  		int			rmid;
*************** StartupXLOG(void)
*** 5372,5382 ****
  		XLogCheckInvalidPages();
  
  		/*
- 		 * Reset pgstat data, because it may be invalid after recovery.
- 		 */
- 		pgstat_reset_all();
- 
- 		/*
  		 * Perform a checkpoint to update all our recovery activity to disk.
  		 *
  		 * Note that we write a shutdown checkpoint rather than an on-line
--- 5591,5596 ----
*************** StartupXLOG(void)
*** 5398,5409 ****
  	 */
  	InRecovery = false;
  
  	ControlFile->state = DB_IN_PRODUCTION;
  	ControlFile->time = (pg_time_t) time(NULL);
  	UpdateControlFile();
  
  	/* start the archive_timeout timer running */
! 	XLogCtl->Write.lastSegSwitchTime = ControlFile->time;
  
  	/* initialize shared-memory copy of latest checkpoint XID/epoch */
  	XLogCtl->ckptXidEpoch = ControlFile->checkPointCopy.nextXidEpoch;
--- 5612,5625 ----
  	 */
  	InRecovery = false;
  
+ 	LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
  	ControlFile->state = DB_IN_PRODUCTION;
  	ControlFile->time = (pg_time_t) time(NULL);
  	UpdateControlFile();
+ 	LWLockRelease(ControlFileLock);
  
  	/* start the archive_timeout timer running */
! 	XLogCtl->Write.lastSegSwitchTime = (pg_time_t) time(NULL);
  
  	/* initialize shared-memory copy of latest checkpoint XID/epoch */
  	XLogCtl->ckptXidEpoch = ControlFile->checkPointCopy.nextXidEpoch;
*************** StartupXLOG(void)
*** 5438,5443 ****
--- 5654,5698 ----
  		readRecordBuf = NULL;
  		readRecordBufSize = 0;
  	}
+ 
+ 	/*
+ 	 * All done. Allow others to write WAL.
+ 	 */
+ 	XLogCtl->SharedRecoveryInProgress = false;
+ }
+ 
+ /*
+  * Is the system still in recovery?
+  *
+  * As a side-effect, we initialize the local TimeLineID and RedoRecPtr
+  * variables the first time we see that recovery is finished.
+  */
+ bool
+ RecoveryInProgress(void)
+ {
+ 	/*
+ 	 * We check shared state each time only until we leave recovery mode.
+ 	 * We can't re-enter recovery, so we rely on the local state variable
+ 	 * after that.
+ 	 */
+ 	if (!LocalRecoveryInProgress)
+ 		return false;
+ 	else
+ 	{
+ 		/* use volatile pointer to prevent code rearrangement */
+ 		volatile XLogCtlData *xlogctl = XLogCtl;
+ 
+ 		LocalRecoveryInProgress = xlogctl->SharedRecoveryInProgress;
+ 
+ 		/*
+ 		 * Initialize TimeLineID and RedoRecPtr the first time we see that
+ 		 * recovery is finished.
+ 		 */
+ 		if (!LocalRecoveryInProgress)
+ 			InitXLOGAccess();
+ 
+ 		return LocalRecoveryInProgress;
+ 	}
  }
  
  /*
*************** InitXLOGAccess(void)
*** 5569,5574 ****
--- 5824,5831 ----
  {
  	/* ThisTimeLineID doesn't change so we need no lock to copy it */
  	ThisTimeLineID = XLogCtl->ThisTimeLineID;
+ 	Assert(ThisTimeLineID != 0);
+ 
  	/* Use GetRedoRecPtr to copy the RedoRecPtr safely */
  	(void) GetRedoRecPtr();
  }
*************** ShutdownXLOG(int code, Datum arg)
*** 5680,5686 ****
  	ereport(LOG,
  			(errmsg("shutting down")));
  
! 	CreateCheckPoint(CHECKPOINT_IS_SHUTDOWN | CHECKPOINT_IMMEDIATE);
  	ShutdownCLOG();
  	ShutdownSUBTRANS();
  	ShutdownMultiXact();
--- 5937,5946 ----
  	ereport(LOG,
  			(errmsg("shutting down")));
  
! 	if (RecoveryInProgress())
! 		CreateRestartPoint(CHECKPOINT_IS_SHUTDOWN | CHECKPOINT_IMMEDIATE);
! 	else
! 		CreateCheckPoint(CHECKPOINT_IS_SHUTDOWN | CHECKPOINT_IMMEDIATE);
  	ShutdownCLOG();
  	ShutdownSUBTRANS();
  	ShutdownMultiXact();
*************** ShutdownXLOG(int code, Datum arg)
*** 5693,5701 ****
   * Log start of a checkpoint.
   */
  static void
! LogCheckpointStart(int flags)
  {
! 	elog(LOG, "checkpoint starting:%s%s%s%s%s%s",
  		 (flags & CHECKPOINT_IS_SHUTDOWN) ? " shutdown" : "",
  		 (flags & CHECKPOINT_IMMEDIATE) ? " immediate" : "",
  		 (flags & CHECKPOINT_FORCE) ? " force" : "",
--- 5953,5972 ----
   * Log start of a checkpoint.
   */
  static void
! LogCheckpointStart(int flags, bool restartpoint)
  {
! 	char *msg;
! 
! 	/*
! 	 * XXX: This is hopelessly untranslatable. We could call gettext_noop
! 	 * for the main message, but what about all the flags?
! 	 */
! 	if (restartpoint)
! 		msg = "restartpoint starting:%s%s%s%s%s%s";
! 	else
! 		msg = "checkpoint starting:%s%s%s%s%s%s";
! 
! 	elog(LOG, msg,
  		 (flags & CHECKPOINT_IS_SHUTDOWN) ? " shutdown" : "",
  		 (flags & CHECKPOINT_IMMEDIATE) ? " immediate" : "",
  		 (flags & CHECKPOINT_FORCE) ? " force" : "",
*************** LogCheckpointStart(int flags)
*** 5708,5714 ****
   * Log end of a checkpoint.
   */
  static void
! LogCheckpointEnd(void)
  {
  	long		write_secs,
  				sync_secs,
--- 5979,5985 ----
   * Log end of a checkpoint.
   */
  static void
! LogCheckpointEnd(bool restartpoint)
  {
  	long		write_secs,
  				sync_secs,
*************** LogCheckpointEnd(void)
*** 5731,5747 ****
  						CheckpointStats.ckpt_sync_end_t,
  						&sync_secs, &sync_usecs);
  
! 	elog(LOG, "checkpoint complete: wrote %d buffers (%.1f%%); "
! 		 "%d transaction log file(s) added, %d removed, %d recycled; "
! 		 "write=%ld.%03d s, sync=%ld.%03d s, total=%ld.%03d s",
! 		 CheckpointStats.ckpt_bufs_written,
! 		 (double) CheckpointStats.ckpt_bufs_written * 100 / NBuffers,
! 		 CheckpointStats.ckpt_segs_added,
! 		 CheckpointStats.ckpt_segs_removed,
! 		 CheckpointStats.ckpt_segs_recycled,
! 		 write_secs, write_usecs / 1000,
! 		 sync_secs, sync_usecs / 1000,
! 		 total_secs, total_usecs / 1000);
  }
  
  /*
--- 6002,6027 ----
  						CheckpointStats.ckpt_sync_end_t,
  						&sync_secs, &sync_usecs);
  
! 	if (restartpoint)
! 		elog(LOG, "restartpoint complete: wrote %d buffers (%.1f%%); "
! 			 "write=%ld.%03d s, sync=%ld.%03d s, total=%ld.%03d s",
! 			 CheckpointStats.ckpt_bufs_written,
! 			 (double) CheckpointStats.ckpt_bufs_written * 100 / NBuffers,
! 			 write_secs, write_usecs / 1000,
! 			 sync_secs, sync_usecs / 1000,
! 			 total_secs, total_usecs / 1000);
! 	else
! 		elog(LOG, "checkpoint complete: wrote %d buffers (%.1f%%); "
! 			 "%d transaction log file(s) added, %d removed, %d recycled; "
! 			 "write=%ld.%03d s, sync=%ld.%03d s, total=%ld.%03d s",
! 			 CheckpointStats.ckpt_bufs_written,
! 			 (double) CheckpointStats.ckpt_bufs_written * 100 / NBuffers,
! 			 CheckpointStats.ckpt_segs_added,
! 			 CheckpointStats.ckpt_segs_removed,
! 			 CheckpointStats.ckpt_segs_recycled,
! 			 write_secs, write_usecs / 1000,
! 			 sync_secs, sync_usecs / 1000,
! 			 total_secs, total_usecs / 1000);
  }
  
  /*
*************** CreateCheckPoint(int flags)
*** 5772,5784 ****
  	TransactionId *inCommitXids;
  	int			nInCommit;
  
  	/*
  	 * Acquire CheckpointLock to ensure only one checkpoint happens at a time.
! 	 * (This is just pro forma, since in the present system structure there is
! 	 * only one process that is allowed to issue checkpoints at any given
! 	 * time.)
  	 */
! 	LWLockAcquire(CheckpointLock, LW_EXCLUSIVE);
  
  	/*
  	 * Prepare to accumulate statistics.
--- 6052,6084 ----
  	TransactionId *inCommitXids;
  	int			nInCommit;
  
+ 	/* shouldn't happen */
+ 	if (RecoveryInProgress())
+ 		elog(ERROR, "can't create a checkpoint during recovery");
+ 
  	/*
  	 * Acquire CheckpointLock to ensure only one checkpoint happens at a time.
! 	 * During normal operation, bgwriter is the only process that creates
! 	 * checkpoints, but at the end of archive recovery, the bgwriter can be
! 	 * busy creating a restartpoint while the startup process tries to perform
! 	 * the startup checkpoint.
  	 */
! 	if (!LWLockConditionalAcquire(CheckpointLock, LW_EXCLUSIVE))
! 	{
! 		Assert(InRecovery);
! 
! 		/*
! 		 * A restartpoint is in progress. Wait until it finishes. This can
! 		 * cause an extra restartpoint to be performed, but that's OK because
! 		 * we're just about to perform a checkpoint anyway. Flushing the
! 		 * buffers in this restartpoint can take some time, but that time is
! 		 * saved from the upcoming checkpoint so the net effect is zero.
! 		 */
! 		ereport(DEBUG2, (errmsg("hurrying in-progress restartpoint")));
! 		RequestCheckpoint(CHECKPOINT_IMMEDIATE | CHECKPOINT_WAIT);
! 
! 		LWLockAcquire(CheckpointLock, LW_EXCLUSIVE);
! 	}
  
  	/*
  	 * Prepare to accumulate statistics.
*************** CreateCheckPoint(int flags)
*** 5797,5805 ****
--- 6097,6107 ----
  
  	if (shutdown)
  	{
+ 		LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
  		ControlFile->state = DB_SHUTDOWNING;
  		ControlFile->time = (pg_time_t) time(NULL);
  		UpdateControlFile();
+ 		LWLockRelease(ControlFileLock);
  	}
  
  	/*
*************** CreateCheckPoint(int flags)
*** 5903,5909 ****
  	 * to log anything if we decided to skip the checkpoint.
  	 */
  	if (log_checkpoints)
! 		LogCheckpointStart(flags);
  
  	TRACE_POSTGRESQL_CHECKPOINT_START(flags);
  
--- 6205,6211 ----
  	 * to log anything if we decided to skip the checkpoint.
  	 */
  	if (log_checkpoints)
! 		LogCheckpointStart(flags, false);
  
  	TRACE_POSTGRESQL_CHECKPOINT_START(flags);
  
*************** CreateCheckPoint(int flags)
*** 6070,6076 ****
  
  	/* All real work is done, but log before releasing lock. */
  	if (log_checkpoints)
! 		LogCheckpointEnd();
  
          TRACE_POSTGRESQL_CHECKPOINT_DONE(CheckpointStats.ckpt_bufs_written,
                                  NBuffers, CheckpointStats.ckpt_segs_added,
--- 6372,6378 ----
  
  	/* All real work is done, but log before releasing lock. */
  	if (log_checkpoints)
! 		LogCheckpointEnd(false);
  
          TRACE_POSTGRESQL_CHECKPOINT_DONE(CheckpointStats.ckpt_bufs_written,
                                  NBuffers, CheckpointStats.ckpt_segs_added,
*************** CheckPointGuts(XLogRecPtr checkPointRedo
*** 6098,6129 ****
  }
  
  /*
!  * Set a recovery restart point if appropriate
!  *
!  * This is similar to CreateCheckPoint, but is used during WAL recovery
!  * to establish a point from which recovery can roll forward without
!  * replaying the entire recovery log.  This function is called each time
!  * a checkpoint record is read from XLOG; it must determine whether a
!  * restartpoint is needed or not.
   */
  static void
  RecoveryRestartPoint(const CheckPoint *checkPoint)
  {
- 	int			elapsed_secs;
  	int			rmid;
! 
! 	/*
! 	 * Do nothing if the elapsed time since the last restartpoint is less than
! 	 * half of checkpoint_timeout.	(We use a value less than
! 	 * checkpoint_timeout so that variations in the timing of checkpoints on
! 	 * the master, or speed of transmission of WAL segments to a slave, won't
! 	 * make the slave skip a restartpoint once it's synced with the master.)
! 	 * Checking true elapsed time keeps us from doing restartpoints too often
! 	 * while rapidly scanning large amounts of WAL.
! 	 */
! 	elapsed_secs = (pg_time_t) time(NULL) - ControlFile->time;
! 	if (elapsed_secs < CheckPointTimeout / 2)
! 		return;
  
  	/*
  	 * Is it safe to checkpoint?  We must ask each of the resource managers
--- 6400,6416 ----
  }
  
  /*
!  * This is used during WAL recovery to establish a point from which recovery
!  * can roll forward without replaying the entire recovery log.  This function
!  * is called each time a checkpoint record is read from XLOG. It is stored
!  * in shared memory, so that it can be used as a restartpoint later on.
   */
  static void
  RecoveryRestartPoint(const CheckPoint *checkPoint)
  {
  	int			rmid;
! 	/* use volatile pointer to prevent code rearrangement */
! 	volatile XLogCtlData *xlogctl = XLogCtl;
  
  	/*
  	 * Is it safe to checkpoint?  We must ask each of the resource managers
*************** RecoveryRestartPoint(const CheckPoint *c
*** 6145,6172 ****
  	}
  
  	/*
! 	 * OK, force data out to disk
  	 */
! 	CheckPointGuts(checkPoint->redo, CHECKPOINT_IMMEDIATE);
  
  	/*
! 	 * Update pg_control so that any subsequent crash will restart from this
! 	 * checkpoint.	Note: ReadRecPtr gives the XLOG address of the checkpoint
! 	 * record itself.
  	 */
  	ControlFile->prevCheckPoint = ControlFile->checkPoint;
! 	ControlFile->checkPoint = ReadRecPtr;
! 	ControlFile->checkPointCopy = *checkPoint;
  	ControlFile->time = (pg_time_t) time(NULL);
  	UpdateControlFile();
  
! 	ereport((recoveryLogRestartpoints ? LOG : DEBUG2),
  			(errmsg("recovery restart point at %X/%X",
! 					checkPoint->redo.xlogid, checkPoint->redo.xrecoff)));
  	if (recoveryLastXTime)
! 		ereport((recoveryLogRestartpoints ? LOG : DEBUG2),
! 				(errmsg("last completed transaction was at log time %s",
! 						timestamptz_to_str(recoveryLastXTime))));
  }
  
  /*
--- 6432,6559 ----
  	}
  
  	/*
! 	 * Copy the checkpoint record to shared memory, so that bgwriter can
! 	 * use it the next time it wants to perform a restartpoint.
  	 */
! 	SpinLockAcquire(&xlogctl->info_lck);
! 	XLogCtl->lastCheckPointRecPtr = ReadRecPtr;
! 	memcpy(&XLogCtl->lastCheckPoint, checkPoint, sizeof(CheckPoint));
! 	SpinLockRelease(&xlogctl->info_lck);
! }
! 
! /*
!  * This is similar to CreateCheckPoint, but is used during WAL recovery
!  * to establish a point from which recovery can roll forward without
!  * replaying the entire recovery log.
!  *
!  * Returns true if a new restartpoint was established. We can only establish
!  * a restartpoint if we have replayed a checkpoint record since last
!  * restartpoint.
!  */
! bool
! CreateRestartPoint(int flags)
! {
! 	XLogRecPtr lastCheckPointRecPtr;
! 	CheckPoint lastCheckPoint;
! 	/* use volatile pointer to prevent code rearrangement */
! 	volatile XLogCtlData *xlogctl = XLogCtl;
  
  	/*
! 	 * Acquire CheckpointLock to ensure only one restartpoint or checkpoint
! 	 * happens at a time.
  	 */
+ 	LWLockAcquire(CheckpointLock, LW_EXCLUSIVE);
+ 
+ 	/* Get the a local copy of the last checkpoint record. */
+ 	SpinLockAcquire(&xlogctl->info_lck);
+ 	lastCheckPointRecPtr = xlogctl->lastCheckPointRecPtr;
+ 	memcpy(&lastCheckPoint, &XLogCtl->lastCheckPoint, sizeof(CheckPoint));
+ 	SpinLockRelease(&xlogctl->info_lck);
+ 
+ 	/* 
+ 	 * Check that we're still in recovery mode. It's ok if we exit recovery
+ 	 * mode after this check, the restart point is valid anyway.
+ 	 */
+ 	if (!RecoveryInProgress())
+ 	{
+ 		ereport(DEBUG2,
+ 				(errmsg("skipping restartpoint, recovery has already ended")));
+ 		LWLockRelease(CheckpointLock);
+ 		return false;
+ 	}
+ 
+ 	/*
+ 	 * If the last checkpoint record we've replayed is already our last
+ 	 * restartpoint, we can't perform a new restart point. We still update
+ 	 * minRecoveryPoint in that case, so that if this is a shutdown restart
+ 	 * point, we won't start up earlier than before. That's not strictly
+ 	 * necessary, but when we get hot standby capability, it would be rather
+ 	 * weird if the database opened up for read-only connections at a
+ 	 * point-in-time before the last shutdown. Such time travel is still
+ 	 * possible in case of immediate shutdown, though.
+ 	 *
+ 	 * We don't explicitly advance minRecoveryPoint when we do create a
+ 	 * restartpoint. It's assumed that flushing the buffers will do that
+ 	 * as a side-effect.
+ 	 */
+ 	if (XLogRecPtrIsInvalid(lastCheckPointRecPtr) ||
+ 		XLByteLE(lastCheckPoint.redo, ControlFile->checkPointCopy.redo))
+ 	{
+ 		XLogRecPtr InvalidXLogRecPtr = {0, 0};
+ 		ereport(DEBUG2,
+ 				(errmsg("skipping restartpoint, already performed at %X/%X",
+ 						lastCheckPoint.redo.xlogid, lastCheckPoint.redo.xrecoff)));
+ 
+ 		UpdateMinRecoveryPoint(InvalidXLogRecPtr, true);
+ 		LWLockRelease(CheckpointLock);
+ 		return false;
+ 	}
+ 
+ 	if (log_checkpoints)
+ 	{
+ 		/*
+ 		 * Prepare to accumulate statistics.
+ 		 */
+ 		MemSet(&CheckpointStats, 0, sizeof(CheckpointStats));
+ 		CheckpointStats.ckpt_start_t = GetCurrentTimestamp();
+ 
+ 		LogCheckpointStart(flags, true);
+ 	}
+ 
+ 	CheckPointGuts(lastCheckPoint.redo, flags);
+ 
+ 	/*
+ 	 * Update pg_control, using current time
+ 	 */
+ 	LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
  	ControlFile->prevCheckPoint = ControlFile->checkPoint;
! 	ControlFile->checkPoint = lastCheckPointRecPtr;
! 	ControlFile->checkPointCopy = lastCheckPoint;
  	ControlFile->time = (pg_time_t) time(NULL);
  	UpdateControlFile();
+ 	LWLockRelease(ControlFileLock);
+ 
+ 	/*
+ 	 * Currently, there is no need to truncate pg_subtrans during recovery.
+ 	 * If we did do that, we will need to have called StartupSUBTRANS()
+ 	 * already and then TruncateSUBTRANS() would go here.
+ 	 */
+ 
+ 	/* All real work is done, but log before releasing lock. */
+ 	if (log_checkpoints)
+ 		LogCheckpointEnd(true);
  
! 	ereport((log_checkpoints ? LOG : DEBUG2),
  			(errmsg("recovery restart point at %X/%X",
! 					lastCheckPoint.redo.xlogid, lastCheckPoint.redo.xrecoff)));
! 
  	if (recoveryLastXTime)
! 		ereport((log_checkpoints ? LOG : DEBUG2),
! 			(errmsg("last completed transaction was at log time %s",
! 					timestamptz_to_str(recoveryLastXTime))));
! 
! 	LWLockRelease(CheckpointLock);
! 	return true;
  }
  
  /*
*************** RequestXLogSwitch(void)
*** 6232,6237 ****
--- 6619,6627 ----
  
  /*
   * XLOG resource manager's routines
+  *
+  * Definitions of info values are in include/catalog/pg_control.h, though
+  * not all records types are related to control file processing.
   */
  void
  xlog_redo(XLogRecPtr lsn, XLogRecord *record)
*************** xlog_redo(XLogRecPtr lsn, XLogRecord *re
*** 6278,6286 ****
  								 (int) checkPoint.ThisTimeLineID))
  				ereport(PANIC,
  						(errmsg("unexpected timeline ID %u (after %u) in checkpoint record",
! 								checkPoint.ThisTimeLineID, ThisTimeLineID)));
! 			/* Following WAL records should be run with new TLI */
! 			ThisTimeLineID = checkPoint.ThisTimeLineID;
  		}
  
  		RecoveryRestartPoint(&checkPoint);
--- 6668,6676 ----
  								 (int) checkPoint.ThisTimeLineID))
  				ereport(PANIC,
  						(errmsg("unexpected timeline ID %u (after %u) in checkpoint record",
!                                checkPoint.ThisTimeLineID, ThisTimeLineID)));
!            /* Following WAL records should be run with new TLI */
!            ThisTimeLineID = checkPoint.ThisTimeLineID;
  		}
  
  		RecoveryRestartPoint(&checkPoint);
*************** CancelBackup(void)
*** 7221,7223 ****
--- 7611,7702 ----
  	}
  }
  
+ /* ------------------------------------------------------
+  *  Startup Process main entry point and signal handlers
+  * ------------------------------------------------------
+  */
+ 
+ /*
+  * startupproc_quickdie() occurs when signalled SIGQUIT by the postmaster.
+  *
+  * Some backend has bought the farm,
+  * so we need to stop what we're doing and exit.
+  */
+ static void
+ startupproc_quickdie(SIGNAL_ARGS)
+ {
+ 	PG_SETMASK(&BlockSig);
+ 
+ 	/*
+ 	 * DO NOT proc_exit() -- we're here because shared memory may be
+ 	 * corrupted, so we don't want to try to clean up our transaction. Just
+ 	 * nail the windows shut and get out of town.
+ 	 *
+ 	 * Note we do exit(2) not exit(0).	This is to force the postmaster into a
+ 	 * system reset cycle if some idiot DBA sends a manual SIGQUIT to a random
+ 	 * backend.  This is necessary precisely because we don't clean up our
+ 	 * shared memory state.
+ 	 */
+ 	exit(2);
+ }
+ 
+ 
+ /* SIGTERM: set flag to abort redo and exit */
+ static void
+ StartupProcShutdownHandler(SIGNAL_ARGS)
+ {
+ 	if (in_restore_command)
+ 		proc_exit(0);
+ 	else
+ 		shutdown_requested = true;
+ }
+ 
+ /* Main entry point for startup process */
+ void
+ StartupProcessMain(void)
+ {
+ 	/*
+ 	 * If possible, make this process a group leader, so that the postmaster
+ 	 * can signal any child processes too.
+ 	 */
+ #ifdef HAVE_SETSID
+ 	if (setsid() < 0)
+ 		elog(FATAL, "setsid() failed: %m");
+ #endif
+ 
+ 	/*
+ 	 * Properly accept or ignore signals the postmaster might send us
+ 	 */
+ 	pqsignal(SIGHUP, SIG_IGN);	/* ignore config file updates */
+ 	pqsignal(SIGINT, SIG_IGN);		/* ignore query cancel */
+ 	pqsignal(SIGTERM, StartupProcShutdownHandler); /* request shutdown */
+ 	pqsignal(SIGQUIT, startupproc_quickdie);		/* hard crash time */
+ 	pqsignal(SIGALRM, SIG_IGN);
+ 	pqsignal(SIGPIPE, SIG_IGN);
+ 	pqsignal(SIGUSR1, SIG_IGN);
+ 	pqsignal(SIGUSR2, SIG_IGN);
+ 
+ 	/*
+ 	 * Reset some signals that are accepted by postmaster but not here
+ 	 */
+ 	pqsignal(SIGCHLD, SIG_DFL);
+ 	pqsignal(SIGTTIN, SIG_DFL);
+ 	pqsignal(SIGTTOU, SIG_DFL);
+ 	pqsignal(SIGCONT, SIG_DFL);
+ 	pqsignal(SIGWINCH, SIG_DFL);
+ 
+ 	/*
+ 	 * Unblock signals (they were blocked when the postmaster forked us)
+ 	 */
+ 	PG_SETMASK(&UnBlockSig);
+ 
+ 	StartupXLOG();	
+ 
+ 	BuildFlatFiles(false);
+ 
+ 	/* Let postmaster know that startup is finished */
+ 	SendPostmasterSignal(PMSIGNAL_RECOVERY_COMPLETED);
+ 
+ 	/* exit normally */
+ 	proc_exit(0);
+ }