11/*
2- * $PostgreSQL: pgsql/contrib/pg_standby/pg_standby.c,v 1.21 2009/03/26 22:29:13 tgl Exp $
2+ * $PostgreSQL: pgsql/contrib/pg_standby/pg_standby.c,v 1.22 2009/05/14 20:31:09 heikki Exp $
33 *
44 *
55 * pg_standby.c
2626#include <ctype.h>
2727#include <dirent.h>
2828#include <sys/stat.h>
29+ #include <fcntl.h>
2930#include <signal.h>
3031
3132#ifdef WIN32
@@ -52,7 +53,6 @@ int maxwaittime = 0; /* how long are we prepared to wait for? */
5253int keepfiles = 0 ; /* number of WAL files to keep, 0 keep all */
5354int maxretries = 3 ; /* number of retries on restore command */
5455bool debug = false; /* are we debugging? */
55- bool triggered = false; /* have we been triggered? */
5656bool need_cleanup = false; /* do we need to remove files from
5757 * archive? */
5858
@@ -69,6 +69,30 @@ char restoreCommand[MAXPGPATH]; /* run this to restore */
6969char exclusiveCleanupFileName [MAXPGPATH ]; /* the file we need to
7070 * get from archive */
7171
72+ /*
73+ * Two types of failover are supported (smart and fast failover).
74+ *
75+ * The content of the trigger file determines the type of failover. If the
76+ * trigger file contains the word "smart" (or the file is empty), smart
77+ * failover is chosen: pg_standby acts as cp or ln command itself, on
78+ * successful completion all the available WAL records will be applied
79+ * resulting in zero data loss. But, it might take a long time to finish
80+ * recovery if there's a lot of unapplied WAL.
81+ *
82+ * On the other hand, if the trigger file contains the word "fast", the
83+ * recovery is finished immediately even if unapplied WAL files remain. Any
84+ * transactions in the unapplied WAL files are lost.
85+ *
86+ * An empty trigger file performs smart failover. SIGUSR or SIGINT triggers
87+ * fast failover. A timeout causes fast failover (smart failover would have
88+ * the same effect, since if the timeout is reached there is no unapplied WAL).
89+ */
90+ #define NoFailover 0
91+ #define SmartFailover 1
92+ #define FastFailover 2
93+
94+ static int Failover = NoFailover ;
95+
7296#define RESTORE_COMMAND_COPY 0
7397#define RESTORE_COMMAND_LINK 1
7498int restoreCommandType ;
@@ -108,7 +132,6 @@ struct stat stat_buf;
108132 *
109133 * As an example, and probably the common case, we use either
110134 * cp/ln commands on *nix, or copy/move command on Windows.
111- *
112135 */
113136static void
114137CustomizableInitialize (void )
@@ -352,41 +375,96 @@ SetWALFileNameForCleanup(void)
352375/*
353376 * CheckForExternalTrigger()
354377 *
355- * Is there a trigger file?
378+ * Is there a trigger file? Sets global 'Failover' variable to indicate
379+ * what kind of a trigger file it was. A "fast" trigger file is turned
380+ * into a "smart" file as a side-effect.
356381 */
357- static bool
382+ static void
358383CheckForExternalTrigger (void )
359384{
360- int rc ;
385+ char buf [32 ];
386+ int fd ;
387+ int len ;
361388
362389 /*
363390 * Look for a trigger file, if that option has been selected
364391 *
365392 * We use stat() here because triggerPath is always a file rather than
366393 * potentially being in an archive
367394 */
368- if (triggerPath && stat (triggerPath , & stat_buf ) == 0 )
395+ if (!triggerPath || stat (triggerPath , & stat_buf ) != 0 )
396+ return ;
397+
398+ /*
399+ * An empty trigger file performs smart failover. There's a little race
400+ * condition here: if the writer of the trigger file has just created
401+ * the file, but not yet written anything to it, we'll treat that as
402+ * smart shutdown even if the other process was just about to write "fast"
403+ * to it. But that's fine: we'll restore one more WAL file, and when we're
404+ * invoked next time, we'll see the word "fast" and fail over immediately.
405+ */
406+ if (stat_buf .st_size == 0 )
369407 {
370- fprintf (stderr , "trigger file found\n" );
408+ Failover = SmartFailover ;
409+ fprintf (stderr , "trigger file found: smart failover\n" );
410+ fflush (stderr );
411+ return ;
412+ }
413+
414+ if ((fd = open (triggerPath , O_RDWR , 0 )) < 0 )
415+ {
416+ fprintf (stderr , "WARNING: could not open \"%s\": %s\n" ,
417+ triggerPath , strerror (errno ));
418+ fflush (stderr );
419+ return ;
420+ }
421+
422+ if ((len = read (fd , buf , sizeof (buf ))) < 0 )
423+ {
424+ fprintf (stderr , "WARNING: could not read \"%s\": %s\n" ,
425+ triggerPath , strerror (errno ));
426+ fflush (stderr );
427+ close (fd );
428+ return ;
429+ }
430+ buf [len ] = '\0' ;
431+
432+ if (strncmp (buf , "smart" , 5 ) == 0 )
433+ {
434+ Failover = SmartFailover ;
435+ fprintf (stderr , "trigger file found: smart failover\n" );
436+ fflush (stderr );
437+ close (fd );
438+ return ;
439+ }
440+
441+ if (strncmp (buf , "fast" , 4 ) == 0 )
442+ {
443+ Failover = FastFailover ;
444+
445+ fprintf (stderr , "trigger file found: fast failover\n" );
371446 fflush (stderr );
372447
373448 /*
374- * If trigger file found, we *must* delete it. Here's why: When
375- * recovery completes, we will be asked again for the same file from
376- * the archive using pg_standby so must remove trigger file so we can
377- * reload file again and come up correctly.
449+ * Turn it into a "smart" trigger by truncating the file. Otherwise
450+ * if the server asks us again to restore a segment that was restored
451+ * restored already, we would return "not found" and upset the server.
378452 */
379- rc = unlink (triggerPath );
380- if (rc != 0 )
453+ if (ftruncate (fd , 0 ) < 0 )
381454 {
382- fprintf (stderr , "\n ERROR: could not remove \"%s\": %s" , triggerPath , strerror (errno ));
455+ fprintf (stderr , "WARNING: could not read \"%s\": %s\n" ,
456+ triggerPath , strerror (errno ));
383457 fflush (stderr );
384- exit (rc );
385458 }
386- return true;
387- }
459+ close (fd );
388460
389- return false;
461+ return ;
462+ }
463+ close (fd );
464+
465+ fprintf (stderr , "WARNING: invalid content in \"%s\"\n" , triggerPath );
466+ fflush (stderr );
467+ return ;
390468}
391469
392470/*
@@ -402,7 +480,7 @@ RestoreWALFileForRecovery(void)
402480
403481 if (debug )
404482 {
405- fprintf (stderr , "\nrunning restore :" );
483+ fprintf (stderr , "running restore :" );
406484 fflush (stderr );
407485 }
408486
@@ -413,7 +491,7 @@ RestoreWALFileForRecovery(void)
413491 {
414492 if (debug )
415493 {
416- fprintf (stderr , " OK" );
494+ fprintf (stderr , " OK\n " );
417495 fflush (stderr );
418496 }
419497 return true;
@@ -425,7 +503,7 @@ RestoreWALFileForRecovery(void)
425503 * Allow caller to add additional info
426504 */
427505 if (debug )
428- fprintf (stderr , "not restored : " );
506+ fprintf (stderr , "not restored\n " );
429507 return false;
430508}
431509
@@ -552,8 +630,6 @@ main(int argc, char **argv)
552630 break ;
553631 case 't' : /* Trigger file */
554632 triggerPath = optarg ;
555- if (CheckForExternalTrigger ())
556- exit (1 ); /* Normal exit, with non-zero */
557633 break ;
558634 case 'w' : /* Max wait time */
559635 maxwaittime = atoi (optarg );
@@ -633,20 +709,20 @@ main(int argc, char **argv)
633709
634710 if (debug )
635711 {
636- fprintf (stderr , "\nTrigger file : %s" , triggerPath ? triggerPath : "<not set>" );
637- fprintf (stderr , "\nWaiting for WAL file : %s" , nextWALFileName );
638- fprintf (stderr , "\nWAL file path : %s" , WALFilePath );
639- fprintf (stderr , "\nRestoring to... : %s" , xlogFilePath );
640- fprintf (stderr , "\nSleep interval : %d second%s" ,
712+ fprintf (stderr , "Trigger file : %s\n " , triggerPath ? triggerPath : "<not set>" );
713+ fprintf (stderr , "Waiting for WAL file : %s\n " , nextWALFileName );
714+ fprintf (stderr , "WAL file path : %s\n " , WALFilePath );
715+ fprintf (stderr , "Restoring to : %s\n " , xlogFilePath );
716+ fprintf (stderr , "Sleep interval : %d second%s\n " ,
641717 sleeptime , (sleeptime > 1 ? "s" : " " ));
642- fprintf (stderr , "\nMax wait interval : %d %s" ,
718+ fprintf (stderr , "Max wait interval : %d %s\n " ,
643719 maxwaittime , (maxwaittime > 0 ? "seconds" : "forever" ));
644- fprintf (stderr , "\nCommand for restore : %s" , restoreCommand );
645- fprintf (stderr , "\nKeep archive history : " );
720+ fprintf (stderr , "Command for restore : %s\n " , restoreCommand );
721+ fprintf (stderr , "Keep archive history : " );
646722 if (need_cleanup )
647- fprintf (stderr , "%s and later" , exclusiveCleanupFileName );
723+ fprintf (stderr , "%s and later\n " , exclusiveCleanupFileName );
648724 else
649- fprintf (stderr , "No cleanup required" );
725+ fprintf (stderr , "No cleanup required\n " );
650726 fflush (stderr );
651727 }
652728
@@ -676,56 +752,74 @@ main(int argc, char **argv)
676752 /*
677753 * Main wait loop
678754 */
679- while (! CustomizableNextWALFileReady () && ! triggered )
755+ for (;; )
680756 {
681- if (sleeptime <= 60 )
682- pg_usleep (sleeptime * 1000000L );
683-
757+ /* Check for trigger file or signal first */
758+ CheckForExternalTrigger ();
684759 if (signaled )
685760 {
686- triggered = true ;
761+ Failover = FastFailover ;
687762 if (debug )
688763 {
689- fprintf (stderr , "\nsignaled to exit\n" );
764+ fprintf (stderr , "signaled to exit: fast failover \n" );
690765 fflush (stderr );
691766 }
692767 }
693- else
768+
769+ /*
770+ * Check for fast failover immediately, before checking if the
771+ * requested WAL file is available
772+ */
773+ if (Failover == FastFailover )
774+ exit (1 );
775+
776+ if (CustomizableNextWALFileReady ())
694777 {
778+ /*
779+ * Once we have restored this file successfully we can remove some
780+ * prior WAL files. If this restore fails we musn't remove any file
781+ * because some of them will be requested again immediately after
782+ * the failed restore, or when we restart recovery.
783+ */
784+ if (RestoreWALFileForRecovery ())
785+ {
786+ if (need_cleanup )
787+ CustomizableCleanupPriorWALFiles ();
695788
696- if (debug )
789+ exit (0 );
790+ }
791+ else
697792 {
698- fprintf (stderr , "\nWAL file not present yet." );
699- if (triggerPath )
700- fprintf (stderr , " Checking for trigger file..." );
701- fflush (stderr );
793+ /* Something went wrong in copying the file */
794+ exit (1 );
702795 }
796+ }
797+
798+ /* Check for smart failover if the next WAL file was not available */
799+ if (Failover == SmartFailover )
800+ exit (1 );
703801
704- waittime += sleeptime ;
802+ if (sleeptime <= 60 )
803+ pg_usleep (sleeptime * 1000000L );
705804
706- if (!triggered && (CheckForExternalTrigger () || (waittime >= maxwaittime && maxwaittime > 0 )))
805+ waittime += sleeptime ;
806+ if (waittime >= maxwaittime && maxwaittime > 0 )
807+ {
808+ Failover = FastFailover ;
809+ if (debug )
707810 {
708- triggered = true;
709- if ( debug && waittime >= maxwaittime && maxwaittime > 0 )
710- fprintf (stderr , "\nTimed out after %d seconds\n" , waittime );
811+ fprintf ( stderr , "Timed out after %d seconds: fast failover\n" ,
812+ waittime );
813+ fflush (stderr );
711814 }
712815 }
816+ if (debug )
817+ {
818+ fprintf (stderr , "WAL file not present yet." );
819+ if (triggerPath )
820+ fprintf (stderr , " Checking for trigger file..." );
821+ fprintf (stderr , "\n" );
822+ fflush (stderr );
823+ }
713824 }
714-
715- /*
716- * Action on exit
717- */
718- if (triggered )
719- exit (1 ); /* Normal exit, with non-zero */
720-
721- /*
722- * Once we have restored this file successfully we can remove some prior
723- * WAL files. If this restore fails we musn't remove any file because some
724- * of them will be requested again immediately after the failed restore,
725- * or when we restart recovery.
726- */
727- if (RestoreWALFileForRecovery () && need_cleanup )
728- CustomizableCleanupPriorWALFiles ();
729-
730- return 0 ;
731825}
0 commit comments