*** pgsql/src/backend/access/transam/xlog.c 2010/04/15 03:05:59 1.396 --- pgsql/src/backend/access/transam/xlog.c 2010/04/16 08:58:16 1.397 *************** *** 7,13 **** * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * ! * $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.395 2010/04/14 10:29:07 sriggs Exp $ * *------------------------------------------------------------------------- */ --- 7,13 ---- * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * ! * $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.396 2010/04/15 03:05:59 momjian Exp $ * *------------------------------------------------------------------------- */ *************** static int XLogFileReadAnyTLI(uint32 log *** 539,545 **** int sources); static bool XLogPageRead(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt, bool randAccess); ! static int emode_for_corrupt_record(int emode); static void XLogFileClose(void); static bool RestoreArchivedFile(char *path, const char *xlogfname, const char *recovername, off_t expectedSize); --- 539,545 ---- int sources); static bool XLogPageRead(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt, bool randAccess); ! static int emode_for_corrupt_record(int emode, XLogRecPtr RecPtr); static void XLogFileClose(void); static bool RestoreArchivedFile(char *path, const char *xlogfname, const char *recovername, off_t expectedSize); *************** RecordIsValid(XLogRecord *record, XLogRe *** 3543,3549 **** memcpy(&bkpb, blk, sizeof(BkpBlock)); if (bkpb.hole_offset + bkpb.hole_length > BLCKSZ) { ! ereport(emode, (errmsg("incorrect hole size in record at %X/%X", recptr.xlogid, recptr.xrecoff))); return false; --- 3543,3549 ---- memcpy(&bkpb, blk, sizeof(BkpBlock)); if (bkpb.hole_offset + bkpb.hole_length > BLCKSZ) { ! ereport(emode_for_corrupt_record(emode, recptr), (errmsg("incorrect hole size in record at %X/%X", recptr.xlogid, recptr.xrecoff))); return false; *************** RecordIsValid(XLogRecord *record, XLogRe *** 3556,3562 **** /* Check that xl_tot_len agrees with our calculation */ if (blk != (char *) record + record->xl_tot_len) { ! ereport(emode, (errmsg("incorrect total length in record at %X/%X", recptr.xlogid, recptr.xrecoff))); return false; --- 3556,3562 ---- /* Check that xl_tot_len agrees with our calculation */ if (blk != (char *) record + record->xl_tot_len) { ! ereport(emode_for_corrupt_record(emode, recptr), (errmsg("incorrect total length in record at %X/%X", recptr.xlogid, recptr.xrecoff))); return false; *************** RecordIsValid(XLogRecord *record, XLogRe *** 3569,3575 **** if (!EQ_CRC32(record->xl_crc, crc)) { ! ereport(emode, (errmsg("incorrect resource manager data checksum in record at %X/%X", recptr.xlogid, recptr.xrecoff))); return false; --- 3569,3575 ---- if (!EQ_CRC32(record->xl_crc, crc)) { ! ereport(emode_for_corrupt_record(emode, recptr), (errmsg("incorrect resource manager data checksum in record at %X/%X", recptr.xlogid, recptr.xrecoff))); return false; *************** retry: *** 3674,3680 **** } else if (targetRecOff < pageHeaderSize) { ! ereport(emode_for_corrupt_record(emode), (errmsg("invalid record offset at %X/%X", RecPtr->xlogid, RecPtr->xrecoff))); goto next_record_is_invalid; --- 3674,3680 ---- } else if (targetRecOff < pageHeaderSize) { ! ereport(emode_for_corrupt_record(emode, *RecPtr), (errmsg("invalid record offset at %X/%X", RecPtr->xlogid, RecPtr->xrecoff))); goto next_record_is_invalid; *************** retry: *** 3682,3688 **** if ((((XLogPageHeader) readBuf)->xlp_info & XLP_FIRST_IS_CONTRECORD) && targetRecOff == pageHeaderSize) { ! ereport(emode_for_corrupt_record(emode), (errmsg("contrecord is requested by %X/%X", RecPtr->xlogid, RecPtr->xrecoff))); goto next_record_is_invalid; --- 3682,3688 ---- if ((((XLogPageHeader) readBuf)->xlp_info & XLP_FIRST_IS_CONTRECORD) && targetRecOff == pageHeaderSize) { ! ereport(emode_for_corrupt_record(emode, *RecPtr), (errmsg("contrecord is requested by %X/%X", RecPtr->xlogid, RecPtr->xrecoff))); goto next_record_is_invalid; *************** retry: *** 3697,3703 **** { if (record->xl_len != 0) { ! ereport(emode_for_corrupt_record(emode), (errmsg("invalid xlog switch record at %X/%X", RecPtr->xlogid, RecPtr->xrecoff))); goto next_record_is_invalid; --- 3697,3703 ---- { if (record->xl_len != 0) { ! ereport(emode_for_corrupt_record(emode, *RecPtr), (errmsg("invalid xlog switch record at %X/%X", RecPtr->xlogid, RecPtr->xrecoff))); goto next_record_is_invalid; *************** retry: *** 3705,3711 **** } else if (record->xl_len == 0) { ! ereport(emode_for_corrupt_record(emode), (errmsg("record with zero length at %X/%X", RecPtr->xlogid, RecPtr->xrecoff))); goto next_record_is_invalid; --- 3705,3711 ---- } else if (record->xl_len == 0) { ! ereport(emode_for_corrupt_record(emode, *RecPtr), (errmsg("record with zero length at %X/%X", RecPtr->xlogid, RecPtr->xrecoff))); goto next_record_is_invalid; *************** retry: *** 3714,3727 **** record->xl_tot_len > SizeOfXLogRecord + record->xl_len + XLR_MAX_BKP_BLOCKS * (sizeof(BkpBlock) + BLCKSZ)) { ! ereport(emode_for_corrupt_record(emode), (errmsg("invalid record length at %X/%X", RecPtr->xlogid, RecPtr->xrecoff))); goto next_record_is_invalid; } if (record->xl_rmid > RM_MAX_ID) { ! ereport(emode_for_corrupt_record(emode), (errmsg("invalid resource manager ID %u at %X/%X", record->xl_rmid, RecPtr->xlogid, RecPtr->xrecoff))); goto next_record_is_invalid; --- 3714,3727 ---- record->xl_tot_len > SizeOfXLogRecord + record->xl_len + XLR_MAX_BKP_BLOCKS * (sizeof(BkpBlock) + BLCKSZ)) { ! ereport(emode_for_corrupt_record(emode, *RecPtr), (errmsg("invalid record length at %X/%X", RecPtr->xlogid, RecPtr->xrecoff))); goto next_record_is_invalid; } if (record->xl_rmid > RM_MAX_ID) { ! ereport(emode_for_corrupt_record(emode, *RecPtr), (errmsg("invalid resource manager ID %u at %X/%X", record->xl_rmid, RecPtr->xlogid, RecPtr->xrecoff))); goto next_record_is_invalid; *************** retry: *** 3734,3740 **** */ if (!XLByteLT(record->xl_prev, *RecPtr)) { ! ereport(emode_for_corrupt_record(emode), (errmsg("record with incorrect prev-link %X/%X at %X/%X", record->xl_prev.xlogid, record->xl_prev.xrecoff, RecPtr->xlogid, RecPtr->xrecoff))); --- 3734,3740 ---- */ if (!XLByteLT(record->xl_prev, *RecPtr)) { ! ereport(emode_for_corrupt_record(emode, *RecPtr), (errmsg("record with incorrect prev-link %X/%X at %X/%X", record->xl_prev.xlogid, record->xl_prev.xrecoff, RecPtr->xlogid, RecPtr->xrecoff))); *************** retry: *** 3750,3756 **** */ if (!XLByteEQ(record->xl_prev, ReadRecPtr)) { ! ereport(emode_for_corrupt_record(emode), (errmsg("record with incorrect prev-link %X/%X at %X/%X", record->xl_prev.xlogid, record->xl_prev.xrecoff, RecPtr->xlogid, RecPtr->xrecoff))); --- 3750,3756 ---- */ if (!XLByteEQ(record->xl_prev, ReadRecPtr)) { ! ereport(emode_for_corrupt_record(emode, *RecPtr), (errmsg("record with incorrect prev-link %X/%X at %X/%X", record->xl_prev.xlogid, record->xl_prev.xrecoff, RecPtr->xlogid, RecPtr->xrecoff))); *************** retry: *** 3779,3785 **** { readRecordBufSize = 0; /* We treat this as a "bogus data" condition */ ! ereport(emode_for_corrupt_record(emode), (errmsg("record length %u at %X/%X too long", total_len, RecPtr->xlogid, RecPtr->xrecoff))); goto next_record_is_invalid; --- 3779,3785 ---- { readRecordBufSize = 0; /* We treat this as a "bogus data" condition */ ! ereport(emode_for_corrupt_record(emode, *RecPtr), (errmsg("record length %u at %X/%X too long", total_len, RecPtr->xlogid, RecPtr->xrecoff))); goto next_record_is_invalid; *************** retry: *** 3819,3825 **** /* Check that the continuation record looks valid */ if (!(((XLogPageHeader) readBuf)->xlp_info & XLP_FIRST_IS_CONTRECORD)) { ! ereport(emode_for_corrupt_record(emode), (errmsg("there is no contrecord flag in log file %u, segment %u, offset %u", readId, readSeg, readOff))); goto next_record_is_invalid; --- 3819,3825 ---- /* Check that the continuation record looks valid */ if (!(((XLogPageHeader) readBuf)->xlp_info & XLP_FIRST_IS_CONTRECORD)) { ! ereport(emode_for_corrupt_record(emode, *RecPtr), (errmsg("there is no contrecord flag in log file %u, segment %u, offset %u", readId, readSeg, readOff))); goto next_record_is_invalid; *************** retry: *** 3829,3835 **** if (contrecord->xl_rem_len == 0 || total_len != (contrecord->xl_rem_len + gotlen)) { ! ereport(emode_for_corrupt_record(emode), (errmsg("invalid contrecord length %u in log file %u, segment %u, offset %u", contrecord->xl_rem_len, readId, readSeg, readOff))); --- 3829,3835 ---- if (contrecord->xl_rem_len == 0 || total_len != (contrecord->xl_rem_len + gotlen)) { ! ereport(emode_for_corrupt_record(emode, *RecPtr), (errmsg("invalid contrecord length %u in log file %u, segment %u, offset %u", contrecord->xl_rem_len, readId, readSeg, readOff))); *************** retry: *** 3847,3853 **** contrecord->xl_rem_len); break; } ! if (!RecordIsValid(record, *RecPtr, emode_for_corrupt_record(emode))) goto next_record_is_invalid; pageHeaderSize = XLogPageHeaderSize((XLogPageHeader) readBuf); EndRecPtr.xlogid = readId; --- 3847,3853 ---- contrecord->xl_rem_len); break; } ! if (!RecordIsValid(record, *RecPtr, emode)) goto next_record_is_invalid; pageHeaderSize = XLogPageHeaderSize((XLogPageHeader) readBuf); EndRecPtr.xlogid = readId; *************** retry: *** 3861,3867 **** } /* Record does not cross a page boundary */ ! if (!RecordIsValid(record, *RecPtr, emode_for_corrupt_record(emode))) goto next_record_is_invalid; EndRecPtr.xlogid = RecPtr->xlogid; EndRecPtr.xrecoff = RecPtr->xrecoff + MAXALIGN(total_len); --- 3861,3867 ---- } /* Record does not cross a page boundary */ ! if (!RecordIsValid(record, *RecPtr, emode)) goto next_record_is_invalid; EndRecPtr.xlogid = RecPtr->xlogid; EndRecPtr.xrecoff = RecPtr->xrecoff + MAXALIGN(total_len); *************** ValidXLOGHeader(XLogPageHeader hdr, int *** 3914,3929 **** { XLogRecPtr recaddr; if (hdr->xlp_magic != XLOG_PAGE_MAGIC) { ! ereport(emode, (errmsg("invalid magic number %04X in log file %u, segment %u, offset %u", hdr->xlp_magic, readId, readSeg, readOff))); return false; } if ((hdr->xlp_info & ~XLP_ALL_FLAGS) != 0) { ! ereport(emode, (errmsg("invalid info bits %04X in log file %u, segment %u, offset %u", hdr->xlp_info, readId, readSeg, readOff))); return false; --- 3914,3932 ---- { XLogRecPtr recaddr; + recaddr.xlogid = readId; + recaddr.xrecoff = readSeg * XLogSegSize + readOff; + if (hdr->xlp_magic != XLOG_PAGE_MAGIC) { ! ereport(emode_for_corrupt_record(emode, recaddr), (errmsg("invalid magic number %04X in log file %u, segment %u, offset %u", hdr->xlp_magic, readId, readSeg, readOff))); return false; } if ((hdr->xlp_info & ~XLP_ALL_FLAGS) != 0) { ! ereport(emode_for_corrupt_record(emode, recaddr), (errmsg("invalid info bits %04X in log file %u, segment %u, offset %u", hdr->xlp_info, readId, readSeg, readOff))); return false; *************** ValidXLOGHeader(XLogPageHeader hdr, int *** 3945,3951 **** longhdr->xlp_sysid); snprintf(sysident_str, sizeof(sysident_str), UINT64_FORMAT, ControlFile->system_identifier); ! ereport(emode, (errmsg("WAL file is from different database system"), errdetail("WAL file database system identifier is %s, pg_control database system identifier is %s.", fhdrident_str, sysident_str))); --- 3948,3954 ---- longhdr->xlp_sysid); snprintf(sysident_str, sizeof(sysident_str), UINT64_FORMAT, ControlFile->system_identifier); ! ereport(emode_for_corrupt_record(emode, recaddr), (errmsg("WAL file is from different database system"), errdetail("WAL file database system identifier is %s, pg_control database system identifier is %s.", fhdrident_str, sysident_str))); *************** ValidXLOGHeader(XLogPageHeader hdr, int *** 3953,3966 **** } if (longhdr->xlp_seg_size != XLogSegSize) { ! ereport(emode, (errmsg("WAL file is from different database system"), errdetail("Incorrect XLOG_SEG_SIZE in page header."))); return false; } if (longhdr->xlp_xlog_blcksz != XLOG_BLCKSZ) { ! ereport(emode, (errmsg("WAL file is from different database system"), errdetail("Incorrect XLOG_BLCKSZ in page header."))); return false; --- 3956,3969 ---- } if (longhdr->xlp_seg_size != XLogSegSize) { ! ereport(emode_for_corrupt_record(emode, recaddr), (errmsg("WAL file is from different database system"), errdetail("Incorrect XLOG_SEG_SIZE in page header."))); return false; } if (longhdr->xlp_xlog_blcksz != XLOG_BLCKSZ) { ! ereport(emode_for_corrupt_record(emode, recaddr), (errmsg("WAL file is from different database system"), errdetail("Incorrect XLOG_BLCKSZ in page header."))); return false; *************** ValidXLOGHeader(XLogPageHeader hdr, int *** 3969,3985 **** else if (readOff == 0) { /* hmm, first page of file doesn't have a long header? */ ! ereport(emode, (errmsg("invalid info bits %04X in log file %u, segment %u, offset %u", hdr->xlp_info, readId, readSeg, readOff))); return false; } - recaddr.xlogid = readId; - recaddr.xrecoff = readSeg * XLogSegSize + readOff; if (!XLByteEQ(hdr->xlp_pageaddr, recaddr)) { ! ereport(emode, (errmsg("unexpected pageaddr %X/%X in log file %u, segment %u, offset %u", hdr->xlp_pageaddr.xlogid, hdr->xlp_pageaddr.xrecoff, readId, readSeg, readOff))); --- 3972,3986 ---- else if (readOff == 0) { /* hmm, first page of file doesn't have a long header? */ ! ereport(emode_for_corrupt_record(emode, recaddr), (errmsg("invalid info bits %04X in log file %u, segment %u, offset %u", hdr->xlp_info, readId, readSeg, readOff))); return false; } if (!XLByteEQ(hdr->xlp_pageaddr, recaddr)) { ! ereport(emode_for_corrupt_record(emode, recaddr), (errmsg("unexpected pageaddr %X/%X in log file %u, segment %u, offset %u", hdr->xlp_pageaddr.xlogid, hdr->xlp_pageaddr.xrecoff, readId, readSeg, readOff))); *************** ValidXLOGHeader(XLogPageHeader hdr, int *** 3991,3997 **** */ if (!list_member_int(expectedTLIs, (int) hdr->xlp_tli)) { ! ereport(emode, (errmsg("unexpected timeline ID %u in log file %u, segment %u, offset %u", hdr->xlp_tli, readId, readSeg, readOff))); --- 3992,3998 ---- */ if (!list_member_int(expectedTLIs, (int) hdr->xlp_tli)) { ! ereport(emode_for_corrupt_record(emode, recaddr), (errmsg("unexpected timeline ID %u in log file %u, segment %u, offset %u", hdr->xlp_tli, readId, readSeg, readOff))); *************** ValidXLOGHeader(XLogPageHeader hdr, int *** 4009,4015 **** */ if (hdr->xlp_tli < lastPageTLI) { ! ereport(emode, (errmsg("out-of-sequence timeline ID %u (after %u) in log file %u, segment %u, offset %u", hdr->xlp_tli, lastPageTLI, readId, readSeg, readOff))); --- 4010,4016 ---- */ if (hdr->xlp_tli < lastPageTLI) { ! ereport(emode_for_corrupt_record(emode, recaddr), (errmsg("out-of-sequence timeline ID %u (after %u) in log file %u, segment %u, offset %u", hdr->xlp_tli, lastPageTLI, readId, readSeg, readOff))); *************** retry: *** 9245,9258 **** readOff = 0; if (read(readFile, readBuf, XLOG_BLCKSZ) != XLOG_BLCKSZ) { ! ereport(emode_for_corrupt_record(emode), (errcode_for_file_access(), errmsg("could not read from log file %u, segment %u, offset %u: %m", readId, readSeg, readOff))); goto next_record_is_invalid; } ! if (!ValidXLOGHeader((XLogPageHeader) readBuf, ! emode_for_corrupt_record(emode))) goto next_record_is_invalid; } --- 9246,9258 ---- readOff = 0; if (read(readFile, readBuf, XLOG_BLCKSZ) != XLOG_BLCKSZ) { ! ereport(emode_for_corrupt_record(emode, *RecPtr), (errcode_for_file_access(), errmsg("could not read from log file %u, segment %u, offset %u: %m", readId, readSeg, readOff))); goto next_record_is_invalid; } ! if (!ValidXLOGHeader((XLogPageHeader) readBuf, emode)) goto next_record_is_invalid; } *************** retry: *** 9260,9266 **** readOff = targetPageOff; if (lseek(readFile, (off_t) readOff, SEEK_SET) < 0) { ! ereport(emode_for_corrupt_record(emode), (errcode_for_file_access(), errmsg("could not seek in log file %u, segment %u to offset %u: %m", readId, readSeg, readOff))); --- 9260,9266 ---- readOff = targetPageOff; if (lseek(readFile, (off_t) readOff, SEEK_SET) < 0) { ! ereport(emode_for_corrupt_record(emode, *RecPtr), (errcode_for_file_access(), errmsg("could not seek in log file %u, segment %u to offset %u: %m", readId, readSeg, readOff))); *************** retry: *** 9268,9280 **** } if (read(readFile, readBuf, XLOG_BLCKSZ) != XLOG_BLCKSZ) { ! ereport(emode_for_corrupt_record(emode), (errcode_for_file_access(), errmsg("could not read from log file %u, segment %u, offset %u: %m", readId, readSeg, readOff))); goto next_record_is_invalid; } ! if (!ValidXLOGHeader((XLogPageHeader) readBuf, emode_for_corrupt_record(emode))) goto next_record_is_invalid; Assert(targetId == readId); --- 9268,9280 ---- } if (read(readFile, readBuf, XLOG_BLCKSZ) != XLOG_BLCKSZ) { ! ereport(emode_for_corrupt_record(emode, *RecPtr), (errcode_for_file_access(), errmsg("could not read from log file %u, segment %u, offset %u: %m", readId, readSeg, readOff))); goto next_record_is_invalid; } ! if (!ValidXLOGHeader((XLogPageHeader) readBuf, emode)) goto next_record_is_invalid; Assert(targetId == readId); *************** triggered: *** 9316,9325 **** * 'emode' is the error mode that would be used to report a file-not-found * or legitimate end-of-WAL situation. It is upgraded to WARNING or PANIC * if a corrupt record is not expected at this point. */ static int ! emode_for_corrupt_record(int emode) { /* * We don't expect any invalid records in archive or in records streamed * from master. Files in the archive should be complete, and we should --- 9316,9332 ---- * 'emode' is the error mode that would be used to report a file-not-found * or legitimate end-of-WAL situation. It is upgraded to WARNING or PANIC * if a corrupt record is not expected at this point. + * + * NOTE: This function remembers the RecPtr value it was last called with, + * to suppress repeated messages about the same record. Only call this when + * you are about to ereport(), or you might cause a later message to be + * erroneously suppressed. */ static int ! emode_for_corrupt_record(int emode, XLogRecPtr RecPtr) { + static XLogRecPtr lastComplaint = {0, 0}; + /* * We don't expect any invalid records in archive or in records streamed * from master. Files in the archive should be complete, and we should *************** emode_for_corrupt_record(int emode) *** 9340,9345 **** --- 9347,9363 ---- if (emode < WARNING) emode = WARNING; } + /* + * If we retry reading a record in pg_xlog, only complain on the first + * time to keep the noise down. + */ + else if (emode == LOG) + { + if (XLByteEQ(RecPtr, lastComplaint)) + emode = DEBUG1; + else + lastComplaint = RecPtr; + } return emode; }