summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSimon Riggs2017-06-23 17:58:46 +0000
committerSimon Riggs2017-06-23 17:58:46 +0000
commit9ea3c64124af039219aa5030d7af675dce5daa60 (patch)
tree5d32787a7dd1973e9cbebc0cdae02f70c9dc540f
parenta79122b06194927d2b79465f335b94f2b4472816 (diff)
Improve replication lag interpolation after idle period
After sitting idle and fully replayed for a while and then encountering a new burst of WAL activity, we interpolate between an ancient sample and the not-yet-reached one for the new traffic. That produced a corner case report of lag after receiving first new reply from standby, which might sometimes be a large spike. Correct this by resetting last_read time and handle that new case. Author: Thomas Munro
-rw-r--r--src/backend/replication/walsender.c29
1 files changed, 25 insertions, 4 deletions
diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c
index 82f7b59373..f845180873 100644
--- a/src/backend/replication/walsender.c
+++ b/src/backend/replication/walsender.c
@@ -3443,6 +3443,16 @@ LagTrackerRead(int head, XLogRecPtr lsn, TimestampTz now)
(LagTracker.read_heads[head] + 1) % LAG_TRACKER_BUFFER_SIZE;
}
+ /*
+ * If the lag tracker is empty, that means the standby has processed
+ * everything we've ever sent so we should now clear 'last_read'. If we
+ * didn't do that, we'd risk using a stale and irrelevant sample for
+ * interpolation at the beginning of the next burst of WAL after a period
+ * of idleness.
+ */
+ if (LagTracker.read_heads[head] == LagTracker.write_head)
+ LagTracker.last_read[head].time = 0;
+
if (time > now)
{
/* If the clock somehow went backwards, treat as not found. */
@@ -3459,9 +3469,14 @@ LagTrackerRead(int head, XLogRecPtr lsn, TimestampTz now)
* eventually start moving again and cross one of our samples before
* we can show the lag increasing.
*/
- if (LagTracker.read_heads[head] != LagTracker.write_head &&
- LagTracker.last_read[head].time != 0)
+ if (LagTracker.read_heads[head] == LagTracker.write_head)
{
+ /* There are no future samples, so we can't interpolate. */
+ return -1;
+ }
+ else if (LagTracker.last_read[head].time != 0)
+ {
+ /* We can interpolate between last_read and the next sample. */
double fraction;
WalTimeSample prev = LagTracker.last_read[head];
WalTimeSample next = LagTracker.buffer[LagTracker.read_heads[head]];
@@ -3494,8 +3509,14 @@ LagTrackerRead(int head, XLogRecPtr lsn, TimestampTz now)
}
else
{
- /* Couldn't interpolate due to lack of data. */
- return -1;
+ /*
+ * We have only a future sample, implying that we were entirely
+ * caught up but and now there is a new burst of WAL and the
+ * standby hasn't processed the first sample yet. Until the
+ * standby reaches the future sample the best we can do is report
+ * the hypothetical lag if that sample were to be replayed now.
+ */
+ time = LagTracker.buffer[LagTracker.read_heads[head]].time;
}
}