diff options
author | Simon Riggs | 2017-06-23 17:58:46 +0000 |
---|---|---|
committer | Simon Riggs | 2017-06-23 17:58:46 +0000 |
commit | 9ea3c64124af039219aa5030d7af675dce5daa60 (patch) | |
tree | 5d32787a7dd1973e9cbebc0cdae02f70c9dc540f | |
parent | a79122b06194927d2b79465f335b94f2b4472816 (diff) |
Improve replication lag interpolation after idle period
After sitting idle and fully replayed for a while and then encountering
a new burst of WAL activity, we interpolate between an ancient sample and the
not-yet-reached one for the new traffic. That produced a corner case report
of lag after receiving first new reply from standby, which might sometimes
be a large spike.
Correct this by resetting last_read time and handle that new case.
Author: Thomas Munro
-rw-r--r-- | src/backend/replication/walsender.c | 29 |
1 files changed, 25 insertions, 4 deletions
diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c index 82f7b59373..f845180873 100644 --- a/src/backend/replication/walsender.c +++ b/src/backend/replication/walsender.c @@ -3443,6 +3443,16 @@ LagTrackerRead(int head, XLogRecPtr lsn, TimestampTz now) (LagTracker.read_heads[head] + 1) % LAG_TRACKER_BUFFER_SIZE; } + /* + * If the lag tracker is empty, that means the standby has processed + * everything we've ever sent so we should now clear 'last_read'. If we + * didn't do that, we'd risk using a stale and irrelevant sample for + * interpolation at the beginning of the next burst of WAL after a period + * of idleness. + */ + if (LagTracker.read_heads[head] == LagTracker.write_head) + LagTracker.last_read[head].time = 0; + if (time > now) { /* If the clock somehow went backwards, treat as not found. */ @@ -3459,9 +3469,14 @@ LagTrackerRead(int head, XLogRecPtr lsn, TimestampTz now) * eventually start moving again and cross one of our samples before * we can show the lag increasing. */ - if (LagTracker.read_heads[head] != LagTracker.write_head && - LagTracker.last_read[head].time != 0) + if (LagTracker.read_heads[head] == LagTracker.write_head) { + /* There are no future samples, so we can't interpolate. */ + return -1; + } + else if (LagTracker.last_read[head].time != 0) + { + /* We can interpolate between last_read and the next sample. */ double fraction; WalTimeSample prev = LagTracker.last_read[head]; WalTimeSample next = LagTracker.buffer[LagTracker.read_heads[head]]; @@ -3494,8 +3509,14 @@ LagTrackerRead(int head, XLogRecPtr lsn, TimestampTz now) } else { - /* Couldn't interpolate due to lack of data. */ - return -1; + /* + * We have only a future sample, implying that we were entirely + * caught up but and now there is a new burst of WAL and the + * standby hasn't processed the first sample yet. Until the + * standby reaches the future sample the best we can do is report + * the hypothetical lag if that sample were to be replayed now. + */ + time = LagTracker.buffer[LagTracker.read_heads[head]].time; } } |