Skip to content

Commit 1d19699

Browse files
committed
Fix stall on deadlock detection
1 parent 29d7ebf commit 1d19699

File tree

3 files changed

+41
-1
lines changed

3 files changed

+41
-1
lines changed

src/backend/storage/lmgr/deadlock.c

+8-1
Original file line numberDiff line numberDiff line change
@@ -450,10 +450,17 @@ FindLockCycle(PGPROC *checkProc,
450450
EDGE *softEdges, /* output argument */
451451
int *nSoftEdges) /* output argument */
452452
{
453+
bool found;
454+
TimestampTz now;
455+
int i;
453456
nVisitedProcs = 0;
454457
nDeadlockDetails = 0;
455458
*nSoftEdges = 0;
456-
return FindLockCycleRecurse(checkProc, 0, softEdges, nSoftEdges);
459+
found = FindLockCycleRecurse(checkProc, 0, softEdges, nSoftEdges);
460+
now = GetCurrentTimestamp();
461+
for (i = 0; i < nVisitedProcs; i++)
462+
visitedProcs[i]->lastDeadlockCheck = now;
463+
return found;
457464
}
458465

459466
static bool

src/backend/storage/lmgr/proc.c

+29
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,8 @@ static LOCALLOCK *lockAwaited = NULL;
8686

8787
static DeadLockState deadlock_state = DS_NOT_YET_CHECKED;
8888

89+
static bool inside_deadlock_check = false;
90+
8991
/* Is a deadlock check pending? */
9092
static volatile sig_atomic_t got_deadlock_timeout;
9193

@@ -187,6 +189,7 @@ InitProcGlobal(void)
187189
ProcGlobal->checkpointerLatch = NULL;
188190
pg_atomic_init_u32(&ProcGlobal->procArrayGroupFirst, INVALID_PGPROCNO);
189191
pg_atomic_init_u32(&ProcGlobal->clogGroupFirst, INVALID_PGPROCNO);
192+
pg_atomic_init_flag(&ProcGlobal->activeDeadlockCheck);
190193

191194
/*
192195
* Create and initialize all the PGPROC structures we'll need. There are
@@ -763,6 +766,14 @@ ProcReleaseLocks(bool isCommit)
763766
{
764767
if (!MyProc)
765768
return;
769+
770+
/* Release deadlock detection flag is backend was interrupted inside deadlock check */
771+
if (inside_deadlock_check)
772+
{
773+
pg_atomic_clear_flag(&ProcGlobal->activeDeadlockCheck);
774+
inside_deadlock_check = false;
775+
}
776+
766777
/* If waiting, get off wait queue (should only be needed after error) */
767778
LockErrorCleanup();
768779
/* Release standard locks, including session-level if aborting */
@@ -1665,6 +1676,21 @@ static void
16651676
CheckDeadLock(void)
16661677
{
16671678
int i;
1679+
TimestampTz now = GetCurrentTimestamp();
1680+
1681+
if (now - MyProc->lastDeadlockCheck < DeadlockTimeout*1000)
1682+
return;
1683+
1684+
/*
1685+
* Ensure that only one backend is checking for deadlock.
1686+
* Otherwise under high load cascade of deadlock timeout expirations can cause stuck of Postgres.
1687+
*/
1688+
if (!pg_atomic_test_set_flag(&ProcGlobal->activeDeadlockCheck))
1689+
{
1690+
enable_timeout_after(DEADLOCK_TIMEOUT, random() % DeadlockTimeout);
1691+
return;
1692+
}
1693+
inside_deadlock_check = true;
16681694

16691695
/*
16701696
* Acquire exclusive lock on the entire shared lock data structures. Must
@@ -1741,6 +1767,9 @@ CheckDeadLock(void)
17411767
check_done:
17421768
for (i = NUM_LOCK_PARTITIONS; --i >= 0;)
17431769
LWLockRelease(LockHashPartitionLockByIndex(i));
1770+
1771+
pg_atomic_clear_flag(&ProcGlobal->activeDeadlockCheck);
1772+
inside_deadlock_check = false;
17441773
}
17451774

17461775
/*

src/include/storage/proc.h

+4
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include "storage/lock.h"
2222
#include "storage/pg_sema.h"
2323
#include "storage/proclist_types.h"
24+
#include "datatype/timestamp.h"
2425

2526
/*
2627
* Each backend advertises up to PGPROC_MAX_CACHED_SUBXIDS TransactionIds
@@ -200,6 +201,7 @@ struct PGPROC
200201
PGPROC *lockGroupLeader; /* lock group leader, if I'm a member */
201202
dlist_head lockGroupMembers; /* list of members, if I'm a leader */
202203
dlist_node lockGroupLink; /* my member link, if I'm a member */
204+
TimestampTz lastDeadlockCheck;
203205
};
204206

205207
/* NOTE: "typedef struct PGPROC PGPROC" appears in storage/lock.h. */
@@ -267,6 +269,8 @@ typedef struct PROC_HDR
267269
int startupProcPid;
268270
/* Buffer id of the buffer that Startup process waits for pin on, or -1 */
269271
int startupBufferPinWaitBufId;
272+
/* Deadlock detection is in progress */
273+
pg_atomic_flag activeDeadlockCheck;
270274
} PROC_HDR;
271275

272276
extern PGDLLIMPORT PROC_HDR *ProcGlobal;

0 commit comments

Comments
 (0)