Skip to content

Commit 4eb2176

Browse files
committed
Fix DROP {DATABASE,TABLESPACE} on Windows.
Previously, it was possible for DROP DATABASE, DROP TABLESPACE and ALTER DATABASE SET TABLESPACE to fail because other backends still had file handles open for dropped tables. Windows won't allow a directory containing unlinked-but-still-open files to be unlinked. Tackle this problem by forcing all backends to close all smgr fds. No change for Unix systems, which don't suffer from the problem, but the new code path can be tested by Unix-based developers by defining USE_BARRIER_SMGRRELEASE explicitly. It's possible that PROCSIGNAL_BARRIER_SMGRRELEASE will have more bug-fixing applications soon (under discussion). Note that this is the first user of the ProcSignalBarrier mechanism from commit 16a4e4a. It could in principle be back-patched as far as 14, but since field complaints are rare and ProcSignalBarrier hasn't been battle-tested, that seems like a bad idea. Fix in master only, where these failures have started to show up in automated testing due to new tests. Suggested-by: Andres Freund <[email protected]> Reviewed-by: Andres Freund <[email protected]> Reviewed-by: Daniel Gustafsson <[email protected]> Reviewed-by: Robert Haas <[email protected]> Discussion: https://postgr.es/m/CA+hUKGLdemy2gBm80kz20GTe6hNVwoErE8KwcJk6-U56oStjtg@mail.gmail.com
1 parent e5691cc commit 4eb2176

File tree

9 files changed

+77
-36
lines changed

9 files changed

+77
-36
lines changed

src/backend/commands/dbcommands.c

+16-3
Original file line numberDiff line numberDiff line change
@@ -997,12 +997,15 @@ dropdb(const char *dbname, bool missing_ok, bool force)
997997

998998
/*
999999
* Force a checkpoint to make sure the checkpointer has received the
1000-
* message sent by ForgetDatabaseSyncRequests. On Windows, this also
1001-
* ensures that background procs don't hold any open files, which would
1002-
* cause rmdir() to fail.
1000+
* message sent by ForgetDatabaseSyncRequests.
10031001
*/
10041002
RequestCheckpoint(CHECKPOINT_IMMEDIATE | CHECKPOINT_FORCE | CHECKPOINT_WAIT);
10051003

1004+
#if defined(USE_BARRIER_SMGRRELEASE)
1005+
/* Close all smgr fds in all backends. */
1006+
WaitForProcSignalBarrier(EmitProcSignalBarrier(PROCSIGNAL_BARRIER_SMGRRELEASE));
1007+
#endif
1008+
10061009
/*
10071010
* Remove all tablespace subdirs belonging to the database.
10081011
*/
@@ -1251,6 +1254,11 @@ movedb(const char *dbname, const char *tblspcname)
12511254
RequestCheckpoint(CHECKPOINT_IMMEDIATE | CHECKPOINT_FORCE | CHECKPOINT_WAIT
12521255
| CHECKPOINT_FLUSH_ALL);
12531256

1257+
#if defined(USE_BARRIER_SMGRRELEASE)
1258+
/* Close all smgr fds in all backends. */
1259+
WaitForProcSignalBarrier(EmitProcSignalBarrier(PROCSIGNAL_BARRIER_SMGRRELEASE));
1260+
#endif
1261+
12541262
/*
12551263
* Now drop all buffers holding data of the target database; they should
12561264
* no longer be dirty so DropDatabaseBuffers is safe.
@@ -2258,6 +2266,11 @@ dbase_redo(XLogReaderState *record)
22582266
/* Clean out the xlog relcache too */
22592267
XLogDropDatabase(xlrec->db_id);
22602268

2269+
#if defined(USE_BARRIER_SMGRRELEASE)
2270+
/* Close all sgmr fds in all backends. */
2271+
WaitForProcSignalBarrier(EmitProcSignalBarrier(PROCSIGNAL_BARRIER_SMGRRELEASE));
2272+
#endif
2273+
22612274
for (i = 0; i < xlrec->ntablespaces; i++)
22622275
{
22632276
dst_path = GetDatabasePath(xlrec->db_id, xlrec->tablespace_ids[i]);

src/backend/commands/tablespace.c

+20-6
Original file line numberDiff line numberDiff line change
@@ -536,15 +536,24 @@ DropTableSpace(DropTableSpaceStmt *stmt)
536536
* but we can't tell them apart from important data files that we
537537
* mustn't delete. So instead, we force a checkpoint which will clean
538538
* out any lingering files, and try again.
539-
*
540-
* XXX On Windows, an unlinked file persists in the directory listing
539+
*/
540+
RequestCheckpoint(CHECKPOINT_IMMEDIATE | CHECKPOINT_FORCE | CHECKPOINT_WAIT);
541+
542+
/*
543+
* On Windows, an unlinked file persists in the directory listing
541544
* until no process retains an open handle for the file. The DDL
542545
* commands that schedule files for unlink send invalidation messages
543-
* directing other PostgreSQL processes to close the files. DROP
544-
* TABLESPACE should not give up on the tablespace becoming empty
545-
* until all relevant invalidation processing is complete.
546+
* directing other PostgreSQL processes to close the files, but
547+
* nothing guarantees they'll be processed in time. So, we'll also
548+
* use a global barrier to ask all backends to close all files, and
549+
* wait until they're finished.
546550
*/
547-
RequestCheckpoint(CHECKPOINT_IMMEDIATE | CHECKPOINT_FORCE | CHECKPOINT_WAIT);
551+
#if defined(USE_BARRIER_SMGRRELEASE)
552+
LWLockRelease(TablespaceCreateLock);
553+
WaitForProcSignalBarrier(EmitProcSignalBarrier(PROCSIGNAL_BARRIER_SMGRRELEASE));
554+
LWLockAcquire(TablespaceCreateLock, LW_EXCLUSIVE);
555+
#endif
556+
/* And now try again. */
548557
if (!destroy_tablespace_directories(tablespaceoid, false))
549558
{
550559
/* Still not empty, the files must be important then */
@@ -1582,6 +1591,11 @@ tblspc_redo(XLogReaderState *record)
15821591
*/
15831592
if (!destroy_tablespace_directories(xlrec->ts_id, true))
15841593
{
1594+
#if defined(USE_BARRIER_SMGRRELEASE)
1595+
/* Close all smgr fds in all backends. */
1596+
WaitForProcSignalBarrier(EmitProcSignalBarrier(PROCSIGNAL_BARRIER_SMGRRELEASE));
1597+
#endif
1598+
15851599
ResolveRecoveryConflictWithTablespace(xlrec->ts_id);
15861600

15871601
/*

src/backend/storage/ipc/procsignal.c

+3-21
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
#include "storage/latch.h"
2929
#include "storage/proc.h"
3030
#include "storage/shmem.h"
31+
#include "storage/smgr.h"
3132
#include "storage/sinval.h"
3233
#include "tcop/tcopprot.h"
3334
#include "utils/memutils.h"
@@ -94,7 +95,6 @@ static ProcSignalSlot *MyProcSignalSlot = NULL;
9495
static bool CheckProcSignal(ProcSignalReason reason);
9596
static void CleanupProcSignalState(int status, Datum arg);
9697
static void ResetProcSignalBarrierBits(uint32 flags);
97-
static bool ProcessBarrierPlaceholder(void);
9898
static inline int GetNumProcSignalSlots(void);
9999

100100
/*
@@ -536,8 +536,8 @@ ProcessProcSignalBarrier(void)
536536
type = (ProcSignalBarrierType) pg_rightmost_one_pos32(flags);
537537
switch (type)
538538
{
539-
case PROCSIGNAL_BARRIER_PLACEHOLDER:
540-
processed = ProcessBarrierPlaceholder();
539+
case PROCSIGNAL_BARRIER_SMGRRELEASE:
540+
processed = ProcessBarrierSmgrRelease();
541541
break;
542542
}
543543

@@ -603,24 +603,6 @@ ResetProcSignalBarrierBits(uint32 flags)
603603
InterruptPending = true;
604604
}
605605

606-
static bool
607-
ProcessBarrierPlaceholder(void)
608-
{
609-
/*
610-
* XXX. This is just a placeholder until the first real user of this
611-
* machinery gets committed. Rename PROCSIGNAL_BARRIER_PLACEHOLDER to
612-
* PROCSIGNAL_BARRIER_SOMETHING_ELSE where SOMETHING_ELSE is something
613-
* appropriately descriptive. Get rid of this function and instead have
614-
* ProcessBarrierSomethingElse. Most likely, that function should live in
615-
* the file pertaining to that subsystem, rather than here.
616-
*
617-
* The return value should be 'true' if the barrier was successfully
618-
* absorbed and 'false' if not. Note that returning 'false' can lead to
619-
* very frequent retries, so try hard to make that an uncommon case.
620-
*/
621-
return true;
622-
}
623-
624606
/*
625607
* CheckProcSignal - check to see if a particular reason has been
626608
* signaled, and clear the signal flag. Should be called after receiving

src/backend/storage/smgr/md.c

+6
Original file line numberDiff line numberDiff line change
@@ -549,6 +549,12 @@ mdclose(SMgrRelation reln, ForkNumber forknum)
549549
}
550550
}
551551

552+
void
553+
mdrelease(void)
554+
{
555+
closeAllVfds();
556+
}
557+
552558
/*
553559
* mdprefetch() -- Initiate asynchronous read of the specified block of a relation
554560
*/

src/backend/storage/smgr/smgr.c

+18
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ typedef struct f_smgr
4141
{
4242
void (*smgr_init) (void); /* may be NULL */
4343
void (*smgr_shutdown) (void); /* may be NULL */
44+
void (*smgr_release) (void); /* may be NULL */
4445
void (*smgr_open) (SMgrRelation reln);
4546
void (*smgr_close) (SMgrRelation reln, ForkNumber forknum);
4647
void (*smgr_create) (SMgrRelation reln, ForkNumber forknum,
@@ -69,6 +70,7 @@ static const f_smgr smgrsw[] = {
6970
{
7071
.smgr_init = mdinit,
7172
.smgr_shutdown = NULL,
73+
.smgr_release = mdrelease,
7274
.smgr_open = mdopen,
7375
.smgr_close = mdclose,
7476
.smgr_create = mdcreate,
@@ -693,3 +695,19 @@ AtEOXact_SMgr(void)
693695
smgrclose(rel);
694696
}
695697
}
698+
699+
/*
700+
* This routine is called when we are ordered to release all open files by a
701+
* ProcSignalBarrier.
702+
*/
703+
bool
704+
ProcessBarrierSmgrRelease(void)
705+
{
706+
for (int i = 0; i < NSmgr; i++)
707+
{
708+
if (smgrsw[i].smgr_release)
709+
smgrsw[i].smgr_release();
710+
}
711+
712+
return true;
713+
}

src/include/pg_config_manual.h

+11
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,17 @@
152152
#define EXEC_BACKEND
153153
#endif
154154

155+
/*
156+
* If USE_BARRIER_SMGRRELEASE is defined, certain code paths that unlink
157+
* directories will ask other backends to close all smgr file descriptors.
158+
* This is enabled on Windows, because otherwise unlinked but still open files
159+
* can prevent rmdir(containing_directory) from succeeding. On other
160+
* platforms, it can be defined to exercise those code paths.
161+
*/
162+
#if defined(WIN32)
163+
#define USE_BARRIER_SMGRRELEASE
164+
#endif
165+
155166
/*
156167
* Define this if your operating system supports link()
157168
*/

src/include/storage/md.h

+1
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
extern void mdinit(void);
2424
extern void mdopen(SMgrRelation reln);
2525
extern void mdclose(SMgrRelation reln, ForkNumber forknum);
26+
extern void mdrelease(void);
2627
extern void mdcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo);
2728
extern bool mdexists(SMgrRelation reln, ForkNumber forknum);
2829
extern void mdunlink(RelFileNodeBackend rnode, ForkNumber forknum, bool isRedo);

src/include/storage/procsignal.h

+1-6
Original file line numberDiff line numberDiff line change
@@ -49,12 +49,7 @@ typedef enum
4949

5050
typedef enum
5151
{
52-
/*
53-
* XXX. PROCSIGNAL_BARRIER_PLACEHOLDER should be replaced when the first
54-
* real user of the ProcSignalBarrier mechanism is added. It's just here
55-
* for now because we can't have an empty enum.
56-
*/
57-
PROCSIGNAL_BARRIER_PLACEHOLDER = 0
52+
PROCSIGNAL_BARRIER_SMGRRELEASE /* ask smgr to close files */
5853
} ProcSignalBarrierType;
5954

6055
/*

src/include/storage/smgr.h

+1
Original file line numberDiff line numberDiff line change
@@ -104,5 +104,6 @@ extern void smgrtruncate(SMgrRelation reln, ForkNumber *forknum,
104104
int nforks, BlockNumber *nblocks);
105105
extern void smgrimmedsync(SMgrRelation reln, ForkNumber forknum);
106106
extern void AtEOXact_SMgr(void);
107+
extern bool ProcessBarrierSmgrRelease(void);
107108

108109
#endif /* SMGR_H */

0 commit comments

Comments
 (0)