Repair longstanding bug in slru/clog logic: it is possible for two backends
authorTom Lane <[email protected]>
Sat, 21 Jan 2006 04:38:54 +0000 (04:38 +0000)
committerTom Lane <[email protected]>
Sat, 21 Jan 2006 04:38:54 +0000 (04:38 +0000)
to try to create a log segment file concurrently, but the code erroneously
specified O_EXCL to open(), resulting in a needless failure.  Before 7.4,
it was even a PANIC condition :-(.  Correct code is actually simpler than
what we had, because we can just say O_CREAT to start with and not need a
second open() call.  I believe this accounts for several recent reports of
hard-to-reproduce "could not create file ...: File exists" errors in both
pg_clog and pg_subtrans.

src/backend/access/transam/clog.c

index f0f52a98ce777694db6fca94e8a4f6024a969f8c..16a8445e3aacb803f2d089931feb68c1230a0d9c 100644 (file)
@@ -598,17 +598,15 @@ CLOGPhysicalWritePage(int pageno, int slotno)
         * that have already been truncated from the commit log.  Easiest way
         * to deal with that is to accept references to nonexistent files here
         * and in CLOGPhysicalReadPage.)
+        *
+        * Note: it is possible for more than one backend to be executing
+        * this code simultaneously for different pages of the same file.
+        * Hence, don't use O_EXCL or O_TRUNC or anything like that.
         */
-       fd = BasicOpenFile(path, O_RDWR | PG_BINARY, S_IRUSR | S_IWUSR);
+       fd = BasicOpenFile(path, O_RDWR | O_CREAT | PG_BINARY,
+                                          S_IRUSR | S_IWUSR);
        if (fd < 0)
-       {
-               if (errno != ENOENT)
-                       elog(PANIC, "open of %s failed: %m", path);
-               fd = BasicOpenFile(path, O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
-                                                  S_IRUSR | S_IWUSR);
-               if (fd < 0)
-                       elog(PANIC, "creation of file %s failed: %m", path);
-       }
+               elog(PANIC, "open of %s failed: %m", path);
 
        if (lseek(fd, (off_t) offset, SEEK_SET) < 0)
                elog(PANIC, "lseek of clog file %u, offset %u failed: %m",