summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Lane2010-12-09 01:01:33 +0000
committerTom Lane2010-12-09 01:01:33 +0000
commit6b2c0eab1f2f0b86ebf2721749689e6b839bc279 (patch)
tree4d0b6acf0aa109771edef8f382248045f69c7cc3
parent0ae63a4fb2db1131e0715810199de0c0a8e7c2d8 (diff)
Force default wal_sync_method to be fdatasync on Linux.
Recent versions of the Linux system header files cause xlogdefs.h to believe that open_datasync should be the default sync method, whereas formerly fdatasync was the default on Linux. open_datasync is a bad choice, first because it doesn't actually outperform fdatasync (in fact the reverse), and second because we try to use O_DIRECT with it, causing failures on certain filesystems (e.g., ext4 with data=journal option). This part of the patch is largely per a proposal from Marti Raudsepp. More extensive changes are likely to follow in HEAD, but this is as much change as we want to back-patch. Also clean up confusing code and incorrect documentation surrounding the fsync_writethrough option. Those changes shouldn't result in any actual behavioral change, but I chose to back-patch them anyway to keep the branches looking similar in this area. In 9.0 and HEAD, also do some copy-editing on the WAL Reliability documentation section. Back-patch to all supported branches, since any of them might get used on modern Linux versions.
-rw-r--r--doc/src/sgml/config.sgml8
-rw-r--r--src/backend/access/transam/xlog.c10
-rw-r--r--src/backend/storage/file/fd.c9
-rw-r--r--src/backend/utils/misc/postgresql.conf.sample2
-rw-r--r--src/include/port/linux.h10
-rw-r--r--src/include/port/win32.h14
6 files changed, 35 insertions, 18 deletions
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index ea9192625cc..ecee5507c26 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -1347,12 +1347,12 @@ SET ENABLE_SEQSCAN TO OFF;
</listitem>
<listitem>
<para>
- <literal>fsync_writethrough</> (call <function>fsync()</> at each commit, forcing write-through of any disk write cache)
+ <literal>fsync</> (call <function>fsync()</> at each commit)
</para>
</listitem>
<listitem>
<para>
- <literal>fsync</> (call <function>fsync()</> at each commit)
+ <literal>fsync_writethrough</> (call <function>fsync()</> at each commit, forcing write-through of any disk write cache)
</para>
</listitem>
<listitem>
@@ -1363,7 +1363,9 @@ SET ENABLE_SEQSCAN TO OFF;
</itemizedlist>
<para>
Not all of these choices are available on all platforms.
- The default is the first method in the above list that is supported.
+ The default is the first method in the above list that is supported
+ by the platform, except that <literal>fdatasync</> is the default on
+ Linux.
This option can be set at server start or in the
<filename>postgresql.conf</filename> file.
</para>
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index a8fdf407809..c3097a28a50 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -92,7 +92,11 @@
#endif
#endif
-#if defined(OPEN_DATASYNC_FLAG)
+#if defined(PLATFORM_DEFAULT_SYNC_METHOD)
+#define DEFAULT_SYNC_METHOD_STR PLATFORM_DEFAULT_SYNC_METHOD_STR
+#define DEFAULT_SYNC_METHOD PLATFORM_DEFAULT_SYNC_METHOD
+#define DEFAULT_SYNC_FLAGBIT PLATFORM_DEFAULT_SYNC_FLAGBIT
+#elif defined(OPEN_DATASYNC_FLAG)
#define DEFAULT_SYNC_METHOD_STR "open_datasync"
#define DEFAULT_SYNC_METHOD SYNC_METHOD_OPEN
#define DEFAULT_SYNC_FLAGBIT OPEN_DATASYNC_FLAG
@@ -100,10 +104,6 @@
#define DEFAULT_SYNC_METHOD_STR "fdatasync"
#define DEFAULT_SYNC_METHOD SYNC_METHOD_FDATASYNC
#define DEFAULT_SYNC_FLAGBIT 0
-#elif defined(HAVE_FSYNC_WRITETHROUGH_ONLY)
-#define DEFAULT_SYNC_METHOD_STR "fsync_writethrough"
-#define DEFAULT_SYNC_METHOD SYNC_METHOD_FSYNC_WRITETHROUGH
-#define DEFAULT_SYNC_FLAGBIT 0
#else
#define DEFAULT_SYNC_METHOD_STR "fsync"
#define DEFAULT_SYNC_METHOD SYNC_METHOD_FSYNC
diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c
index 3e3932b861e..87b6865ee67 100644
--- a/src/backend/storage/file/fd.c
+++ b/src/backend/storage/file/fd.c
@@ -237,12 +237,13 @@ static void RemovePgTempFilesInDir(const char *tmpdirname);
int
pg_fsync(int fd)
{
-#ifndef HAVE_FSYNC_WRITETHROUGH_ONLY
- if (sync_method != SYNC_METHOD_FSYNC_WRITETHROUGH)
- return pg_fsync_no_writethrough(fd);
+ /* #if is to skip the sync_method test if there's no need for it */
+#if defined(HAVE_FSYNC_WRITETHROUGH) && !defined(FSYNC_WRITETHROUGH_IS_FSYNC)
+ if (sync_method == SYNC_METHOD_FSYNC_WRITETHROUGH)
+ return pg_fsync_writethrough(fd);
else
#endif
- return pg_fsync_writethrough(fd);
+ return pg_fsync_no_writethrough(fd);
}
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index 29d7c078524..df62b67830c 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -138,7 +138,7 @@
#wal_sync_method = fsync # the default is the first option
# supported by the operating system:
# open_datasync
- # fdatasync
+ # fdatasync (default on Linux)
# fsync
# fsync_writethrough
# open_sync
diff --git a/src/include/port/linux.h b/src/include/port/linux.h
index 6feb22e1d5c..562b5fbf045 100644
--- a/src/include/port/linux.h
+++ b/src/include/port/linux.h
@@ -10,3 +10,13 @@
* to have a kernel version test here.
*/
#define HAVE_LINUX_EIDRM_BUG
+
+/*
+ * Set the default wal_sync_method to fdatasync. With recent Linux versions,
+ * xlogdefs.h's normal rules will prefer open_datasync, which (a) doesn't
+ * perform better and (b) causes outright failures on ext4 data=journal
+ * filesystems, because those don't support O_DIRECT.
+ */
+#define PLATFORM_DEFAULT_SYNC_METHOD_STR "fdatasync"
+#define PLATFORM_DEFAULT_SYNC_METHOD SYNC_METHOD_FDATASYNC
+#define PLATFORM_DEFAULT_SYNC_FLAGBIT 0
diff --git a/src/include/port/win32.h b/src/include/port/win32.h
index 93d6e2a74c3..cefafd0bd98 100644
--- a/src/include/port/win32.h
+++ b/src/include/port/win32.h
@@ -16,14 +16,18 @@
#define mkdir(a,b) mkdir(a)
-#define HAVE_FSYNC_WRITETHROUGH
-#define HAVE_FSYNC_WRITETHROUGH_ONLY
#define ftruncate(a,b) chsize(a,b)
+
+/* Windows doesn't have fsync() as such, use _commit() */
+#define fsync(fd) _commit(fd)
+
/*
- * Even though we don't support 'fsync' as a wal_sync_method,
- * we do fsync() a few other places where _commit() is just fine.
+ * For historical reasons, we allow setting wal_sync_method to
+ * fsync_writethrough on Windows, even though it's really identical to fsync
+ * (both code paths wind up at _commit()).
*/
-#define fsync(fd) _commit(fd)
+#define HAVE_FSYNC_WRITETHROUGH
+#define FSYNC_WRITETHROUGH_IS_FSYNC
#define USES_WINSOCK